gpu: nvgpu: hwpm broadcast register support

Add support for hwpm broadcast registers (ltc and lts)

In gr_gk20a_find_priv_offset_in_buffer, replace "Unknown address type" error
with informational message: gr_gk20a_exec_ctx_ops calls
gk20a_get_ctx_buffer_offsets and if that fails,
calls gr_gk20a_get_pm_ctx_buffer_offsets; HWPM registers will fail the first
call, so an error or warning is overkill.

Bug 1648200

Change-Id: I197b82579e9894652add4ff254418f818981415a
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1131365
(cherry picked from commit 9f30a92c5d87f6dadd34cc37396a6b10e3a72751)
Reviewed-on: http://git-master/r/1133628
(cherry picked from commit 7eb7cfd998852ba7f7c4c40d3db286f66e83ab3a)
Reviewed-on: http://git-master/r/1127749
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Peter Daifuku
2016-04-15 18:12:34 -07:00
committed by Terje Bergstrom
parent 5ccaaa73af
commit ce0fe5082e
6 changed files with 177 additions and 9 deletions

View File

@@ -178,6 +178,15 @@ struct gpu_ops {
struct gr_zcull_info *zcull_params);
bool (*is_tpc_addr)(struct gk20a *g, u32 addr);
u32 (*get_tpc_num)(struct gk20a *g, u32 addr);
bool (*is_ltcs_ltss_addr)(struct gk20a *g, u32 addr);
bool (*is_ltcn_ltss_addr)(struct gk20a *g, u32 addr);
bool (*get_lts_in_ltc_shared_base)(void);
void (*split_lts_broadcast_addr)(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index);
void (*split_ltc_broadcast_addr)(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index);
void (*detect_sm_arch)(struct gk20a *g);
int (*add_zbc_color)(struct gk20a *g, struct gr_gk20a *gr,
struct zbc_entry *color_val, u32 index);

View File

@@ -6318,6 +6318,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
}
*be_num = pri_get_be_num(g, addr);
return 0;
} else if (pri_is_ltc_addr(addr)) {
*addr_type = CTXSW_ADDR_TYPE_LTCS;
if (g->ops.gr.is_ltcs_ltss_addr(g, addr))
*broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS;
else if (g->ops.gr.is_ltcn_ltss_addr(g, addr))
*broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS;
return 0;
} else {
*addr_type = CTXSW_ADDR_TYPE_SYS;
return 0;
@@ -6412,7 +6419,15 @@ static int gr_gk20a_create_priv_addr_table(struct gk20a *g,
pri_gpc_addr(g, pri_gpccs_addr_mask(addr),
gpc_num);
}
} else {
}
if (broadcast_flags & PRI_BROADCAST_FLAGS_LTSS) {
g->ops.gr.split_lts_broadcast_addr(g, addr,
priv_addr_table, &t);
} else if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
g->ops.gr.split_ltc_broadcast_addr(g, addr,
priv_addr_table, &t);
} else if (!(broadcast_flags & PRI_BROADCAST_FLAGS_GPC)) {
if (broadcast_flags & PRI_BROADCAST_FLAGS_TPC)
for (tpc_num = 0;
tpc_num < g->gr.gpc_tpc_count[gpc_num];
@@ -7296,8 +7311,7 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g,
num_tpcs) << 2);
}
} else {
gk20a_err(dev_from_gk20a(g),
" Unknown address type.\n");
gk20a_dbg_fn("Unknown address type.");
return -EINVAL;
}
err = gr_gk20a_process_context_buffer_priv_segment(g,
@@ -8653,6 +8667,28 @@ static int gr_gk20a_get_preemption_mode_flags(struct gk20a *g,
return 0;
}
static bool gr_gk20a_is_ltcs_ltss_addr_stub(struct gk20a *g, u32 addr)
{
return false;
}
static bool gr_gk20a_is_ltcn_ltss_addr_stub(struct gk20a *g, u32 addr)
{
return false;
}
static void gr_gk20a_split_lts_broadcast_addr_stub(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
}
static void gr_gk20a_split_ltc_broadcast_addr_stub(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
}
void gk20a_init_gr_ops(struct gpu_ops *gops)
{
gops->gr.access_smpc_reg = gr_gk20a_access_smpc_reg;
@@ -8723,4 +8759,10 @@ void gk20a_init_gr_ops(struct gpu_ops *gops)
gops->gr.get_preemption_mode_flags = gr_gk20a_get_preemption_mode_flags;
gops->gr.program_active_tpc_counts = gr_gk20a_program_active_tpc_counts;
gops->gr.program_sm_id_numbering = gr_gk20a_program_sm_id_numbering;
gops->gr.is_ltcs_ltss_addr = gr_gk20a_is_ltcs_ltss_addr_stub;
gops->gr.is_ltcn_ltss_addr = gr_gk20a_is_ltcn_ltss_addr_stub;
gops->gr.split_lts_broadcast_addr =
gr_gk20a_split_lts_broadcast_addr_stub;
gops->gr.split_ltc_broadcast_addr =
gr_gk20a_split_ltc_broadcast_addr_stub;
}

View File

@@ -1,7 +1,7 @@
/*
* GK20A Graphics Context Pri Register Addressing
*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -167,12 +167,21 @@ static inline u32 pri_ppc_addr(struct gk20a *g, u32 addr, u32 gpc, u32 ppc)
ppc_in_gpc_base + (ppc * ppc_in_gpc_stride) + addr;
}
/*
* LTC pri addressing
*/
static inline bool pri_is_ltc_addr(u32 addr)
{
return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v()));
}
enum ctxsw_addr_type {
CTXSW_ADDR_TYPE_SYS = 0,
CTXSW_ADDR_TYPE_GPC = 1,
CTXSW_ADDR_TYPE_TPC = 2,
CTXSW_ADDR_TYPE_BE = 3,
CTXSW_ADDR_TYPE_PPC = 4
CTXSW_ADDR_TYPE_SYS = 0,
CTXSW_ADDR_TYPE_GPC = 1,
CTXSW_ADDR_TYPE_TPC = 2,
CTXSW_ADDR_TYPE_BE = 3,
CTXSW_ADDR_TYPE_PPC = 4,
CTXSW_ADDR_TYPE_LTCS = 5
};
#define PRI_BROADCAST_FLAGS_NONE 0
@@ -180,5 +189,7 @@ enum ctxsw_addr_type {
#define PRI_BROADCAST_FLAGS_TPC BIT(1)
#define PRI_BROADCAST_FLAGS_BE BIT(2)
#define PRI_BROADCAST_FLAGS_PPC BIT(3)
#define PRI_BROADCAST_FLAGS_LTCS BIT(4)
#define PRI_BROADCAST_FLAGS_LTSS BIT(5)
#endif /* GR_PRI_GK20A_H */

View File

@@ -50,6 +50,14 @@
#ifndef _hw_ltc_gk20a_h_
#define _hw_ltc_gk20a_h_
static inline u32 ltc_pltcg_base_v(void)
{
return 0x00140000;
}
static inline u32 ltc_pltcg_extent_v(void)
{
return 0x0017ffff;
}
static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
{
return 0x001410c8;

View File

@@ -29,6 +29,7 @@
#include "hw_fifo_gm20b.h"
#include "hw_fb_gm20b.h"
#include "hw_top_gm20b.h"
#include "hw_ltc_gm20b.h"
#include "hw_ctxsw_prog_gm20b.h"
#include "hw_fuse_gm20b.h"
#include "pmu_gm20b.h"
@@ -1402,6 +1403,79 @@ static int gm20b_gr_fuse_override(struct gk20a *g)
return 0;
}
static bool gr_gm20b_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
{
u32 ltc_shared_base = ltc_ltcs_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
return (addr >= ltc_shared_base) &&
(addr < (ltc_shared_base + lts_stride));
}
static bool gr_gm20b_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
{
u32 lts_shared_base = ltc_ltc0_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1;
u32 base_offset = lts_shared_base & addr_mask;
u32 end_offset = base_offset + lts_stride;
return (!gr_gm20b_is_ltcs_ltss_addr(g, addr)) &&
((addr & addr_mask) >= base_offset) &&
((addr & addr_mask) < end_offset);
}
static void gr_gm20b_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc_slices = g->ops.gr.get_max_lts_per_ltc(g);
u32 index = *priv_addr_table_index;
u32 lts_num;
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
for (lts_num = 0; lts_num < num_ltc_slices; lts_num++)
priv_addr_table[index++] = ltc_ltc0_lts0_v() +
ltc_num * ltc_stride +
lts_num * lts_stride +
(addr & (lts_stride - 1));
*priv_addr_table_index = index;
}
static void gr_gm20b_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc = g->ltc_count;
u32 i, start, ltc_num = 0;
u32 pltcg_base = ltc_pltcg_base_v();
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
for (i = 0; i < num_ltc; i++) {
start = pltcg_base + i * ltc_stride;
if ((addr >= start) && (addr < (start + ltc_stride))) {
ltc_num = i;
break;
}
}
gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table,
priv_addr_table_index);
}
static void gr_gm20b_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc = g->ltc_count;
u32 ltc_num;
for (ltc_num = 0; ltc_num < num_ltc; ltc_num++)
gr_gm20b_update_ltc_lts_addr(g, addr, ltc_num,
priv_addr_table, priv_addr_table_index);
}
void gm20b_init_gr(struct gpu_ops *gops)
{
gops->gr.init_gpc_mmu = gr_gm20b_init_gpc_mmu;
@@ -1478,4 +1552,8 @@ void gm20b_init_gr(struct gpu_ops *gops)
gops->gr.fuse_override = gm20b_gr_fuse_override;
gops->gr.load_smid_config = gr_gm20b_load_smid_config;
gops->gr.program_sm_id_numbering = gr_gm20b_program_sm_id_numbering;
gops->gr.is_ltcs_ltss_addr = gr_gm20b_is_ltcs_ltss_addr;
gops->gr.is_ltcn_ltss_addr = gr_gm20b_is_ltcn_ltss_addr;
gops->gr.split_lts_broadcast_addr = gr_gm20b_split_lts_broadcast_addr;
gops->gr.split_ltc_broadcast_addr = gr_gm20b_split_ltc_broadcast_addr;
}

View File

@@ -50,6 +50,26 @@
#ifndef _hw_ltc_gm20b_h_
#define _hw_ltc_gm20b_h_
static inline u32 ltc_pltcg_base_v(void)
{
return 0x00140000;
}
static inline u32 ltc_pltcg_extent_v(void)
{
return 0x0017ffff;
}
static inline u32 ltc_ltc0_ltss_v(void)
{
return 0x00140200;
}
static inline u32 ltc_ltc0_lts0_v(void)
{
return 0x00140400;
}
static inline u32 ltc_ltcs_ltss_v(void)
{
return 0x0017e200;
}
static inline u32 ltc_ltcs_lts0_cbc_ctrl1_r(void)
{
return 0x0014046c;