gpu: nvgpu: Full chip support for ctxsw

nvgpu changes needed to handle the newly added ctxsw lists
Fix regops support for ppc registers

Squashed from:
Change-Id: I08e6dec3bb2f7aa51de912c9d1c84a350ce07f72
Signed-off-by: neha <njoshi@nvidia.com>
Reviewed-on: http://git-master/r/1151010
(cherry picked from commit fd03ad9f09e66f78db88fb7ece448e26e0515821)

and:
Change-Id: I75a7f810ee0b613c22ac2cef2d936563d8067f97
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: http://git-master/r/1158888
(cherry picked from commit f00a7fcc57fb937b800e46760087ff6f7637520c)

Bug 200180000
Bug 1771830

Reviewed-on: http://git-master/r/1164397
(cherry picked from commit 7028f051e4f37edeff90a9923f022cec6c645a8f)
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Change-Id: I796ddf93ef37170843a4a6b44190cd6780d25852
Reviewed-on: http://git-master/r/1183588
Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
neha
2016-04-11 17:42:39 +05:30
committed by Nirav Patel
parent 51a32d8f2c
commit f3d89a2997
10 changed files with 206 additions and 3 deletions

View File

@@ -100,6 +100,7 @@ enum nvgpu_litter_value {
GPU_LIT_TPC_IN_GPC_SHARED_BASE,
GPU_LIT_PPC_IN_GPC_BASE,
GPU_LIT_PPC_IN_GPC_STRIDE,
GPU_LIT_PPC_IN_GPC_SHARED_BASE,
GPU_LIT_ROP_BASE,
GPU_LIT_ROP_STRIDE,
GPU_LIT_ROP_SHARED_BASE,

View File

@@ -336,6 +336,34 @@ static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_NVPERF_SYS_ROUTER:
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_SYS_ROUTER");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_NVPERF_PMA:
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_PMA");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.perf_pma);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_CTXREG_PMROP:
gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMROP");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.pm_rop);
if (err)
goto clean_up;
break;
case NETLIST_REGIONID_CTXREG_PMUCGPC:
gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMUCGPC");
err = gr_gk20a_alloc_load_netlist_aiv(
src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc);
if (err)
goto clean_up;
break;
default:
gk20a_dbg_info("unrecognized region %d skipped", i);
break;
@@ -381,6 +409,11 @@ clean_up:
kfree(g->gr.ctx_vars.ctxsw_regs.gpc_router.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_ltc.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
kfree(g->gr.ctx_vars.ctxsw_regs.perf_sys_router.l);
kfree(g->gr.ctx_vars.ctxsw_regs.perf_pma.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_rop.l);
kfree(g->gr.ctx_vars.ctxsw_regs.pm_ucgpc.l);
release_firmware(netlist_fw);
err = -ENOENT;
}

View File

@@ -93,6 +93,11 @@ union __max_name {
#define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25
#define NETLIST_REGIONID_CTXREG_PMLTC 26
#define NETLIST_REGIONID_CTXREG_PMFBPA 27
#define NETLIST_REGIONID_SWVEIDBUNDLEINIT 28
#define NETLIST_REGIONID_NVPERF_SYS_ROUTER 29
#define NETLIST_REGIONID_NVPERF_PMA 30
#define NETLIST_REGIONID_CTXREG_PMROP 31
#define NETLIST_REGIONID_CTXREG_PMUCGPC 32
struct netlist_region {
u32 region_id;

View File

@@ -64,6 +64,8 @@
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000
#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
#define NV_PCFG_BASE 0x00088000
#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020
#define FE_PWR_MODE_TIMEOUT_MAX 2000
#define FE_PWR_MODE_TIMEOUT_DEFAULT 10
#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000
@@ -6446,6 +6448,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
} else
*gpc_num = pri_get_gpc_num(g, addr);
if (pri_is_ppc_addr(g, gpc_addr)) {
*addr_type = CTXSW_ADDR_TYPE_PPC;
if (pri_is_ppc_addr_shared(g, gpc_addr)) {
*broadcast_flags |= PRI_BROADCAST_FLAGS_PPC;
return 0;
}
}
if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
*addr_type = CTXSW_ADDR_TYPE_TPC;
if (pri_is_tpc_addr_shared(g, gpc_addr)) {
@@ -7490,6 +7499,72 @@ static int map_cmp(const void *a, const void *b)
return 0;
}
static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry *map,
struct aiv_list_gk20a *regs,
u32 *count, u32 *offset,
u32 max_cnt, u32 base, u32 mask)
{
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + regs->count) > max_cnt)
return -EINVAL;
for (idx = 0; idx < regs->count; idx++) {
if ((base + (regs->l[idx].addr & mask)) < 0xFFF)
map[cnt].addr = base + (regs->l[idx].addr & mask)
+ NV_PCFG_BASE;
else
map[cnt].addr = base + (regs->l[idx].addr & mask);
map[cnt++].offset = off;
off += 4;
}
*count = cnt;
*offset = off;
return 0;
}
static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
struct ctxsw_buf_offset_map_entry *map,
struct aiv_list_gk20a *regs,
u32 *count, u32 *offset,
u32 max_cnt, u32 base, u32 mask)
{
u32 idx;
u32 cnt = *count;
u32 off = *offset;
if ((cnt + regs->count) > max_cnt)
return -EINVAL;
/* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
* To handle the case of PPC registers getting added into GPC, the below
* code specifically checks for any PPC offsets and adds them using
* proper mask
*/
for (idx = 0; idx < regs->count; idx++) {
/* Check if the address is PPC address */
if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_BASE);
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_STRIDE);
/* Use PPC mask instead of the GPC mask provided */
u32 ppcmask = ppc_in_gpc_stride - 1;
map[cnt].addr = base + ppc_in_gpc_base
+ (regs->l[idx].addr & ppcmask);
} else
map[cnt].addr = base + (regs->l[idx].addr & mask);
map[cnt++].offset = off;
off += 4;
}
*count = cnt;
*offset = off;
return 0;
}
static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
struct aiv_list_gk20a *regs,
u32 *count, u32 *offset,
@@ -7577,12 +7652,18 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
return -EINVAL;
base = gpc_base + (gpc_stride * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
&g->gr.ctx_vars.ctxsw_regs.pm_gpc,
count, offset, max_cnt, base,
(gpc_stride - 1)))
return -EINVAL;
base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
if (add_ctxsw_buffer_map_entries(map,
&g->gr.ctx_vars.ctxsw_regs.pm_ucgpc,
count, offset, max_cnt, base, ~0))
return -EINVAL;
base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num);
if (add_ctxsw_buffer_map_entries(map,
&g->gr.ctx_vars.ctxsw_regs.perf_gpc,
@@ -7609,6 +7690,12 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
*| |
*| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
*|---------------------------------------------|
*| |
*| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words
*|---------------------------------------------|
*| |
*| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words
*|---------------------------------------------|
*| PADDING for 256 byte alignment |
*|---------------------------------------------|<----256 byte aligned
*| LIST_compressed_nv_perf_fbp_ctx_regs |
@@ -7620,6 +7707,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
*| LIST_compressed_pm_fbpa_ctx_regs |
*| |Space allocated: numRegs * n words (for n FB units)
*|---------------------------------------------|
*| LIST_compressed_pm_rop_ctx_regs |
*|---------------------------------------------|
*| LIST_compressed_pm_ltc_ctx_regs |
*| LTC0 LTS0 |
*| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
@@ -7641,7 +7730,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
*| LIST_pm_ctx_reg_PPC REG1 TPCn |
*| * numPpcs REGn TPC0 |
*| LIST_pm_ctx_reg_GPC REGn TPC1 |
*| LIST_nv_perf_ctx_reg_GPC REGn TPCn |
*| List_pm_ctx_reg_uc_GPC REGn TPCn |
*| LIST_nv_perf_ctx_reg_GPC |
*| ---- |--
*| GPC1 . |
*| . |<----
@@ -7679,7 +7769,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
return -ENOMEM;
/* Add entries from _LIST_pm_ctx_reg_SYS */
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
goto cleanup;
@@ -7688,6 +7778,16 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
goto cleanup;
/* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router,
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
goto cleanup;
/* Add entries from _LIST_nv_perf_pma_ctx_reg*/
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma,
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
goto cleanup;
offset = ALIGN(offset, 256);
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
@@ -7714,6 +7814,13 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
num_fbpas, fbpa_stride, ~0))
goto cleanup;
/* Add entries from _LIST_nv_pm_rop_ctx_regs */
if (add_ctxsw_buffer_map_entries(map,
&g->gr.ctx_vars.ctxsw_regs.pm_rop,
&count, &offset,
hwpm_ctxsw_reg_count_max, 0, ~0))
goto cleanup;
/* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
if (add_ctxsw_buffer_map_entries_subunits(map,
&g->gr.ctx_vars.ctxsw_regs.pm_ltc,

View File

@@ -248,6 +248,10 @@ struct gr_gk20a {
struct aiv_list_gk20a gpc_router;
struct aiv_list_gk20a pm_ltc;
struct aiv_list_gk20a pm_fbpa;
struct aiv_list_gk20a perf_sys_router;
struct aiv_list_gk20a perf_pma;
struct aiv_list_gk20a pm_rop;
struct aiv_list_gk20a pm_ucgpc;
} ctxsw_regs;
int regs_base_index;
bool valid;

View File

@@ -69,6 +69,35 @@ static inline u32 pri_get_gpc_num(struct gk20a *g, u32 addr)
}
return 0;
}
/*
* PPC pri addressing
*/
static inline bool pri_is_ppc_addr_shared(struct gk20a *g, u32 addr)
{
u32 ppc_in_gpc_shared_base = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_SHARED_BASE);
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_STRIDE);
return ((addr >= ppc_in_gpc_shared_base) &&
(addr < (ppc_in_gpc_shared_base + ppc_in_gpc_stride)));
}
static inline bool pri_is_ppc_addr(struct gk20a *g, u32 addr)
{
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_BASE);
u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
GPU_LIT_NUM_PES_PER_GPC);
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
GPU_LIT_PPC_IN_GPC_STRIDE);
return ((addr >= ppc_in_gpc_base) &&
(addr < ppc_in_gpc_base + num_pes_per_gpc * ppc_in_gpc_stride))
|| pri_is_ppc_addr_shared(g, addr);
}
/*
* TPC pri addressing
*/

View File

@@ -87,9 +87,15 @@ static int gk20a_get_litter_value(struct gk20a *g,
case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
ret = proj_tpc_in_gpc_shared_base_v();
break;
case GPU_LIT_PPC_IN_GPC_BASE:
ret = proj_ppc_in_gpc_base_v();
break;
case GPU_LIT_PPC_IN_GPC_STRIDE:
ret = proj_ppc_in_gpc_stride_v();
break;
case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
ret = proj_ppc_in_gpc_shared_base_v();
break;
case GPU_LIT_ROP_BASE:
ret = proj_rop_base_v();
break;
@@ -118,6 +124,8 @@ static int gk20a_get_litter_value(struct gk20a *g,
ret = proj_fbpa_stride_v();
break;
default:
gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
BUG();
break;
}

View File

@@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
{
return 0x00003000;
}
static inline u32 proj_ppc_in_gpc_shared_base_v(void)
{
return 0x00003e00;
}
static inline u32 proj_ppc_in_gpc_stride_v(void)
{
return 0x00000200;

View File

@@ -128,9 +128,15 @@ static int gm20b_get_litter_value(struct gk20a *g,
case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
ret = proj_tpc_in_gpc_shared_base_v();
break;
case GPU_LIT_PPC_IN_GPC_BASE:
ret = proj_ppc_in_gpc_base_v();
break;
case GPU_LIT_PPC_IN_GPC_STRIDE:
ret = proj_ppc_in_gpc_stride_v();
break;
case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
ret = proj_ppc_in_gpc_shared_base_v();
break;
case GPU_LIT_ROP_BASE:
ret = proj_rop_base_v();
break;
@@ -159,6 +165,8 @@ static int gm20b_get_litter_value(struct gk20a *g,
ret = proj_fbpa_stride_v();
break;
default:
gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
BUG();
break;
}

View File

@@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
{
return 0x00003000;
}
static inline u32 proj_ppc_in_gpc_shared_base_v(void)
{
return 0x00003e00;
}
static inline u32 proj_ppc_in_gpc_stride_v(void)
{
return 0x00000200;