mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 18:42:29 +03:00
gpu: nvgpu: Full chip support for ctxsw
nvgpu changes needed to handle the newly added ctxsw lists Fix regops support for ppc registers Squashed from: Change-Id: I08e6dec3bb2f7aa51de912c9d1c84a350ce07f72 Signed-off-by: neha <njoshi@nvidia.com> Reviewed-on: http://git-master/r/1151010 (cherry picked from commit fd03ad9f09e66f78db88fb7ece448e26e0515821) and: Change-Id: I75a7f810ee0b613c22ac2cef2d936563d8067f97 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: http://git-master/r/1158888 (cherry picked from commit f00a7fcc57fb937b800e46760087ff6f7637520c) Bug 200180000 Bug 1771830 Reviewed-on: http://git-master/r/1164397 (cherry picked from commit 7028f051e4f37edeff90a9923f022cec6c645a8f) Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Change-Id: I796ddf93ef37170843a4a6b44190cd6780d25852 Reviewed-on: http://git-master/r/1183588 Reviewed-by: Vladislav Buzov <vbuzov@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
@@ -100,6 +100,7 @@ enum nvgpu_litter_value {
|
||||
GPU_LIT_TPC_IN_GPC_SHARED_BASE,
|
||||
GPU_LIT_PPC_IN_GPC_BASE,
|
||||
GPU_LIT_PPC_IN_GPC_STRIDE,
|
||||
GPU_LIT_PPC_IN_GPC_SHARED_BASE,
|
||||
GPU_LIT_ROP_BASE,
|
||||
GPU_LIT_ROP_STRIDE,
|
||||
GPU_LIT_ROP_SHARED_BASE,
|
||||
|
||||
@@ -336,6 +336,34 @@ static int gr_gk20a_init_ctx_vars_fw(struct gk20a *g, struct gr_gk20a *gr)
|
||||
if (err)
|
||||
goto clean_up;
|
||||
break;
|
||||
case NETLIST_REGIONID_NVPERF_SYS_ROUTER:
|
||||
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_SYS_ROUTER");
|
||||
err = gr_gk20a_alloc_load_netlist_aiv(
|
||||
src, size, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
break;
|
||||
case NETLIST_REGIONID_NVPERF_PMA:
|
||||
gk20a_dbg_info("NETLIST_REGIONID_NVPERF_PMA");
|
||||
err = gr_gk20a_alloc_load_netlist_aiv(
|
||||
src, size, &g->gr.ctx_vars.ctxsw_regs.perf_pma);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
break;
|
||||
case NETLIST_REGIONID_CTXREG_PMROP:
|
||||
gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMROP");
|
||||
err = gr_gk20a_alloc_load_netlist_aiv(
|
||||
src, size, &g->gr.ctx_vars.ctxsw_regs.pm_rop);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
break;
|
||||
case NETLIST_REGIONID_CTXREG_PMUCGPC:
|
||||
gk20a_dbg_info("NETLIST_REGIONID_CTXREG_PMUCGPC");
|
||||
err = gr_gk20a_alloc_load_netlist_aiv(
|
||||
src, size, &g->gr.ctx_vars.ctxsw_regs.pm_ucgpc);
|
||||
if (err)
|
||||
goto clean_up;
|
||||
break;
|
||||
default:
|
||||
gk20a_dbg_info("unrecognized region %d skipped", i);
|
||||
break;
|
||||
@@ -381,6 +409,11 @@ clean_up:
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.gpc_router.l);
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.pm_ltc.l);
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.pm_fbpa.l);
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.perf_sys_router.l);
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.perf_pma.l);
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.pm_rop.l);
|
||||
kfree(g->gr.ctx_vars.ctxsw_regs.pm_ucgpc.l);
|
||||
release_firmware(netlist_fw);
|
||||
err = -ENOENT;
|
||||
}
|
||||
|
||||
@@ -93,6 +93,11 @@ union __max_name {
|
||||
#define NETLIST_REGIONID_NVPERF_GPC_ROUTER 25
|
||||
#define NETLIST_REGIONID_CTXREG_PMLTC 26
|
||||
#define NETLIST_REGIONID_CTXREG_PMFBPA 27
|
||||
#define NETLIST_REGIONID_SWVEIDBUNDLEINIT 28
|
||||
#define NETLIST_REGIONID_NVPERF_SYS_ROUTER 29
|
||||
#define NETLIST_REGIONID_NVPERF_PMA 30
|
||||
#define NETLIST_REGIONID_CTXREG_PMROP 31
|
||||
#define NETLIST_REGIONID_CTXREG_PMUCGPC 32
|
||||
|
||||
struct netlist_region {
|
||||
u32 region_id;
|
||||
|
||||
@@ -64,6 +64,8 @@
|
||||
#define NV_PERF_PMM_FBP_ROUTER_STRIDE 0x0200
|
||||
#define NV_PERF_PMMGPC_CHIPLET_OFFSET 0x1000
|
||||
#define NV_PERF_PMMGPCROUTER_STRIDE 0x0200
|
||||
#define NV_PCFG_BASE 0x00088000
|
||||
#define NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE 0x0020
|
||||
#define FE_PWR_MODE_TIMEOUT_MAX 2000
|
||||
#define FE_PWR_MODE_TIMEOUT_DEFAULT 10
|
||||
#define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000
|
||||
@@ -6446,6 +6448,13 @@ static int gr_gk20a_decode_priv_addr(struct gk20a *g, u32 addr,
|
||||
} else
|
||||
*gpc_num = pri_get_gpc_num(g, addr);
|
||||
|
||||
if (pri_is_ppc_addr(g, gpc_addr)) {
|
||||
*addr_type = CTXSW_ADDR_TYPE_PPC;
|
||||
if (pri_is_ppc_addr_shared(g, gpc_addr)) {
|
||||
*broadcast_flags |= PRI_BROADCAST_FLAGS_PPC;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (g->ops.gr.is_tpc_addr(g, gpc_addr)) {
|
||||
*addr_type = CTXSW_ADDR_TYPE_TPC;
|
||||
if (pri_is_tpc_addr_shared(g, gpc_addr)) {
|
||||
@@ -7490,6 +7499,72 @@ static int map_cmp(const void *a, const void *b)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_ctxsw_buffer_map_entries_pmsys(struct ctxsw_buf_offset_map_entry *map,
|
||||
struct aiv_list_gk20a *regs,
|
||||
u32 *count, u32 *offset,
|
||||
u32 max_cnt, u32 base, u32 mask)
|
||||
{
|
||||
u32 idx;
|
||||
u32 cnt = *count;
|
||||
u32 off = *offset;
|
||||
|
||||
if ((cnt + regs->count) > max_cnt)
|
||||
return -EINVAL;
|
||||
|
||||
for (idx = 0; idx < regs->count; idx++) {
|
||||
if ((base + (regs->l[idx].addr & mask)) < 0xFFF)
|
||||
map[cnt].addr = base + (regs->l[idx].addr & mask)
|
||||
+ NV_PCFG_BASE;
|
||||
else
|
||||
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
||||
map[cnt++].offset = off;
|
||||
off += 4;
|
||||
}
|
||||
*count = cnt;
|
||||
*offset = off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_ctxsw_buffer_map_entries_pmgpc(struct gk20a *g,
|
||||
struct ctxsw_buf_offset_map_entry *map,
|
||||
struct aiv_list_gk20a *regs,
|
||||
u32 *count, u32 *offset,
|
||||
u32 max_cnt, u32 base, u32 mask)
|
||||
{
|
||||
u32 idx;
|
||||
u32 cnt = *count;
|
||||
u32 off = *offset;
|
||||
|
||||
if ((cnt + regs->count) > max_cnt)
|
||||
return -EINVAL;
|
||||
|
||||
/* NOTE: The PPC offsets get added to the pm_gpc list if numPpc <= 1
|
||||
* To handle the case of PPC registers getting added into GPC, the below
|
||||
* code specifically checks for any PPC offsets and adds them using
|
||||
* proper mask
|
||||
*/
|
||||
for (idx = 0; idx < regs->count; idx++) {
|
||||
/* Check if the address is PPC address */
|
||||
if (pri_is_ppc_addr_shared(g, regs->l[idx].addr & mask)) {
|
||||
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_BASE);
|
||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||
/* Use PPC mask instead of the GPC mask provided */
|
||||
u32 ppcmask = ppc_in_gpc_stride - 1;
|
||||
|
||||
map[cnt].addr = base + ppc_in_gpc_base
|
||||
+ (regs->l[idx].addr & ppcmask);
|
||||
} else
|
||||
map[cnt].addr = base + (regs->l[idx].addr & mask);
|
||||
map[cnt++].offset = off;
|
||||
off += 4;
|
||||
}
|
||||
*count = cnt;
|
||||
*offset = off;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_ctxsw_buffer_map_entries(struct ctxsw_buf_offset_map_entry *map,
|
||||
struct aiv_list_gk20a *regs,
|
||||
u32 *count, u32 *offset,
|
||||
@@ -7577,12 +7652,18 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
return -EINVAL;
|
||||
|
||||
base = gpc_base + (gpc_stride * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
if (add_ctxsw_buffer_map_entries_pmgpc(g, map,
|
||||
&g->gr.ctx_vars.ctxsw_regs.pm_gpc,
|
||||
count, offset, max_cnt, base,
|
||||
(gpc_stride - 1)))
|
||||
return -EINVAL;
|
||||
|
||||
base = NV_XBAR_MXBAR_PRI_GPC_GNIC_STRIDE * gpc_num;
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
&g->gr.ctx_vars.ctxsw_regs.pm_ucgpc,
|
||||
count, offset, max_cnt, base, ~0))
|
||||
return -EINVAL;
|
||||
|
||||
base = (NV_PERF_PMMGPC_CHIPLET_OFFSET * gpc_num);
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
&g->gr.ctx_vars.ctxsw_regs.perf_gpc,
|
||||
@@ -7609,6 +7690,12 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
*| |
|
||||
*| LIST_compressed_nv_perf_ctx_reg_SYS |Space allocated: numRegs words
|
||||
*|---------------------------------------------|
|
||||
*| |
|
||||
*| LIST_compressed_nv_perf_ctx_reg_sysrouter|Space allocated: numRegs words
|
||||
*|---------------------------------------------|
|
||||
*| |
|
||||
*| LIST_compressed_nv_perf_ctx_reg_PMA |Space allocated: numRegs words
|
||||
*|---------------------------------------------|
|
||||
*| PADDING for 256 byte alignment |
|
||||
*|---------------------------------------------|<----256 byte aligned
|
||||
*| LIST_compressed_nv_perf_fbp_ctx_regs |
|
||||
@@ -7620,6 +7707,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
*| LIST_compressed_pm_fbpa_ctx_regs |
|
||||
*| |Space allocated: numRegs * n words (for n FB units)
|
||||
*|---------------------------------------------|
|
||||
*| LIST_compressed_pm_rop_ctx_regs |
|
||||
*|---------------------------------------------|
|
||||
*| LIST_compressed_pm_ltc_ctx_regs |
|
||||
*| LTC0 LTS0 |
|
||||
*| LTC1 LTS0 |Space allocated: numRegs * n words (for n LTC units)
|
||||
@@ -7641,7 +7730,8 @@ static int add_ctxsw_buffer_map_entries_gpcs(struct gk20a *g,
|
||||
*| LIST_pm_ctx_reg_PPC REG1 TPCn |
|
||||
*| * numPpcs REGn TPC0 |
|
||||
*| LIST_pm_ctx_reg_GPC REGn TPC1 |
|
||||
*| LIST_nv_perf_ctx_reg_GPC REGn TPCn |
|
||||
*| List_pm_ctx_reg_uc_GPC REGn TPCn |
|
||||
*| LIST_nv_perf_ctx_reg_GPC |
|
||||
*| ---- |--
|
||||
*| GPC1 . |
|
||||
*| . |<----
|
||||
@@ -7679,7 +7769,7 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Add entries from _LIST_pm_ctx_reg_SYS */
|
||||
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
|
||||
if (add_ctxsw_buffer_map_entries_pmsys(map, &g->gr.ctx_vars.ctxsw_regs.pm_sys,
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
|
||||
goto cleanup;
|
||||
|
||||
@@ -7688,6 +7778,16 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
|
||||
goto cleanup;
|
||||
|
||||
/* Add entries from _LIST_nv_perf_sysrouter_ctx_reg*/
|
||||
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_sys_router,
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
|
||||
goto cleanup;
|
||||
|
||||
/* Add entries from _LIST_nv_perf_pma_ctx_reg*/
|
||||
if (add_ctxsw_buffer_map_entries(map, &g->gr.ctx_vars.ctxsw_regs.perf_pma,
|
||||
&count, &offset, hwpm_ctxsw_reg_count_max, 0, ~0))
|
||||
goto cleanup;
|
||||
|
||||
offset = ALIGN(offset, 256);
|
||||
|
||||
/* Add entries from _LIST_nv_perf_fbp_ctx_regs */
|
||||
@@ -7714,6 +7814,13 @@ static int gr_gk20a_create_hwpm_ctxsw_buffer_offset_map(struct gk20a *g)
|
||||
num_fbpas, fbpa_stride, ~0))
|
||||
goto cleanup;
|
||||
|
||||
/* Add entries from _LIST_nv_pm_rop_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries(map,
|
||||
&g->gr.ctx_vars.ctxsw_regs.pm_rop,
|
||||
&count, &offset,
|
||||
hwpm_ctxsw_reg_count_max, 0, ~0))
|
||||
goto cleanup;
|
||||
|
||||
/* Add entries from _LIST_compressed_nv_pm_ltc_ctx_regs */
|
||||
if (add_ctxsw_buffer_map_entries_subunits(map,
|
||||
&g->gr.ctx_vars.ctxsw_regs.pm_ltc,
|
||||
|
||||
@@ -248,6 +248,10 @@ struct gr_gk20a {
|
||||
struct aiv_list_gk20a gpc_router;
|
||||
struct aiv_list_gk20a pm_ltc;
|
||||
struct aiv_list_gk20a pm_fbpa;
|
||||
struct aiv_list_gk20a perf_sys_router;
|
||||
struct aiv_list_gk20a perf_pma;
|
||||
struct aiv_list_gk20a pm_rop;
|
||||
struct aiv_list_gk20a pm_ucgpc;
|
||||
} ctxsw_regs;
|
||||
int regs_base_index;
|
||||
bool valid;
|
||||
|
||||
@@ -69,6 +69,35 @@ static inline u32 pri_get_gpc_num(struct gk20a *g, u32 addr)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* PPC pri addressing
|
||||
*/
|
||||
static inline bool pri_is_ppc_addr_shared(struct gk20a *g, u32 addr)
|
||||
{
|
||||
u32 ppc_in_gpc_shared_base = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_SHARED_BASE);
|
||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||
|
||||
return ((addr >= ppc_in_gpc_shared_base) &&
|
||||
(addr < (ppc_in_gpc_shared_base + ppc_in_gpc_stride)));
|
||||
}
|
||||
|
||||
static inline bool pri_is_ppc_addr(struct gk20a *g, u32 addr)
|
||||
{
|
||||
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_BASE);
|
||||
u32 num_pes_per_gpc = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_NUM_PES_PER_GPC);
|
||||
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g,
|
||||
GPU_LIT_PPC_IN_GPC_STRIDE);
|
||||
|
||||
return ((addr >= ppc_in_gpc_base) &&
|
||||
(addr < ppc_in_gpc_base + num_pes_per_gpc * ppc_in_gpc_stride))
|
||||
|| pri_is_ppc_addr_shared(g, addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* TPC pri addressing
|
||||
*/
|
||||
|
||||
@@ -87,9 +87,15 @@ static int gk20a_get_litter_value(struct gk20a *g,
|
||||
case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
|
||||
ret = proj_tpc_in_gpc_shared_base_v();
|
||||
break;
|
||||
case GPU_LIT_PPC_IN_GPC_BASE:
|
||||
ret = proj_ppc_in_gpc_base_v();
|
||||
break;
|
||||
case GPU_LIT_PPC_IN_GPC_STRIDE:
|
||||
ret = proj_ppc_in_gpc_stride_v();
|
||||
break;
|
||||
case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
|
||||
ret = proj_ppc_in_gpc_shared_base_v();
|
||||
break;
|
||||
case GPU_LIT_ROP_BASE:
|
||||
ret = proj_rop_base_v();
|
||||
break;
|
||||
@@ -118,6 +124,8 @@ static int gk20a_get_litter_value(struct gk20a *g,
|
||||
ret = proj_fbpa_stride_v();
|
||||
break;
|
||||
default:
|
||||
gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
|
||||
{
|
||||
return 0x00003000;
|
||||
}
|
||||
static inline u32 proj_ppc_in_gpc_shared_base_v(void)
|
||||
{
|
||||
return 0x00003e00;
|
||||
}
|
||||
static inline u32 proj_ppc_in_gpc_stride_v(void)
|
||||
{
|
||||
return 0x00000200;
|
||||
|
||||
@@ -128,9 +128,15 @@ static int gm20b_get_litter_value(struct gk20a *g,
|
||||
case GPU_LIT_TPC_IN_GPC_SHARED_BASE:
|
||||
ret = proj_tpc_in_gpc_shared_base_v();
|
||||
break;
|
||||
case GPU_LIT_PPC_IN_GPC_BASE:
|
||||
ret = proj_ppc_in_gpc_base_v();
|
||||
break;
|
||||
case GPU_LIT_PPC_IN_GPC_STRIDE:
|
||||
ret = proj_ppc_in_gpc_stride_v();
|
||||
break;
|
||||
case GPU_LIT_PPC_IN_GPC_SHARED_BASE:
|
||||
ret = proj_ppc_in_gpc_shared_base_v();
|
||||
break;
|
||||
case GPU_LIT_ROP_BASE:
|
||||
ret = proj_rop_base_v();
|
||||
break;
|
||||
@@ -159,6 +165,8 @@ static int gm20b_get_litter_value(struct gk20a *g,
|
||||
ret = proj_fbpa_stride_v();
|
||||
break;
|
||||
default:
|
||||
gk20a_err(dev_from_gk20a(g), "Missing definition %d", value);
|
||||
BUG();
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -78,6 +78,10 @@ static inline u32 proj_ppc_in_gpc_base_v(void)
|
||||
{
|
||||
return 0x00003000;
|
||||
}
|
||||
static inline u32 proj_ppc_in_gpc_shared_base_v(void)
|
||||
{
|
||||
return 0x00003e00;
|
||||
}
|
||||
static inline u32 proj_ppc_in_gpc_stride_v(void)
|
||||
{
|
||||
return 0x00000200;
|
||||
|
||||
Reference in New Issue
Block a user