gpu: nvgpu: register usage optimizations

With hw minimal headers, lot of unwanted hw registers are stripped.
SW needed few updates to use minimal headers:

1. Use stride value to get non zero instance offset:
gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r() =
	gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r() +
        nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
gr_pri_be1_becs_be_activity0_r() = gr_pri_be0_becs_be_activity0_r() +
		nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE);

2. Broadcast registers should not be used for reading status and
they should be used only for broadcast register writes. Removed
following register reads from gm20b register dump:
NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0
NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0

Above optimizations are done for gm20b, gp10b and gv11b.

JIRA NVGPU-2917
JIRA NVGPU-2918
JIRA NVGPU-2919

Change-Id: Ia8c736639f7cada0cf9f0d227dac372bdf09e55b
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2088128
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seshendra Gadagottu
2019-04-02 14:18:59 -07:00
committed by mobile promotions
parent 78b78d4e39
commit 26f98f0956
6 changed files with 27 additions and 96 deletions

View File

@@ -441,12 +441,14 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) &&
(gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o,
"NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g,
(gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r() +
nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE))));
}
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
@@ -455,18 +457,13 @@ int gr_gm20b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
}
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
gk20a_readl(g, (gr_pri_be0_becs_be_activity0_r() +
nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE))));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",

View File

@@ -598,12 +598,14 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) &&
(gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o,
"NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g,
(gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r() +
nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE))));
}
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
@@ -612,18 +614,13 @@ int gr_gp10b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
}
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
gk20a_readl(g, (gr_pri_be0_becs_be_activity0_r() +
nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE))));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",

View File

@@ -1161,12 +1161,14 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) &&
(gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o,
"NV_PGRAPH_PRI_GPC0_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g,
(gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r() +
nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE))));
}
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
@@ -1175,18 +1177,13 @@ int gr_gv11b_dump_gr_status_regs(struct gk20a *g,
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC0_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r()));
if ((gr->config->gpc_tpc_count != NULL) && (gr->config->gpc_tpc_count[0] == 2U)) {
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPC1_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r()));
}
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BE0_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_be0_becs_be_activity0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BE1_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_be1_becs_be_activity0_r()));
gk20a_readl(g, (gr_pri_be0_becs_be_activity0_r() +
nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE))));
gk20a_debug_output(o, "NV_PGRAPH_PRI_BES_BECS_BE_ACTIVITY0: 0x%x\n",
gk20a_readl(g, gr_pri_bes_becs_be_activity0_r()));
gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",

View File

@@ -434,14 +434,6 @@ static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r(void)
{
return 0x00504500U;
}
static inline u32 gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r(void)
{
return 0x00504d00U;
}
static inline u32 gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r(void)
{
return 0x00501d00U;
}
static inline u32 gr_pri_gpcs_gpccs_gpc_activity_0_r(void)
{
return 0x0041ac80U;
@@ -458,14 +450,6 @@ static inline u32 gr_pri_gpcs_gpccs_gpc_activity_3_r(void)
{
return 0x0041ac8cU;
}
static inline u32 gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r(void)
{
return 0x0041c500U;
}
static inline u32 gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r(void)
{
return 0x0041cd00U;
}
static inline u32 gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r(void)
{
return 0x00419d00U;
@@ -474,10 +458,6 @@ static inline u32 gr_pri_be0_becs_be_activity0_r(void)
{
return 0x00410200U;
}
static inline u32 gr_pri_be1_becs_be_activity0_r(void)
{
return 0x00410600U;
}
static inline u32 gr_pri_bes_becs_be_activity0_r(void)
{
return 0x00408a00U;

View File

@@ -458,14 +458,6 @@ static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r(void)
{
return 0x00504500U;
}
static inline u32 gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r(void)
{
return 0x00504d00U;
}
static inline u32 gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r(void)
{
return 0x00501d00U;
}
static inline u32 gr_pri_gpcs_gpccs_gpc_activity_0_r(void)
{
return 0x0041ac80U;
@@ -482,14 +474,6 @@ static inline u32 gr_pri_gpcs_gpccs_gpc_activity_3_r(void)
{
return 0x0041ac8cU;
}
static inline u32 gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r(void)
{
return 0x0041c500U;
}
static inline u32 gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r(void)
{
return 0x0041cd00U;
}
static inline u32 gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r(void)
{
return 0x00419d00U;
@@ -498,10 +482,6 @@ static inline u32 gr_pri_be0_becs_be_activity0_r(void)
{
return 0x00410200U;
}
static inline u32 gr_pri_be1_becs_be_activity0_r(void)
{
return 0x00410600U;
}
static inline u32 gr_pri_bes_becs_be_activity0_r(void)
{
return 0x00408a00U;

View File

@@ -526,14 +526,6 @@ static inline u32 gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r(void)
{
return 0x00504500U;
}
static inline u32 gr_pri_gpc0_tpc1_tpccs_tpc_activity_0_r(void)
{
return 0x00504d00U;
}
static inline u32 gr_pri_gpc0_tpcs_tpccs_tpc_activity_0_r(void)
{
return 0x00501d00U;
}
static inline u32 gr_pri_gpcs_gpccs_gpc_activity_0_r(void)
{
return 0x0041ac80U;
@@ -550,14 +542,6 @@ static inline u32 gr_pri_gpcs_gpccs_gpc_activity_3_r(void)
{
return 0x0041ac8cU;
}
static inline u32 gr_pri_gpcs_tpc0_tpccs_tpc_activity_0_r(void)
{
return 0x0041c500U;
}
static inline u32 gr_pri_gpcs_tpc1_tpccs_tpc_activity_0_r(void)
{
return 0x0041cd00U;
}
static inline u32 gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r(void)
{
return 0x00419d00U;
@@ -566,10 +550,6 @@ static inline u32 gr_pri_be0_becs_be_activity0_r(void)
{
return 0x00410200U;
}
static inline u32 gr_pri_be1_becs_be_activity0_r(void)
{
return 0x00410600U;
}
static inline u32 gr_pri_bes_becs_be_activity0_r(void)
{
return 0x00408a00U;