From 815c102e5dc06e15f3c34d1f73b680a825f2d3b2 Mon Sep 17 00:00:00 2001 From: Vinod G Date: Tue, 9 Apr 2019 17:17:27 -0700 Subject: [PATCH] gpu: nvgpu: move get_nonpes_aware_tpc hal to hal.gr.init Move get_nonpes_aware_tpc hal to hal.gr.init . This hal is implemented for gv11b. Update sm_id_numbering hal to pass the gr_config struct pointer as parameter to avoid dereferencing from gr inside hal. JIRA NVGPU-2951 Change-Id: I1e06b634cc36741e116e41e581a18c7f5b373945 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/2093835 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/fs_state.c | 3 +- .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 3 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 3 +- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 41 ++++--------------- drivers/gpu/nvgpu/gv11b/gr_gv11b.h | 1 - drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 3 +- drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c | 4 +- drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h | 4 +- drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c | 36 +++++++++++++--- drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h | 6 ++- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 8 ++-- drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 8 ++-- drivers/gpu/nvgpu/tu104/hal_tu104.c | 3 +- 13 files changed, 68 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/fs_state.c b/drivers/gpu/nvgpu/common/gr/fs_state.c index 7d11637c7..3a98ca3dd 100644 --- a/drivers/gpu/nvgpu/common/gr/fs_state.c +++ b/drivers/gpu/nvgpu/common/gr/fs_state.c @@ -114,7 +114,8 @@ int nvgpu_gr_fs_state_init(struct gk20a *g) tpc_index = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); gpc_index = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); - g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id); + g->ops.gr.init.sm_id_numbering(g, gpc_index, tpc_index, sm_id, + gr_config); } g->ops.gr.init.pd_tpc_per_gpc(g, gr_config); diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 7e8fa3f02..dda2cd714 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -217,7 +217,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, - .get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc, .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, @@ -359,6 +358,8 @@ static const struct gpu_ops vgpu_gv11b_ops = { }, #endif /* CONFIG_GK20A_CTXSW_TRACE */ .init = { + .get_nonpes_aware_tpc = + gv11b_gr_init_get_nonpes_aware_tpc, .get_fbp_en_mask = vgpu_gr_get_fbp_en_mask, .fs_state = vgpu_gr_init_fs_state, .get_bundle_cb_default_size = diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 593e228f0..12fb1dcd3 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -461,7 +461,6 @@ static const struct gpu_ops gv100_ops = { .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, .split_fbpa_broadcast_addr = gr_gv100_split_fbpa_broadcast_addr, - .get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc, .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, @@ -613,6 +612,8 @@ static const struct gpu_ops gv100_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .get_nonpes_aware_tpc = + gv11b_gr_init_get_nonpes_aware_tpc, .wait_initialized = nvgpu_gr_wait_initialized, .ecc_scrub_reg = NULL, .get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask, diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 9315879eb..31240aa4b 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1632,32 +1632,6 @@ void gr_gv11b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_warp_count_v(v); } -u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc) -{ - u32 tpc_new = 0; - u32 temp; - u32 pes; - struct gr_gk20a *gr = &g->gr; - - for (pes = 0U; - pes < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc); - pes++) { - if ((nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes) & - BIT32(tpc)) != 0U) { - break; - } - tpc_new += nvgpu_gr_config_get_pes_tpc_count(gr->config, - gpc, pes); - } - temp = (BIT32(tpc) - 1U) & - nvgpu_gr_config_get_pes_tpc_mask(gr->config, gpc, pes); - temp = (u32)hweight32(temp); - tpc_new += temp; - - nvgpu_log_info(g, "tpc: %d -> new tpc: %d", tpc, tpc_new); - return tpc_new; -} - void gv11b_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc, u32 *esr_sm_sel) { @@ -1794,10 +1768,11 @@ int gv11b_gr_set_sm_debug_mode(struct gk20a *g, sm_info = nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); gpc = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); - if (g->ops.gr.get_nonpes_aware_tpc != NULL) { - tpc = g->ops.gr.get_nonpes_aware_tpc(g, + if (g->ops.gr.init.get_nonpes_aware_tpc != NULL) { + tpc = g->ops.gr.init.get_nonpes_aware_tpc(g, nvgpu_gr_config_get_sm_info_gpc_index(sm_info), - nvgpu_gr_config_get_sm_info_tpc_index(sm_info)); + nvgpu_gr_config_get_sm_info_tpc_index(sm_info), + g->gr.config); } else { tpc = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); } @@ -3232,11 +3207,13 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g, if (gk20a_is_channel_ctx_resident(ch)) { struct sm_info *sm_info = nvgpu_gr_config_get_sm_info(g->gr.config, sm_id); + gpc = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); - if (g->ops.gr.get_nonpes_aware_tpc != NULL) { - tpc = g->ops.gr.get_nonpes_aware_tpc(g, + if (g->ops.gr.init.get_nonpes_aware_tpc != NULL) { + tpc = g->ops.gr.init.get_nonpes_aware_tpc(g, nvgpu_gr_config_get_sm_info_gpc_index(sm_info), - nvgpu_gr_config_get_sm_info_tpc_index(sm_info)); + nvgpu_gr_config_get_sm_info_tpc_index(sm_info), + g->gr.config); } else { tpc = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h index bc94641d6..a7230a9be 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h @@ -168,7 +168,6 @@ int gr_gv11b_create_priv_addr_table(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *num_registers); -u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc); void gr_gv11b_powergate_tpc(struct gk20a *g); void gr_gv11b_set_shader_cut_collector(struct gk20a *g, u32 data); diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index a044ec17f..37ad34895 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -432,7 +432,6 @@ static const struct gpu_ops gv11b_ops = { .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, .split_fbpa_broadcast_addr = gr_gk20a_split_fbpa_broadcast_addr, - .get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc, .get_offset_in_gpccs_segment = gr_gk20a_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, @@ -587,6 +586,8 @@ static const struct gpu_ops gv11b_ops = { gv100_gr_hwpm_map_align_regs_perf_pma, }, .init = { + .get_nonpes_aware_tpc = + gv11b_gr_init_get_nonpes_aware_tpc, .wait_initialized = nvgpu_gr_wait_initialized, .ecc_scrub_reg = gv11b_gr_init_ecc_scrub_reg, .get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask, diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index fe2cc05c3..fb08f2085 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -191,8 +191,8 @@ void gm20b_gr_init_get_access_map(struct gk20a *g, *num_entries = (int)array_size; } -void gm20b_gr_init_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid) +void gm20b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid, + struct nvgpu_gr_config *gr_config) { u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h index b9aaf74e0..598d15627 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.h @@ -43,8 +43,8 @@ void gm20b_gr_init_gpc_mmu(struct gk20a *g); void gm20b_gr_init_fifo_access(struct gk20a *g, bool enable); void gm20b_gr_init_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries); -void gm20b_gr_init_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid); +void gm20b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid, + struct nvgpu_gr_config *gr_config); u32 gm20b_gr_init_get_sm_id_size(void); int gm20b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, struct nvgpu_gr_config *gr_config); diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index 1d230877d..6b04a5725 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -266,6 +266,32 @@ void gv11b_gr_init_ecc_scrub_reg(struct gk20a *g, } +u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc, + struct nvgpu_gr_config *gr_config) +{ + u32 tpc_new = 0; + u32 temp; + u32 pes; + + for (pes = 0U; + pes < nvgpu_gr_config_get_gpc_ppc_count(gr_config, gpc); + pes++) { + if ((nvgpu_gr_config_get_pes_tpc_mask(gr_config, gpc, pes) & + BIT32(tpc)) != 0U) { + break; + } + tpc_new += nvgpu_gr_config_get_pes_tpc_count(gr_config, + gpc, pes); + } + temp = (BIT32(tpc) - 1U) & + nvgpu_gr_config_get_pes_tpc_mask(gr_config, gpc, pes); + temp = (u32)hweight32(temp); + tpc_new += temp; + + nvgpu_log_info(g, "tpc: %d -> new tpc: %d", tpc, tpc_new); + return tpc_new; +} + void gv11b_gr_init_gpc_mmu(struct gk20a *g) { u32 temp; @@ -339,8 +365,8 @@ void gv11b_gr_init_get_access_map(struct gk20a *g, *num_entries = (int)array_size; } -void gv11b_gr_init_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid) +void gv11b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid, + struct nvgpu_gr_config *gr_config) { u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE); u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, @@ -349,12 +375,12 @@ void gv11b_gr_init_sm_id_numbering(struct gk20a *g, u32 global_tpc_index; u32 tpc_offset; struct sm_info *sm_info = - nvgpu_gr_config_get_sm_info(g->gr.config, smid); + nvgpu_gr_config_get_sm_info(gr_config, smid); global_tpc_index = nvgpu_gr_config_get_sm_info_global_tpc_index(sm_info); - tpc = g->ops.gr.get_nonpes_aware_tpc(g, gpc, tpc); + tpc = g->ops.gr.init.get_nonpes_aware_tpc(g, gpc, tpc, gr_config); tpc_offset = tpc_in_gpc_stride * tpc; nvgpu_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, @@ -372,7 +398,7 @@ int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, u32 tpc_index, gpc_index, tpc_id; u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); u32 num_gpcs = nvgpu_get_litter_value(g, GPU_LIT_NUM_GPCS); - u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(g->gr.config); + u32 no_of_sm = nvgpu_gr_config_get_no_of_sm(gr_config); /* Each NV_PGRAPH_PRI_CWD_GPC_TPC_ID can store 4 TPCs.*/ for (i = 0U; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h index 11cc8ba9b..fb2f359f8 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.h @@ -30,13 +30,15 @@ struct nvgpu_gr_config; struct nvgpu_gr_ctx; struct netlist_av_list; +u32 gv11b_gr_init_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc, + struct nvgpu_gr_config *gr_config); void gv11b_gr_init_ecc_scrub_reg(struct gk20a *g, struct nvgpu_gr_config *gr_config); void gv11b_gr_init_gpc_mmu(struct gk20a *g); void gv11b_gr_init_get_access_map(struct gk20a *g, u32 **whitelist, int *num_entries); -void gv11b_gr_init_sm_id_numbering(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid); +void gv11b_gr_init_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid, + struct nvgpu_gr_config *gr_config); int gv11b_gr_init_sm_id_config(struct gk20a *g, u32 *tpc_sm_id, struct nvgpu_gr_config *gr_config); void gv11b_gr_init_tpc_mask(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 4fce66430..7345c0248 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -423,7 +423,6 @@ struct gpu_ops { u32 num_fbpas, u32 *priv_addr_table, u32 *priv_addr_table_index); - u32 (*get_nonpes_aware_tpc)(struct gk20a *g, u32 gpc, u32 tpc); int (*get_offset_in_gpccs_segment)(struct gk20a *g, enum ctxsw_addr_type addr_type, u32 num_tpcs, u32 num_ppcs, u32 reg_list_ppc_count, @@ -679,6 +678,8 @@ struct gpu_ops { } hwpm_map; struct { + u32 (*get_nonpes_aware_tpc)(struct gk20a *g, u32 gpc, + u32 tpc, struct nvgpu_gr_config *gr_config); void (*wait_initialized)(struct gk20a *g); void (*ecc_scrub_reg)(struct gk20a *g, struct nvgpu_gr_config *gr_config); @@ -693,8 +694,9 @@ struct gpu_ops { u32 (*get_sm_id_size)(void); int (*sm_id_config)(struct gk20a *g, u32 *tpc_sm_id, struct nvgpu_gr_config *gr_config); - void (*sm_id_numbering)(struct gk20a *g, - u32 gpc, u32 tpc, u32 smid); + void (*sm_id_numbering)(struct gk20a *g, u32 gpc, + u32 tpc, u32 smid, + struct nvgpu_gr_config *gr_config); void (*tpc_mask)(struct gk20a *g, u32 gpc_index, u32 pes_tpc_mask); int (*rop_mapping)(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 248b201ba..458a9befa 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -817,13 +817,15 @@ static int gk20a_ctrl_vsm_mapping(struct gk20a *g, for (i = 0; i < no_of_sm; i++) { struct sm_info *sm_info = nvgpu_gr_config_get_sm_info(gr->config, i); + vsms_buf[i].gpc_index = nvgpu_gr_config_get_sm_info_gpc_index(sm_info); - if (g->ops.gr.get_nonpes_aware_tpc) + if (g->ops.gr.init.get_nonpes_aware_tpc) vsms_buf[i].tpc_index = - g->ops.gr.get_nonpes_aware_tpc(g, + g->ops.gr.init.get_nonpes_aware_tpc(g, nvgpu_gr_config_get_sm_info_gpc_index(sm_info), - nvgpu_gr_config_get_sm_info_tpc_index(sm_info)); + nvgpu_gr_config_get_sm_info_tpc_index(sm_info), + gr->config); else vsms_buf[i].tpc_index = nvgpu_gr_config_get_sm_info_tpc_index(sm_info); diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 2c07f367d..1ea8ca1bf 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -486,7 +486,6 @@ static const struct gpu_ops tu104_ops = { .decode_priv_addr = gr_gv11b_decode_priv_addr, .create_priv_addr_table = gr_gv11b_create_priv_addr_table, .split_fbpa_broadcast_addr = gr_gv100_split_fbpa_broadcast_addr, - .get_nonpes_aware_tpc = gr_gv11b_get_nonpes_aware_tpc, .get_offset_in_gpccs_segment = gr_tu104_get_offset_in_gpccs_segment, .set_debug_mode = gm20b_gr_set_debug_mode, @@ -642,6 +641,8 @@ static const struct gpu_ops tu104_ops = { gv100_gr_hwpm_map_get_active_fbpa_mask, }, .init = { + .get_nonpes_aware_tpc = + gv11b_gr_init_get_nonpes_aware_tpc, .wait_initialized = nvgpu_gr_wait_initialized, .ecc_scrub_reg = NULL, .get_fbp_en_mask = gm20b_gr_init_get_fbp_en_mask,