diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8d8f56f69..2b242978d 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -2123,6 +2123,26 @@ void gr_gv11b_detect_sm_arch(struct gk20a *g) gr_gpc0_tpc0_sm_arch_warp_count_v(v); } +static u32 gr_gv11b_get_nonpes_aware_tpc(struct gk20a *g, u32 gpc, u32 tpc) +{ + u32 tpc_new = 0; + u32 temp; + u32 pes; + struct gr_gk20a *gr = &g->gr; + + for (pes = 0; pes < gr->gpc_ppc_count[gpc]; pes++) { + if (gr->pes_tpc_mask[pes][gpc] & BIT(tpc)) + break; + tpc_new += gr->pes_tpc_count[pes][gpc]; + } + temp = (BIT(tpc) - 1) & gr->pes_tpc_mask[pes][gpc]; + temp = hweight32(temp); + tpc_new += temp; + + nvgpu_log_info(g, "tpc: %d -> new tpc: %d", tpc, tpc_new); + return tpc_new; +} + void gr_gv11b_program_sm_id_numbering(struct gk20a *g, u32 gpc, u32 tpc, u32 smid) { @@ -2133,6 +2153,8 @@ void gr_gv11b_program_sm_id_numbering(struct gk20a *g, u32 tpc_offset = tpc_in_gpc_stride * tpc; u32 global_tpc_index = g->gr.sm_to_cluster[smid].global_tpc_index; + tpc = gr_gv11b_get_nonpes_aware_tpc(g, gpc, tpc); + gk20a_writel(g, gr_gpc0_tpc0_sm_cfg_r() + gpc_offset + tpc_offset, gr_gpc0_tpc0_sm_cfg_tpc_id_f(global_tpc_index)); gk20a_writel(g, gr_gpc0_gpm_pd_sm_id_r(tpc) + gpc_offset,