diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 695e3f695..0436c4668 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -172,6 +172,7 @@ struct gpu_ops { u32 (*get_max_ltc_per_fbp)(struct gk20a *g); u32 (*get_max_lts_per_ltc)(struct gk20a *g); u32* (*get_rop_l2_en_mask)(struct gk20a *g); + void (*init_sm_dsm_reg_info)(void); } gr; const char *name; struct { diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4933d4427..e4e0d1630 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -5995,7 +5995,7 @@ static const u32 _num_sm_dsm_perf_ctrl_regs = 4; static u32 _sm_dsm_perf_regs[5]; static u32 _sm_dsm_perf_ctrl_regs[4]; -static void init_sm_dsm_reg_info(void) +static void init_ovr_perf_reg_info(void) { if (_ovr_perf_regs[0] != 0) return; @@ -6017,7 +6017,12 @@ static void init_sm_dsm_reg_info(void) _ovr_perf_regs[14] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter5_r(); _ovr_perf_regs[15] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter6_r(); _ovr_perf_regs[16] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter7_r(); +} +void gr_gk20a_init_sm_dsm_reg_info(void) +{ + if (_sm_dsm_perf_regs[0] != 0) + return; _sm_dsm_perf_regs[0] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter_status_r(); _sm_dsm_perf_regs[1] = gr_pri_gpc0_tpc0_sm_dsm_perf_counter0_r(); @@ -6050,7 +6055,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g, u32 vaddr_hi; u32 tmp; - init_sm_dsm_reg_info(); + init_ovr_perf_reg_info(); + g->ops.gr.init_sm_dsm_reg_info(); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr); @@ -6274,7 +6280,7 @@ static int gr_gk20a_find_priv_offset_in_ext_buffer(struct gk20a *g, &sm_dsm_perf_regs, &perf_register_stride); - init_sm_dsm_reg_info(); + g->ops.gr.init_sm_dsm_reg_info(); for (i = 0; i < num_sm_dsm_perf_regs; i++) { if ((addr & tpc_gpc_mask) == (sm_dsm_perf_regs[i] & tpc_gpc_mask)) { @@ -7375,4 +7381,5 @@ void gk20a_init_gr_ops(struct gpu_ops *gops) gops->gr.get_max_ltc_per_fbp = gr_gk20a_get_max_ltc_per_fbp; gops->gr.get_max_lts_per_ltc = gr_gk20a_get_max_lts_per_ltc; gops->gr.get_rop_l2_en_mask = gr_gk20a_rop_l2_en_mask; + gops->gr.init_sm_dsm_reg_info = gr_gk20a_init_sm_dsm_reg_info; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 59176af8d..1a55e064b 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -461,14 +461,6 @@ void gr_gk20a_commit_global_pagepool(struct gk20a *g, u64 addr, u32 size, bool patch); void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); void gr_gk20a_enable_hww_exceptions(struct gk20a *g); -void gr_gk20a_get_sm_dsm_perf_regs(struct gk20a *g, - u32 *num_sm_dsm_perf_regs, - u32 **sm_dsm_perf_regs, - u32 *perf_register_stride); -void gr_gk20a_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, - u32 *num_sm_dsm_perf_regs, - u32 **sm_dsm_perf_regs, - u32 *perf_register_stride); int gr_gk20a_setup_rop_mapping(struct gk20a *g, struct gr_gk20a *gr); int gr_gk20a_init_ctxsw_ucode(struct gk20a *g); int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index e5af96d2a..cffc56d11 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -453,26 +453,46 @@ static bool gr_gm20b_is_valid_class(struct gk20a *g, u32 class_num) return valid; } -static void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, - u32 *num_sm_dsm_perf_regs, - u32 **sm_dsm_perf_regs, - u32 *perf_register_stride) +/* Following are the blocks of registers that the ucode + stores in the extended region.*/ +/* == ctxsw_extended_sm_dsm_perf_counter_register_stride_v() ? */ +static const u32 _num_sm_dsm_perf_regs; +/* == ctxsw_extended_sm_dsm_perf_counter_control_register_stride_v() ?*/ +static const u32 _num_sm_dsm_perf_ctrl_regs = 2; +static u32 *_sm_dsm_perf_regs; +static u32 _sm_dsm_perf_ctrl_regs[2]; + +void gr_gm20b_init_sm_dsm_reg_info(void) { - gr_gk20a_get_sm_dsm_perf_regs(g, num_sm_dsm_perf_regs, - sm_dsm_perf_regs, - perf_register_stride); - *perf_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_register_stride_v(); + if (_sm_dsm_perf_ctrl_regs[0] != 0) + return; + + _sm_dsm_perf_ctrl_regs[0] = + gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control0_r(); + _sm_dsm_perf_ctrl_regs[1] = + gr_pri_gpc0_tpc0_sm_dsm_perf_counter_control5_r(); } -static void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, - u32 *num_sm_dsm_perf_regs, - u32 **sm_dsm_perf_regs, - u32 *ctrl_register_stride) +void gr_gm20b_get_sm_dsm_perf_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_regs, + u32 **sm_dsm_perf_regs, + u32 *perf_register_stride) { - gr_gk20a_get_sm_dsm_perf_ctrl_regs(g, num_sm_dsm_perf_regs, - sm_dsm_perf_regs, - ctrl_register_stride); - *ctrl_register_stride = ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); + *num_sm_dsm_perf_regs = _num_sm_dsm_perf_regs; + *sm_dsm_perf_regs = _sm_dsm_perf_regs; + *perf_register_stride = 0; +} + +void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, + u32 *num_sm_dsm_perf_ctrl_regs, + u32 **sm_dsm_perf_ctrl_regs, + u32 *ctrl_register_stride) +{ + *num_sm_dsm_perf_ctrl_regs = _num_sm_dsm_perf_ctrl_regs; + *sm_dsm_perf_ctrl_regs = _sm_dsm_perf_ctrl_regs; + + *ctrl_register_stride = + ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); } static u32 gr_gm20b_get_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) @@ -1072,4 +1092,5 @@ void gm20b_init_gr(struct gpu_ops *gops) gops->gr.get_max_lts_per_ltc = gr_gm20b_get_max_lts_per_ltc; gops->gr.get_rop_l2_en_mask = gr_gm20b_rop_l2_en_mask; gops->gr.get_max_fbps_count = gr_gm20b_get_max_fbps_count; + gops->gr.init_sm_dsm_reg_info = gr_gm20b_init_sm_dsm_reg_info; }