gpu: nvgpu: Initialize ctxsw header counters

Initialize following counters in context header for all legacy chips: ctxsw_prog_main_image_num_save_ops ctxsw_prog_main_image_num_restore_ops This was already present in the code but move to a function gk20a_gr_init_ctxsw_hdr_data, so that it can be re-used across chips. Additionally initialize following preemption related counters for gp10b onwards in context header: ctxsw_prog_main_image_num_wfi_save_ops ctxsw_prog_main_image_num_cta_save_ops ctxsw_prog_main_image_num_gfxp_save_ops ctxsw_prog_main_image_num_cilp_save_ops Bug 1958308 Change-Id: I0e45ec718a8f9ddb951b52c92137051b4f6a8c60 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1562654 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-24 02:22:34 +03:00 · 2017-09-18 11:06:09 -07:00
parent c03ccd89c2
commit c4370d7def
7 changed files with 34 additions and 4 deletions
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -415,6 +415,9 @@ struct gpu_ops {
 				u32 gpc, u32 tpc, u32 sm);
 		void (*resume_all_sms)(struct gk20a *g);
 		void (*disable_rd_coalesce)(struct gk20a *g);
+		void (*init_ctxsw_hdr_data)(struct gk20a *g,
+					struct nvgpu_mem *mem);
+
 	} gr;
 	struct {
 		void (*init_hw)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -1821,6 +1821,15 @@ cleanup_pm_buf:
 	return ret;
 }

+void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
+				struct nvgpu_mem *mem)
+{
+	nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_num_save_ops_o(), 0);
+	nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_num_restore_ops_o(), 0);
+}
+
 /* load saved fresh copy of gloden image into channel gr_ctx */
 int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 					struct channel_gk20a *c)
@@ -1860,12 +1869,11 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 		nvgpu_mem_wr_n(g, mem, 0,
 			gr->ctx_vars.local_golden_image,
 			gr->ctx_vars.golden_image_size);
-		nvgpu_mem_wr(g, mem,
-			ctxsw_prog_main_image_num_save_ops_o(), 0);
-		nvgpu_mem_wr(g, mem,
-			ctxsw_prog_main_image_num_restore_ops_o(), 0);
 	}

+	if (g->ops.gr.init_ctxsw_hdr_data)
+		g->ops.gr.init_ctxsw_hdr_data(g, mem);
+
 	if (g->ops.gr.enable_cde_in_fecs && c->cde)
 		g->ops.gr.enable_cde_in_fecs(g, mem);

--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -737,6 +737,8 @@ void gk20a_gr_get_esr_sm_sel(struct gk20a *g, u32 gpc, u32 tpc,
 void gk20a_gr_init_ovr_sm_dsm_perf(void);
 void gk20a_gr_get_ovr_perf_regs(struct gk20a *g, u32 *num_ovr_perf_regs,
 					       u32 **ovr_perf_regs);
+void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
+					struct nvgpu_mem *mem);

 static inline const char *gr_gk20a_graphics_preempt_mode_name(u32 graphics_preempt_mode)
 {
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -283,6 +283,7 @@ static const struct gpu_ops gm20b_ops = {
 		.init_ovr_sm_dsm_perf =  gk20a_gr_init_ovr_sm_dsm_perf,
 		.get_ovr_perf_regs = gk20a_gr_get_ovr_perf_regs,
 		.disable_rd_coalesce = gm20a_gr_disable_rd_coalesce,
+		.init_ctxsw_hdr_data = gk20a_gr_init_ctxsw_hdr_data,
 	},
 	.fb = {
 		.reset = fb_gk20a_reset,
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -2379,3 +2379,17 @@ int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch)

 	return __gr_gk20a_exec_ctx_ops(ch, &ops, 1, 1, 0, false);
 }
+
+void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	gk20a_gr_init_ctxsw_hdr_data(g, mem);
+
+	nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_num_wfi_save_ops_o(), 0);
+	nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_num_cta_save_ops_o(), 0);
+	nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_num_gfxp_save_ops_o(), 0);
+	nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_num_cilp_save_ops_o(), 0);
+}
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.h
@@ -133,6 +133,7 @@ int gr_gp10b_init_preemption_state(struct gk20a *g);
 void gr_gp10b_set_preemption_buffer_va(struct gk20a *g,
 			struct nvgpu_mem *mem, u64 gpu_va);
 int gr_gp10b_set_czf_bypass(struct gk20a *g, struct channel_gk20a *ch);
+void gr_gp10b_init_ctxsw_hdr_data(struct gk20a *g, struct nvgpu_mem *mem);

 struct gr_t18x {
 	struct {
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -302,6 +302,7 @@ static const struct gpu_ops gp10b_ops = {
 		.set_bes_crop_debug3 = gr_gp10b_set_bes_crop_debug3,
 		.create_gr_sysfs = gr_gp10b_create_sysfs,
 		.set_ctxsw_preemption_mode = gr_gp10b_set_ctxsw_preemption_mode,
+		.init_ctxsw_hdr_data = gr_gp10b_init_ctxsw_hdr_data,
 	},
 	.fb = {
 		.reset = fb_gk20a_reset,