gpu: nvgpu: changes related handling ctx header

ctx header holds only gpu va for each address space. All other information will be held in main context. Ctx header will have gpu va for following fields: ctxsw_prog_main_image_context_buffer_ptr ctxsw_prog_main_image_context_buffer_ptr_hi ctxsw_prog_main_image_zcull_ptr ctxsw_prog_main_image_zcull_ptr ctxsw_prog_main_image_pm_ptr ctxsw_prog_main_image_pm_ptr_hi ctxsw_prog_main_image_full_preemption_ptr_hi ctxsw_prog_main_image_full_preemption_ptr ctxsw_prog_main_image_full_preemption_ptr_xxxx0 ctxsw_prog_main_image_full_preemption_ptr_xxxx0_v ctxsw_prog_main_image_patch_adr_lo ctxsw_prog_main_image_patch_adr_hi Changes done as part of this CL: - Read ctx_id from from main context header - Golden context creation: Use gold_mem for for golden context creation and copy golden context from save gold local memory to main context. No need to restore golden context to context header. - Write ctx_patch_count and smpc_ctxsw_mode in main context header only. - Update preemption mode in main context header and preemption buffer va in context header. - Updated image patch buffer va in context header. Bug 1958308 Change-Id: Ic076aad8b1802f76f941d2d15cb9a8c07308e3e8 Signed-off-by: seshendra Gadagottu <sgadagottu@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1562680 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-23 18:16:01 +03:00 · 2017-09-18 11:39:32 -07:00
parent c4370d7def
commit 1132fd2a12
2 changed files with 51 additions and 93 deletions
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -104,19 +104,12 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
 		u32 *ctx_id)
 {
 	struct channel_ctx_gk20a *ch_ctx = &c->ch_ctx;
-	struct ctx_header_desc *ctx_header = &ch_ctx->ctx_header;
-	struct nvgpu_mem *ctx_header_mem = &ctx_header->mem;
-	struct nvgpu_mem *mem;
+	struct nvgpu_mem *mem = &ch_ctx->gr_ctx->mem;

 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
 	g->ops.mm.l2_flush(g, true);

-	if (ctx_header_mem->gpu_va)
-		mem = ctx_header_mem;
-	else
-		mem = &ch_ctx->gr_ctx->mem;
-
 	if (nvgpu_mem_begin(g, mem))
 		return -ENOMEM;

@@ -681,20 +674,10 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
 					struct channel_ctx_gk20a *ch_ctx)
 {
-	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
-	struct nvgpu_mem *ctxheader = &ctx->mem;
-
 	nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);

 	/* Write context count to context image if it is mapped */
-	if (ctxheader->gpu_va) {
-
-		if (ctxheader->cpu_va)
-			nvgpu_mem_wr(g, ctxheader,
-			     ctxsw_prog_main_image_patch_count_o(),
-			     ch_ctx->patch_ctx.data_count);
-
-	} else if (ch_ctx->gr_ctx->mem.cpu_va) {
+	if (ch_ctx->gr_ctx->mem.cpu_va) {
 		nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
 			     ctxsw_prog_main_image_patch_count_o(),
 			     ch_ctx->patch_ctx.data_count);
@@ -1357,8 +1340,6 @@ static int gr_gk20a_init_golden_ctx_image(struct gk20a *g,
 	struct aiv_list_gk20a *sw_ctx_load = &g->gr.ctx_vars.sw_ctx_load;
 	struct av_list_gk20a *sw_method_init = &g->gr.ctx_vars.sw_method_init;
 	u32 last_method_data = 0;
-	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
-	struct nvgpu_mem *ctxheader = &ctx->mem;

 	gk20a_dbg_fn("");

@@ -1549,14 +1530,7 @@ restore_fe_go_idle:
 	nvgpu_mem_wr(g, gold_mem, ctxsw_prog_main_image_zcull_o(),
 		 ctxsw_prog_main_image_zcull_mode_no_ctxsw_v());

-	if (nvgpu_mem_begin(g, ctxheader))
-		goto clean_up;
-
-	if (ctxheader->gpu_va)
-		g->ops.gr.write_zcull_ptr(g, ctxheader, 0);
-	else
 	g->ops.gr.write_zcull_ptr(g, gold_mem, 0);
-	nvgpu_mem_end(g, ctxheader);

 	err = g->ops.gr.commit_inst(c, ch_ctx->global_ctx_buffer_va[GOLDEN_CTX_VA]);
 	if (err)
@@ -1564,8 +1538,7 @@ restore_fe_go_idle:

 	gr_gk20a_fecs_ctx_image_save(c, gr_fecs_method_push_adr_wfi_golden_save_v());

-	if (nvgpu_mem_begin(g, ctxheader))
-		goto clean_up;
+

 	if (gr->ctx_vars.local_golden_image == NULL) {

@@ -1576,17 +1549,11 @@ restore_fe_go_idle:
 			err = -ENOMEM;
 			goto clean_up;
 		}
-
-		if (ctxheader->gpu_va)
-			nvgpu_mem_rd_n(g, ctxheader, 0,
-				gr->ctx_vars.local_golden_image,
-				gr->ctx_vars.golden_image_size);
-		else
 		nvgpu_mem_rd_n(g, gold_mem, 0,
 			gr->ctx_vars.local_golden_image,
 			gr->ctx_vars.golden_image_size);
+
 	}
-	nvgpu_mem_end(g, ctxheader);

 	err = g->ops.gr.commit_inst(c, gr_mem->gpu_va);
 	if (err)
@@ -1618,8 +1585,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 	struct nvgpu_mem *mem;
 	u32 data;
 	int ret;
-	struct ctx_header_desc *ctx = &c->ch_ctx.ctx_header;
-	struct nvgpu_mem *ctxheader = &ctx->mem;

 	gk20a_dbg_fn("");

@@ -1651,14 +1616,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 		goto out;
 	}

-	if (nvgpu_mem_begin(g, ctxheader)) {
-		ret = -ENOMEM;
-		goto clean_up_mem;
-	}
-	if (ctxheader->gpu_va)
-		data = nvgpu_mem_rd(g, ctxheader,
-			ctxsw_prog_main_image_pm_o());
-	else
 	data = nvgpu_mem_rd(g, mem,
 		ctxsw_prog_main_image_pm_o());

@@ -1667,18 +1624,9 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 		ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
 		ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();

-	if (ctxheader->gpu_va)
-		nvgpu_mem_wr(g, ctxheader,
-			ctxsw_prog_main_image_pm_o(),
-			data);
-	else
 	nvgpu_mem_wr(g, mem,
-			ctxsw_prog_main_image_pm_o(),
-			data);
+		ctxsw_prog_main_image_pm_o(), data);

-	nvgpu_mem_end(g, ctxheader);
-
-clean_up_mem:
 	nvgpu_mem_end(g, mem);
 out:
 	gk20a_enable_channel_tsg(g, c);
@@ -1862,14 +1810,9 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 		goto clean_up_mem;
 	}

-	if (ctxheader->gpu_va) {
-		if (g->ops.gr.restore_context_header)
-			g->ops.gr.restore_context_header(g, ctxheader);
-	} else {
 	nvgpu_mem_wr_n(g, mem, 0,
 		gr->ctx_vars.local_golden_image,
 		gr->ctx_vars.golden_image_size);
-	}

 	if (g->ops.gr.init_ctxsw_hdr_data)
 		g->ops.gr.init_ctxsw_hdr_data(g, mem);
@@ -1923,21 +1866,21 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,

 	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_count_o(),
 		 ch_ctx->patch_ctx.data_count);
-	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_lo_o(),
-		 virt_addr_lo);
-	nvgpu_mem_wr(g, mem, ctxsw_prog_main_image_patch_adr_hi_o(),
-		 virt_addr_hi);

 	if (ctxheader->gpu_va) {
-		nvgpu_mem_wr(g, ctxheader,
-			ctxsw_prog_main_image_patch_count_o(),
-			ch_ctx->patch_ctx.data_count);
 		nvgpu_mem_wr(g, ctxheader,
 			ctxsw_prog_main_image_patch_adr_lo_o(),
 			virt_addr_lo);
 		nvgpu_mem_wr(g, ctxheader,
 			ctxsw_prog_main_image_patch_adr_hi_o(),
 			virt_addr_hi);
+	} else {
+		nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_patch_adr_lo_o(),
+			virt_addr_lo);
+		nvgpu_mem_wr(g, mem,
+			ctxsw_prog_main_image_patch_adr_hi_o(),
+			virt_addr_hi);
 	}

 	/* Update main header region of the context buffer with the info needed
@@ -6606,6 +6549,8 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
 	u32 *ovr_perf_regs = NULL;
 	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
 	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
+	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
+	struct nvgpu_mem *ctxheader = &ctx->mem;

 	g->ops.gr.init_ovr_sm_dsm_perf();
 	g->ops.gr.init_sm_dsm_reg_info();
@@ -6640,12 +6585,21 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
 				nvgpu_mem_wr(g, mem,
 					 ctxsw_prog_main_image_patch_count_o(),
 					 ch_ctx->patch_ctx.data_count);
+				if (ctxheader->gpu_va) {
+					nvgpu_mem_wr(g, ctxheader,
+						ctxsw_prog_main_image_patch_adr_lo_o(),
+						vaddr_lo);
+					nvgpu_mem_wr(g, ctxheader,
+						ctxsw_prog_main_image_patch_adr_hi_o(),
+						vaddr_hi);
+				} else {
 					nvgpu_mem_wr(g, mem,
 						ctxsw_prog_main_image_patch_adr_lo_o(),
 						vaddr_lo);
 					nvgpu_mem_wr(g, mem,
 						ctxsw_prog_main_image_patch_adr_hi_o(),
 						vaddr_hi);
+				}

 				/* we're not caching these on cpu side,
 				   but later watch for it */
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1168,6 +1168,9 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		struct nvgpu_mem *mem)
 {
 	struct gr_ctx_desc *gr_ctx = ch_ctx->gr_ctx;
+	struct ctx_header_desc *ctx = &ch_ctx->ctx_header;
+	struct nvgpu_mem *ctxheader = &ctx->mem;
+
 	u32 gfxp_preempt_option =
 		ctxsw_prog_main_image_graphics_preemption_options_control_gfxp_f();
 	u32 cilp_preempt_option =
@@ -1204,9 +1207,14 @@ void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
 		u32 size;
 		u32 cbes_reserve;

-		if (g->ops.gr.set_preemption_buffer_va)
+		if (g->ops.gr.set_preemption_buffer_va) {
+			if (ctxheader->gpu_va)
+				g->ops.gr.set_preemption_buffer_va(g, ctxheader,
+				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
+			else
 				g->ops.gr.set_preemption_buffer_va(g, mem,
 				gr_ctx->t18x.preempt_ctxsw_buffer.gpu_va);
+		}

 		err = gr_gk20a_ctx_patch_write_begin(g, ch_ctx);
 		if (err) {
@@ -2247,10 +2255,6 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 		goto enable_ch;

 	if (g->ops.gr.update_ctxsw_preemption_mode) {
-		if (ctxheader->gpu_va)
-			g->ops.gr.update_ctxsw_preemption_mode(ch->g,
-							ch_ctx, ctxheader);
-		else
 		g->ops.gr.update_ctxsw_preemption_mode(ch->g,
 						ch_ctx, mem);