gpu: nvgpu: don't mem_{begin,end}() for gr

Now that GR buffers always have a kernel mapping, remove the unnecessary calls to nvgpu_mem_begin() and nvgpu_mem_end() on these buffers: - global ctx buffer mem in gr - gr ctx mem in a tsg - patch ctx mem in a gr ctx - pm ctx mem in a gr ctx - ctx_header mem in a channel (subctx header) Change-Id: Id2a8ad108aef8db8b16dce5bae8003bbcd3b23e4 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1760599 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2018-06-25 15:47:15 +03:00
parent 2dda362e63
commit dd146d42fc
5 changed files with 7 additions and 162 deletions
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -657,9 +657,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,

 	mem = &ch_ctx->mem;

-	if (nvgpu_mem_begin(g, mem))
-		return -ENOMEM;
-
 	nvgpu_log(g, gpu_dbg_ctxsw, "addr_hi=%x addr_lo=%x count=%d", hi,
 		lo, GK20A_FECS_TRACE_NUM_RECORDS);

@@ -668,14 +665,9 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 		ctxsw_prog_main_image_context_timestamp_buffer_control_num_records_f(
 			GK20A_FECS_TRACE_NUM_RECORDS));

-	nvgpu_mem_end(g, mem);
-
 	if (nvgpu_is_enabled(g, NVGPU_FECS_TRACE_VA))
 		mem = &ch->ctx_header.mem;

-	if (nvgpu_mem_begin(g, mem))
-		return -ENOMEM;
-
 	nvgpu_mem_wr(g, mem,
 		ctxsw_prog_main_image_context_timestamp_buffer_ptr_o(),
 		lo);
@@ -684,8 +676,6 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 		ctxsw_prog_main_image_context_timestamp_buffer_ptr_v_f(hi) |
 		aperture_mask);

-	nvgpu_mem_end(g, mem);
-
 	/* pid (process identifier) in user space, corresponds to tgid (thread
 	 * group id) in kernel space.
 	 */
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -111,15 +111,10 @@ int gr_gk20a_get_ctx_id(struct gk20a *g,
 	   Flush and invalidate before cpu update. */
 	g->ops.mm.l2_flush(g, true);

-	if (nvgpu_mem_begin(g, mem))
-		return -ENOMEM;
-
 	*ctx_id = nvgpu_mem_rd(g, mem,
 			ctxsw_prog_main_image_context_id_o());
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "ctx_id: 0x%x", *ctx_id);

-	nvgpu_mem_end(g, mem);
-
 	return 0;
 }

@@ -696,12 +691,6 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 					  struct nvgpu_gr_ctx *gr_ctx,
 					  bool update_patch_count)
 {
-	int err = 0;
-
-	err = nvgpu_mem_begin(g, &gr_ctx->patch_ctx.mem);
-	if (err)
-		return err;
-
 	if (update_patch_count) {
 		/* reset patch count if ucode has already processed it */
 		gr_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
@@ -717,8 +706,6 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
 					struct nvgpu_gr_ctx *gr_ctx,
 					bool update_patch_count)
 {
-	nvgpu_mem_end(g, &gr_ctx->patch_ctx.mem);
-
 	/* Write context count to context image if it is mapped */
 	if (update_patch_count) {
 		nvgpu_mem_wr(g, &gr_ctx->mem,
@@ -832,31 +819,22 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)
 	gr_ctx = &tsg->gr_ctx;
 	mem = &gr_ctx->mem;

-	if (nvgpu_mem_begin(g, mem))
-		return -ENOMEM;
-
-	if (nvgpu_mem_begin(g, ctxheader)) {
-		ret = -ENOMEM;
-		goto clean_up_mem;
-	}
-
 	if (gr_ctx->zcull_ctx.gpu_va == 0 &&
 	    gr_ctx->zcull_ctx.ctx_sw_mode ==
 		ctxsw_prog_main_image_zcull_mode_separate_buffer_v()) {
-		ret = -EINVAL;
-		goto clean_up;
+		return -EINVAL;
 	}

 	ret = gk20a_disable_channel_tsg(g, c);
 	if (ret) {
 		nvgpu_err(g, "failed to disable channel/TSG");
-		goto clean_up;
+		return ret;
 	}
 	ret = gk20a_fifo_preempt(g, c);
 	if (ret) {
 		gk20a_enable_channel_tsg(g, c);
 		nvgpu_err(g, "failed to preempt channel/TSG");
-		goto clean_up;
+		return ret;
 	}

 	nvgpu_mem_wr(g, mem,
@@ -871,11 +849,6 @@ static int gr_gk20a_ctx_zcull_setup(struct gk20a *g, struct channel_gk20a *c)

 	gk20a_enable_channel_tsg(g, c);

-clean_up:
-	nvgpu_mem_end(g, ctxheader);
-clean_up_mem:
-	nvgpu_mem_end(g, mem);
-
 	return ret;
 }

@@ -1597,12 +1570,6 @@ restore_fe_go_idle:
 		goto restore_fe_go_idle;
 	}

-	if (nvgpu_mem_begin(g, gold_mem))
-		goto clean_up;
-
-	if (nvgpu_mem_begin(g, gr_mem))
-		goto clean_up;
-
 	ctx_header_words =  roundup(ctx_header_bytes, sizeof(u32));
 	ctx_header_words >>= 2;

@@ -1655,9 +1622,6 @@ clean_up:
 	else
 		nvgpu_log_fn(g, "done");

-	nvgpu_mem_end(g, gold_mem);
-	nvgpu_mem_end(g, gr_mem);
-
 	nvgpu_mutex_release(&gr->ctx_mutex);
 	return err;
 }
@@ -1701,11 +1665,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 	   Flush and invalidate before cpu update. */
 	g->ops.mm.l2_flush(g, true);

-	if (nvgpu_mem_begin(g, mem)) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	data = nvgpu_mem_rd(g, mem,
 		ctxsw_prog_main_image_pm_o());

@@ -1717,7 +1676,6 @@ int gr_gk20a_update_smpc_ctxsw_mode(struct gk20a *g,
 	nvgpu_mem_wr(g, mem,
 		ctxsw_prog_main_image_pm_o(), data);

-	nvgpu_mem_end(g, mem);
 out:
 	gk20a_enable_channel_tsg(g, c);
 	return ret;
@@ -1807,24 +1765,7 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 		}

 		/* Now clear the buffer */
-		if (nvgpu_mem_begin(g, &pm_ctx->mem)) {
-			ret = -ENOMEM;
-			goto cleanup_pm_buf;
-		}
-
 		nvgpu_memset(g, &pm_ctx->mem, 0, 0, pm_ctx->mem.size);
-
-		nvgpu_mem_end(g, &pm_ctx->mem);
-	}
-
-	if (nvgpu_mem_begin(g, gr_mem)) {
-		ret = -ENOMEM;
-		goto cleanup_pm_buf;
-	}
-
-	if (nvgpu_mem_begin(g, ctxheader)) {
-		ret = -ENOMEM;
-		goto clean_up_mem;
 	}

 	data = nvgpu_mem_rd(g, gr_mem, ctxsw_prog_main_image_pm_o());
@@ -1848,22 +1789,10 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g,
 	else
 		g->ops.gr.write_pm_ptr(g, gr_mem, virt_addr);

-	nvgpu_mem_end(g, ctxheader);
-	nvgpu_mem_end(g, gr_mem);
-
 	/* enable channel */
 	gk20a_enable_channel_tsg(g, c);

 	return 0;
-clean_up_mem:
-	nvgpu_mem_end(g, gr_mem);
-cleanup_pm_buf:
-	nvgpu_gmmu_unmap(c->vm, &pm_ctx->mem, pm_ctx->mem.gpu_va);
-	nvgpu_dma_free(g, &pm_ctx->mem);
-	memset(&pm_ctx->mem, 0, sizeof(struct nvgpu_mem));
-
-	gk20a_enable_channel_tsg(g, c);
-	return ret;
 }

 void gk20a_gr_init_ctxsw_hdr_data(struct gk20a *g,
@@ -1904,9 +1833,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 	   Flush and invalidate before cpu update. */
 	g->ops.mm.l2_flush(g, true);

-	if (nvgpu_mem_begin(g, mem))
-		return -ENOMEM;
-
 	nvgpu_mem_wr_n(g, mem, 0,
 		gr->ctx_vars.local_golden_image,
 		gr->ctx_vars.golden_image_size);
@@ -1973,7 +1899,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,
 		if (gr_ctx->pm_ctx.mem.gpu_va == 0) {
 			nvgpu_err(g,
 				"context switched pm with no pm buffer!");
-			nvgpu_mem_end(g, mem);
 			return -EFAULT;
 		}

@@ -1989,8 +1914,6 @@ int gr_gk20a_load_golden_ctx_image(struct gk20a *g,

 	g->ops.gr.write_pm_ptr(g, mem, virt_addr);

-	nvgpu_mem_end(g, mem);
-
 	return ret;
 }

@@ -4840,12 +4763,6 @@ static int gr_gk20a_init_access_map(struct gk20a *g)
 	u32 *whitelist = NULL;
 	int w, num_entries = 0;

-	if (nvgpu_mem_begin(g, mem)) {
-		nvgpu_err(g,
-			  "failed to map priv access map memory");
-		return -ENOMEM;
-	}
-
 	nvgpu_memset(g, mem, 0, 0, PAGE_SIZE * nr_pages);

 	g->ops.gr.get_access_map(g, &whitelist, &num_entries);
@@ -4864,7 +4781,6 @@ static int gr_gk20a_init_access_map(struct gk20a *g)
 		nvgpu_mem_wr32(g, mem, map_byte / sizeof(u32), x);
 	}

-	nvgpu_mem_end(g, mem);
 	return 0;
 }

@@ -6758,22 +6674,12 @@ static int gr_gk20a_ctx_patch_smpc(struct gk20a *g,
 					 ctxsw_prog_main_image_patch_count_o(),
 					 gr_ctx->patch_ctx.data_count);
 				if (ctxheader->gpu_va) {
-					/*
-					 * Main context can be gr_ctx or pm_ctx.
-					 * CPU access for relevant ctx is taken
-					 * care of in the calling function
-					 * __gr_gk20a_exec_ctx_ops. Need to take
-					 * care of cpu access to ctxheader here.
-					 */
-				if (nvgpu_mem_begin(g, ctxheader))
-						return -ENOMEM;
 					nvgpu_mem_wr(g, ctxheader,
 						ctxsw_prog_main_image_patch_adr_lo_o(),
 						vaddr_lo);
 					nvgpu_mem_wr(g, ctxheader,
 						ctxsw_prog_main_image_patch_adr_hi_o(),
 						vaddr_hi);
-					 nvgpu_mem_end(g, ctxheader);
 				} else {
 					nvgpu_mem_wr(g, mem,
 						ctxsw_prog_main_image_patch_adr_lo_o(),
@@ -8038,17 +7944,8 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 						ctx_ops[i].type == REGOP(TYPE_GR_CTX_QUAD),
 						ctx_ops[i].quad);
 			if (!err) {
-				if (!gr_ctx_ready) {
-					/* would have been a variant of
-					 * gr_gk20a_apply_instmem_overrides,
-					 * recoded in-place instead.
-					 */
-					if (nvgpu_mem_begin(g, &gr_ctx->mem)) {
-						err = -ENOMEM;
-						goto cleanup;
-					}
+				if (!gr_ctx_ready)
 					gr_ctx_ready = true;
-				}
 				current_mem = &gr_ctx->mem;
 			} else {
 				err = gr_gk20a_get_pm_ctx_buffer_offsets(g,
@@ -8072,10 +7969,6 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 						err = -EINVAL;
 						goto cleanup;
 					}
-					if (nvgpu_mem_begin(g, &gr_ctx->pm_ctx.mem)) {
-						err = -ENOMEM;
-						goto cleanup;
-					}
 					pm_ctx_ready = true;
 				}
 				current_mem = &gr_ctx->pm_ctx.mem;
@@ -8148,10 +8041,6 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,

 	if (gr_ctx->patch_ctx.mem.cpu_va)
 		gr_gk20a_ctx_patch_write_end(g, gr_ctx, gr_ctx_ready);
-	if (gr_ctx_ready)
-		nvgpu_mem_end(g, &gr_ctx->mem);
-	if (pm_ctx_ready)
-		nvgpu_mem_end(g, &gr_ctx->pm_ctx.mem);

 	return err;
 }
--- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c
@@ -1056,16 +1056,11 @@ int gr_gm20b_update_pc_sampling(struct channel_gk20a *c,
 		return -EINVAL;


-	if (nvgpu_mem_begin(c->g, mem))
-		return -ENOMEM;
-
 	v = nvgpu_mem_rd(c->g, mem, ctxsw_prog_main_image_pm_o());
 	v &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
 	v |= ctxsw_prog_main_image_pm_pc_sampling_f(enable);
 	nvgpu_mem_wr(c->g, mem, ctxsw_prog_main_image_pm_o(), v);

-	nvgpu_mem_end(c->g, mem);
-
 	nvgpu_log_fn(c->g, "done");

 	return 0;
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -1115,10 +1115,6 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
 {
 	struct nvgpu_mem *mem = &gr_ctx->mem;

-	if (nvgpu_mem_begin(g, mem)) {
-		WARN_ON("Cannot map context");
-		return;
-	}
 	nvgpu_err(g, "ctxsw_prog_main_image_magic_value_o : %x (expect %x)",
 		nvgpu_mem_rd(g, mem,
 				ctxsw_prog_main_image_magic_value_o()),
@@ -1159,7 +1155,6 @@ void gr_gp10b_dump_ctxsw_stats(struct gk20a *g, struct vm_gk20a *vm,
 		"image compute preemption option (CTA is 1) %x",
 		nvgpu_mem_rd(g, mem,
 			ctxsw_prog_main_image_compute_preemption_options_o()));
-	nvgpu_mem_end(g, mem);
 }

 void gr_gp10b_update_ctxsw_preemption_mode(struct gk20a *g,
@@ -2175,12 +2170,9 @@ int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,
 	gr_ctx->boosted_ctx = boost;
 	mem = &gr_ctx->mem;

-	if (nvgpu_mem_begin(g, mem))
-		return -ENOMEM;
-
 	err = gk20a_disable_channel_tsg(g, ch);
 	if (err)
-		goto unmap_ctx;
+		return err;

 	err = gk20a_fifo_preempt(g, ch);
 	if (err)
@@ -2193,8 +2185,6 @@ int gr_gp10b_set_boosted_ctx(struct channel_gk20a *ch,

 enable_ch:
 	gk20a_enable_channel_tsg(g, ch);
-unmap_ctx:
-	nvgpu_mem_end(g, mem);

 	return err;
 }
@@ -2217,8 +2207,6 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 	struct tsg_gk20a *tsg;
 	struct vm_gk20a *vm;
 	struct nvgpu_mem *mem;
-	struct ctx_header_desc *ctx = &ch->ctx_header;
-	struct nvgpu_mem *ctxheader = &ctx->mem;
 	u32 class;
 	int err = 0;

@@ -2263,15 +2251,9 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,
 		}
 	}

-	if (nvgpu_mem_begin(g, mem))
-		return -ENOMEM;
-
-	if (nvgpu_mem_begin(g, ctxheader))
-		goto unamp_ctx_header;
-
 	err = gk20a_disable_channel_tsg(g, ch);
 	if (err)
-		goto unmap_ctx;
+		return err;

 	err = gk20a_fifo_preempt(g, ch);
 	if (err)
@@ -2292,11 +2274,6 @@ int gr_gp10b_set_preemption_mode(struct channel_gk20a *ch,

 enable_ch:
 	gk20a_enable_channel_tsg(g, ch);
-unmap_ctx:
-	nvgpu_mem_end(g, ctxheader);
-unamp_ctx_header:
-	nvgpu_mem_end(g, mem);
-
 	return err;
 }

--- a/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/subctx_gv11b.c
@@ -82,11 +82,7 @@ int gv11b_alloc_subctx_header(struct channel_gk20a *c)
 			return -ENOMEM;
 		}
 		/* Now clear the buffer */
-		if (nvgpu_mem_begin(g, &ctx->mem))
-			return -ENOMEM;
-
 		nvgpu_memset(g, &ctx->mem, 0, 0, ctx->mem.size);
-		nvgpu_mem_end(g, &ctx->mem);
 	}
 	return ret;
 }
@@ -117,8 +113,6 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)

 	gr_mem = &ctx->mem;
 	g->ops.mm.l2_flush(g, true);
-	if (nvgpu_mem_begin(g, gr_mem))
-		return -ENOMEM;

 	/* set priv access map */
 	addr_lo = u64_lo32(gr_ctx->global_ctx_buffer_va[PRIV_ACCESS_MAP_VA]);
@@ -153,7 +147,7 @@ int gv11b_update_subctx_header(struct channel_gk20a *c, u64 gpu_va)
 	nvgpu_mem_wr(g, gr_mem,
                ctxsw_prog_main_image_ctl_o(),
                ctxsw_prog_main_image_ctl_type_per_veid_header_v());
-	nvgpu_mem_end(g, gr_mem);
+
 	return ret;
 }