gpu: nvgpu: fix patch buf count update for vidmem

gr_gk20a_ctx_patch_write_begin() updates the patch buffer data_count when the associated graphics context memory buffer has been CPU-mapped; it was doing so by looking for a non-null cpu_va. However, if the graphics context has been allocated from vidmem, cpu_va is always 0, so we can't tell if nvgpu_mem_begin() was called for the context buffer or not. Instead: - add a cpu_accessible flag to the nvgpu_mem struct and set it in nvgpu_mem_begin() - return the value of that flag in nvgpu_mem_cpu_accessible() - gr_gk20a_ctx_patch_write_begin() now calls this new function instead of checking cpu_va. Bug 2012077 JIRA ESRM-74 Change-Id: I8401699f30b4ae7154111721c25c7ec3ff95d329 Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1587293 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-10-27 15:46:53 -07:00
parent 5662236895
commit de399ccb00
3 changed files with 25 additions and 8 deletions
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -60,6 +60,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
 {
 	void *cpu_va;

+	if (WARN_ON(mem->cpu_accessible)) {
+		nvgpu_warn(g, "nested");
+		return -EBUSY;
+	}
+
+	/* flag that the intent is to allow CPU access to the memory. */
+	mem->cpu_accessible = true;
+
 	if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
 		return 0;

@@ -71,17 +79,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
 	if (!(mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
 		return 0;

-	if (WARN_ON(mem->cpu_va)) {
-		nvgpu_warn(g, "nested");
-		return -EBUSY;
-	}
-
 	cpu_va = vmap(mem->priv.pages,
 			PAGE_ALIGN(mem->size) >> PAGE_SHIFT,
 			0, pgprot_writecombine(PAGE_KERNEL));

-	if (WARN_ON(!cpu_va))
+	if (WARN_ON(!cpu_va)) {
+		mem->cpu_accessible = false;
 		return -ENOMEM;
+	}

 	mem->cpu_va = cpu_va;
 	return 0;
@@ -89,6 +94,8 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)

 void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
 {
+	mem->cpu_accessible = false;
+
 	if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
 		return;

--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -682,7 +682,7 @@ int gr_gk20a_ctx_patch_write_begin(struct gk20a *g,
 	if (err)
 		return err;

-	if (ch_ctx->gr_ctx->mem.cpu_va) {
+	if (nvgpu_mem_cpu_accessible(&ch_ctx->gr_ctx->mem)) {
 		/* reset patch count if ucode has already processed it */
 		ch_ctx->patch_ctx.data_count = nvgpu_mem_rd(g,
 						&ch_ctx->gr_ctx->mem,
@@ -699,7 +699,7 @@ void gr_gk20a_ctx_patch_write_end(struct gk20a *g,
 	nvgpu_mem_end(g, &ch_ctx->patch_ctx.mem);

 	/* Write context count to context image if it is mapped */
-	if (ch_ctx->gr_ctx->mem.cpu_va) {
+	if (nvgpu_mem_cpu_accessible(&ch_ctx->gr_ctx->mem)) {
 		nvgpu_mem_wr(g, &ch_ctx->gr_ctx->mem,
 			     ctxsw_prog_main_image_patch_count_o(),
 			     ch_ctx->patch_ctx.data_count);
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -122,6 +122,7 @@ struct nvgpu_mem {
 	size_t					 aligned_size;
 	u64					 gpu_va;
 	bool					 skip_wmb;
+	bool					 cpu_accessible;

 	/*
 	 * Set when a nvgpu_mem struct is not a "real" nvgpu_mem struct. Instead
@@ -210,6 +211,15 @@ static inline bool nvgpu_mem_is_valid(struct nvgpu_mem *mem)

 }

+/*
+ * Returns true if the passed nvgpu_mem can be accessed by the CPU by virtue
+ * of having called nvgpu_mem_begin successfully.
+ */
+static inline bool nvgpu_mem_cpu_accessible(struct nvgpu_mem *mem)
+{
+	return mem->cpu_accessible;
+}
+
 /*
 * Create a nvgpu_sgt of the default implementation
 */