gpu: nvgpu: Refactor gk20a_vm_alloc_va()

This function is an internal function to the VM manager that allocates virtual memory space in the GVA allocator. It is unfortunately used in the vGPU code, though. In any event, this patch cleans up and moves the implementation of these functions into the VM common code. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I24a3d29b5fcb12615df27d2ac82891d1bacfe541 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1477745 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-04-26 14:27:02 -07:00
parent 92fe030e52
commit b70bad4b9f
9 changed files with 90 additions and 89 deletions
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -394,7 +394,7 @@ clean_up:
 	}
 	nvgpu_kfree(g, mapped_buffer);
 	if (va_allocated)
-		gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx);
+		__nvgpu_vm_free_va(vm, map_offset, bfr.pgsz_idx);
 	if (!IS_ERR(bfr.sgt))
 		gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt);

--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -29,6 +29,54 @@ int vm_aspace_id(struct vm_gk20a *vm)
 	return vm->as_share ? vm->as_share->id : -1;
 }

+u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
+			enum gmmu_pgsz_gk20a pgsz_idx)
+
+{
+	struct gk20a *g = vm->mm->g;
+	struct nvgpu_allocator *vma = NULL;
+	u64 addr;
+	u64 page_size = vm->gmmu_page_sizes[pgsz_idx];
+
+	vma = vm->vma[pgsz_idx];
+
+	if (pgsz_idx >= gmmu_nr_page_sizes) {
+		nvgpu_err(g, "(%s) invalid page size requested", vma->name);
+		return 0;
+	}
+
+	if ((pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
+		nvgpu_err(g, "(%s) unsupportd page size requested", vma->name);
+		return 0;
+	}
+
+	/* Be certain we round up to page_size if needed */
+	size = (size + ((u64)page_size - 1)) & ~((u64)page_size - 1);
+	nvgpu_log(g, gpu_dbg_map, "size=0x%llx @ pgsz=%dKB", size,
+		  vm->gmmu_page_sizes[pgsz_idx] >> 10);
+
+	addr = nvgpu_alloc(vma, size);
+	if (!addr) {
+		nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size);
+		return 0;
+	}
+
+	nvgpu_log(g, gpu_dbg_map, "(%s) addr: 0x%llx", vma->name, addr);
+	return addr;
+}
+
+int __nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr,
+		       enum gmmu_pgsz_gk20a pgsz_idx)
+{
+	struct gk20a *g = vm->mm->g;
+	struct nvgpu_allocator *vma = vm->vma[pgsz_idx];
+
+	nvgpu_log(g, gpu_dbg_map, "(%s) addr: 0x%llx", vma->name, addr);
+	nvgpu_free(vma, addr);
+
+	return 0;
+}
+
 void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch)
 {
 	memset(mapping_batch, 0, sizeof(*mapping_batch));
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1192,57 +1192,6 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
 }

-u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
-		      u64 size,
-		      enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
-
-{
-	struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx];
-	u64 offset;
-	u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
-	struct gk20a *g = vm->mm->g;
-
-	if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
-		nvgpu_warn(g,
-			 "invalid page size requested in gk20a vm alloc");
-		return 0;
-	}
-
-	if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) {
-		nvgpu_warn(g, "unsupportd page size requested");
-		return 0;
-
-	}
-
-	/* Be certain we round up to gmmu_page_size if needed */
-	size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
-	gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
-			vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
-
-	offset = nvgpu_alloc(vma, size);
-	if (!offset) {
-		nvgpu_err(vm->mm->g,
-			  "%s oom: sz=0x%llx", vma->name, size);
-		return 0;
-	}
-
-	gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
-	return offset;
-}
-
-int gk20a_vm_free_va(struct vm_gk20a *vm,
-		     u64 offset, u64 size,
-		     enum gmmu_pgsz_gk20a pgsz_idx)
-{
-	struct nvgpu_allocator *vma = vm->vma[pgsz_idx];
-
-	gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
-			vma->name, offset, size);
-	nvgpu_free(vma, offset);
-
-	return 0;
-}
-
 int setup_buffer_kind_and_compression(struct vm_gk20a *vm,
 				      u32 flags,
 				      struct buffer_attrs *bfr,
@@ -1313,7 +1262,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,

 	/* Allocate (or validate when map_offset != 0) the virtual address. */
 	if (!map_offset) {
-		map_offset = gk20a_vm_alloc_va(vm, size,
+		map_offset = __nvgpu_vm_alloc_va(vm, size,
 					  pgsz_idx);
 		if (!map_offset) {
 			nvgpu_err(g, "failed to allocate va space");
@@ -1364,7 +1313,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 	return map_offset;
 fail_validate:
 	if (allocated)
-		gk20a_vm_free_va(vm, map_offset, size, pgsz_idx);
+		__nvgpu_vm_free_va(vm, map_offset, pgsz_idx);
 fail_alloc:
 	nvgpu_err(g, "%s: failed with err=%d\n", __func__, err);
 	return 0;
@@ -1383,7 +1332,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 	struct gk20a *g = gk20a_from_vm(vm);

 	if (va_allocated) {
-		err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
+		err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
 		if (err) {
 			nvgpu_err(g, "failed to free va");
 			return;
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -412,14 +412,6 @@ int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm,
 			       u32 *mapping_ctagline,
 			       u32 *flags);

-u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
-		     u64 size,
-		     enum gmmu_pgsz_gk20a gmmu_pgsz_idx);
-
-int gk20a_vm_free_va(struct vm_gk20a *vm,
-		     u64 offset, u64 size,
-		     enum gmmu_pgsz_gk20a pgsz_idx);
-
 /* vm-as interface */
 struct nvgpu_as_alloc_space_args;
 struct nvgpu_as_free_space_args;
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -242,4 +242,17 @@ int nvgpu_init_vm(struct mm_gk20a *mm,
 		char *name);
 void nvgpu_deinit_vm(struct vm_gk20a *vm);

+/*
+ * These are private to the VM code but are unfortunately used by the vgpu code.
+ * It appears to be used for an optimization in reducing the number of server
+ * requests to the vgpu server. Basically the vgpu implementation of
+ * map_global_ctx_buffers() sends a bunch of VA ranges over to the RM server.
+ * Ideally the RM server can just batch mappings but until such a time this
+ * will be used by the vgpu code.
+ */
+u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size,
+			enum gmmu_pgsz_gk20a pgsz_idx);
+int __nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr,
+		       enum gmmu_pgsz_gk20a pgsz_idx);
+
 #endif
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -40,8 +40,7 @@ static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);

-	gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, gr_ctx->mem.size,
-			gmmu_page_size_kernel);
+	__nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, gmmu_page_size_kernel);

 	nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer);
 	nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer);
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c
@@ -81,7 +81,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm,

 	/* Allocate (or validate when map_offset != 0) the virtual address. */
 	if (!map_offset) {
-		map_offset = gk20a_vm_alloc_va(vm, size, pgsz_idx);
+		map_offset = __nvgpu_vm_alloc_va(vm, size, pgsz_idx);
 		if (!map_offset) {
 			nvgpu_err(g, "failed to allocate va space");
 			err = -ENOMEM;
--- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c
@@ -156,7 +156,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
 	/* FIXME: add VPR support */

 	/* Circular Buffer */
-	gpu_va = gk20a_vm_alloc_va(ch_vm,
+	gpu_va = __nvgpu_vm_alloc_va(ch_vm,
 			gr->global_ctx_buffer[CIRCULAR].mem.size,
 			gmmu_page_size_kernel);

@@ -166,7 +166,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
 	g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].mem.size;

 	/* Attribute Buffer */
-	gpu_va = gk20a_vm_alloc_va(ch_vm,
+	gpu_va = __nvgpu_vm_alloc_va(ch_vm,
 			gr->global_ctx_buffer[ATTRIBUTE].mem.size,
 			gmmu_page_size_kernel);

@@ -176,7 +176,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
 	g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].mem.size;

 	/* Page Pool */
-	gpu_va = gk20a_vm_alloc_va(ch_vm,
+	gpu_va = __nvgpu_vm_alloc_va(ch_vm,
 			gr->global_ctx_buffer[PAGEPOOL].mem.size,
 			gmmu_page_size_kernel);
 	if (!gpu_va)
@@ -185,7 +185,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
 	g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].mem.size;

 	/* Priv register Access Map */
-	gpu_va = gk20a_vm_alloc_va(ch_vm,
+	gpu_va = __nvgpu_vm_alloc_va(ch_vm,
 			gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size,
 			gmmu_page_size_kernel);
 	if (!gpu_va)
@@ -211,8 +211,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
 clean_up:
 	for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
 		if (g_bfr_va[i]) {
-			gk20a_vm_free_va(ch_vm, g_bfr_va[i],
-					g_bfr_size[i], gmmu_page_size_kernel);
+			__nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
+					   gmmu_page_size_kernel);
 			g_bfr_va[i] = 0;
 		}
 	}
@@ -242,8 +242,8 @@ static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c)

 	for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) {
 		if (g_bfr_va[i]) {
-			gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i],
-					gmmu_page_size_kernel);
+			__nvgpu_vm_free_va(ch_vm, g_bfr_va[i],
+					   gmmu_page_size_kernel);
 			g_bfr_va[i] = 0;
 			g_bfr_size[i] = 0;
 		}
@@ -277,7 +277,7 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
 		return -ENOMEM;

 	gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
-	gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm,
+	gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm,
 						gr_ctx->mem.size,
 						gmmu_page_size_kernel);

@@ -296,8 +296,8 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,

 	if (unlikely(err)) {
 		nvgpu_err(g, "fail to alloc gr_ctx");
-		gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va,
-				 gr_ctx->mem.size, gmmu_page_size_kernel);
+		__nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
+				   gmmu_page_size_kernel);
 		nvgpu_kfree(g, gr_ctx);
 	} else {
 		gr_ctx->virt_ctx = p->gr_ctx_handle;
@@ -323,8 +323,8 @@ void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm,
 		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 		WARN_ON(err || msg.ret);

-		gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, gr_ctx->mem.size,
-				gmmu_page_size_kernel);
+		__nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va,
+				   gmmu_page_size_kernel);
 		nvgpu_kfree(g, gr_ctx);
 	}
 }
@@ -349,7 +349,7 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
 	gk20a_dbg_fn("");

 	patch_ctx->mem.size = 128 * sizeof(u32);
-	patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm,
+	patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm,
 						patch_ctx->mem.size,
 						gmmu_page_size_kernel);
 	if (!patch_ctx->mem.gpu_va)
@@ -361,8 +361,8 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
 	p->patch_ctx_va = patch_ctx->mem.gpu_va;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	if (err || msg.ret) {
-		gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
-				 patch_ctx->mem.size, gmmu_page_size_kernel);
+		__nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
+				   gmmu_page_size_kernel);
 		err = -ENOMEM;
 	}

@@ -387,8 +387,8 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c)
 		err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 		WARN_ON(err || msg.ret);

-		gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
-				 patch_ctx->mem.size, gmmu_page_size_kernel);
+		__nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va,
+				   gmmu_page_size_kernel);
 		patch_ctx->mem.gpu_va = 0;
 	}
 }
@@ -413,8 +413,8 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c)
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);

-	gk20a_vm_free_va(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size,
-			gmmu_page_size_kernel);
+	__nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va,
+			   gmmu_page_size_kernel);
 	pm_ctx->mem.gpu_va = 0;
 }

@@ -1046,7 +1046,7 @@ static int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g,

 		/* Allocate buffer if necessary */
 		if (pm_ctx->mem.gpu_va == 0) {
-			pm_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch->vm,
+			pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm,
 					g->gr.ctx_vars.pm_ctxsw_image_size,
 					gmmu_page_size_kernel);

--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -106,7 +106,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,

 	/* Allocate (or validate when map_offset != 0) the virtual address. */
 	if (!map_offset) {
-		map_offset = gk20a_vm_alloc_va(vm, size,
+		map_offset = __nvgpu_vm_alloc_va(vm, size,
 					  pgsz_idx);
 		if (!map_offset) {
 			nvgpu_err(g, "failed to allocate va space\n");
@@ -180,7 +180,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
 	gk20a_dbg_fn("");

 	if (va_allocated) {
-		err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx);
+		err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
 		if (err) {
 			dev_err(dev_from_vm(vm),
 				"failed to free va");