gpu: nvgpu: VM unmap refactoring

Re-organize the unmap code to be better split between OS specific requirements and common core requirements. The new code flow works as follows: nvgpu_vm_unmap() Is the primary entrance to the unmap path. It takes a VM and a GPU virtual address to unmap. There's also an optional batch mapping struct. This function is responsible for making sure there is a real buffer and that if it's being called on a fixed mapping then the mapping will definitely be freed (since buffers are ref-counted). Then this function decrements the ref-count and returns. If the ref-count hits zero then __nvgpu_vm_unmap_ref() is called which just calls __nvgpu_vm_unmap() with the relevant batch struct if present. This is where the real work is done. __nvgpu_vm_unmap() clears the GMMU mapping, removes the mapped buffer from the various lists and trees it may be in and then calls the nvgpu_vm_unmap_system() function. This function handles any OS specific stuff and must be defined by all VM OS implementations. There's a a short cut used by some other core VM code to free mappings without going through nvgpu_vm_map(). Mostly they just directly decrement the mapping ref-count which can then call __nvgpu_vm_unmap_ref() if the ref-count hits zero. JIRA NVGPU-30 JIRA NVGPU-71 Change-Id: Ic626d37ab936819841bab45214f027b40ffa4e5a Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1583982 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2017-10-17 10:55:00 -07:00
parent a37cec19f0
commit d13c256d5e
8 changed files with 136 additions and 106 deletions
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -1167,7 +1167,7 @@ __releases(&l->cde_app->mutex)
 	cde_ctx->init_cmd_executed = true;

 	/* unmap the buffers - channel holds references to them now */
-	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
+	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);

 	return err;

@@ -1175,7 +1175,7 @@ exit_unmap_surface:
 	if (surface)
 		dma_buf_vunmap(compbits_scatter_buf, surface);
 exit_unmap_vaddr:
-	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
+	nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
 exit_idle:
 	gk20a_idle(g);
 	return err;
--- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c
@@ -118,7 +118,10 @@ static int gk20a_as_ioctl_unmap_buffer(
 		struct nvgpu_as_unmap_buffer_args *args)
 {
 	gk20a_dbg_fn("");
-	return nvgpu_vm_unmap_buffer(as_share->vm, args->offset, NULL);
+
+	nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
+
+	return 0;
 }

 static int gk20a_as_ioctl_map_buffer_batch(
@@ -155,10 +158,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
 			break;
 		}

-		err = nvgpu_vm_unmap_buffer(as_share->vm, unmap_args.offset,
-					    &batch);
-		if (err)
-			break;
+		nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
 	}

 	if (err) {
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
@@ -1142,7 +1142,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 	return 0;

 err_unmap:
-	nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL);
+	nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
 err_remove_vm:
 	nvgpu_vm_put(mm->perfbuf.vm);
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -1386,7 +1386,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)

 	err = g->ops.dbg_session_ops.perfbuffer_disable(g);

-	nvgpu_vm_unmap_buffer(vm, offset, NULL);
+	nvgpu_vm_unmap(vm, offset, NULL);
 	nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
 	nvgpu_vm_put(vm);

--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -641,88 +641,20 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 	return err;
 }

-int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
-			  struct vm_gk20a_mapping_batch *batch)
-{
-	struct gk20a *g = vm->mm->g;
-	struct nvgpu_mapped_buf *mapped_buffer;
-
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-
-	mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
-	if (!mapped_buffer) {
-		nvgpu_mutex_release(&vm->update_gmmu_lock);
-		nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
-		return 0;
-	}
-
-	if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
-		struct nvgpu_timeout timeout;
-
-		nvgpu_mutex_release(&vm->update_gmmu_lock);
-
-		nvgpu_timeout_init(vm->mm->g, &timeout, 10000,
-				   NVGPU_TIMER_RETRY_TIMER);
-		do {
-			if (nvgpu_atomic_read(
-				&mapped_buffer->ref.refcount) == 1)
-					break;
-			nvgpu_udelay(5);
-		} while (!nvgpu_timeout_expired_msg(&timeout,
-					    "sync-unmap failed on 0x%llx"));
-
-		nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	}
-
-	if (mapped_buffer->user_mapped == 0) {
-		nvgpu_mutex_release(&vm->update_gmmu_lock);
-		nvgpu_err(g, "addr already unmapped from user 0x%llx", offset);
-		return 0;
-	}
-
-	mapped_buffer->user_mapped--;
-	if (mapped_buffer->user_mapped == 0)
-		vm->num_user_mapped_buffers--;
-
-	vm->kref_put_batch = batch;
-	nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
-	vm->kref_put_batch = NULL;
-
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-	return 0;
-}
-
-/* NOTE! mapped_buffers lock must be held */
-void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
-			   struct vm_gk20a_mapping_batch *batch)
+/*
+ * This is the function call-back for freeing OS specific components of an
+ * nvgpu_mapped_buf. This should most likely never be called outside of the
+ * core MM framework!
+ *
+ * Note: the VM lock will be held.
+ */
+void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
 {
 	struct vm_gk20a *vm = mapped_buffer->vm;
-	struct gk20a *g = vm->mm->g;
-
-	g->ops.mm.gmmu_unmap(vm,
-		mapped_buffer->addr,
-		mapped_buffer->size,
-		mapped_buffer->pgsz_idx,
-		mapped_buffer->va_allocated,
-		gk20a_mem_flag_none,
-		mapped_buffer->vm_area ?
-		  mapped_buffer->vm_area->sparse : false,
-		batch);

 	gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
 		       mapped_buffer->sgt);

-	/* remove from mapped buffer tree and remove list, free */
-	nvgpu_remove_mapped_buf(vm, mapped_buffer);
-	if (!nvgpu_list_empty(&mapped_buffer->buffer_list))
-		nvgpu_list_del(&mapped_buffer->buffer_list);
-
-	/* keep track of mapped buffers */
-	if (mapped_buffer->user_mapped)
-		vm->num_user_mapped_buffers--;
-
 	if (mapped_buffer->own_mem_ref)
 		dma_buf_put(mapped_buffer->dmabuf);
-
-	nvgpu_kfree(g, mapped_buffer);
 }
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -36,6 +36,9 @@
 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"

+static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
+			     struct vm_gk20a_mapping_batch *batch);
+
 int vm_aspace_id(struct vm_gk20a *vm)
 {
 	return vm->as_share ? vm->as_share->id : -1;
@@ -538,7 +541,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
 	nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
 	while (node) {
 		mapped_buffer = mapped_buffer_from_rbtree_node(node);
-		nvgpu_vm_unmap_locked(mapped_buffer, NULL);
+		__nvgpu_vm_unmap(mapped_buffer, NULL);
 		nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
 	}

@@ -702,8 +705,7 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
 	vm->kref_put_batch = &batch;

 	for (i = 0; i < num_buffers; ++i)
-		nvgpu_ref_put(&mapped_buffers[i]->ref,
-			 nvgpu_vm_unmap_locked_ref);
+		nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref);

 	vm->kref_put_batch = NULL;
 	nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
@@ -712,26 +714,118 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
 	nvgpu_big_free(vm->mm->g, mapped_buffers);
 }

-void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref)
+/*
+ * Really unmap. This does the real GMMU unmap and removes the mapping from the
+ * VM map tracking tree (and vm_area list if necessary).
+ */
+static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
+			     struct vm_gk20a_mapping_batch *batch)
+{
+	struct vm_gk20a *vm = mapped_buffer->vm;
+	struct gk20a *g = vm->mm->g;
+
+	vm->num_user_mapped_buffers--;
+
+	g->ops.mm.gmmu_unmap(vm,
+			     mapped_buffer->addr,
+			     mapped_buffer->size,
+			     mapped_buffer->pgsz_idx,
+			     mapped_buffer->va_allocated,
+			     gk20a_mem_flag_none,
+			     mapped_buffer->vm_area ?
+			     mapped_buffer->vm_area->sparse : false,
+			     batch);
+
+	/*
+	 * Remove from mapped buffer tree. Then delete the buffer from the
+	 * linked list of mapped buffers; though note: not all mapped buffers
+	 * are part of a vm_area.
+	 */
+	nvgpu_remove_mapped_buf(vm, mapped_buffer);
+	nvgpu_list_del(&mapped_buffer->buffer_list);
+
+	/*
+	 * OS specific freeing. This is after the generic freeing incase the
+	 * generic freeing relies on some component of the OS specific
+	 * nvgpu_mapped_buf in some abstraction or the like.
+	 */
+	nvgpu_vm_unmap_system(mapped_buffer);
+
+	nvgpu_kfree(g, mapped_buffer);
+}
+
+void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref)
 {
 	struct nvgpu_mapped_buf *mapped_buffer =
 		container_of(ref, struct nvgpu_mapped_buf, ref);
-	nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
+
+	__nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch);
 }

-void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
+/*
+ * For fixed-offset buffers we must sync the buffer. That means we wait for the
+ * buffer to hit a ref-count of 1 before proceeding.
+ *
+ * Note: this requires the update_gmmu_lock to be held since we release it and
+ * re-aquire it in this function.
+ */
+static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
+				      struct nvgpu_mapped_buf *mapped_buffer)
+{
+	struct nvgpu_timeout timeout;
+	int ret = 0;
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+	/*
+	 * 500ms second timer.
+	 */
+	nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER);
+
+	do {
+		if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1)
+			break;
+		nvgpu_msleep(10);
+	} while (!nvgpu_timeout_expired_msg(&timeout,
+					    "sync-unmap failed on 0x%llx"));
+
+	if (nvgpu_timeout_expired(&timeout))
+		ret = -ETIMEDOUT;
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+	return ret;
+}
+
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
+		    struct vm_gk20a_mapping_batch *batch)
 {
-	struct gk20a *g = vm->mm->g;
 	struct nvgpu_mapped_buf *mapped_buffer;

 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
 	mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
-	if (!mapped_buffer) {
-		nvgpu_mutex_release(&vm->update_gmmu_lock);
-		nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
-		return;
+	if (!mapped_buffer)
+		goto done;
+
+	if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
+		if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer))
+			/*
+			 * Looks like we have failed... Better not continue in
+			 * case the buffer is in use.
+			 */
+			goto done;
 	}

-	nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
+	/*
+	 * Make sure we have access to the batch if we end up calling through to
+	 * the unmap_ref function.
+	 */
+	vm->kref_put_batch = batch;
+	nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref);
+	vm->kref_put_batch = NULL;
+
+done:
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
+	return;
 }
--- a/drivers/gpu/nvgpu/common/mm/vm_area.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -208,7 +208,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
 				       &vm_area->buffer_list_head,
 				       nvgpu_mapped_buf, buffer_list) {
 		nvgpu_list_del(&buffer->buffer_list);
-		nvgpu_ref_put(&buffer->ref, nvgpu_vm_unmap_locked_ref);
+		nvgpu_ref_put(&buffer->ref, __nvgpu_vm_unmap_ref);
 	}

 	/* if this was a sparse mapping, free the va */
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
@@ -98,10 +98,6 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			u64 mapping_size,
 			struct vm_gk20a_mapping_batch *batch);

-/* Note: batch may be NULL if unmap op is not part of a batch */
-int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
-			  struct vm_gk20a_mapping_batch *batch);
-
 /* find buffer corresponding to va */
 int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
 		      struct dma_buf **dmabuf,
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -207,11 +207,19 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
 			  struct nvgpu_mapped_buf **mapped_buffers,
 			  int num_buffers);

-void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
+void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
 		    struct vm_gk20a_mapping_batch *batch);
-void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref);

-void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset);
+/*
+ * Implemented by each OS. Called from within the core VM code to handle OS
+ * specific components of an nvgpu_mapped_buf.
+ */
+void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer);
+
+/*
+ * Don't use this outside of the core VM code!
+ */
+void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref);

 /*
 * These all require the VM update lock to be held.