gpu: nvgpu: VM map path refactoring

Final VM mapping refactoring. Move most of the logic in the VM map path to the common/mm/vm.c code and use the generic APIs previously implemented to deal with comptags and map caching. This also updates the mapped_buffer struct to finally be free of the Linux dma_buf and scatter gather table pointers. This is replaced with the nvgpu_os_buffer struct. JIRA NVGPU-30 JIRA NVGPU-71 JIRA NVGPU-224 Change-Id: If5b32886221c3e5af2f3d7ddd4fa51dd487bb981 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1583987 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-10-20 10:26:22 -07:00
parent 8428c82c81
commit 01c98eb680
5 changed files with 360 additions and 251 deletions
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -64,7 +64,7 @@ static dma_addr_t gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
 	if (buffer)
-		addr = nvgpu_mem_get_addr_sgl(g, buffer->sgt->sgl);
+		addr = nvgpu_mem_get_addr_sgl(g, buffer->os_priv.sgt->sgl);
 	nvgpu_mutex_release(&vm->update_gmmu_lock);

 	return addr;
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -49,8 +49,8 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse(
 		struct nvgpu_mapped_buf *mapped_buffer =
 				mapped_buffer_from_rbtree_node(node);

-		if (mapped_buffer->dmabuf == dmabuf &&
-		    kind == mapped_buffer->kind)
+		if (mapped_buffer->os_priv.dmabuf == dmabuf &&
+		    mapped_buffer->kind == kind)
 			return mapped_buffer;

 		nvgpu_rbtree_enum_next(&node, node);
@@ -75,7 +75,7 @@ int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
 		return -EINVAL;
 	}

-	*dmabuf = mapped_buffer->dmabuf;
+	*dmabuf = mapped_buffer->os_priv.dmabuf;
 	*offset = gpu_va - mapped_buffer->addr;

 	nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -83,66 +83,68 @@ int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
 	return 0;
 }

+u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf)
+{
+	return os_buf->dmabuf->size;
+}
+
 /*
 * vm->update_gmmu_lock must be held. This checks to see if we already have
 * mapped the passed buffer into this VM. If so, just return the existing
 * mapping address.
 */
-static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
-				   struct dma_buf *dmabuf,
-				   u64 offset_align,
+struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
+					       struct nvgpu_os_buffer *os_buf,
+					       u64 map_addr,
 					       u32 flags,
-				   int kind,
-				   int rw_flag)
+					       int kind)
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	struct nvgpu_mapped_buf *mapped_buffer = NULL;

 	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
-		mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset_align);
+		mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, map_addr);
 		if (!mapped_buffer)
-			return 0;
+			return NULL;

-		if (mapped_buffer->dmabuf != dmabuf ||
+		if (mapped_buffer->os_priv.dmabuf != os_buf->dmabuf ||
 		    mapped_buffer->kind != (u32)kind)
-			return 0;
+			return NULL;
 	} else {
 		mapped_buffer =
-			__nvgpu_vm_find_mapped_buf_reverse(vm, dmabuf, kind);
+			__nvgpu_vm_find_mapped_buf_reverse(vm,
+							   os_buf->dmabuf,
+							   kind);
 		if (!mapped_buffer)
-			return 0;
+			return NULL;
 	}

 	if (mapped_buffer->flags != flags)
-		return 0;
+		return NULL;

 	/*
 	 * If we find the mapping here then that means we have mapped it already
-	 * and already have a dma_buf ref to the underlying buffer. As such
-	 * release the ref taken earlier in the map path.
+	 * and the prior pin and get must be undone.
 	 */
-	dma_buf_put(mapped_buffer->dmabuf);
-
-	nvgpu_ref_get(&mapped_buffer->ref);
+	gk20a_mm_unpin(os_buf->dev, os_buf->dmabuf, mapped_buffer->os_priv.sgt);
+	dma_buf_put(os_buf->dmabuf);

 	nvgpu_log(g, gpu_dbg_map,
 		  "gv: 0x%04x_%08x + 0x%-7zu "
-		  "[dma: 0x%02x_%08x, pa: 0x%02x_%08x] "
+		  "[dma: 0x%010llx, pa: 0x%010llx] "
 		  "pgsz=%-3dKb as=%-2d ctags=%d start=%d "
 		  "flags=0x%x apt=%s (reused)",
 		  u64_hi32(mapped_buffer->addr), u64_lo32(mapped_buffer->addr),
-		  dmabuf->size,
-		  u64_hi32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
-		  u64_lo32((u64)sg_dma_address(mapped_buffer->sgt->sgl)),
-		  u64_hi32((u64)sg_phys(mapped_buffer->sgt->sgl)),
-		  u64_lo32((u64)sg_phys(mapped_buffer->sgt->sgl)),
+		  os_buf->dmabuf->size,
+		  (u64)sg_dma_address(mapped_buffer->os_priv.sgt->sgl),
+		  (u64)sg_phys(mapped_buffer->os_priv.sgt->sgl),
 		  vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
 		  vm_aspace_id(vm),
 		  mapped_buffer->ctag_lines, mapped_buffer->ctag_offset,
 		  mapped_buffer->flags,
-		  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, dmabuf)));
+		  nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));

-	return mapped_buffer->addr;
+	return mapped_buffer;
 }

 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
@@ -159,237 +161,62 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	struct device *dev = dev_from_gk20a(g);
-	struct nvgpu_ctag_buffer_info binfo = { 0 };
-	struct gk20a_comptags comptags;
-	struct nvgpu_vm_area *vm_area = NULL;
-	struct nvgpu_sgt *nvgpu_sgt = NULL;
-	struct sg_table *sgt;
-	struct nvgpu_mapped_buf *mapped_buffer = NULL;
 	struct nvgpu_os_buffer os_buf = { dmabuf, dev };
-	enum nvgpu_aperture aperture;
-	bool va_allocated = false;
-	bool clear_ctags = false;
-	u64 map_offset = 0;
-	u64 align;
-	u32 ctag_offset;
+	struct sg_table *sgt;
+	struct nvgpu_sgt *nvgpu_sgt = NULL;
+	struct nvgpu_mapped_buf *mapped_buffer = NULL;
+	u64 map_addr = 0ULL;
 	int err = 0;

-	/*
-	 * The kind used as part of the key for map caching. HW may
-	 * actually be programmed with the fallback kind in case the
-	 * key kind is compressible but we're out of comptags.
-	 */
-	s16 map_key_kind;
-
-	binfo.flags = flags;
-	binfo.size = dmabuf->size;
-	binfo.compr_kind = compr_kind;
-	binfo.incompr_kind = incompr_kind;
-
-	if (compr_kind != NV_KIND_INVALID)
-		map_key_kind = compr_kind;
-	else
-		map_key_kind = incompr_kind;
-
-	if (map_key_kind == NV_KIND_INVALID) {
-		nvgpu_err(g, "Valid kind must be supplied");
-		return -EINVAL;
-	}
-
-	if (vm->userspace_managed &&
-	    !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
-		nvgpu_err(g, "non-fixed-offset mapping not available on "
-			  "userspace managed address spaces");
-		return -EFAULT;
-	}
-
-	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-
-	/* check if this buffer is already mapped */
-	if (!vm->userspace_managed) {
-		map_offset = __nvgpu_vm_find_mapping(
-			vm, dmabuf, offset_align,
-			flags, map_key_kind, rw_flag);
-		if (map_offset) {
-			nvgpu_mutex_release(&vm->update_gmmu_lock);
-			*gpu_va = map_offset;
-			return 0;
-		}
-	}
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
+		map_addr = offset_align;

 	sgt = gk20a_mm_pin(dev, dmabuf);
 	if (IS_ERR(sgt)) {
-		err = PTR_ERR(sgt);
-		nvgpu_warn(g, "oom allocating tracking buffer");
+		nvgpu_warn(g, "Failed to pin dma_buf!");
+		return PTR_ERR(sgt);
+	}
+
+	if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
+		err = -EINVAL;
 		goto clean_up;
 	}

 	nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt);
-	if (!nvgpu_sgt)
-		goto clean_up;
-
-	aperture = gk20a_dmabuf_aperture(g, dmabuf);
-	if (aperture == APERTURE_INVALID) {
-		err = -EINVAL;
+	if (!nvgpu_sgt) {
+		err = -ENOMEM;
 		goto clean_up;
 	}

-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)
-		map_offset = offset_align;
-
-	align = nvgpu_sgt_alignment(g, nvgpu_sgt);
-	if (g->mm.disable_bigpage)
-		binfo.pgsz_idx = gmmu_page_size_small;
-	else
-		binfo.pgsz_idx = __get_pte_size(vm, map_offset,
-						min_t(u64, binfo.size, align));
-	mapping_size = mapping_size ? mapping_size : binfo.size;
-	mapping_size = ALIGN(mapping_size, SZ_4K);
-
-	if ((mapping_size > binfo.size) ||
-	    (buffer_offset > (binfo.size - mapping_size))) {
-		err = -EINVAL;
-		goto clean_up;
-	}
-
-	/* Check if we should use a fixed offset for mapping this buffer */
-	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
-		err = nvgpu_vm_area_validate_buffer(vm,
-						    offset_align,
-						    mapping_size,
-						    binfo.pgsz_idx,
-						    &vm_area);
-		if (err)
-			goto clean_up;
-
-		map_offset = offset_align;
-		va_allocated = false;
-	} else {
-		va_allocated = true;
-	}
-
-	err = nvgpu_vm_compute_compression(vm, &binfo);
-	if (err) {
-		nvgpu_err(g, "failure setting up compression");
-		goto clean_up;
-	}
-
-	/* bar1 and pmu vm don't need ctag */
-	if (!vm->enable_ctag)
-		binfo.ctag_lines = 0;
-
-	gk20a_get_comptags(&os_buf, &comptags);
-
-	if (binfo.ctag_lines && !comptags.lines) {
-		/* allocate compression resources if needed */
-		err = gk20a_alloc_comptags(g, &os_buf,
-					   &g->gr.comp_tags,
-					   binfo.ctag_lines);
-		if (err) {
-			/* TBD: we can partially alloc ctags as well... */
-
-			/* prevent compression ... */
-			binfo.compr_kind = NV_KIND_INVALID;
-
-			/* ... and make sure we have the fallback */
-			if (binfo.incompr_kind == NV_KIND_INVALID) {
-				nvgpu_err(g, "comptag alloc failed and no fallback kind specified");
-				goto clean_up;
-			}
-		} else {
-			gk20a_get_comptags(&os_buf, &comptags);
-
-			if (g->ops.ltc.cbc_ctrl)
-				g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
-						    comptags.offset,
-						    comptags.offset +
-							comptags.allocated_lines - 1);
-			else
-				clear_ctags = true;
-		}
-	}
-
-	/*
-	 * Calculate comptag index for this mapping. Differs in
-	 * case of partial mapping.
-	 */
-	ctag_offset = comptags.offset;
-	if (ctag_offset)
-		ctag_offset += buffer_offset >>
-			       ilog2(g->ops.fb.compression_page_size(g));
-
-	/* update gmmu ptes */
-	map_offset = g->ops.mm.gmmu_map(vm,
-					map_offset,
+	mapped_buffer = nvgpu_vm_map(vm,
+				     &os_buf,
 				     nvgpu_sgt,
-					buffer_offset, /* sg offset */
+				     map_addr,
 				     mapping_size,
-					binfo.pgsz_idx,
-					(binfo.compr_kind != NV_KIND_INVALID ?
-					 binfo.compr_kind : binfo.incompr_kind),
-					ctag_offset,
-					flags, rw_flag,
-					clear_ctags,
-					false,
-					false,
+				     buffer_offset,
+				     rw_flag,
+				     flags,
+				     compr_kind,
+				     incompr_kind,
 				     batch,
-					aperture);
-	if (!map_offset)
-		goto clean_up;
+				     gk20a_dmabuf_aperture(g, dmabuf));

 	nvgpu_sgt_free(g, nvgpu_sgt);

-	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
-	if (!mapped_buffer) {
-		nvgpu_warn(g, "oom allocating tracking buffer");
-		goto clean_up;
-	}
-	mapped_buffer->dmabuf      = dmabuf;
-	mapped_buffer->sgt         = sgt;
-	mapped_buffer->addr        = map_offset;
-	mapped_buffer->size        = mapping_size;
-	mapped_buffer->pgsz_idx    = binfo.pgsz_idx;
-	mapped_buffer->ctag_offset = ctag_offset;
-	mapped_buffer->ctag_lines  = binfo.ctag_lines;
-	mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
-	mapped_buffer->vm          = vm;
-	mapped_buffer->flags       = flags;
-	mapped_buffer->kind        = map_key_kind;
-	mapped_buffer->va_allocated = va_allocated;
-	nvgpu_init_list_node(&mapped_buffer->buffer_list);
-	nvgpu_ref_init(&mapped_buffer->ref);
-
-	err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
-	if (err) {
-		nvgpu_err(g, "failed to insert into mapped buffer tree");
+	if (IS_ERR(mapped_buffer)) {
+		err = PTR_ERR(mapped_buffer);
 		goto clean_up;
 	}

-	vm->num_user_mapped_buffers++;
+	mapped_buffer->os_priv.dmabuf = dmabuf;
+	mapped_buffer->os_priv.sgt    = sgt;

-	if (vm_area) {
-		nvgpu_list_add_tail(&mapped_buffer->buffer_list,
-			      &vm_area->buffer_list_head);
-		mapped_buffer->vm_area = vm_area;
-	}
-
-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-
-	*gpu_va = map_offset;
+	*gpu_va = mapped_buffer->addr;
 	return 0;

 clean_up:
-	nvgpu_kfree(g, mapped_buffer);
-
-	if (nvgpu_sgt)
-		nvgpu_sgt_free(g, nvgpu_sgt);
-	if (va_allocated)
-		__nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx);
-	if (!IS_ERR(sgt))
 	gk20a_mm_unpin(dev, dmabuf, sgt);

-	nvgpu_mutex_release(&vm->update_gmmu_lock);
-	nvgpu_log_info(g, "err=%d", err);
 	return err;
 }

@@ -407,8 +234,6 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 	struct dma_buf *dmabuf;
 	u64 ret_va;

-	gk20a_dbg_fn("");
-
 	/* get ref to the mem handle (released on unmap_locked) */
 	dmabuf = dma_buf_get(dmabuf_fd);
 	if (IS_ERR(dmabuf)) {
@@ -465,8 +290,8 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
 {
 	struct vm_gk20a *vm = mapped_buffer->vm;

-	gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
-		       mapped_buffer->sgt);
+	gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->os_priv.dmabuf,
+		       mapped_buffer->os_priv.sgt);

-	dma_buf_put(mapped_buffer->dmabuf);
+	dma_buf_put(mapped_buffer->os_priv.dmabuf);
 }
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -20,6 +20,7 @@
 * DEALINGS IN THE SOFTWARE.
 */

+#include <nvgpu/bug.h>
 #include <nvgpu/log.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/vm.h>
@@ -712,6 +713,249 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
 	nvgpu_big_free(vm->mm->g, mapped_buffers);
 }

+struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
+				      struct nvgpu_os_buffer *os_buf,
+				      struct nvgpu_sgt *sgt,
+				      u64 map_addr,
+				      u64 map_size,
+				      u64 phys_offset,
+				      int rw,
+				      u32 flags,
+				      s16 compr_kind,
+				      s16 incompr_kind,
+				      struct vm_gk20a_mapping_batch *batch,
+				      enum nvgpu_aperture aperture)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	struct nvgpu_mapped_buf *mapped_buffer = NULL;
+	struct nvgpu_ctag_buffer_info binfo = { 0 };
+	struct gk20a_comptags comptags;
+	struct nvgpu_vm_area *vm_area = NULL;
+	int err = 0;
+	u64 align;
+	u32 ctag_offset;
+	bool clear_ctags = false;
+	bool va_allocated = true;
+
+	/*
+	 * The kind used as part of the key for map caching. HW may
+	 * actually be programmed with the fallback kind in case the
+	 * key kind is compressible but we're out of comptags.
+	 */
+	s16 map_key_kind;
+
+	if (vm->userspace_managed &&
+	    !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+		nvgpu_err(g,
+			  "non-fixed-offset mapping not available on "
+			  "userspace managed address spaces");
+		return ERR_PTR(-EINVAL);
+	}
+
+	binfo.flags = flags;
+	binfo.size = nvgpu_os_buf_get_size(os_buf);
+	binfo.compr_kind = compr_kind;
+	binfo.incompr_kind = incompr_kind;
+
+	if (compr_kind != NV_KIND_INVALID)
+		map_key_kind = compr_kind;
+	else
+		map_key_kind = incompr_kind;
+
+	/*
+	 * Check if this buffer is already mapped.
+	 */
+	if (!vm->userspace_managed) {
+		nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+		mapped_buffer = nvgpu_vm_find_mapping(vm,
+						      os_buf,
+						      map_addr,
+						      flags,
+						      map_key_kind);
+		nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+		if (mapped_buffer) {
+			nvgpu_ref_get(&mapped_buffer->ref);
+			return mapped_buffer;
+		}
+	}
+
+	/*
+	 * Generate a new mapping!
+	 */
+	mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer));
+	if (!mapped_buffer) {
+		nvgpu_warn(g, "oom allocating tracking buffer");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	align = nvgpu_sgt_alignment(g, sgt);
+	if (g->mm.disable_bigpage)
+		binfo.pgsz_idx = gmmu_page_size_small;
+	else
+		binfo.pgsz_idx = __get_pte_size(vm, map_addr,
+						min_t(u64, binfo.size, align));
+	map_size = map_size ? map_size : binfo.size;
+	map_size = ALIGN(map_size, SZ_4K);
+
+	if ((map_size > binfo.size) ||
+	    (phys_offset > (binfo.size - map_size))) {
+		err = -EINVAL;
+		goto clean_up_nolock;
+	}
+
+	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
+
+	/*
+	 * Check if we should use a fixed offset for mapping this buffer.
+	 */
+	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
+		err = nvgpu_vm_area_validate_buffer(vm,
+						    map_addr,
+						    map_size,
+						    binfo.pgsz_idx,
+						    &vm_area);
+		if (err)
+			goto clean_up;
+
+		va_allocated = false;
+	}
+
+	err = nvgpu_vm_compute_compression(vm, &binfo);
+	if (err) {
+		nvgpu_err(g, "failure setting up compression");
+		goto clean_up;
+	}
+
+	/*
+	 * bar1 and pmu VMs don't need ctags.
+	 */
+	if (!vm->enable_ctag)
+		binfo.ctag_lines = 0;
+
+	gk20a_get_comptags(os_buf, &comptags);
+
+	if (binfo.ctag_lines && !comptags.lines) {
+		/*
+		 * Allocate compression resources if needed.
+		 */
+		if (gk20a_alloc_comptags(g,
+					 os_buf,
+					 &g->gr.comp_tags,
+					 binfo.ctag_lines)) {
+
+			/*
+			 * Prevent compression...
+			 */
+			binfo.compr_kind = NV_KIND_INVALID;
+
+			/*
+			 * ... And make sure we have a fallback.
+			 */
+			if (binfo.incompr_kind == NV_KIND_INVALID) {
+				nvgpu_err(g, "comptag alloc failed and no "
+					     "fallback kind specified");
+				err = -ENOMEM;
+
+				/*
+				 * Any alloced comptags are cleaned up when the
+				 * dmabuf is freed.
+				 */
+				goto clean_up;
+			}
+		} else {
+			gk20a_get_comptags(os_buf, &comptags);
+
+			if (g->ops.ltc.cbc_ctrl)
+				g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
+					      comptags.offset,
+					      comptags.offset +
+					          comptags.allocated_lines - 1);
+			else
+				clear_ctags = true;
+		}
+	}
+
+	/*
+	 * Calculate comptag index for this mapping. Differs in case of partial
+	 * mapping.
+	 */
+	ctag_offset = comptags.offset;
+	if (ctag_offset)
+		ctag_offset += phys_offset >>
+			       ilog2(g->ops.fb.compression_page_size(g));
+
+	map_addr = g->ops.mm.gmmu_map(vm,
+				      map_addr,
+				      sgt,
+				      phys_offset,
+				      map_size,
+				      binfo.pgsz_idx,
+				      binfo.compr_kind != NV_KIND_INVALID ?
+					  binfo.compr_kind : binfo.incompr_kind,
+				      ctag_offset,
+				      flags,
+				      rw,
+				      clear_ctags,
+				      false,
+				      false,
+				      batch,
+				      aperture);
+	if (!map_addr) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	nvgpu_init_list_node(&mapped_buffer->buffer_list);
+	nvgpu_ref_init(&mapped_buffer->ref);
+	mapped_buffer->addr         = map_addr;
+	mapped_buffer->size         = map_size;
+	mapped_buffer->pgsz_idx     = binfo.pgsz_idx;
+	mapped_buffer->ctag_offset  = ctag_offset;
+	mapped_buffer->ctag_lines   = binfo.ctag_lines;
+	mapped_buffer->ctag_allocated_lines = comptags.allocated_lines;
+	mapped_buffer->vm           = vm;
+	mapped_buffer->flags        = flags;
+	mapped_buffer->kind         = map_key_kind;
+	mapped_buffer->va_allocated = va_allocated;
+	mapped_buffer->vm_area      = vm_area;
+
+	err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
+	if (err) {
+		nvgpu_err(g, "failed to insert into mapped buffer tree");
+		goto clean_up;
+	}
+
+	vm->num_user_mapped_buffers++;
+
+	if (vm_area) {
+		nvgpu_list_add_tail(&mapped_buffer->buffer_list,
+			      &vm_area->buffer_list_head);
+		mapped_buffer->vm_area = vm_area;
+	}
+
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+
+	return mapped_buffer;
+
+clean_up:
+	if (mapped_buffer->addr)
+		g->ops.mm.gmmu_unmap(vm,
+				     mapped_buffer->addr,
+				     mapped_buffer->size,
+				     mapped_buffer->pgsz_idx,
+				     mapped_buffer->va_allocated,
+				     gk20a_mem_flag_none,
+				     mapped_buffer->vm_area ?
+				     mapped_buffer->vm_area->sparse : false,
+				     NULL);
+	nvgpu_mutex_release(&vm->update_gmmu_lock);
+clean_up_nolock:
+	nvgpu_kfree(g, mapped_buffer);
+
+	return ERR_PTR(err);
+}
+
 /*
 * Really unmap. This does the real GMMU unmap and removes the mapping from the
 * VM map tracking tree (and vm_area list if necessary).
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
@@ -44,6 +44,11 @@ struct nvgpu_os_buffer {
 	struct device *dev;
 };

+struct nvgpu_mapped_buf_priv {
+	struct dma_buf *dmabuf;
+	struct sg_table *sgt;
+};
+
 /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 		       struct dma_buf *dmabuf,
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -37,6 +37,18 @@ struct vm_gk20a;
 struct nvgpu_vm_area;
 struct gk20a_comptag_allocator;

+/*
+ * Defined by each OS. Allows the common VM code do things to the OS specific
+ * buffer structures.
+ */
+struct nvgpu_os_buffer;
+
+#ifdef __KERNEL__
+#include <nvgpu/linux/vm.h>
+#else
+/* QNX include goes here. */
+#endif
+
 /**
 * This header contains the OS agnostic APIs for dealing with VMs. Most of the
 * VM implementation is system specific - it must translate from a platform's
@@ -89,13 +101,12 @@ struct nvgpu_mapped_buf {
 	struct vm_gk20a *vm;
 	struct nvgpu_vm_area *vm_area;

+	struct nvgpu_ref ref;
+
 	struct nvgpu_rbtree_node node;
 	struct nvgpu_list_node buffer_list;
 	u64 addr;
 	u64 size;
-	struct dma_buf *dmabuf;
-	struct sg_table *sgt;
-	struct nvgpu_ref ref;

 	u32 pgsz_idx;
 	u32 ctag_offset;
@@ -105,13 +116,16 @@ struct nvgpu_mapped_buf {
 	u32 flags;
 	u32 kind;
 	bool va_allocated;
-};

 	/*
- * Defined by each OS. Allows the common VM code do things to the OS specific
- * buffer structures.
+	 * Separate from the nvgpu_os_buffer struct to clearly distinguish
+	 * lifetime. A nvgpu_mapped_buf_priv will _always_ be wrapped by a
+	 * struct nvgpu_mapped_buf; however, there are times when a struct
+	 * nvgpu_os_buffer would be separate. This aims to prevent dangerous
+	 * usage of container_of() or the like in OS code.
 	 */
-struct nvgpu_os_buffer;
+	struct nvgpu_mapped_buf_priv os_priv;
+};

 static inline struct nvgpu_mapped_buf *
 nvgpu_mapped_buf_from_buffer_list(struct nvgpu_list_node *node)
@@ -226,6 +240,25 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
 			  struct nvgpu_mapped_buf **mapped_buffers,
 			  int num_buffers);

+struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
+					       struct nvgpu_os_buffer *os_buf,
+					       u64 map_addr,
+					       u32 flags,
+					       int kind);
+
+struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
+				      struct nvgpu_os_buffer *os_buf,
+				      struct nvgpu_sgt *sgt,
+				      u64 map_addr,
+				      u64 map_size,
+				      u64 phys_offset,
+				      int rw,
+				      u32 flags,
+				      s16 compr_kind,
+				      s16 incompr_kind,
+				      struct vm_gk20a_mapping_batch *batch,
+				      enum nvgpu_aperture aperture);
+
 void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
 		    struct vm_gk20a_mapping_batch *batch);

@@ -240,6 +273,8 @@ void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer);
 */
 void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref);

+u64 nvgpu_os_buf_get_size(struct nvgpu_os_buffer *os_buf);
+
 /*
 * These all require the VM update lock to be held.
 */