gpu: nvgpu: Update Linux side VM code for API solidification

Update the Linux specific code to match the MM API docs in the previous patch. The user passed page size is plumbed through the Linux VM mapping calls but is ultimately ignored once the core VM code is called. This will be handled in the next patch. This also adds some code to make the CDE page size picking happen semi-intelligently. In many cases the CDE buffers can be mapped with large pages. Bug 2011640 Change-Id: I20e78e7d5a841e410864b474179e71da1c2482f4 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1740610 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2018-06-05 20:53:16 +01:00
parent 46666ed101
commit 840e039d57
5 changed files with 65 additions and 15 deletions
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/vm.h
@@ -54,8 +54,9 @@ struct nvgpu_mapped_buf_priv {
 /* NVGPU_AS_MAP_BUFFER_FLAGS_DIRECT_KIND_CTRL must be set */
 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 		       struct dma_buf *dmabuf,
-		       u64 offset_align,
+		       u64 map_addr,
 		       u32 flags,
+		       u32 page_size,
 		       s16 compr_kind,
 		       s16 incompr_kind,
 		       int rw_flag,
@@ -71,8 +72,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 */
 int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			int dmabuf_fd,
-			u64 *offset_align,
+			u64 *map_addr,
 			u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */
+			u32 page_size,
 			s16 compr_kind,
 			s16 incompr_kind,
 			u64 buffer_offset,
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -975,6 +975,30 @@ static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct nvgpu_os_linux *l
 	return cde_ctx;
 }

+static u32 gk20a_cde_mapping_page_size(struct vm_gk20a *vm,
+				       u32 map_offset, u32 map_size)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	/*
+	 * To be simple we will just make the map size depend on the
+	 * iommu'ability of the driver. If there's an IOMMU we can rely on
+	 * buffers being contiguous. If not, then we'll use 4k pages since we
+	 * know that will work for any buffer.
+	 */
+	if (!nvgpu_iommuable(g))
+		return SZ_4K;
+
+	/*
+	 * If map size or offset is not 64K aligned then use small pages.
+	 */
+	if (map_size & (vm->big_page_size - 1) ||
+	    map_offset & (vm->big_page_size - 1))
+		return SZ_4K;
+
+	return vm->big_page_size;
+}
+
 int gk20a_cde_convert(struct nvgpu_os_linux *l,
 		      struct dma_buf *compbits_scatter_buf,
 		      u64 compbits_byte_offset,
@@ -1071,7 +1095,10 @@ __releases(&l->cde_app->mutex)
 	err = nvgpu_vm_map_linux(cde_ctx->vm, compbits_scatter_buf, 0,
 				 NVGPU_VM_MAP_CACHEABLE |
 				 NVGPU_VM_MAP_DIRECT_KIND_CTRL,
-				 NVGPU_KIND_INVALID,
+				 gk20a_cde_mapping_page_size(cde_ctx->vm,
+							     map_offset,
+							     map_size),
+				 NV_KIND_INVALID,
 				 compbits_kind, /* incompressible kind */
 				 gk20a_mem_flag_none,
 				 map_offset, map_size,
--- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c
@@ -111,6 +111,7 @@ static int gk20a_as_ioctl_map_buffer_ex(

 	return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd,
 				   &args->offset, args->flags,
+				   args->page_size,
 				   args->compr_kind,
 				   args->incompr_kind,
 				   args->buffer_offset,
@@ -201,7 +202,7 @@ static int gk20a_as_ioctl_map_buffer_batch(

 		err = nvgpu_vm_map_buffer(
 			as_share->vm, map_args.dmabuf_fd,
-			&map_args.offset, map_args.flags,
+			&map_args.offset, map_args.flags, map_args.page_size,
 			compressible_kind, incompressible_kind,
 			map_args.buffer_offset,
 			map_args.mapping_size,
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1372,10 +1372,11 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 			args->dmabuf_fd,
 			&args->offset,
 			0,
+			SZ_4K,
+			0,
 			0,
 			0,
 			0,
-			args->mapping_size,
 			NULL);
 	if (err)
 		goto err_remove_vm;
--- a/drivers/gpu/nvgpu/os/linux/vm.c
+++ b/drivers/gpu/nvgpu/os/linux/vm.c
@@ -175,8 +175,9 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,

 int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 		       struct dma_buf *dmabuf,
-		       u64 offset_align,
+		       u64 map_addr,
 		       u32 flags,
+		       u32 page_size,
 		       s16 compr_kind,
 		       s16 incompr_kind,
 		       int rw_flag,
@@ -192,12 +193,8 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm,
 	struct nvgpu_sgt *nvgpu_sgt = NULL;
 	struct nvgpu_mapped_buf *mapped_buffer = NULL;
 	struct dma_buf_attachment *attachment;
-	u64 map_addr = 0ULL;
 	int err = 0;

-	if (flags & NVGPU_VM_MAP_FIXED_OFFSET)
-		map_addr = offset_align;
-
 	sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
 	if (IS_ERR(sgt)) {
 		nvgpu_warn(g, "Failed to pin dma_buf!");
@@ -253,8 +250,9 @@ clean_up:

 int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 			int dmabuf_fd,
-			u64 *offset_align,
+			u64 *map_addr,
 			u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/
+			u32 page_size,
 			s16 compr_kind,
 			s16 incompr_kind,
 			u64 buffer_offset,
@@ -274,6 +272,26 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 		return PTR_ERR(dmabuf);
 	}

+	/*
+	 * For regular maps we do not accept either an input address or a
+	 * buffer_offset.
+	 */
+	if (!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) &&
+	    (buffer_offset || *map_addr)) {
+		nvgpu_err(g,
+			  "Regular map with addr/buf offset is not supported!");
+		return -EINVAL;
+	}
+
+	/*
+	 * Map size is always buffer size for non fixed mappings. As such map
+	 * size should be left as zero by userspace for non-fixed maps.
+	 */
+	if (mapping_size && !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+		nvgpu_err(g, "map_size && non-fixed-mapping!");
+		return -EINVAL;
+	}
+
 	/* verify that we're not overflowing the buffer, i.e.
 	 * (buffer_offset + mapping_size) > dmabuf->size.
 	 *
@@ -284,7 +302,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 	if ((mapping_size > dmabuf->size) ||
 			(buffer_offset > (dmabuf->size - mapping_size))) {
 		nvgpu_err(g,
-			  "buf size %llx < (offset(%llx) + map_size(%llx))\n",
+			  "buf size %llx < (offset(%llx) + map_size(%llx))",
 			  (u64)dmabuf->size, buffer_offset, mapping_size);
 		dma_buf_put(dmabuf);
 		return -EINVAL;
@@ -296,8 +314,9 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 		return err;
 	}

-	err = nvgpu_vm_map_linux(vm, dmabuf, *offset_align,
+	err = nvgpu_vm_map_linux(vm, dmabuf, *map_addr,
 				 nvgpu_vm_translate_linux_flags(g, flags),
+				 page_size,
 				 compr_kind, incompr_kind,
 				 gk20a_mem_flag_none,
 				 buffer_offset,
@@ -306,7 +325,7 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
 				 &ret_va);

 	if (!err)
-		*offset_align = ret_va;
+		*map_addr = ret_va;
 	else
 		dma_buf_put(dmabuf);