gpu: nvgpu: User-space managed address space support

Implement NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED, which enables creating userspace-managed GPU address spaces. When an address space is marked as userspace-managed, the following changes are in effect: - Only fixed-address mappings are allowed. - VA space allocation for fixed-address mappings is not required, except to mark space as sparse. - Maps and unmaps are always immediate. In particular, the mapping ref increments at kickoffs and decrements at job completion are skipped. Bug 1614735 Bug 1623949 Bug 1660392 Change-Id: I834fe19b3f65e9b02c268952383eddee0e465759 Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/738558 Reviewed-on: http://git-master/r/833253 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2015-05-04 18:41:23 +03:00
parent 503d3a0b10
commit 9d2c9072c8
9 changed files with 148 additions and 38 deletions
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -38,7 +38,8 @@ static void release_as_share_id(struct gk20a_as *as, int id)
 }

 int gk20a_as_alloc_share(struct gk20a_as *as,
-			 u32 flags, struct gk20a_as_share **out)
+			 u32 big_page_size, u32 flags,
+			 struct gk20a_as_share **out)
 {
 	struct gk20a *g = gk20a_from_as(as);
 	struct gk20a_as_share *as_share;
@@ -59,7 +60,7 @@ int gk20a_as_alloc_share(struct gk20a_as *as,
 	err = gk20a_busy(g->dev);
 	if (err)
 		goto failed;
-	err = g->ops.mm.vm_alloc_share(as_share, flags);
+	err = g->ops.mm.vm_alloc_share(as_share, big_page_size, flags);
 	gk20a_idle(g->dev);

 	if (err)
@@ -332,7 +333,7 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp)

 	g = container_of(inode->i_cdev, struct gk20a, as.cdev);

-	err = gk20a_as_alloc_share(&g->as, 0, &as_share);
+	err = gk20a_as_alloc_share(&g->as, 0, 0, &as_share);
 	if (err) {
 		gk20a_dbg_fn("failed to alloc share");
 		return err;
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.h
@@ -1,7 +1,7 @@
 /*
 * GK20A Address Spaces
 *
- * Copyright (c) 2011-2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -42,7 +42,9 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share);
 int gk20a_as_dev_open(struct inode *inode, struct file *filp);
 int gk20a_as_dev_release(struct inode *inode, struct file *filp);
 long gk20a_as_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int gk20a_as_alloc_share(struct gk20a_as *as,
+
+/* if big_page_size == 0, the default big page size is used */
+int gk20a_as_alloc_share(struct gk20a_as *as, u32 big_page_size,
 			 u32 flags, struct gk20a_as_share **out);

 #endif
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -166,7 +166,8 @@ static int gk20a_ctrl_alloc_as(
 		goto clean_up;
 	}

-	err = gk20a_as_alloc_share(&g->as, args->big_page_size, &as_share);
+	err = gk20a_as_alloc_share(&g->as, args->big_page_size, args->flags,
+				   &as_share);
 	if (err)
 		goto clean_up_file;

--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1993,6 +1993,8 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 	    gk20a_platform_has_syncpoints(g->dev))
 		gpu->flags |= NVGPU_GPU_FLAGS_HAS_SYNCPOINTS;

+	gpu->flags |= NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS;
+
 	gpu->gpc_mask = 1;

 	g->ops.gr.detect_sm_arch(g);
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -370,7 +370,7 @@ struct gpu_ops {
 				struct vm_gk20a_mapping_batch *batch);
 		void (*vm_remove)(struct vm_gk20a *vm);
 		int (*vm_alloc_share)(struct gk20a_as_share *as_share,
-				      u32 flags);
+				      u32 big_page_size, u32 flags);
 		int (*vm_bind_channel)(struct gk20a_as_share *as_share,
 				struct channel_gk20a *ch);
 		int (*fb_flush)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -767,6 +767,12 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
 	struct rb_node *node;
 	int i = 0;

+	if (vm->userspace_managed) {
+		*mapped_buffers = NULL;
+		*num_buffers = 0;
+		return 0;
+	}
+
 	mutex_lock(&vm->update_gmmu_lock);

 	buffer_list = nvgpu_alloc(sizeof(*buffer_list) *
@@ -1135,7 +1141,8 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm,

 static int validate_fixed_buffer(struct vm_gk20a *vm,
 				 struct buffer_attrs *bfr,
-				 u64 map_offset, u64 map_size)
+				 u64 map_offset, u64 map_size,
+				 struct vm_reserved_va_node **pva_node)
 {
 	struct device *dev = dev_from_vm(vm);
 	struct vm_reserved_va_node *va_node;
@@ -1154,15 +1161,16 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
 		return -EINVAL;
 	}

-	/* find the space reservation */
+	/* Find the space reservation, but it's ok to have none for
+	 * userspace-managed address spaces */
 	va_node = addr_to_reservation(vm, map_offset);
-	if (!va_node) {
+	if (!va_node && !vm->userspace_managed) {
 		gk20a_warn(dev, "fixed offset mapping without space allocation");
 		return -EINVAL;
 	}

-	/* mapped area should fit inside va */
-	if (map_end > va_node->vaddr_start + va_node->size) {
+	/* Mapped area should fit inside va, if there's one */
+	if (va_node && map_end > va_node->vaddr_start + va_node->size) {
 		gk20a_warn(dev, "fixed offset mapping size overflows va node");
 		return -EINVAL;
 	}
@@ -1177,6 +1185,8 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
 		return -EINVAL;
 	}

+	*pva_node = va_node;
+
 	return 0;
 }

@@ -1411,17 +1421,29 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 	u64 buf_addr;
 	u64 ctag_map_win_size = 0;
 	u32 ctag_map_win_ctagline = 0;
+	struct vm_reserved_va_node *va_node = NULL;
+
+	if (user_mapped && vm->userspace_managed &&
+	    !(flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)) {
+		gk20a_err(d,
+			  "%s: non-fixed-offset mapping not available on userspace managed address spaces",
+			  __func__);
+		return -EFAULT;
+	}

 	mutex_lock(&vm->update_gmmu_lock);

 	/* check if this buffer is already mapped */
-	map_offset = gk20a_vm_map_duplicate_locked(vm, dmabuf, offset_align,
+	if (!vm->userspace_managed) {
+		map_offset = gk20a_vm_map_duplicate_locked(
+			vm, dmabuf, offset_align,
 			flags, kind, sgt,
 			user_mapped, rw_flag);
 		if (map_offset) {
 			mutex_unlock(&vm->update_gmmu_lock);
 			return map_offset;
 		}
+	}

 	/* pin buffer to get phys/iovmm addr */
 	bfr.sgt = gk20a_mm_pin(d, dmabuf);
@@ -1504,7 +1526,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,

 	if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET)  {
 		err = validate_fixed_buffer(vm, &bfr,
-			offset_align, mapping_size);
+					    offset_align, mapping_size,
+					    &va_node);
 		if (err)
 			goto clean_up;

@@ -1671,11 +1694,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,

 	gk20a_dbg_info("allocated va @ 0x%llx", map_offset);

-	if (!va_allocated) {
-		struct vm_reserved_va_node *va_node;
-
-		/* find the space reservation */
-		va_node = addr_to_reservation(vm, map_offset);
+	if (va_node) {
 		list_add_tail(&mapped_buffer->va_buffers_list,
 			      &va_node->va_buffers_list);
 		mapped_buffer->va_node = va_node;
@@ -1753,18 +1772,27 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 	struct mapped_buffer_node *mapped_buffer;
 	struct gk20a *g = gk20a_from_vm(vm);
 	struct device *d = dev_from_vm(vm);
+	const bool fixed_mapping =
+		(flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) != 0;

-	if (flags & NVGPU_AS_MAP_BUFFER_COMPBITS_FLAGS_FIXED_OFFSET) {
-		/* This will be implemented later */
+	if (vm->userspace_managed && !fixed_mapping) {
 		gk20a_err(d,
-			  "%s: fixed-offset compbits mapping not yet supported",
+			  "%s: non-fixed-offset mapping is not available on userspace managed address spaces",
+			  __func__);
+		return -EFAULT;
+	}
+
+	if (fixed_mapping && !vm->userspace_managed) {
+		gk20a_err(d,
+			  "%s: fixed-offset mapping is available only on userspace managed address spaces",
 			  __func__);
 		return -EFAULT;
 	}

 	mutex_lock(&vm->update_gmmu_lock);

-	mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);
+	mapped_buffer =
+		find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva);

 	if (!mapped_buffer || !mapped_buffer->user_mapped) {
 		mutex_unlock(&vm->update_gmmu_lock);
@@ -1774,7 +1802,8 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,

 	if (!mapped_buffer->ctags_mappable) {
 		mutex_unlock(&vm->update_gmmu_lock);
-		gk20a_err(d, "%s: comptags not mappable, offset 0x%llx", __func__, mapping_gva);
+		gk20a_err(d, "%s: comptags not mappable, offset 0x%llx",
+			  __func__, mapping_gva);
 		return -EFAULT;
 	}

@@ -1804,10 +1833,41 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 		cacheline_offset_start =
 			cacheline_start * aggregate_cacheline_sz;

+		if (fixed_mapping) {
+			struct buffer_attrs bfr;
+			int err;
+			struct vm_reserved_va_node *va_node = NULL;
+
+			memset(&bfr, 0, sizeof(bfr));
+
+			bfr.pgsz_idx = small_pgsz_index;
+
+			err = validate_fixed_buffer(
+				vm, &bfr, *compbits_win_gva,
+				mapped_buffer->ctag_map_win_size, &va_node);
+
+			if (err) {
+				mutex_unlock(&vm->update_gmmu_lock);
+				return err;
+			}
+
+			if (va_node) {
+				/* this would create a dangling GPU VA
+				 * pointer if the space is freed
+				 * before before the buffer is
+				 * unmapped */
+				mutex_unlock(&vm->update_gmmu_lock);
+				gk20a_err(d,
+					  "%s: comptags cannot be mapped into allocated space",
+					  __func__);
+				return -EINVAL;
+			}
+		}
+
 		mapped_buffer->ctag_map_win_addr =
 			g->ops.mm.gmmu_map(
 				vm,
-				0,
+				!fixed_mapping ? 0 : *compbits_win_gva, /* va */
 				g->gr.compbit_store.mem.sgt,
 				cacheline_offset_start, /* sg offset */
 				mapped_buffer->ctag_map_win_size, /* size */
@@ -1828,6 +1888,15 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
 				  __func__, mapping_gva);
 			return -ENOMEM;
 		}
+	} else if (fixed_mapping && *compbits_win_gva &&
+		   mapped_buffer->ctag_map_win_addr != *compbits_win_gva) {
+		mutex_unlock(&vm->update_gmmu_lock);
+		gk20a_err(d,
+			  "%s: re-requesting comptags map into mismatching address. buffer offset 0x"
+			  "%llx, existing comptag map at 0x%llx, requested remap 0x%llx",
+			  __func__, mapping_gva,
+			  mapped_buffer->ctag_map_win_addr, *compbits_win_gva);
+		return -EINVAL;
 	}

 	*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
@@ -2662,6 +2731,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 		u64 kernel_reserved,
 		u64 aperture_size,
 		bool big_pages,
+		bool userspace_managed,
 		char *name)
 {
 	int err, i;
@@ -2685,6 +2755,8 @@ int gk20a_init_vm(struct mm_gk20a *mm,

 	vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big];

+	vm->userspace_managed = userspace_managed;
+
 	vm->mmu_levels = vm->mm->g->ops.mm.get_mmu_levels(vm->mm->g,
 			vm->big_page_size);

@@ -2821,7 +2893,8 @@ clean_up_pdes:
 }

 /* address space interfaces for the gk20a module */
-int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
+			 u32 flags)
 {
 	struct gk20a_as *as = as_share->as;
 	struct gk20a *g = gk20a_from_as(as);
@@ -2829,6 +2902,8 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
 	struct vm_gk20a *vm;
 	char name[32];
 	int err;
+	const bool userspace_managed =
+		(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;

 	gk20a_dbg_fn("");

@@ -2856,7 +2931,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size)
 	err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10,
 			    mm->channel.kernel_size,
 			    mm->channel.user_size + mm->channel.kernel_size,
-			    !mm->disable_bigpage, name);
+			    !mm->disable_bigpage, userspace_managed, name);

 	return err;
 }
@@ -3235,7 +3310,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
 	gk20a_init_vm(mm, vm, big_page_size, SZ_4K,
 		      mm->bar1.aperture_size - SZ_4K,
-		      mm->bar1.aperture_size, false, "bar1");
+		      mm->bar1.aperture_size, false, false, "bar1");

 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
@@ -3263,7 +3338,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)

 	gk20a_init_vm(mm, vm, big_page_size,
 		      SZ_4K * 16, GK20A_PMU_VA_SIZE,
-		      GK20A_PMU_VA_SIZE * 2, false,
+		      GK20A_PMU_VA_SIZE * 2, false, false,
 		      "system");

 	err = gk20a_alloc_inst_block(g, inst_block);
@@ -3303,7 +3378,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm)
 			SZ_4K * 16,
 			NV_MM_DEFAULT_KERNEL_SIZE,
 			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, "cde");
+			false, false, "cde");
 }

 void gk20a_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -251,6 +251,8 @@ struct vm_gk20a {

 	u32 big_page_size;

+	bool userspace_managed;
+
 	const struct gk20a_mmu_level *mmu_levels;

 	struct kref ref;
@@ -586,7 +588,8 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
 /* vm-as interface */
 struct nvgpu_as_alloc_space_args;
 struct nvgpu_as_free_space_args;
-int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 flags);
+int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
+			 u32 flags);
 int gk20a_vm_release_share(struct gk20a_as_share *as_share);
 int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
 			 struct nvgpu_as_alloc_space_args *args);
@@ -621,6 +624,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
 		u64 kernel_reserved,
 		u64 aperture_size,
 		bool big_pages,
+		bool userspace_managed,
 		char *name);
 void gk20a_deinit_vm(struct vm_gk20a *vm);

--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -267,7 +267,7 @@ u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)

 /* address space interfaces for the gk20a module */
 static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
-		u32 big_page_size)
+			       u32 big_page_size, u32 flags)
 {
 	struct gk20a_as *as = as_share->as;
 	struct gk20a *g = gk20a_from_as(as);
@@ -280,6 +280,8 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
 		kernel_vma_start, kernel_vma_limit;
 	char name[32];
 	int err, i;
+	const bool userspace_managed =
+		(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;

 	/* note: keep the page sizes sorted lowest to highest here */
 	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
@@ -290,6 +292,12 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,

 	gk20a_dbg_fn("");

+	if (userspace_managed) {
+		gk20a_err(dev_from_gk20a(g),
+			  "userspace-managed address spaces not yet supported");
+		return -ENOSYS;
+	}
+
 	big_page_size = gmmu_page_sizes[gmmu_page_size_big];

 	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -106,6 +106,8 @@ struct nvgpu_gpu_zbc_query_table_args {
 #define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS		(1 << 4)
 /* NVGPU_IOCTL_CHANNEL_CYCLE_STATS_SNAPSHOT is available */
 #define NVGPU_GPU_FLAGS_SUPPORT_CYCLE_STATS_SNAPSHOT	(1 << 6)
+/* User-space managed address spaces support */
+#define NVGPU_GPU_FLAGS_SUPPORT_USERSPACE_MANAGED_AS	(1 << 7)

 struct nvgpu_gpu_characteristics {
 	__u32 arch;
@@ -239,7 +241,22 @@ struct nvgpu_gpu_mark_compressible_write_args {
 struct nvgpu_alloc_as_args {
 	__u32 big_page_size;
 	__s32 as_fd;
-	__u64 reserved;			/* must be zero */
+
+/*
+ * The GPU address space will be managed by the userspace. This has
+ * the following changes in functionality:
+ *   1. All non-fixed-offset user mappings are rejected (i.e.,
+ *      fixed-offset only)
+ *   2. Address space does not need to be allocated for fixed-offset
+ *      mappings, except to mark sparse address space areas.
+ *   3. Maps and unmaps are immediate. In particular, mapping ref
+ *      increments at kickoffs and decrements at job completion are
+ *      bypassed.
+ */
+#define NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED (1 << 0)
+	__u32 flags;
+
+	__u32 reserved;			/* must be zero */
 };

 struct nvgpu_gpu_open_tsg_args {