gpu: nvgpu: support usermode submit buffers

Import userd and gpfifo buffers from userspace if provided via NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX. Also supply the work submit token (i.e., the hw channel id) to userspace. To keep the buffers alive, store their dmabuf and attachment/sgt handles in nvgpu_channel_linux. Our nvgpu_mem doesn't provide such data for buffers that are mainly in kernel use. The buffers are freed via a new API in the os_channel interface. Fix a bug in gk20a_channel_free_usermode_buffers: also unmap the usermode gpfifo buffer. Bug 200145225 Bug 200541476 Change-Id: I8416af7085c91b044ac8ccd9faa38e2a6d0c3946 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1795821 Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> (cherry picked from commit 99b1c6dcdf in dev-main) Reviewed-on: https://git-master.nvidia.com/r/2170603 GVS: Gerrit_Virtual_Submit Reviewed-by: Bibek Basu <bbasu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2018-09-11 14:47:51 +03:00
parent 758cb76e22
commit 8b484c0b53
7 changed files with 205 additions and 4 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1228,7 +1228,9 @@ int nvgpu_channel_setup_bind(struct channel_gk20a *c,
 	nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d",
 		c->chid, gpfifo_gpu_va, c->gpfifo.entry_num);

-	g->ops.fifo.setup_userd(c);
+	if (!c->usermode_submit_enabled) {
+		g->ops.fifo.setup_userd(c);
+	}

 	if (g->aggressive_sync_destroy_thresh == 0U) {
 		nvgpu_mutex_acquire(&c->sync_lock);
@@ -1326,7 +1328,10 @@ void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c)
 		nvgpu_dma_free(c->g, &c->usermode_userd);
 	}
 	if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) {
-		nvgpu_dma_free(c->g, &c->usermode_gpfifo);
+		nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
+	}
+	if (c->g->os_channel.free_usermode_buffers != NULL) {
+		c->g->os_channel.free_usermode_buffers(c);
 	}
 }

--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -95,7 +95,9 @@ struct nvgpu_setup_bind_args {
 	u32 num_gpfifo_entries;
 	u32 num_inflight_jobs;
 	u32 userd_dmabuf_fd;
+	u64 userd_dmabuf_offset;
 	u32 gpfifo_dmabuf_fd;
+	u64 gpfifo_dmabuf_offset;
 	u32 work_submit_token;
 	u32 flags;
 };
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1605,7 +1605,8 @@ struct gk20a {
 				struct nvgpu_gpfifo_userdata userdata,
 				u32 start, u32 length);
 		int (*alloc_usermode_buffers)(struct channel_gk20a *c,
-			struct nvgpu_setup_bind_args *gpfifo_args);
+			struct nvgpu_setup_bind_args *args);
+		void (*free_usermode_buffers)(struct channel_gk20a *c);
 	} os_channel;

 	struct gk20a_scale_profile *scale_profile;
--- a/drivers/gpu/nvgpu/os/linux/channel.h
+++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -63,6 +63,19 @@ struct nvgpu_os_fence_framework {
 	struct sync_timeline *timeline;
 };

+struct nvgpu_usermode_bufs_linux {
+	/*
+	 * Common low level info of these is stored in nvgpu_mems in
+	 * channel_gk20a; these hold lifetimes for the actual dmabuf and its
+	 * dma mapping.
+	 */
+	struct nvgpu_usermode_buf_linux {
+		struct dma_buf *dmabuf;
+		struct dma_buf_attachment *attachment;
+		struct sg_table *sgt;
+	} gpfifo, userd;
+};
+
 struct nvgpu_channel_linux {
 	struct channel_gk20a *ch;

@@ -72,6 +85,8 @@ struct nvgpu_channel_linux {
 	struct nvgpu_error_notifier error_notifier;

 	struct dma_buf *cyclestate_buffer_handler;
+
+	struct nvgpu_usermode_bufs_linux usermode;
 };

 u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -590,6 +590,9 @@ static u32 nvgpu_setup_bind_user_flags_to_common_flags(u32 user_flags)
 	if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE)
 		flags |= NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE;

+	if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT)
+		flags |= NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT;
+
 	return flags;
 }

@@ -601,6 +604,14 @@ static void nvgpu_get_setup_bind_args(
 		channel_setup_bind_args->num_gpfifo_entries;
 	setup_bind_args->num_inflight_jobs =
 		channel_setup_bind_args->num_inflight_jobs;
+	setup_bind_args->userd_dmabuf_fd =
+		channel_setup_bind_args->userd_dmabuf_fd;
+	setup_bind_args->userd_dmabuf_offset =
+		channel_setup_bind_args->userd_dmabuf_offset;
+	setup_bind_args->gpfifo_dmabuf_fd =
+		channel_setup_bind_args->gpfifo_dmabuf_fd;
+	setup_bind_args->gpfifo_dmabuf_offset =
+		channel_setup_bind_args->gpfifo_dmabuf_offset;
 	setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags(
 			channel_setup_bind_args->flags);
 }
@@ -1156,6 +1167,8 @@ long gk20a_channel_ioctl(struct file *filp,
 			break;
 		}
 		err = nvgpu_channel_setup_bind(ch, &setup_bind_args);
+		channel_setup_bind_args->work_submit_token =
+			setup_bind_args.work_submit_token;
 		gk20a_idle(ch->g);
 		break;
 	}
--- a/drivers/gpu/nvgpu/os/linux/linux-channel.c
+++ b/drivers/gpu/nvgpu/os/linux/linux-channel.c
@@ -20,6 +20,7 @@
 #include <nvgpu/os_sched.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/dma.h>

 /*
 * This is required for nvgpu_vm_find_buf() which is used in the tracing
@@ -31,6 +32,7 @@
 #include "channel.h"
 #include "ioctl_channel.h"
 #include "os_linux.h"
+#include "dmabuf.h"

 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>

@@ -383,6 +385,147 @@ static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest,
 	return n == 0 ? 0 : -EFAULT;
 }

+int nvgpu_usermode_buf_from_dmabuf(struct gk20a *g, int dmabuf_fd,
+		struct nvgpu_mem *mem, struct nvgpu_usermode_buf_linux *buf)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct dma_buf *dmabuf;
+	struct sg_table *sgt;
+	struct dma_buf_attachment *attachment;
+	int err;
+
+	dmabuf = dma_buf_get(dmabuf_fd);
+	if (IS_ERR(dmabuf)) {
+		return PTR_ERR(dmabuf);
+	}
+
+	if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
+		err = -EINVAL;
+		goto put_dmabuf;
+	}
+
+	err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
+	if (err != 0) {
+		goto put_dmabuf;
+	}
+
+	sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
+	if (IS_ERR(sgt)) {
+		nvgpu_warn(g, "Failed to pin dma_buf!");
+		err = PTR_ERR(sgt);
+		goto put_dmabuf;
+	}
+
+	buf->dmabuf = dmabuf;
+	buf->attachment = attachment;
+	buf->sgt = sgt;
+
+	/*
+	 * This mem is unmapped and freed in a common path; for Linux, we'll
+	 * also need to unref the dmabuf stuff (above) but the sgt here is only
+	 * borrowed, so it cannot be freed by nvgpu_mem_*.
+	 */
+	mem->mem_flags  = NVGPU_MEM_FLAG_FOREIGN_SGT;
+	mem->aperture   = APERTURE_SYSMEM;
+	mem->skip_wmb   = 0;
+	mem->size       = dmabuf->size;
+
+	mem->priv.flags = 0;
+	mem->priv.pages = NULL;
+	mem->priv.sgt   = sgt;
+
+	return 0;
+put_dmabuf:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+void nvgpu_channel_free_usermode_buffers(struct channel_gk20a *c)
+{
+	struct nvgpu_channel_linux *priv = c->os_priv;
+	struct gk20a *g = c->g;
+	struct device *dev = dev_from_gk20a(g);
+
+	if (priv->usermode.gpfifo.dmabuf != NULL) {
+		gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
+			       priv->usermode.gpfifo.attachment,
+			       priv->usermode.gpfifo.sgt);
+		dma_buf_put(priv->usermode.gpfifo.dmabuf);
+		priv->usermode.gpfifo.dmabuf = NULL;
+	}
+
+	if (priv->usermode.userd.dmabuf != NULL) {
+		gk20a_mm_unpin(dev, priv->usermode.userd.dmabuf,
+		       priv->usermode.userd.attachment,
+		       priv->usermode.userd.sgt);
+		dma_buf_put(priv->usermode.userd.dmabuf);
+		priv->usermode.userd.dmabuf = NULL;
+	}
+}
+
+static int nvgpu_channel_alloc_usermode_buffers(struct channel_gk20a *c,
+		struct nvgpu_setup_bind_args *args)
+{
+	struct nvgpu_channel_linux *priv = c->os_priv;
+	struct gk20a *g = c->g;
+	struct device *dev = dev_from_gk20a(g);
+	size_t gpfifo_size;
+	int err;
+
+	if (args->gpfifo_dmabuf_fd == 0 || args->userd_dmabuf_fd == 0) {
+		return -EINVAL;
+	}
+
+	if (args->gpfifo_dmabuf_offset != 0 ||
+			args->userd_dmabuf_offset != 0) {
+		/* TODO - not yet supported */
+		return -EINVAL;
+	}
+
+	err = nvgpu_usermode_buf_from_dmabuf(g, args->gpfifo_dmabuf_fd,
+			&c->usermode_gpfifo, &priv->usermode.gpfifo);
+	if (err < 0) {
+		return err;
+	}
+
+	gpfifo_size = max_t(u32, SZ_4K,
+			args->num_gpfifo_entries *
+			nvgpu_get_gpfifo_entry_size());
+
+	if (c->usermode_gpfifo.size < gpfifo_size) {
+		err = -EINVAL;
+		goto free_gpfifo;
+	}
+
+	c->usermode_gpfifo.gpu_va = nvgpu_gmmu_map(c->vm, &c->usermode_gpfifo,
+			c->usermode_gpfifo.size, 0, gk20a_mem_flag_none,
+			false, c->usermode_gpfifo.aperture);
+
+	if (c->usermode_gpfifo.gpu_va == 0) {
+		err = -ENOMEM;
+		goto unmap_free_gpfifo;
+	}
+
+	err = nvgpu_usermode_buf_from_dmabuf(g, args->userd_dmabuf_fd,
+			&c->usermode_userd, &priv->usermode.userd);
+	if (err < 0) {
+		goto unmap_free_gpfifo;
+	}
+
+	args->work_submit_token = g->fifo.channel_base + c->chid;
+
+	return 0;
+unmap_free_gpfifo:
+	nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
+free_gpfifo:
+	gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
+		       priv->usermode.gpfifo.attachment,
+		       priv->usermode.gpfifo.sgt);
+	dma_buf_put(priv->usermode.gpfifo.dmabuf);
+	priv->usermode.gpfifo.dmabuf = NULL;
+	return err;
+}
+
 int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
 {
 	struct gk20a *g = &l->g;
@@ -417,6 +560,12 @@ int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
 	g->os_channel.copy_user_gpfifo =
 		nvgpu_channel_copy_user_gpfifo;

+	g->os_channel.alloc_usermode_buffers =
+		nvgpu_channel_alloc_usermode_buffers;
+
+	g->os_channel.free_usermode_buffers =
+		nvgpu_channel_free_usermode_buffers;
+
 	return 0;

 err_clean:
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -1514,8 +1514,24 @@ struct nvgpu_channel_setup_bind_args {
 #define NVGPU_CHANNEL_SETUP_BIND_FLAGS_DETERMINISTIC	(1 << 1)
 /* enable replayable gmmu faults for this channel */
 #define NVGPU_CHANNEL_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE   (1 << 2)
+/*
+ * Enable usermode submits on this channel.
+ *
+ * Submits in usermode are supported in some environments. If supported and
+ * this flag is set + USERD and GPFIFO buffers are provided here, a submit
+ * token is passed back to be written in the doorbell register in the usermode
+ * region to notify the GPU for new work on this channel. Usermode and
+ * kernelmode submit modes are mutually exclusive; by passing this flag, the
+ * SUBMIT_GPFIFO IOCTL cannot be used.
+ */
+#define NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT	(1 << 3)
 	__u32 flags;
-	__u32 reserved[16];
+	__s32 userd_dmabuf_fd;	/* in */
+	__s32 gpfifo_dmabuf_fd;	/* in */
+	__u32 work_submit_token; /* out */
+	__u64 userd_dmabuf_offset; /* in */
+	__u64 gpfifo_dmabuf_offset; /* in */
+	__u32 reserved[9];
 };
 struct nvgpu_fence {
 	__u32 id;        /* syncpoint id or sync fence fd */