gpu: nvgpu: support usermode submit buffers

Import userd and gpfifo buffers from userspace if provided via
NVGPU_IOCTL_CHANNEL_ALLOC_GPFIFO_EX. Also supply the work submit token
(i.e., the hw channel id) to userspace.

To keep the buffers alive, store their dmabuf and attachment/sgt handles
in nvgpu_channel_linux. Our nvgpu_mem doesn't provide such data for
buffers that are mainly in kernel use. The buffers are freed via a new
API in the os_channel interface.

Fix a bug in gk20a_channel_free_usermode_buffers: also unmap the
usermode gpfifo buffer.

Bug 200145225
Bug 200541476

Change-Id: I8416af7085c91b044ac8ccd9faa38e2a6d0c3946
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1795821
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
(cherry picked from commit 99b1c6dcdf
in dev-main)
Reviewed-on: https://git-master.nvidia.com/r/2170603
GVS: Gerrit_Virtual_Submit
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Holtta
2018-09-11 14:47:51 +03:00
committed by mobile promotions
parent 758cb76e22
commit 8b484c0b53
7 changed files with 205 additions and 4 deletions

View File

@@ -1228,7 +1228,9 @@ int nvgpu_channel_setup_bind(struct channel_gk20a *c,
nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d",
c->chid, gpfifo_gpu_va, c->gpfifo.entry_num);
g->ops.fifo.setup_userd(c);
if (!c->usermode_submit_enabled) {
g->ops.fifo.setup_userd(c);
}
if (g->aggressive_sync_destroy_thresh == 0U) {
nvgpu_mutex_acquire(&c->sync_lock);
@@ -1326,7 +1328,10 @@ void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c)
nvgpu_dma_free(c->g, &c->usermode_userd);
}
if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) {
nvgpu_dma_free(c->g, &c->usermode_gpfifo);
nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
}
if (c->g->os_channel.free_usermode_buffers != NULL) {
c->g->os_channel.free_usermode_buffers(c);
}
}

View File

@@ -95,7 +95,9 @@ struct nvgpu_setup_bind_args {
u32 num_gpfifo_entries;
u32 num_inflight_jobs;
u32 userd_dmabuf_fd;
u64 userd_dmabuf_offset;
u32 gpfifo_dmabuf_fd;
u64 gpfifo_dmabuf_offset;
u32 work_submit_token;
u32 flags;
};

View File

@@ -1605,7 +1605,8 @@ struct gk20a {
struct nvgpu_gpfifo_userdata userdata,
u32 start, u32 length);
int (*alloc_usermode_buffers)(struct channel_gk20a *c,
struct nvgpu_setup_bind_args *gpfifo_args);
struct nvgpu_setup_bind_args *args);
void (*free_usermode_buffers)(struct channel_gk20a *c);
} os_channel;
struct gk20a_scale_profile *scale_profile;

View File

@@ -63,6 +63,19 @@ struct nvgpu_os_fence_framework {
struct sync_timeline *timeline;
};
struct nvgpu_usermode_bufs_linux {
/*
* Common low level info of these is stored in nvgpu_mems in
* channel_gk20a; these hold lifetimes for the actual dmabuf and its
* dma mapping.
*/
struct nvgpu_usermode_buf_linux {
struct dma_buf *dmabuf;
struct dma_buf_attachment *attachment;
struct sg_table *sgt;
} gpfifo, userd;
};
struct nvgpu_channel_linux {
struct channel_gk20a *ch;
@@ -72,6 +85,8 @@ struct nvgpu_channel_linux {
struct nvgpu_error_notifier error_notifier;
struct dma_buf *cyclestate_buffer_handler;
struct nvgpu_usermode_bufs_linux usermode;
};
u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);

View File

@@ -590,6 +590,9 @@ static u32 nvgpu_setup_bind_user_flags_to_common_flags(u32 user_flags)
if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE)
flags |= NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE;
if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT)
flags |= NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT;
return flags;
}
@@ -601,6 +604,14 @@ static void nvgpu_get_setup_bind_args(
channel_setup_bind_args->num_gpfifo_entries;
setup_bind_args->num_inflight_jobs =
channel_setup_bind_args->num_inflight_jobs;
setup_bind_args->userd_dmabuf_fd =
channel_setup_bind_args->userd_dmabuf_fd;
setup_bind_args->userd_dmabuf_offset =
channel_setup_bind_args->userd_dmabuf_offset;
setup_bind_args->gpfifo_dmabuf_fd =
channel_setup_bind_args->gpfifo_dmabuf_fd;
setup_bind_args->gpfifo_dmabuf_offset =
channel_setup_bind_args->gpfifo_dmabuf_offset;
setup_bind_args->flags = nvgpu_setup_bind_user_flags_to_common_flags(
channel_setup_bind_args->flags);
}
@@ -1156,6 +1167,8 @@ long gk20a_channel_ioctl(struct file *filp,
break;
}
err = nvgpu_channel_setup_bind(ch, &setup_bind_args);
channel_setup_bind_args->work_submit_token =
setup_bind_args.work_submit_token;
gk20a_idle(ch->g);
break;
}

View File

@@ -20,6 +20,7 @@
#include <nvgpu/os_sched.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/dma.h>
/*
* This is required for nvgpu_vm_find_buf() which is used in the tracing
@@ -31,6 +32,7 @@
#include "channel.h"
#include "ioctl_channel.h"
#include "os_linux.h"
#include "dmabuf.h"
#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
@@ -383,6 +385,147 @@ static int nvgpu_channel_copy_user_gpfifo(struct nvgpu_gpfifo_entry *dest,
return n == 0 ? 0 : -EFAULT;
}
int nvgpu_usermode_buf_from_dmabuf(struct gk20a *g, int dmabuf_fd,
struct nvgpu_mem *mem, struct nvgpu_usermode_buf_linux *buf)
{
struct device *dev = dev_from_gk20a(g);
struct dma_buf *dmabuf;
struct sg_table *sgt;
struct dma_buf_attachment *attachment;
int err;
dmabuf = dma_buf_get(dmabuf_fd);
if (IS_ERR(dmabuf)) {
return PTR_ERR(dmabuf);
}
if (gk20a_dmabuf_aperture(g, dmabuf) == APERTURE_INVALID) {
err = -EINVAL;
goto put_dmabuf;
}
err = gk20a_dmabuf_alloc_drvdata(dmabuf, dev);
if (err != 0) {
goto put_dmabuf;
}
sgt = gk20a_mm_pin(dev, dmabuf, &attachment);
if (IS_ERR(sgt)) {
nvgpu_warn(g, "Failed to pin dma_buf!");
err = PTR_ERR(sgt);
goto put_dmabuf;
}
buf->dmabuf = dmabuf;
buf->attachment = attachment;
buf->sgt = sgt;
/*
* This mem is unmapped and freed in a common path; for Linux, we'll
* also need to unref the dmabuf stuff (above) but the sgt here is only
* borrowed, so it cannot be freed by nvgpu_mem_*.
*/
mem->mem_flags = NVGPU_MEM_FLAG_FOREIGN_SGT;
mem->aperture = APERTURE_SYSMEM;
mem->skip_wmb = 0;
mem->size = dmabuf->size;
mem->priv.flags = 0;
mem->priv.pages = NULL;
mem->priv.sgt = sgt;
return 0;
put_dmabuf:
dma_buf_put(dmabuf);
return err;
}
void nvgpu_channel_free_usermode_buffers(struct channel_gk20a *c)
{
struct nvgpu_channel_linux *priv = c->os_priv;
struct gk20a *g = c->g;
struct device *dev = dev_from_gk20a(g);
if (priv->usermode.gpfifo.dmabuf != NULL) {
gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
priv->usermode.gpfifo.attachment,
priv->usermode.gpfifo.sgt);
dma_buf_put(priv->usermode.gpfifo.dmabuf);
priv->usermode.gpfifo.dmabuf = NULL;
}
if (priv->usermode.userd.dmabuf != NULL) {
gk20a_mm_unpin(dev, priv->usermode.userd.dmabuf,
priv->usermode.userd.attachment,
priv->usermode.userd.sgt);
dma_buf_put(priv->usermode.userd.dmabuf);
priv->usermode.userd.dmabuf = NULL;
}
}
static int nvgpu_channel_alloc_usermode_buffers(struct channel_gk20a *c,
struct nvgpu_setup_bind_args *args)
{
struct nvgpu_channel_linux *priv = c->os_priv;
struct gk20a *g = c->g;
struct device *dev = dev_from_gk20a(g);
size_t gpfifo_size;
int err;
if (args->gpfifo_dmabuf_fd == 0 || args->userd_dmabuf_fd == 0) {
return -EINVAL;
}
if (args->gpfifo_dmabuf_offset != 0 ||
args->userd_dmabuf_offset != 0) {
/* TODO - not yet supported */
return -EINVAL;
}
err = nvgpu_usermode_buf_from_dmabuf(g, args->gpfifo_dmabuf_fd,
&c->usermode_gpfifo, &priv->usermode.gpfifo);
if (err < 0) {
return err;
}
gpfifo_size = max_t(u32, SZ_4K,
args->num_gpfifo_entries *
nvgpu_get_gpfifo_entry_size());
if (c->usermode_gpfifo.size < gpfifo_size) {
err = -EINVAL;
goto free_gpfifo;
}
c->usermode_gpfifo.gpu_va = nvgpu_gmmu_map(c->vm, &c->usermode_gpfifo,
c->usermode_gpfifo.size, 0, gk20a_mem_flag_none,
false, c->usermode_gpfifo.aperture);
if (c->usermode_gpfifo.gpu_va == 0) {
err = -ENOMEM;
goto unmap_free_gpfifo;
}
err = nvgpu_usermode_buf_from_dmabuf(g, args->userd_dmabuf_fd,
&c->usermode_userd, &priv->usermode.userd);
if (err < 0) {
goto unmap_free_gpfifo;
}
args->work_submit_token = g->fifo.channel_base + c->chid;
return 0;
unmap_free_gpfifo:
nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
free_gpfifo:
gk20a_mm_unpin(dev, priv->usermode.gpfifo.dmabuf,
priv->usermode.gpfifo.attachment,
priv->usermode.gpfifo.sgt);
dma_buf_put(priv->usermode.gpfifo.dmabuf);
priv->usermode.gpfifo.dmabuf = NULL;
return err;
}
int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
{
struct gk20a *g = &l->g;
@@ -417,6 +560,12 @@ int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
g->os_channel.copy_user_gpfifo =
nvgpu_channel_copy_user_gpfifo;
g->os_channel.alloc_usermode_buffers =
nvgpu_channel_alloc_usermode_buffers;
g->os_channel.free_usermode_buffers =
nvgpu_channel_free_usermode_buffers;
return 0;
err_clean:

View File

@@ -1514,8 +1514,24 @@ struct nvgpu_channel_setup_bind_args {
#define NVGPU_CHANNEL_SETUP_BIND_FLAGS_DETERMINISTIC (1 << 1)
/* enable replayable gmmu faults for this channel */
#define NVGPU_CHANNEL_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2)
/*
* Enable usermode submits on this channel.
*
* Submits in usermode are supported in some environments. If supported and
* this flag is set + USERD and GPFIFO buffers are provided here, a submit
* token is passed back to be written in the doorbell register in the usermode
* region to notify the GPU for new work on this channel. Usermode and
* kernelmode submit modes are mutually exclusive; by passing this flag, the
* SUBMIT_GPFIFO IOCTL cannot be used.
*/
#define NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT (1 << 3)
__u32 flags;
__u32 reserved[16];
__s32 userd_dmabuf_fd; /* in */
__s32 gpfifo_dmabuf_fd; /* in */
__u32 work_submit_token; /* out */
__u64 userd_dmabuf_offset; /* in */
__u64 gpfifo_dmabuf_offset; /* in */
__u32 reserved[9];
};
struct nvgpu_fence {
__u32 id; /* syncpoint id or sync fence fd */