diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 83c49d528..a68968fe0 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -376,6 +376,13 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) if(g->ops.fifo.free_channel_ctx_header) g->ops.fifo.free_channel_ctx_header(ch); + if (ch->usermode_submit_enabled) { + gk20a_channel_free_usermode_buffers(ch); + ch->userd_iova = nvgpu_mem_get_addr(g, &f->userd) + + ch->chid * f->userd_entry_size; + ch->usermode_submit_enabled = false; + } + gk20a_gr_flush_channel_tlb(gr); nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem); @@ -1086,12 +1093,30 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c, goto clean_up_idle; } + if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT) { + if (g->ops.fifo.alloc_usermode_buffers) { + err = g->ops.fifo.alloc_usermode_buffers(c, + gpfifo_args); + if (err) { + nvgpu_err(g, "Usermode buffer alloc failed"); + goto clean_up; + } + c->userd_iova = nvgpu_mem_get_addr(g, + &c->usermode_userd); + c->usermode_submit_enabled = true; + } else { + nvgpu_err(g, "Usermode submit not supported"); + err = -EINVAL; + goto clean_up; + } + } + err = nvgpu_dma_alloc_map_sys(ch_vm, gpfifo_size * gpfifo_entry_size, &c->gpfifo.mem); if (err) { nvgpu_err(g, "%s: memory allocation failed", __func__); - goto clean_up; + goto clean_up_usermode; } if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) { @@ -1174,6 +1199,13 @@ clean_up_sync: clean_up_unmap: nvgpu_big_free(g, c->gpfifo.pipe); nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem); +clean_up_usermode: + if (c->usermode_submit_enabled) { + gk20a_channel_free_usermode_buffers(c); + c->userd_iova = nvgpu_mem_get_addr(g, &g->fifo.userd) + + c->chid * g->fifo.userd_entry_size; + c->usermode_submit_enabled = false; + } clean_up: memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc)); clean_up_idle: @@ -1187,6 +1219,12 @@ clean_up_idle: return err; } +void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c) +{ + if (nvgpu_mem_is_valid(&c->usermode_userd)) + nvgpu_dma_free(c->g, &c->usermode_userd); +} + /* Update with this periodically to determine how the gpfifo is draining. */ static inline u32 update_gp_get(struct gk20a *g, struct channel_gk20a *c) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index eaa5f81b5..cd5bf7d76 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -45,10 +45,14 @@ struct fifo_profile_gk20a; #define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR (1 << 0) #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC (1 << 1) #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE (1 << 2) +#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT (1 << 3) struct nvgpu_gpfifo_args { u32 num_entries; u32 num_inflight_jobs; + u32 userd_dmabuf_fd; + u32 gpfifo_dmabuf_fd; + u32 work_submit_token; u32 flags; }; @@ -184,6 +188,7 @@ struct channel_gk20a { /* deterministic, but explicitly idle and submits disallowed */ bool deterministic_railgate_allowed; bool cde; + bool usermode_submit_enabled; pid_t pid; pid_t tgid; struct nvgpu_mutex ioctl_lock; @@ -198,6 +203,7 @@ struct channel_gk20a { struct gpfifo_desc gpfifo; + struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */ struct nvgpu_mem inst_block; u64 userd_iova; @@ -361,6 +367,7 @@ void free_priv_cmdbuf(struct channel_gk20a *c, void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, bool clean_all); +void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c); u32 nvgpu_get_gpfifo_entry_size(void); #endif /* CHANNEL_GK20A_H */ diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index fd7faa22e..c446e3eac 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -3916,11 +3916,19 @@ void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c) int gk20a_fifo_setup_userd(struct channel_gk20a *c) { struct gk20a *g = c->g; - struct nvgpu_mem *mem = &g->fifo.userd; - u32 offset = c->chid * g->fifo.userd_entry_size / sizeof(u32); + struct nvgpu_mem *mem; + u32 offset; gk20a_dbg_fn(""); + if (nvgpu_mem_is_valid(&c->usermode_userd)) { + mem = &c->usermode_userd; + offset = 0; + } else { + mem = &g->fifo.userd; + offset = c->chid * g->fifo.userd_entry_size / sizeof(u32); + } + nvgpu_mem_wr32(g, mem, offset + ram_userd_put_w(), 0); nvgpu_mem_wr32(g, mem, offset + ram_userd_get_w(), 0); nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index d61423d1b..4a1c2f755 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -637,6 +637,8 @@ struct gpu_ops { int (*channel_suspend)(struct gk20a *g); int (*channel_resume)(struct gk20a *g); void (*set_error_notifier)(struct channel_gk20a *ch, u32 error); + int (*alloc_usermode_buffers)(struct channel_gk20a *c, + struct nvgpu_gpfifo_args *gpfifo_args); #ifdef CONFIG_TEGRA_GK20A_NVHOST int (*alloc_syncpt_buf)(struct channel_gk20a *c, u32 syncpt_id, struct nvgpu_mem *syncpt_buf);