diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 83c49d528..a68968fe0 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -376,6 +376,13 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	if(g->ops.fifo.free_channel_ctx_header)
 		g->ops.fifo.free_channel_ctx_header(ch);
 
+	if (ch->usermode_submit_enabled) {
+		gk20a_channel_free_usermode_buffers(ch);
+		ch->userd_iova = nvgpu_mem_get_addr(g, &f->userd) +
+				ch->chid * f->userd_entry_size;
+		ch->usermode_submit_enabled = false;
+	}
+
 	gk20a_gr_flush_channel_tlb(gr);
 
 	nvgpu_dma_unmap_free(ch_vm, &ch->gpfifo.mem);
@@ -1086,12 +1093,30 @@ int gk20a_channel_alloc_gpfifo(struct channel_gk20a *c,
 		goto clean_up_idle;
 	}
 
+	if (gpfifo_args->flags & NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT) {
+		if (g->ops.fifo.alloc_usermode_buffers) {
+			err = g->ops.fifo.alloc_usermode_buffers(c,
+					gpfifo_args);
+			if (err) {
+				nvgpu_err(g, "Usermode buffer alloc failed");
+				goto clean_up;
+			}
+			c->userd_iova = nvgpu_mem_get_addr(g,
+				&c->usermode_userd);
+			c->usermode_submit_enabled = true;
+		} else {
+			nvgpu_err(g, "Usermode submit not supported");
+			err = -EINVAL;
+			goto clean_up;
+		}
+	}
+
 	err = nvgpu_dma_alloc_map_sys(ch_vm,
 			gpfifo_size * gpfifo_entry_size,
 			&c->gpfifo.mem);
 	if (err) {
 		nvgpu_err(g, "%s: memory allocation failed", __func__);
-		goto clean_up;
+		goto clean_up_usermode;
 	}
 
 	if (c->gpfifo.mem.aperture == APERTURE_VIDMEM || g->mm.force_pramin) {
@@ -1174,6 +1199,13 @@ clean_up_sync:
 clean_up_unmap:
 	nvgpu_big_free(g, c->gpfifo.pipe);
 	nvgpu_dma_unmap_free(ch_vm, &c->gpfifo.mem);
+clean_up_usermode:
+	if (c->usermode_submit_enabled) {
+		gk20a_channel_free_usermode_buffers(c);
+		c->userd_iova = nvgpu_mem_get_addr(g, &g->fifo.userd) +
+				c->chid * g->fifo.userd_entry_size;
+		c->usermode_submit_enabled = false;
+	}
 clean_up:
 	memset(&c->gpfifo, 0, sizeof(struct gpfifo_desc));
 clean_up_idle:
@@ -1187,6 +1219,12 @@ clean_up_idle:
 	return err;
 }
 
+void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c)
+{
+	if (nvgpu_mem_is_valid(&c->usermode_userd))
+		nvgpu_dma_free(c->g, &c->usermode_userd);
+}
+
 /* Update with this periodically to determine how the gpfifo is draining. */
 static inline u32 update_gp_get(struct gk20a *g,
 				struct channel_gk20a *c)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index eaa5f81b5..cd5bf7d76 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -45,10 +45,14 @@ struct fifo_profile_gk20a;
 #define NVGPU_GPFIFO_FLAGS_SUPPORT_VPR			(1 << 0)
 #define NVGPU_GPFIFO_FLAGS_SUPPORT_DETERMINISTIC	(1 << 1)
 #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE	(1 << 2)
+#define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT		(1 << 3)
 
 struct nvgpu_gpfifo_args {
 	u32 num_entries;
 	u32 num_inflight_jobs;
+	u32 userd_dmabuf_fd;
+	u32 gpfifo_dmabuf_fd;
+	u32 work_submit_token;
 	u32 flags;
 };
 
@@ -184,6 +188,7 @@ struct channel_gk20a {
 	/* deterministic, but explicitly idle and submits disallowed */
 	bool deterministic_railgate_allowed;
 	bool cde;
+	bool usermode_submit_enabled;
 	pid_t pid;
 	pid_t tgid;
 	struct nvgpu_mutex ioctl_lock;
@@ -198,6 +203,7 @@ struct channel_gk20a {
 
 	struct gpfifo_desc gpfifo;
 
+	struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
 	struct nvgpu_mem inst_block;
 
 	u64 userd_iova;
@@ -361,6 +367,7 @@ void free_priv_cmdbuf(struct channel_gk20a *c,
 void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 					bool clean_all);
 
+void gk20a_channel_free_usermode_buffers(struct channel_gk20a *c);
 u32 nvgpu_get_gpfifo_entry_size(void);
 
 #endif /* CHANNEL_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index fd7faa22e..c446e3eac 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3916,11 +3916,19 @@ void gk20a_fifo_setup_ramfc_for_privileged_channel(struct channel_gk20a *c)
 int gk20a_fifo_setup_userd(struct channel_gk20a *c)
 {
 	struct gk20a *g = c->g;
-	struct nvgpu_mem *mem = &g->fifo.userd;
-	u32 offset = c->chid * g->fifo.userd_entry_size / sizeof(u32);
+	struct nvgpu_mem *mem;
+	u32 offset;
 
 	gk20a_dbg_fn("");
 
+	if (nvgpu_mem_is_valid(&c->usermode_userd)) {
+		mem = &c->usermode_userd;
+		offset = 0;
+	} else {
+		mem = &g->fifo.userd;
+		offset = c->chid * g->fifo.userd_entry_size / sizeof(u32);
+	}
+
 	nvgpu_mem_wr32(g, mem, offset + ram_userd_put_w(), 0);
 	nvgpu_mem_wr32(g, mem, offset + ram_userd_get_w(), 0);
 	nvgpu_mem_wr32(g, mem, offset + ram_userd_ref_w(), 0);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d61423d1b..4a1c2f755 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -637,6 +637,8 @@ struct gpu_ops {
 		int (*channel_suspend)(struct gk20a *g);
 		int (*channel_resume)(struct gk20a *g);
 		void (*set_error_notifier)(struct channel_gk20a *ch, u32 error);
+		int (*alloc_usermode_buffers)(struct channel_gk20a *c,
+			struct nvgpu_gpfifo_args *gpfifo_args);
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
 		int (*alloc_syncpt_buf)(struct channel_gk20a *c,
 				u32 syncpt_id, struct nvgpu_mem *syncpt_buf);