gpu: nvgpu: add CONFIG_NVGPU_KERNEL_MODE_SUBMIT flag

The following functions belong to the path of kernel_mode submit and the flag CONFIG_NVGPU_KERNEL_MODE_SUBMIT is used to compile these out of safety builds. channel_gk20a_alloc_priv_cmdbuf channel_gk20a_free_prealloc_resources channel_gk20a_joblist_add channel_gk20a_joblist_delete channel_gk20a_joblist_peek channel_gk20a_prealloc_resources nvgpu_channel nvgpu_channel_add_job nvgpu_channel_alloc_job nvgpu_channel_alloc_priv_cmdbuf nvgpu_channel_clean_up_jobs nvgpu_channel_free_job nvgpu_channel_free_priv_cmd_entry nvgpu_channel_free_priv_cmd_q nvgpu_channel_from_worker_item nvgpu_channel_get_gpfifo_free_count nvgpu_channel_is_prealloc_enabled nvgpu_channel_joblist_is_empty nvgpu_channel_joblist_lock nvgpu_channel_joblist_unlock nvgpu_channel_kernelmode_deinit nvgpu_channel_poll_wdt nvgpu_channel_set_syncpt nvgpu_channel_setup_kernelmode nvgpu_channel_sync_get_ref nvgpu_channel_sync_incr nvgpu_channel_sync_incr_user nvgpu_channel_sync_put_ref_and_check nvgpu_channel_sync_wait_fence_fd nvgpu_channel_update nvgpu_channel_update_gpfifo_get_and_get_free_count nvgpu_channel_update_priv_cmd_q_and_free_entry nvgpu_channel_wdt_continue nvgpu_channel_wdt_handler nvgpu_channel_wdt_init nvgpu_channel_wdt_restart_all_channels nvgpu_channel_wdt_restart_all_channels nvgpu_channel_wdt_rewind nvgpu_channel_wdt_start nvgpu_channel_wdt_stop nvgpu_channel_worker_deinit nvgpu_channel_worker_from_worker nvgpu_channel_worker_init nvgpu_channel_worker_poll_init nvgpu_channel_worker_poll_wakeup_post_process_item nvgpu_channel_worker_poll_wakeup_process_item nvgpu_submit_channel_gpfifo_kernel nvgpu_submit_channel_gpfifo_user gk20a_userd_gp_get gk20a_userd_pb_get gk20a_userd_gp_put nvgpu_fence_alloc The following members of struct nvgpu_channel are compiled out of safety build. struct gpfifo_desc gpfifo; struct priv_cmd_queue priv_cmd_q; struct nvgpu_channel_sync *sync; struct nvgpu_list_node worker_item; struct nvgpu_channel_wdt wdt; The following files are compiled out of safety build. common/fifo/submit.c common/sync/channe1_sync_semaphore.c hal/fifo/userd_gv11b.c Jira NVGPU-3479 Change-Id: If46c936477c6698f4bec3cab93906aaacb0ceabf Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2127212 GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2019-06-24 15:17:47 +05:30
parent 2fc673df49
commit f6c96f620f
27 changed files with 1031 additions and 904 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -40,6 +40,7 @@ ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL
 ccflags-y += -DCONFIG_NVGPU_POWER_PG
 ccflags-y += -DCONFIG_NVGPU_CE
+ccflags-y += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
 ccflags-y += -DCONFIG_NVGPU_COMPRESSION
 ccflags-y += -DCONFIG_NVGPU_SIM
 ccflags-y += -DCONFIG_NVGPU_TRACE
--- a/drivers/gpu/nvgpu/Makefile.shared.configs
+++ b/drivers/gpu/nvgpu/Makefile.shared.configs
@@ -79,6 +79,11 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_USERD
 # Enable Channel WDT for safety build until we switch to user mode submits only
 NVGPU_COMMON_CFLAGS		+= -DCONFIG_NVGPU_CHANNEL_WDT

+# Enable Kernel Mode submit for safety build until we switch to user mode
+# submits only
+CONFIG_NVGPU_KERNEL_MODE_SUBMIT := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FIFO_ENGINE_ACTIVITY

 # Enable Channel/TSG Scheduling for safety build until devctl whitelisting is done
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -125,7 +125,6 @@ srcs += common/utils/enabled.c \
 	common/ptimer/ptimer.c \
 	common/sync/channel_sync.c \
 	common/sync/channel_sync_syncpt.c \
-	common/sync/channel_sync_semaphore.c \
 	common/semaphore/semaphore_sea.c \
 	common/semaphore/semaphore_pool.c \
 	common/semaphore/semaphore_hw.c \
@@ -137,7 +136,6 @@ srcs += common/utils/enabled.c \
 	common/rc/rc.c \
 	common/fifo/fifo.c \
 	common/fifo/pbdma.c \
-	common/fifo/submit.c \
 	common/fifo/tsg.c \
 	common/fifo/runlist.c \
 	common/fifo/engine_status.c \
@@ -236,7 +234,6 @@ srcs += common/utils/enabled.c \
 	hal/fifo/tsg_gk20a.c \
 	hal/fifo/tsg_gv11b.c \
 	hal/fifo/userd_gk20a.c \
-	hal/fifo/userd_gv11b.c \
 	hal/fifo/fifo_intr_gk20a.c \
 	hal/fifo/fifo_intr_gv11b.c \
 	hal/fifo/mmu_fault_gk20a.c \
@@ -332,6 +329,12 @@ ifeq ($(CONFIG_NVGPU_CE),1)
 srcs += common/ce/ce.c
 endif

+ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1)
+srcs += common/fifo/submit.c \
+	common/sync/channel_sync_semaphore.c \
+	hal/fifo/userd_gv11b.c
+endif
+
 ifeq ($(CONFIG_NVGPU_FECS_TRACE),1)
 srcs += common/gr/fecs_trace.c \
 	hal/gr/fecs_trace/fecs_trace_gm20b.c \
--- a/drivers/gpu/nvgpu/common/fence/fence.c
+++ b/drivers/gpu/nvgpu/common/fence/fence.c
@@ -156,6 +156,7 @@ void nvgpu_fence_pool_free(struct nvgpu_channel *ch)
 	}
 }

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch)
 {
 	struct nvgpu_fence_type *fence = NULL;
@@ -183,6 +184,7 @@ struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch)

 	return fence;
 }
+#endif

 void nvgpu_fence_init(struct nvgpu_fence_type *f,
 		const struct nvgpu_fence_ops *ops,
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
--- a/drivers/gpu/nvgpu/common/fifo/fifo.c
+++ b/drivers/gpu/nvgpu/common/fifo/fifo.c
@@ -60,7 +60,9 @@ void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)

 void nvgpu_fifo_cleanup_sw(struct gk20a *g)
 {
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	nvgpu_channel_worker_deinit(g);
+#endif
 	nvgpu_fifo_cleanup_sw_common(g);
 }

@@ -169,19 +171,23 @@ int nvgpu_fifo_setup_sw(struct gk20a *g)
 		return err;
 	}

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	err = nvgpu_channel_worker_init(g);
 	if (err != 0) {
 		nvgpu_err(g, "worker init fail, err=%d", err);
 		goto clean_up;
 	}
+#endif

 	f->sw_ready = true;

 	nvgpu_log_fn(g, "done");
 	return 0;

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 clean_up:
 	nvgpu_fifo_cleanup_sw_common(g);
+#endif

 	return err;
 }
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -460,6 +460,7 @@ void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_tsg *tsg)
 		NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
 }

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg,
 		bool *debug_dump, u32 *ms)
 {
@@ -522,6 +523,7 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg,
 	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	return recover;
 }
+#endif

 #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 int nvgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 level)
--- a/drivers/gpu/nvgpu/common/sync/channel_sync.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c
@@ -45,7 +45,11 @@ struct nvgpu_channel_sync *nvgpu_channel_sync_create(struct nvgpu_channel *c,
 	if (nvgpu_has_syncpoints(c->g)) {
 		return nvgpu_channel_sync_syncpt_create(c, user_managed);
 	} else {
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		return nvgpu_channel_sync_semaphore_create(c, user_managed);
+#else
+		return NULL;
+#endif
 	}
 }

@@ -64,6 +68,7 @@ bool nvgpu_has_syncpoints(struct gk20a *g)
 #endif
 }

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
 	struct priv_cmd_entry *entry, u32 max_wait_cmds)
 {
@@ -91,6 +96,18 @@ void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s)
 	s->set_min_eq_max(s);
 }

+void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s)
+{
+	nvgpu_atomic_inc(&s->refcount);
+}
+
+bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s)
+{
+	return nvgpu_atomic_dec_and_test(&s->refcount);
+}
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 void nvgpu_channel_sync_set_safe_state(struct nvgpu_channel_sync *s)
 {
 	s->set_safe_state(s);
@@ -105,13 +122,4 @@ void nvgpu_channel_sync_destroy(struct nvgpu_channel_sync *sync,
 	sync->destroy(sync);
 }

-void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s)
-{
-	nvgpu_atomic_inc(&s->refcount);
-}
-
-bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s)
-{
-	return nvgpu_atomic_dec_and_test(&s->refcount);
-}

--- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
@@ -39,6 +39,7 @@ struct nvgpu_fence_type;
 struct nvgpu_channel_sync {
 	nvgpu_atomic_t refcount;

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	int (*wait_fence_raw)(struct nvgpu_channel_sync *s, u32 id, u32 thresh,
 			   struct priv_cmd_entry *entry);

@@ -58,7 +59,7 @@ struct nvgpu_channel_sync {
 			 bool wfi,
 			 bool need_sync_fence,
 			 bool register_irq);
-
+#endif
 	void (*set_min_eq_max)(struct nvgpu_channel_sync *s);

 	void (*set_safe_state)(struct nvgpu_channel_sync *s);
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -54,6 +54,7 @@ nvgpu_channel_sync_syncpt_from_ops(struct nvgpu_channel_sync *ops)
 			offsetof(struct nvgpu_channel_sync_syncpt, ops));
 }

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c,
 	u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
 	u32 wait_cmd_size, u32 pos, bool preallocated)
@@ -292,6 +293,14 @@ static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s,
 			entry, fence, need_sync_fence);
 }

+int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s,
+	u32 id, u32 thresh, struct priv_cmd_entry *entry)
+{
+	return channel_sync_syncpt_wait_raw(s, id, thresh, entry);
+}
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 static void channel_sync_syncpt_set_min_eq_max(struct nvgpu_channel_sync *s)
 {
 	struct nvgpu_channel_sync_syncpt *sp =
@@ -339,18 +348,12 @@ u64 nvgpu_channel_sync_get_syncpt_address(struct nvgpu_channel_sync_syncpt *s)
 	return channel_sync_syncpt_get_address(s);
 }

-int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s,
-	u32 id, u32 thresh, struct priv_cmd_entry *entry)
-{
-	return channel_sync_syncpt_wait_raw(s, id, thresh, entry);
-}
-
 struct nvgpu_channel_sync_syncpt *
 nvgpu_channel_sync_to_syncpt(struct nvgpu_channel_sync *sync)
 {
 	struct nvgpu_channel_sync_syncpt *syncpt = NULL;

-	if (sync->wait_fence_fd == channel_sync_syncpt_wait_fd) {
+	if (sync->set_min_eq_max == channel_sync_syncpt_set_min_eq_max) {
 		syncpt = nvgpu_channel_sync_syncpt_from_ops(sync);
 	}

@@ -396,9 +399,11 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c, bool user_managed)
 	nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);

 	nvgpu_atomic_set(&sp->ops.refcount, 0);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	sp->ops.wait_fence_fd		= channel_sync_syncpt_wait_fd;
 	sp->ops.incr			= channel_sync_syncpt_incr;
 	sp->ops.incr_user		= channel_sync_syncpt_incr_user;
+#endif
 	sp->ops.set_min_eq_max		= channel_sync_syncpt_set_min_eq_max;
 	sp->ops.set_safe_state		= channel_sync_syncpt_set_safe_state;
 	sp->ops.destroy			= channel_sync_syncpt_destroy;
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
@@ -72,10 +72,12 @@ int vgpu_fifo_setup_sw(struct gk20a *g)
 		return err;
 	}

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	err = nvgpu_channel_worker_init(g);
 	if (err) {
 		goto clean_up;
 	}
+#endif

 	f->channel_base = priv->constants.channel_base;

@@ -84,9 +86,11 @@ int vgpu_fifo_setup_sw(struct gk20a *g)
 	nvgpu_log_fn(g, "done");
 	return 0;

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 clean_up:
 	/* FIXME: unmap from bar1 */
 	nvgpu_fifo_cleanup_sw_common(g);
+#endif

 	return err;
 }
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -579,7 +579,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.unbind_channel_check_hw_state = NULL,
 		.unbind_channel_check_ctx_reload = NULL,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 		.force_reset = vgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
 		.set_timeslice = vgpu_tsg_set_timeslice,
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -666,7 +666,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.unbind_channel_check_hw_state = NULL,
 		.unbind_channel_check_ctx_reload = NULL,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 		.force_reset = vgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
 		.set_timeslice = vgpu_tsg_set_timeslice,
--- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c
@@ -115,10 +115,11 @@ bool gk20a_fifo_handle_ctxsw_timeout(struct gk20a *g)
 			nvgpu_channel_put(ch);
 		}
 	}
-
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	if (tsg != NULL) {
 		recover = g->ops.tsg.check_ctxsw_timeout(tsg, &debug_dump, &ms);
 	}
+#endif

 	if (recover) {
 		nvgpu_err(g,
--- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c
@@ -221,8 +221,10 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g)
 					0, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR,
 					tsgid);

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 			recover = g->ops.tsg.check_ctxsw_timeout(tsg,
 					&debug_dump, &ms);
+#endif
 			if (recover) {
 				info_status_str =  invalid_str;
 				if (info_status <
--- a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c
@@ -50,6 +50,7 @@ void gk20a_userd_init_mem(struct gk20a *g, struct nvgpu_channel *c)
 	nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
 }

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 u32 gk20a_userd_gp_get(struct gk20a *g, struct nvgpu_channel *c)
 {
 	u64 userd_gpu_va = nvgpu_channel_userd_gpu_va(c);
@@ -82,6 +83,7 @@ void gk20a_userd_gp_put(struct gk20a *g, struct nvgpu_channel *c)
 	BUG_ON(u64_hi32(addr) != 0U);
 	nvgpu_bar1_writel(g, (u32)addr, c->gpfifo.put);
 }
+#endif

 u32 gk20a_userd_entry_size(struct gk20a *g)
 {
--- a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h
@@ -27,9 +27,11 @@ struct gk20a;
 struct nvgpu_channel;

 void gk20a_userd_init_mem(struct gk20a *g, struct nvgpu_channel *c);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 u32 gk20a_userd_gp_get(struct gk20a *g, struct nvgpu_channel *c);
 u64 gk20a_userd_pb_get(struct gk20a *g, struct nvgpu_channel *c);
 void gk20a_userd_gp_put(struct gk20a *g, struct nvgpu_channel *c);
+#endif
 u32 gk20a_userd_entry_size(struct gk20a *g);

 #endif /* USERD_GK20A_H */
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -730,11 +730,13 @@ static const struct gpu_ops gm20b_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gk20a_userd_gp_get,
 		.gp_put = gk20a_userd_gp_put,
 		.pb_get = gk20a_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -764,7 +766,9 @@ static const struct gpu_ops gm20b_ops = {
 		.unbind_channel_check_ctx_reload =
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -815,11 +815,13 @@ static const struct gpu_ops gp10b_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gk20a_userd_gp_get,
 		.gp_put = gk20a_userd_gp_put,
 		.pb_get = gk20a_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -831,7 +833,9 @@ static const struct gpu_ops gp10b_ops = {
 		.count = gm20b_channel_count,
 		.read_state = gk20a_channel_read_state,
 		.force_ctx_reload = gm20b_channel_force_ctx_reload,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.set_syncpt = nvgpu_channel_set_syncpt,
+#endif
 		.abort_clean_up = nvgpu_channel_abort_clean_up,
 		.suspend_all_serviceable_ch =
                        nvgpu_channel_suspend_all_serviceable_ch,
@@ -850,7 +854,9 @@ static const struct gpu_ops gp10b_ops = {
 		.unbind_channel_check_ctx_reload =
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -948,11 +948,13 @@ static const struct gpu_ops gv11b_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gv11b_userd_gp_get,
 		.gp_put = gv11b_userd_gp_put,
 		.pb_get = gv11b_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -989,7 +991,9 @@ static const struct gpu_ops gv11b_ops = {
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted =
 				gv11b_tsg_unbind_channel_check_eng_faulted,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -991,11 +991,13 @@ static const struct gpu_ops tu104_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gv11b_userd_gp_get,
 		.gp_put = gv11b_userd_gp_put,
 		.pb_get = gv11b_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -1032,7 +1034,9 @@ static const struct gpu_ops tu104_ops = {
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted =
 				gv11b_tsg_unbind_channel_check_eng_faulted,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -303,13 +303,24 @@ struct nvgpu_channel {

 	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	struct nvgpu_channel_joblist joblist;
+	struct gpfifo_desc gpfifo;
+	struct priv_cmd_queue priv_cmd_q;
+	struct nvgpu_channel_sync *sync;
+	/* for job cleanup handling in the background worker */
+	struct nvgpu_list_node worker_item;
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+	/* kernel watchdog to kill stuck jobs */
+	struct nvgpu_channel_wdt wdt;
+#endif /* CONFIG_NVGPU_CHANNEL_WDT */
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 	struct nvgpu_allocator fence_allocator;

 	struct vm_gk20a *vm;

-	struct gpfifo_desc gpfifo;
-
 	struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
 	struct nvgpu_mem usermode_gpfifo;
 	struct nvgpu_mem inst_block;
@@ -319,19 +330,9 @@ struct nvgpu_channel {
 	struct nvgpu_mem *userd_mem;	/* kernel mode userd */
 	u32 userd_offset;		/* in bytes from start of userd_mem */

-	struct priv_cmd_queue priv_cmd_q;
-
 	struct nvgpu_cond notifier_wq;
 	struct nvgpu_cond semaphore_wq;

-#ifdef CONFIG_NVGPU_CHANNEL_WDT
-	/* kernel watchdog to kill stuck jobs */
-	struct nvgpu_channel_wdt wdt;
-#endif
-
-	/* for job cleanup handling in the background worker */
-	struct nvgpu_list_node worker_item;
-
 #if defined(CONFIG_NVGPU_CYCLESTATS)
 	struct {
 		void *cyclestate_buffer;
@@ -346,7 +347,6 @@ struct nvgpu_channel {
 	struct nvgpu_list_node dbg_s_list;

 	struct nvgpu_mutex sync_lock;
-	struct nvgpu_channel_sync *sync;
 	struct nvgpu_channel_sync *user_sync;

 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
@@ -388,6 +388,66 @@ struct nvgpu_channel {
 	bool mmu_debug_mode_enabled;
 #endif
 };
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
+static inline struct nvgpu_channel *
+nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_channel *)
+	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
+};
+int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
+			     struct priv_cmd_entry *e);
+void nvgpu_channel_update_priv_cmd_q_and_free_entry(
+	struct nvgpu_channel *ch, struct priv_cmd_entry *e);
+int nvgpu_channel_worker_init(struct gk20a *g);
+void nvgpu_channel_worker_deinit(struct gk20a *g);
+struct nvgpu_channel *nvgpu_channel_get_from_file(int fd);
+void nvgpu_channel_update(struct nvgpu_channel *c);
+int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job **job_out);
+void nvgpu_channel_free_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job);
+u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(
+		struct nvgpu_channel *ch);
+u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch);
+int nvgpu_channel_add_job(struct nvgpu_channel *c,
+				 struct nvgpu_channel_job *job,
+				 bool skip_buffer_refcounting);
+void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
+			     struct priv_cmd_entry *e);
+void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
+					bool clean_all);
+int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_profile *profile);
+
+int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out);
+int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch);
+void nvgpu_channel_joblist_lock(struct nvgpu_channel *c);
+void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c);
+bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c);
+bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c);
+
+bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
+		u32 timeout_delta_ms, bool *progress);
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
+#endif
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 static inline struct nvgpu_channel *
 nvgpu_channel_from_free_chs(struct nvgpu_list_node *node)
 {
@@ -402,13 +462,6 @@ nvgpu_channel_from_ch_entry(struct nvgpu_list_node *node)
          ((uintptr_t)node - offsetof(struct nvgpu_channel, ch_entry));
 };

-static inline struct nvgpu_channel *
-nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
-{
-	return (struct nvgpu_channel *)
-	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
-};
-
 static inline bool nvgpu_channel_as_bound(struct nvgpu_channel *ch)
 {
 	return (ch->vm != NULL);
@@ -426,19 +479,12 @@ void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
 		struct nvgpu_channel *ch);
 bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch);

-bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
-		u32 timeout_delta_ms, bool *progress);
-
 void nvgpu_channel_recover(struct gk20a *g, struct nvgpu_channel *ch,
 	bool verbose, u32 rc_type);

 void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt);
 void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch);
 void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
-int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
-			     struct priv_cmd_entry *e);
-void nvgpu_channel_update_priv_cmd_q_and_free_entry(
-	struct nvgpu_channel *ch, struct priv_cmd_entry *e);

 int nvgpu_channel_enable_tsg(struct gk20a *g, struct nvgpu_channel *ch);
 int nvgpu_channel_disable_tsg(struct gk20a *g, struct nvgpu_channel *ch);
@@ -449,12 +495,6 @@ void nvgpu_channel_resume_all_serviceable_ch(struct gk20a *g);
 void nvgpu_channel_deterministic_idle(struct gk20a *g);
 void nvgpu_channel_deterministic_unidle(struct gk20a *g);

-int nvgpu_channel_worker_init(struct gk20a *g);
-void nvgpu_channel_worker_deinit(struct gk20a *g);
-
-struct nvgpu_channel *nvgpu_channel_get_from_file(int fd);
-void nvgpu_channel_update(struct nvgpu_channel *c);
-
 /* returns ch if reference was obtained */
 struct nvgpu_channel *__must_check nvgpu_channel_get__func(
 		struct nvgpu_channel *ch, const char *caller);
@@ -479,51 +519,15 @@ struct nvgpu_channel *gk20a_open_new_channel(struct gk20a *g,
 int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
 		struct nvgpu_setup_bind_args *args);

-void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
-
-bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c);
-void nvgpu_channel_joblist_lock(struct nvgpu_channel *c);
-void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c);
-bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c);
-
 int nvgpu_channel_update_runlist(struct nvgpu_channel *c, bool add);

 void nvgpu_channel_wait_until_counter_is_N(
 	struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value,
 	struct nvgpu_cond *c, const char *caller, const char *counter_name);
-int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
-		struct nvgpu_channel_job **job_out);
-void nvgpu_channel_free_job(struct nvgpu_channel *c,
-		struct nvgpu_channel_job *job);
-u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(
-		struct nvgpu_channel *ch);
-u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch);
-int nvgpu_channel_add_job(struct nvgpu_channel *c,
-				 struct nvgpu_channel_job *job,
-				 bool skip_buffer_refcounting);
-void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
-			     struct priv_cmd_entry *e);
-void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
-					bool clean_all);

 void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c);
 u32 nvgpu_get_gpfifo_entry_size(void);

-int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
-				struct nvgpu_gpfifo_userdata userdata,
-				u32 num_entries,
-				u32 flags,
-				struct nvgpu_channel_fence *fence,
-				struct nvgpu_fence_type **fence_out,
-				struct nvgpu_profile *profile);
-
-int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
-				struct nvgpu_gpfifo_entry *gpfifo,
-				u32 num_entries,
-				u32 flags,
-				struct nvgpu_channel_fence *fence,
-				struct nvgpu_fence_type **fence_out);
-
 #ifdef CONFIG_DEBUG_FS
 void trace_write_pushbuffers(struct nvgpu_channel *c, u32 count);
 #else
@@ -550,7 +554,6 @@ int nvgpu_channel_alloc_inst(struct gk20a *g, struct nvgpu_channel *ch);
 void nvgpu_channel_free_inst(struct gk20a *g, struct nvgpu_channel *ch);
 void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch,
 			u32 error_notifier);
-int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch);
 struct nvgpu_channel *nvgpu_channel_refch_from_inst_ptr(struct gk20a *g,
 			u64 inst_ptr);
 void nvgpu_channel_debug_dump_all(struct gk20a *g,
--- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
@@ -29,6 +29,9 @@
 #include <nvgpu/atomic.h>

 struct nvgpu_channel_sync;
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
 struct priv_cmd_entry;
 struct nvgpu_channel;
 struct nvgpu_fence_type;
@@ -73,6 +76,17 @@ int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
 * for semaphores.
 */
 void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s);
+/*
+ * Increment the usage_counter for this instance.
+ */
+void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s);
+
+/*
+ * Decrement the usage_counter for this instance and return if equals 0.
+ */
+bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s);
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 /*
 * Set the channel syncpoint/semaphore to safe state
 * This should be used to reset User managed syncpoint since we don't
@@ -86,16 +100,6 @@ void nvgpu_channel_sync_set_safe_state(struct nvgpu_channel_sync *s);
 void nvgpu_channel_sync_destroy(struct nvgpu_channel_sync *sync,
 	bool set_safe_state);

-/*
- * Increment the usage_counter for this instance.
- */
-void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s);
-
-/*
- * Decrement the usage_counter for this instance and return if equals 0.
- */
-bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s);
-
 /*
 * Construct a channel_sync backed by either a syncpoint or a semaphore.
 * A channel_sync is by default constructed as backed by a syncpoint
--- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h
@@ -29,6 +29,8 @@
 #include <nvgpu/types.h>
 #include <nvgpu/channel_sync.h>

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
 struct nvgpu_channel;
 struct nvgpu_channel_sync_semaphore;
 /*
@@ -46,4 +48,6 @@ struct nvgpu_channel_sync *
 nvgpu_channel_sync_semaphore_create(
 	struct nvgpu_channel *c, bool user_managed);

+#endif
+
 #endif /* NVGPU_CHANNEL_SYNC_SEMAPHORE_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/fence.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fence.h
@@ -80,7 +80,9 @@ int nvgpu_fence_pool_alloc(struct nvgpu_channel *ch, unsigned int count);

 void nvgpu_fence_pool_free(struct nvgpu_channel *ch);

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch);
+#endif

 void nvgpu_fence_init(struct nvgpu_fence_type *f,
 		const struct nvgpu_fence_ops *ops,
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1118,9 +1118,11 @@ struct gpu_ops {
 		int (*setup_sw)(struct gk20a *g);
 		void (*cleanup_sw)(struct gk20a *g);
 		void (*init_mem)(struct gk20a *g, struct nvgpu_channel *c);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		u32 (*gp_get)(struct gk20a *g, struct nvgpu_channel *c);
 		void (*gp_put)(struct gk20a *g, struct nvgpu_channel *c);
 		u64 (*pb_get)(struct gk20a *g, struct nvgpu_channel *c);
+#endif
 		u32 (*entry_size)(struct gk20a *g);
 	} userd;

@@ -1229,7 +1231,9 @@ struct gpu_ops {
 		void (*set_error_notifier)(struct nvgpu_channel *ch, u32 error);
 		void (*reset_faulted)(struct gk20a *g, struct nvgpu_channel *ch,
 				bool eng, bool pbdma);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		int (*set_syncpt)(struct nvgpu_channel *ch);
+#endif
 		void (*debug_dump)(struct gk20a *g,
 				struct nvgpu_debug_context *o,
 				struct nvgpu_channel_dump_info *info);
@@ -1257,8 +1261,10 @@ struct gpu_ops {
 		void (*unbind_channel_check_eng_faulted)(struct nvgpu_tsg *tsg,
 				struct nvgpu_channel *ch,
 				struct nvgpu_channel_hw_state *state);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		bool (*check_ctxsw_timeout)(struct nvgpu_tsg *tsg,
 				bool *verbose, u32 *ms);
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		int (*force_reset)(struct nvgpu_channel *ch,
 					u32 err_code, bool verbose);
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -127,8 +127,10 @@ void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g,
 		struct nvgpu_tsg *tsg);
 bool nvgpu_tsg_mark_error(struct gk20a *g, struct nvgpu_tsg *tsg);

+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg,
 		bool *debug_dump, u32 *ms);
+#endif
 int nvgpu_tsg_set_runlist_interleave(struct nvgpu_tsg *tsg, u32 level);
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 int nvgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us);