From f6c96f620ffa6133656310a1415d710e56fb46d0 Mon Sep 17 00:00:00 2001
From: Debarshi Dutta <ddutta@nvidia.com>
Date: Mon, 24 Jun 2019 15:17:47 +0530
Subject: [PATCH] gpu: nvgpu: add CONFIG_NVGPU_KERNEL_MODE_SUBMIT flag

The following functions belong to the path of kernel_mode submit and
the flag CONFIG_NVGPU_KERNEL_MODE_SUBMIT is used to compile these out
of safety builds.

channel_gk20a_alloc_priv_cmdbuf
channel_gk20a_free_prealloc_resources
channel_gk20a_joblist_add
channel_gk20a_joblist_delete
channel_gk20a_joblist_peek
channel_gk20a_prealloc_resources
nvgpu_channel
nvgpu_channel_add_job
nvgpu_channel_alloc_job
nvgpu_channel_alloc_priv_cmdbuf
nvgpu_channel_clean_up_jobs
nvgpu_channel_free_job
nvgpu_channel_free_priv_cmd_entry
nvgpu_channel_free_priv_cmd_q
nvgpu_channel_from_worker_item
nvgpu_channel_get_gpfifo_free_count
nvgpu_channel_is_prealloc_enabled
nvgpu_channel_joblist_is_empty
nvgpu_channel_joblist_lock
nvgpu_channel_joblist_unlock
nvgpu_channel_kernelmode_deinit
nvgpu_channel_poll_wdt
nvgpu_channel_set_syncpt
nvgpu_channel_setup_kernelmode
nvgpu_channel_sync_get_ref
nvgpu_channel_sync_incr
nvgpu_channel_sync_incr_user
nvgpu_channel_sync_put_ref_and_check
nvgpu_channel_sync_wait_fence_fd
nvgpu_channel_update
nvgpu_channel_update_gpfifo_get_and_get_free_count
nvgpu_channel_update_priv_cmd_q_and_free_entry
nvgpu_channel_wdt_continue
nvgpu_channel_wdt_handler
nvgpu_channel_wdt_init
nvgpu_channel_wdt_restart_all_channels
nvgpu_channel_wdt_restart_all_channels
nvgpu_channel_wdt_rewind
nvgpu_channel_wdt_start
nvgpu_channel_wdt_stop
nvgpu_channel_worker_deinit
nvgpu_channel_worker_from_worker
nvgpu_channel_worker_init
nvgpu_channel_worker_poll_init
nvgpu_channel_worker_poll_wakeup_post_process_item
nvgpu_channel_worker_poll_wakeup_process_item
nvgpu_submit_channel_gpfifo_kernel
nvgpu_submit_channel_gpfifo_user
gk20a_userd_gp_get
gk20a_userd_pb_get
gk20a_userd_gp_put
nvgpu_fence_alloc

The following members of struct nvgpu_channel are compiled out of
safety build.

struct gpfifo_desc gpfifo;
struct priv_cmd_queue priv_cmd_q;
struct nvgpu_channel_sync *sync;
struct nvgpu_list_node worker_item;
struct nvgpu_channel_wdt wdt;

The following files are compiled out of safety build.

common/fifo/submit.c
common/sync/channe1_sync_semaphore.c
hal/fifo/userd_gv11b.c

Jira NVGPU-3479

Change-Id: If46c936477c6698f4bec3cab93906aaacb0ceabf
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2127212
GVS: Gerrit_Virtual_Submit
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                    |    1 +
 drivers/gpu/nvgpu/Makefile.shared.configs     |    5 +
 drivers/gpu/nvgpu/Makefile.sources            |    9 +-
 drivers/gpu/nvgpu/common/fence/fence.c        |    2 +
 drivers/gpu/nvgpu/common/fifo/channel.c       | 1638 +++++++++--------
 drivers/gpu/nvgpu/common/fifo/fifo.c          |    6 +
 drivers/gpu/nvgpu/common/fifo/tsg.c           |    2 +
 drivers/gpu/nvgpu/common/sync/channel_sync.c  |   26 +-
 .../gpu/nvgpu/common/sync/channel_sync_priv.h |    3 +-
 .../nvgpu/common/sync/channel_sync_syncpt.c   |   19 +-
 .../gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c    |    4 +
 .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c  |    2 +
 .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c  |    2 +
 .../gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c  |    3 +-
 .../gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c  |    2 +
 drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c      |    2 +
 drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h      |    2 +
 drivers/gpu/nvgpu/hal/init/hal_gm20b.c        |    6 +-
 drivers/gpu/nvgpu/hal/init/hal_gp10b.c        |    8 +-
 drivers/gpu/nvgpu/hal/init/hal_gv11b.c        |    6 +-
 drivers/gpu/nvgpu/hal/init/hal_tu104.c        |    6 +-
 drivers/gpu/nvgpu/include/nvgpu/channel.h     |  143 +-
 .../gpu/nvgpu/include/nvgpu/channel_sync.h    |   24 +-
 .../include/nvgpu/channel_sync_semaphore.h    |    4 +
 drivers/gpu/nvgpu/include/nvgpu/fence.h       |    2 +
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h       |    6 +
 drivers/gpu/nvgpu/include/nvgpu/tsg.h         |    2 +
 27 files changed, 1031 insertions(+), 904 deletions(-)

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index cc92bdca8..940b5c9b9 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -40,6 +40,7 @@ ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 ccflags-y += -DCONFIG_NVGPU_CHANNEL_TSG_CONTROL
 ccflags-y += -DCONFIG_NVGPU_POWER_PG
 ccflags-y += -DCONFIG_NVGPU_CE
+ccflags-y += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
 ccflags-y += -DCONFIG_NVGPU_COMPRESSION
 ccflags-y += -DCONFIG_NVGPU_SIM
 ccflags-y += -DCONFIG_NVGPU_TRACE
diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs
index 34db9c8bc..985466d0a 100644
--- a/drivers/gpu/nvgpu/Makefile.shared.configs
+++ b/drivers/gpu/nvgpu/Makefile.shared.configs
@@ -79,6 +79,11 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_USERD
 # Enable Channel WDT for safety build until we switch to user mode submits only
 NVGPU_COMMON_CFLAGS		+= -DCONFIG_NVGPU_CHANNEL_WDT
 
+# Enable Kernel Mode submit for safety build until we switch to user mode
+# submits only
+CONFIG_NVGPU_KERNEL_MODE_SUBMIT := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
 
 # Enable Channel/TSG Scheduling for safety build until devctl whitelisting is done
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 41b92069c..b43a5ef29 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -125,7 +125,6 @@ srcs += common/utils/enabled.c \
 	common/ptimer/ptimer.c \
 	common/sync/channel_sync.c \
 	common/sync/channel_sync_syncpt.c \
-	common/sync/channel_sync_semaphore.c \
 	common/semaphore/semaphore_sea.c \
 	common/semaphore/semaphore_pool.c \
 	common/semaphore/semaphore_hw.c \
@@ -137,7 +136,6 @@ srcs += common/utils/enabled.c \
 	common/rc/rc.c \
 	common/fifo/fifo.c \
 	common/fifo/pbdma.c \
-	common/fifo/submit.c \
 	common/fifo/tsg.c \
 	common/fifo/runlist.c \
 	common/fifo/engine_status.c \
@@ -236,7 +234,6 @@ srcs += common/utils/enabled.c \
 	hal/fifo/tsg_gk20a.c \
 	hal/fifo/tsg_gv11b.c \
 	hal/fifo/userd_gk20a.c \
-	hal/fifo/userd_gv11b.c \
 	hal/fifo/fifo_intr_gk20a.c \
 	hal/fifo/fifo_intr_gv11b.c \
 	hal/fifo/mmu_fault_gk20a.c \
@@ -332,6 +329,12 @@ ifeq ($(CONFIG_NVGPU_CE),1)
 srcs += common/ce/ce.c
 endif
 
+ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1)
+srcs += common/fifo/submit.c \
+	common/sync/channel_sync_semaphore.c \
+	hal/fifo/userd_gv11b.c
+endif
+
 ifeq ($(CONFIG_NVGPU_FECS_TRACE),1)
 srcs += common/gr/fecs_trace.c \
 	hal/gr/fecs_trace/fecs_trace_gm20b.c \
diff --git a/drivers/gpu/nvgpu/common/fence/fence.c b/drivers/gpu/nvgpu/common/fence/fence.c
index 76f7f48fa..8c827ab5a 100644
--- a/drivers/gpu/nvgpu/common/fence/fence.c
+++ b/drivers/gpu/nvgpu/common/fence/fence.c
@@ -156,6 +156,7 @@ void nvgpu_fence_pool_free(struct nvgpu_channel *ch)
 	}
 }
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch)
 {
 	struct nvgpu_fence_type *fence = NULL;
@@ -183,6 +184,7 @@ struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch)
 
 	return fence;
 }
+#endif
 
 void nvgpu_fence_init(struct nvgpu_fence_type *f,
 		const struct nvgpu_fence_ops *ops,
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index e4a5d1469..f30043a77 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -61,18 +61,21 @@
 static void free_channel(struct nvgpu_fifo *f, struct nvgpu_channel *ch);
 static void gk20a_channel_dump_ref_actions(struct nvgpu_channel *ch);
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 static void nvgpu_channel_free_priv_cmd_q(struct nvgpu_channel *ch);
-
 static void channel_gk20a_free_prealloc_resources(struct nvgpu_channel *c);
-
 static void channel_gk20a_joblist_add(struct nvgpu_channel *c,
 		struct nvgpu_channel_job *job);
 static void channel_gk20a_joblist_delete(struct nvgpu_channel *c,
 		struct nvgpu_channel_job *job);
 static struct nvgpu_channel_job *channel_gk20a_joblist_peek(
 		struct nvgpu_channel *c);
-
 static const struct nvgpu_worker_ops channel_worker_ops;
+#endif
+
+static int nvgpu_channel_setup_ramfc(struct nvgpu_channel *c,
+		struct nvgpu_setup_bind_args *args,
+		u64 gpfifo_gpu_va, u32 gpfifo_size);
 
 /* allocate GPU channel */
 static struct nvgpu_channel *allocate_channel(struct nvgpu_fifo *f)
@@ -174,6 +177,7 @@ int nvgpu_channel_disable_tsg(struct gk20a *g, struct nvgpu_channel *ch)
 	}
 }
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch)
 {
 	/* synchronize with actual job cleanup */
@@ -198,67 +202,6 @@ void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch)
 	nvgpu_channel_update(ch);
 }
 
-void nvgpu_channel_set_unserviceable(struct nvgpu_channel *ch)
-{
-	nvgpu_spinlock_acquire(&ch->unserviceable_lock);
-	ch->unserviceable = true;
-	nvgpu_spinlock_release(&ch->unserviceable_lock);
-}
-
-bool  nvgpu_channel_check_unserviceable(struct nvgpu_channel *ch)
-{
-	bool unserviceable_status;
-
-	nvgpu_spinlock_acquire(&ch->unserviceable_lock);
-	unserviceable_status = ch->unserviceable;
-	nvgpu_spinlock_release(&ch->unserviceable_lock);
-
-	return unserviceable_status;
-}
-
-void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt)
-{
-	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
-
-	nvgpu_log_fn(ch->g, " ");
-
-	if (tsg != NULL) {
-		return nvgpu_tsg_abort(ch->g, tsg, channel_preempt);
-	} else {
-		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
-	}
-}
-
-void nvgpu_channel_wait_until_counter_is_N(
-	struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value,
-	struct nvgpu_cond *c, const char *caller, const char *counter_name)
-{
-	while (true) {
-		if (NVGPU_COND_WAIT(
-			    c,
-			    nvgpu_atomic_read(counter) == wait_value,
-			    5000U) == 0) {
-			break;
-		}
-
-		nvgpu_warn(ch->g,
-			   "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
-			   caller, ch->chid, counter_name,
-			   nvgpu_atomic_read(counter), wait_value);
-
-		gk20a_channel_dump_ref_actions(ch);
-	}
-}
-
-static void nvgpu_channel_usermode_deinit(struct nvgpu_channel *ch)
-{
-	struct gk20a *g = ch->g;
-
-	nvgpu_channel_free_usermode_buffers(ch);
-	(void) nvgpu_userd_init_channel(g, ch);
-	ch->usermode_submit_enabled = false;
-}
-
 static void nvgpu_channel_kernelmode_deinit(struct nvgpu_channel *ch)
 {
 	struct vm_gk20a *ch_vm = ch->vm;
@@ -285,497 +228,6 @@ static void nvgpu_channel_kernelmode_deinit(struct nvgpu_channel *ch)
 	nvgpu_mutex_release(&ch->sync_lock);
 }
 
-/* call ONLY when no references to the channel exist: after the last put */
-static void gk20a_free_channel(struct nvgpu_channel *ch, bool force)
-{
-	struct gk20a *g = ch->g;
-	struct nvgpu_tsg *tsg;
-	struct nvgpu_fifo *f = &g->fifo;
-	struct vm_gk20a *ch_vm = ch->vm;
-	unsigned long timeout;
-#ifdef CONFIG_NVGPU_DEBUGGER
-	struct dbg_session_gk20a *dbg_s;
-	struct dbg_session_data *session_data, *tmp_s;
-	struct dbg_session_channel_data *ch_data, *tmp;
-	bool deferred_reset_pending;
-#endif
-	int err;
-
-	if (g == NULL) {
-		nvgpu_do_assert_print(g, "ch already freed");
-		return;
-	}
-
-	nvgpu_log_fn(g, " ");
-
-	timeout = nvgpu_get_poll_timeout(g);
-
-#ifdef CONFIG_NVGPU_TRACE
-	trace_gk20a_free_channel(ch->chid);
-#endif
-
-	/*
-	 * Disable channel/TSG and unbind here. This should not be executed if
-	 * HW access is not available during shutdown/removal path as it will
-	 * trigger a timeout
-	 */
-	if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
-		/* abort channel and remove from runlist */
-		tsg = nvgpu_tsg_from_ch(ch);
-		if (tsg != NULL) {
-			/* Between tsg is not null and unbind_channel call,
-			 * ioctl cannot be called anymore because user doesn't
-			 * have an open channel fd anymore to use for the unbind
-			 * ioctl.
-			 */
-			err = nvgpu_tsg_unbind_channel(tsg, ch);
-			if (err != 0) {
-				nvgpu_err(g,
-					"failed to unbind channel %d from TSG",
-					ch->chid);
-			}
-		} else {
-			/*
-			 * Channel is already unbound from TSG by User with
-			 * explicit call
-			 * Nothing to do here in that case
-			 */
-		}
-	}
-
-	/*
-	 * OS channel close may require that syncpoint should be set to some
-	 * safe value before it is called. nvgpu_tsg_unbind_channel(above) is
-	 * internally doing that by calling nvgpu_nvhost_syncpt_set_safe_state
-	 * deep down in the stack. Otherwise os_channel close may block if the
-	 * app is killed abruptly (which was going to do the syncpoint signal).
-	 */
-	if (g->os_channel.close != NULL) {
-		g->os_channel.close(ch, force);
-	}
-
-	/* wait until there's only our ref to the channel */
-	if (!force) {
-		nvgpu_channel_wait_until_counter_is_N(
-			ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
-			__func__, "references");
-	}
-
-	/* wait until all pending interrupts for recently completed
-	 * jobs are handled */
-	nvgpu_wait_for_deferred_interrupts(g);
-
-	/* prevent new refs */
-	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
-	if (!ch->referenceable) {
-		nvgpu_spinlock_release(&ch->ref_obtain_lock);
-		nvgpu_err(ch->g,
-			  "Extra %s() called to channel %u",
-			  __func__, ch->chid);
-		return;
-	}
-	ch->referenceable = false;
-	nvgpu_spinlock_release(&ch->ref_obtain_lock);
-
-	/* matches with the initial reference in gk20a_open_new_channel() */
-	nvgpu_atomic_dec(&ch->ref_count);
-
-	/* wait until no more refs to the channel */
-	if (!force) {
-		nvgpu_channel_wait_until_counter_is_N(
-			ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
-			__func__, "references");
-	}
-
-#ifdef CONFIG_NVGPU_DEBUGGER
-	/* if engine reset was deferred, perform it now */
-	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
-	deferred_reset_pending = g->fifo.deferred_reset_pending;
-	nvgpu_mutex_release(&f->deferred_reset_mutex);
-
-	if (deferred_reset_pending) {
-		nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
-				" deferred, running now");
-		nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
-
-		nvgpu_assert(nvgpu_channel_deferred_reset_engines(g, ch) == 0);
-
-		nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
-	}
-#endif
-
-	if (!nvgpu_channel_as_bound(ch)) {
-		goto unbind;
-	}
-
-	nvgpu_log_info(g, "freeing bound channel context, timeout=%ld",
-			timeout);
-
-#ifdef CONFIG_NVGPU_FECS_TRACE
-	if (g->ops.gr.fecs_trace.unbind_channel && !ch->vpr)
-		g->ops.gr.fecs_trace.unbind_channel(g, &ch->inst_block);
-#endif
-
-	if (g->ops.gr.setup.free_subctx != NULL) {
-		g->ops.gr.setup.free_subctx(ch);
-		ch->subctx = NULL;
-	}
-
-	g->ops.gr.intr.flush_channel_tlb(g);
-
-	if (ch->usermode_submit_enabled) {
-		nvgpu_channel_usermode_deinit(ch);
-	} else {
-		nvgpu_channel_kernelmode_deinit(ch);
-	}
-
-	if (ch->user_sync != NULL) {
-		/*
-		 * Set user managed syncpoint to safe state
-		 * But it's already done if channel is recovered
-		 */
-		if (nvgpu_channel_check_unserviceable(ch)) {
-			nvgpu_channel_sync_destroy(ch->user_sync, false);
-		} else {
-			nvgpu_channel_sync_destroy(ch->user_sync, true);
-		}
-		ch->user_sync = NULL;
-	}
-	nvgpu_mutex_release(&ch->sync_lock);
-
-	/*
-	 * free the channel used semaphore index.
-	 * we need to do this before releasing the address space,
-	 * as the semaphore pool might get freed after that point.
-	 */
-	if (ch->hw_sema != NULL) {
-		nvgpu_hw_semaphore_free(ch);
-	}
-
-	/*
-	 * When releasing the channel we unbind the VM - so release the ref.
-	 */
-	nvgpu_vm_put(ch_vm);
-
-	/* make sure we don't have deferred interrupts pending that
-	 * could still touch the channel */
-	nvgpu_wait_for_deferred_interrupts(g);
-
-unbind:
-	g->ops.channel.unbind(ch);
-	g->ops.channel.free_inst(g, ch);
-
-	/* put back the channel-wide submit ref from init */
-	if (ch->deterministic) {
-		nvgpu_rwsem_down_read(&g->deterministic_busy);
-		ch->deterministic = false;
-		if (!ch->deterministic_railgate_allowed) {
-			gk20a_idle(g);
-		}
-		ch->deterministic_railgate_allowed = false;
-
-		nvgpu_rwsem_up_read(&g->deterministic_busy);
-	}
-
-	ch->vpr = false;
-	ch->vm = NULL;
-
-	WARN_ON(ch->sync != NULL);
-
-#ifdef CONFIG_NVGPU_DEBUGGER
-	/* unlink all debug sessions */
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	nvgpu_list_for_each_entry_safe(session_data, tmp_s,
-			&ch->dbg_s_list, dbg_session_data, dbg_s_entry) {
-		dbg_s = session_data->dbg_s;
-		nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
-		nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
-				dbg_session_channel_data, ch_entry) {
-			if (ch_data->chid == ch->chid) {
-				if (ch_data->unbind_single_channel(dbg_s,
-						ch_data) != 0) {
-					nvgpu_err(g,
-						"unbind failed for chid: %d",
-						ch_data->chid);
-				}
-			}
-		}
-		nvgpu_mutex_release(&dbg_s->ch_list_lock);
-	}
-
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-#endif
-
-#if GK20A_CHANNEL_REFCOUNT_TRACKING
-	(void) memset(ch->ref_actions, 0, sizeof(ch->ref_actions));
-	ch->ref_actions_put = 0;
-#endif
-
-	/* make sure we catch accesses of unopened channels in case
-	 * there's non-refcounted channel pointers hanging around */
-	ch->g = NULL;
-	nvgpu_smp_wmb();
-
-	/* ALWAYS last */
-	free_channel(f, ch);
-}
-
-static void gk20a_channel_dump_ref_actions(struct nvgpu_channel *ch)
-{
-#if GK20A_CHANNEL_REFCOUNT_TRACKING
-	size_t i, get;
-	s64 now = nvgpu_current_time_ms();
-	s64 prev = 0;
-	struct gk20a *g = ch->g;
-
-	nvgpu_spinlock_acquire(&ch->ref_actions_lock);
-
-	nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:",
-			ch->chid, nvgpu_atomic_read(&ch->ref_count));
-
-	/* start at the oldest possible entry. put is next insertion point */
-	get = ch->ref_actions_put;
-
-	/*
-	 * If the buffer is not full, this will first loop to the oldest entry,
-	 * skipping not-yet-initialized entries. There is no ref_actions_get.
-	 */
-	for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) {
-		struct nvgpu_channel_ref_action *act = &ch->ref_actions[get];
-
-		if (act->trace.nr_entries) {
-			nvgpu_info(g,
-				"%s ref %zu steps ago (age %lld ms, diff %lld ms)",
-				act->type == channel_gk20a_ref_action_get
-					? "GET" : "PUT",
-				GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i,
-				now - act->timestamp_ms,
-				act->timestamp_ms - prev);
-
-			print_stack_trace(&act->trace, 0);
-			prev = act->timestamp_ms;
-		}
-
-		get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
-	}
-
-	nvgpu_spinlock_release(&ch->ref_actions_lock);
-#endif
-}
-
-static void gk20a_channel_save_ref_source(struct nvgpu_channel *ch,
-		enum nvgpu_channel_ref_action_type type)
-{
-#if GK20A_CHANNEL_REFCOUNT_TRACKING
-	struct nvgpu_channel_ref_action *act;
-
-	nvgpu_spinlock_acquire(&ch->ref_actions_lock);
-
-	act = &ch->ref_actions[ch->ref_actions_put];
-	act->type = type;
-	act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN;
-	act->trace.nr_entries = 0;
-	act->trace.skip = 3; /* onwards from the caller of this */
-	act->trace.entries = act->trace_entries;
-	save_stack_trace(&act->trace);
-	act->timestamp_ms = nvgpu_current_time_ms();
-	ch->ref_actions_put = (ch->ref_actions_put + 1) %
-		GK20A_CHANNEL_REFCOUNT_TRACKING;
-
-	nvgpu_spinlock_release(&ch->ref_actions_lock);
-#endif
-}
-
-/* Try to get a reference to the channel. Return nonzero on success. If fails,
- * the channel is dead or being freed elsewhere and you must not touch it.
- *
- * Always when a nvgpu_channel pointer is seen and about to be used, a
- * reference must be held to it - either by you or the caller, which should be
- * documented well or otherwise clearly seen. This usually boils down to the
- * file from ioctls directly, or an explicit get in exception handlers when the
- * channel is found by a chid.
- *
- * Most global functions in this file require a reference to be held by the
- * caller.
- */
-struct nvgpu_channel *nvgpu_channel_get__func(struct nvgpu_channel *ch,
-					 const char *caller)
-{
-	struct nvgpu_channel *ret;
-
-	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
-
-	if (likely(ch->referenceable)) {
-		gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
-		nvgpu_atomic_inc(&ch->ref_count);
-		ret = ch;
-	} else {
-		ret = NULL;
-	}
-
-	nvgpu_spinlock_release(&ch->ref_obtain_lock);
-
-	if (ret != NULL) {
-		trace_nvgpu_channel_get(ch->chid, caller);
-	}
-
-	return ret;
-}
-
-void nvgpu_channel_put__func(struct nvgpu_channel *ch, const char *caller)
-{
-	gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put);
-	trace_nvgpu_channel_put(ch->chid, caller);
-	nvgpu_atomic_dec(&ch->ref_count);
-	if (nvgpu_cond_broadcast(&ch->ref_count_dec_wq) != 0) {
-		nvgpu_warn(ch->g, "failed to broadcast");
-	}
-
-	/* More puts than gets. Channel is probably going to get
-	 * stuck. */
-	WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0);
-
-	/* Also, more puts than gets. ref_count can go to 0 only if
-	 * the channel is closing. Channel is probably going to get
-	 * stuck. */
-	WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable);
-}
-
-struct nvgpu_channel *nvgpu_channel_from_id__func(struct gk20a *g,
-				u32 chid, const char *caller)
-{
-	if (chid == NVGPU_INVALID_CHANNEL_ID) {
-		return NULL;
-	}
-
-	return nvgpu_channel_get__func(&g->fifo.channel[chid], caller);
-}
-
-void nvgpu_channel_close(struct nvgpu_channel *ch)
-{
-	gk20a_free_channel(ch, false);
-}
-
-/*
- * Be careful with this - it is meant for terminating channels when we know the
- * driver is otherwise dying. Ref counts and the like are ignored by this
- * version of the cleanup.
- */
-void nvgpu_channel_kill(struct nvgpu_channel *ch)
-{
-	gk20a_free_channel(ch, true);
-}
-
-struct nvgpu_channel *gk20a_open_new_channel(struct gk20a *g,
-		u32 runlist_id,
-		bool is_privileged_channel,
-		pid_t pid, pid_t tid)
-{
-	struct nvgpu_fifo *f = &g->fifo;
-	struct nvgpu_channel *ch;
-
-	/* compatibility with existing code */
-	if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
-		runlist_id = nvgpu_engine_get_gr_runlist_id(g);
-	}
-
-	nvgpu_log_fn(g, " ");
-
-	ch = allocate_channel(f);
-	if (ch == NULL) {
-		/* TBD: we want to make this virtualizable */
-		nvgpu_err(g, "out of hw chids");
-		return NULL;
-	}
-
-#ifdef CONFIG_NVGPU_TRACE
-	trace_gk20a_open_new_channel(ch->chid);
-#endif
-
-	BUG_ON(ch->g != NULL);
-	ch->g = g;
-
-	/* Runlist for the channel */
-	ch->runlist_id = runlist_id;
-
-	/* Channel privilege level */
-	ch->is_privileged_channel = is_privileged_channel;
-
-	ch->pid = tid;
-	ch->tgid = pid;  /* process granularity for FECS traces */
-
-	if (nvgpu_userd_init_channel(g, ch) != 0) {
-		nvgpu_err(g, "userd init failed");
-		goto clean_up;
-	}
-
-	if (g->ops.channel.alloc_inst(g, ch) != 0) {
-		nvgpu_err(g, "inst allocation failed");
-		goto clean_up;
-	}
-
-	/* now the channel is in a limbo out of the free list but not marked as
-	 * alive and used (i.e. get-able) yet */
-
-	/* By default, channel is regular (non-TSG) channel */
-	ch->tsgid = NVGPU_INVALID_TSG_ID;
-
-	/* clear ctxsw timeout counter and update timestamp */
-	ch->ctxsw_timeout_accumulated_ms = 0;
-	ch->ctxsw_timeout_gpfifo_get = 0;
-	/* set gr host default timeout */
-	ch->ctxsw_timeout_max_ms = nvgpu_get_poll_timeout(g);
-	ch->ctxsw_timeout_debug_dump = true;
-	/* ch is unserviceable until it is bound to tsg */
-	ch->unserviceable = true;
-
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
-	/* init kernel watchdog timeout */
-	ch->wdt.enabled = true;
-	ch->wdt.limit_ms = g->ch_wdt_init_limit_ms;
-	ch->wdt.debug_dump = true;
-#endif
-
-	ch->obj_class = 0;
-	ch->subctx_id = 0;
-	ch->runqueue_sel = 0;
-
-	ch->mmu_nack_handled = false;
-
-	/* The channel is *not* runnable at this point. It still needs to have
-	 * an address space bound and allocate a gpfifo and grctx. */
-
-	if (nvgpu_cond_init(&ch->notifier_wq) != 0) {
-		nvgpu_err(g, "cond init failed");
-		goto clean_up;
-	}
-	if (nvgpu_cond_init(&ch->semaphore_wq) != 0) {
-		nvgpu_err(g, "cond init failed");
-		goto clean_up;
-	}
-
-	/* Mark the channel alive, get-able, with 1 initial use
-	 * references. The initial reference will be decreased in
-	 * gk20a_free_channel().
-	 *
-	 * Use the lock, since an asynchronous thread could
-	 * try to access this channel while it's not fully
-	 * initialized.
-	 */
-	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
-	ch->referenceable = true;
-	nvgpu_atomic_set(&ch->ref_count, 1);
-	nvgpu_spinlock_release(&ch->ref_obtain_lock);
-
-	return ch;
-
-clean_up:
-	ch->g = NULL;
-	free_channel(f, ch);
-	return NULL;
-}
-
 /* allocate private cmd buffer.
    used for inserting commands before/after user submitted buffers. */
 static int channel_gk20a_alloc_priv_cmdbuf(struct nvgpu_channel *ch,
@@ -1216,78 +668,6 @@ out:
 	return err;
 }
 
-static int nvgpu_channel_setup_ramfc(struct nvgpu_channel *c,
-		struct nvgpu_setup_bind_args *args,
-		u64 gpfifo_gpu_va, u32 gpfifo_size)
-{
-	int err = 0;
-	u64 pbdma_acquire_timeout = 0ULL;
-	struct gk20a *g = c->g;
-
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
-	if (c->wdt.enabled && nvgpu_is_timeouts_enabled(c->g)) {
-		pbdma_acquire_timeout = c->wdt.limit_ms;
-	}
-#else
-	if (nvgpu_is_timeouts_enabled(c->g)) {
-		pbdma_acquire_timeout = g->ch_wdt_init_limit_ms;
-	}
-#endif
-
-	err = g->ops.ramfc.setup(c, gpfifo_gpu_va, gpfifo_size,
-			pbdma_acquire_timeout, args->flags);
-
-	return err;
-}
-
-static int nvgpu_channel_setup_usermode(struct nvgpu_channel *c,
-		struct nvgpu_setup_bind_args *args)
-{
-	u32 gpfifo_size = args->num_gpfifo_entries;
-	int err = 0;
-	struct gk20a *g = c->g;
-	u64 gpfifo_gpu_va;
-
-	if (g->os_channel.alloc_usermode_buffers != NULL) {
-		err = g->os_channel.alloc_usermode_buffers(c, args);
-		if (err != 0) {
-			nvgpu_err(g, "Usermode buffer alloc failed");
-			goto clean_up;
-		}
-		c->userd_iova = nvgpu_mem_get_addr(g,
-			&c->usermode_userd);
-		c->usermode_submit_enabled = true;
-	} else {
-		nvgpu_err(g, "Usermode submit not supported");
-		err = -EINVAL;
-		goto clean_up;
-	}
-	gpfifo_gpu_va = c->usermode_gpfifo.gpu_va;
-
-	nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d",
-		c->chid, gpfifo_gpu_va, gpfifo_size);
-
-	err = nvgpu_channel_setup_ramfc(c, args, gpfifo_gpu_va, gpfifo_size);
-
-	if (err != 0) {
-		goto clean_up_unmap;
-	}
-
-	err = nvgpu_channel_update_runlist(c, true);
-	if (err != 0) {
-		goto clean_up_unmap;
-	}
-
-	return 0;
-
-clean_up_unmap:
-	nvgpu_channel_free_usermode_buffers(c);
-	(void) nvgpu_userd_init_channel(g, c);
-	c->usermode_submit_enabled = false;
-clean_up:
-	return err;
-}
-
 static int nvgpu_channel_setup_kernelmode(struct nvgpu_channel *c,
 		struct nvgpu_setup_bind_args *args)
 {
@@ -1397,99 +777,6 @@ clean_up:
 
 }
 
-int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
-		struct nvgpu_setup_bind_args *args)
-{
-	struct gk20a *g = c->g;
-	int err = 0;
-
-#ifdef CONFIG_NVGPU_VPR
-	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR) != 0U) {
-		c->vpr = true;
-	}
-#else
-	c->vpr = false;
-#endif
-
-	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) {
-		nvgpu_rwsem_down_read(&g->deterministic_busy);
-		/*
-		 * Railgating isn't deterministic; instead of disallowing
-		 * railgating globally, take a power refcount for this
-		 * channel's lifetime. The gk20a_idle() pair for this happens
-		 * when the channel gets freed.
-		 *
-		 * Deterministic flag and this busy must be atomic within the
-		 * busy lock.
-		 */
-		err = gk20a_busy(g);
-		if (err != 0) {
-			nvgpu_rwsem_up_read(&g->deterministic_busy);
-			return err;
-		}
-
-		c->deterministic = true;
-		nvgpu_rwsem_up_read(&g->deterministic_busy);
-	}
-
-	/* an address space needs to have been bound at this point. */
-	if (!nvgpu_channel_as_bound(c)) {
-		nvgpu_err(g,
-			"not bound to an address space at time of setup_bind");
-		err = -EINVAL;
-		goto clean_up_idle;
-	}
-
-	if (nvgpu_mem_is_valid(&c->gpfifo.mem) ||
-			c->usermode_submit_enabled) {
-		nvgpu_err(g, "channel %d :"
-			   "gpfifo already allocated", c->chid);
-		err = -EEXIST;
-		goto clean_up_idle;
-	}
-
-	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) {
-		err = nvgpu_channel_setup_usermode(c, args);
-	} else {
-		if (g->os_channel.open != NULL) {
-			g->os_channel.open(c);
-		}
-		err = nvgpu_channel_setup_kernelmode(c, args);
-	}
-
-	if (err != 0) {
-		goto clean_up_idle;
-	}
-
-	g->ops.channel.bind(c);
-
-	nvgpu_log_fn(g, "done");
-	return 0;
-
-clean_up_idle:
-	if (c->deterministic) {
-		nvgpu_rwsem_down_read(&g->deterministic_busy);
-		gk20a_idle(g);
-		c->deterministic = false;
-		nvgpu_rwsem_up_read(&g->deterministic_busy);
-	}
-	nvgpu_err(g, "fail");
-	return err;
-}
-
-void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c)
-{
-	if (nvgpu_mem_is_valid(&c->usermode_userd)) {
-		nvgpu_dma_free(c->g, &c->usermode_userd);
-	}
-	if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) {
-		nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
-	}
-	if (c->g->os_channel.free_usermode_buffers != NULL) {
-		c->g->os_channel.free_usermode_buffers(c);
-	}
-}
-
 /* Update with this periodically to determine how the gpfifo is draining. */
 static inline u32 nvgpu_channel_update_gpfifo_get(struct gk20a *g,
 				struct nvgpu_channel *c)
@@ -1506,79 +793,6 @@ u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch)
 		ch->gpfifo.entry_num;
 }
 
-static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(struct gk20a *g,
-		struct nvgpu_channel *ch)
-{
-	bool verbose = false;
-	if (nvgpu_is_error_notifier_set(ch,
-			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
-		verbose = ch->ctxsw_timeout_debug_dump;
-	}
-
-	return verbose;
-}
-
-static void nvgpu_channel_set_has_timedout_and_wakeup_wqs(struct gk20a *g,
-		struct nvgpu_channel *ch)
-{
-	/* mark channel as faulted */
-	nvgpu_channel_set_unserviceable(ch);
-
-	/* unblock pending waits */
-	if (nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq) != 0) {
-		nvgpu_warn(g, "failed to broadcast");
-	}
-	if (nvgpu_cond_broadcast_interruptible(&ch->notifier_wq) != 0) {
-		nvgpu_warn(g, "failed to broadcast");
-	}
-}
-
-bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch)
-{
-	bool verbose;
-
-	verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(g, ch);
-	nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch);
-
-	return verbose;
-}
-
-void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch,
-				u32 error_notifier)
-{
-	g->ops.channel.set_error_notifier(ch, error_notifier);
-}
-
-void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
-		struct nvgpu_channel *ch)
-{
-	nvgpu_err(g,
-		"channel %d generated a mmu fault", ch->chid);
-	nvgpu_channel_set_error_notifier(g, ch,
-				NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
-}
-
-bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
-		u32 timeout_delta_ms, bool *progress)
-{
-	u32 gpfifo_get = nvgpu_channel_update_gpfifo_get(ch->g, ch);
-
-	if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) {
-		/* didn't advance since previous ctxsw timeout check */
-		ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms;
-		*progress = false;
-	} else {
-		/* first ctxsw timeout isr encountered */
-		ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms;
-		*progress = true;
-	}
-
-	ch->ctxsw_timeout_gpfifo_get = gpfifo_get;
-
-	return nvgpu_is_timeouts_enabled(ch->g) &&
-		ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
-}
-
 u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(struct nvgpu_channel *ch)
 {
 	(void)nvgpu_channel_update_gpfifo_get(ch->g, ch);
@@ -1834,7 +1048,7 @@ static void nvgpu_channel_poll_wdt(struct gk20a *g)
 	}
 }
 
-#endif
+#endif /* CONFIG_NVGPU_CHANNEL_WDT */
 
 static inline struct nvgpu_channel_worker *
 nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker)
@@ -1889,7 +1103,7 @@ static u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
 	return ch_worker->watchdog_interval;
 }
 
-#endif
+#endif /* CONFIG_NVGPU_CHANNEL_WDT */
 
 static void nvgpu_channel_worker_poll_wakeup_process_item(
 		struct nvgpu_list_node *work_item)
@@ -2250,6 +1464,826 @@ void nvgpu_channel_update(struct nvgpu_channel *c)
 	gk20a_channel_worker_enqueue(c);
 }
 
+bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
+		u32 timeout_delta_ms, bool *progress)
+{
+	u32 gpfifo_get = nvgpu_channel_update_gpfifo_get(ch->g, ch);
+
+	if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) {
+		/* didn't advance since previous ctxsw timeout check */
+		ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms;
+		*progress = false;
+	} else {
+		/* first ctxsw timeout isr encountered */
+		ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms;
+		*progress = true;
+	}
+
+	ch->ctxsw_timeout_gpfifo_get = gpfifo_get;
+
+	return nvgpu_is_timeouts_enabled(ch->g) &&
+		ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
+}
+
+#else
+
+void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch)
+{
+	/* ensure no fences are pending */
+	nvgpu_mutex_acquire(&ch->sync_lock);
+	if (ch->user_sync != NULL) {
+		nvgpu_channel_sync_set_safe_state(ch->user_sync);
+	}
+	nvgpu_mutex_release(&ch->sync_lock);
+}
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
+void nvgpu_channel_set_unserviceable(struct nvgpu_channel *ch)
+{
+	nvgpu_spinlock_acquire(&ch->unserviceable_lock);
+	ch->unserviceable = true;
+	nvgpu_spinlock_release(&ch->unserviceable_lock);
+}
+
+bool  nvgpu_channel_check_unserviceable(struct nvgpu_channel *ch)
+{
+	bool unserviceable_status;
+
+	nvgpu_spinlock_acquire(&ch->unserviceable_lock);
+	unserviceable_status = ch->unserviceable;
+	nvgpu_spinlock_release(&ch->unserviceable_lock);
+
+	return unserviceable_status;
+}
+
+void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt)
+{
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+
+	nvgpu_log_fn(ch->g, " ");
+
+	if (tsg != NULL) {
+		return nvgpu_tsg_abort(ch->g, tsg, channel_preempt);
+	} else {
+		nvgpu_err(ch->g, "chid: %d is not bound to tsg", ch->chid);
+	}
+}
+
+void nvgpu_channel_wait_until_counter_is_N(
+	struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value,
+	struct nvgpu_cond *c, const char *caller, const char *counter_name)
+{
+	while (true) {
+		if (NVGPU_COND_WAIT(
+			    c,
+			    nvgpu_atomic_read(counter) == wait_value,
+			    5000U) == 0) {
+			break;
+		}
+
+		nvgpu_warn(ch->g,
+			   "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
+			   caller, ch->chid, counter_name,
+			   nvgpu_atomic_read(counter), wait_value);
+
+		gk20a_channel_dump_ref_actions(ch);
+	}
+}
+
+static void nvgpu_channel_usermode_deinit(struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+
+	nvgpu_channel_free_usermode_buffers(ch);
+	(void) nvgpu_userd_init_channel(g, ch);
+	ch->usermode_submit_enabled = false;
+}
+
+/* call ONLY when no references to the channel exist: after the last put */
+static void gk20a_free_channel(struct nvgpu_channel *ch, bool force)
+{
+	struct gk20a *g = ch->g;
+	struct nvgpu_tsg *tsg;
+	struct nvgpu_fifo *f = &g->fifo;
+	struct vm_gk20a *ch_vm = ch->vm;
+	unsigned long timeout;
+#ifdef CONFIG_NVGPU_DEBUGGER
+	struct dbg_session_gk20a *dbg_s;
+	struct dbg_session_data *session_data, *tmp_s;
+	struct dbg_session_channel_data *ch_data, *tmp;
+	bool deferred_reset_pending;
+#endif
+	int err;
+
+	if (g == NULL) {
+		nvgpu_do_assert_print(g, "ch already freed");
+		return;
+	}
+
+	nvgpu_log_fn(g, " ");
+
+	timeout = nvgpu_get_poll_timeout(g);
+
+#ifdef CONFIG_NVGPU_TRACE
+	trace_gk20a_free_channel(ch->chid);
+#endif
+
+	/*
+	 * Disable channel/TSG and unbind here. This should not be executed if
+	 * HW access is not available during shutdown/removal path as it will
+	 * trigger a timeout
+	 */
+	if (!nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
+		/* abort channel and remove from runlist */
+		tsg = nvgpu_tsg_from_ch(ch);
+		if (tsg != NULL) {
+			/* Between tsg is not null and unbind_channel call,
+			 * ioctl cannot be called anymore because user doesn't
+			 * have an open channel fd anymore to use for the unbind
+			 * ioctl.
+			 */
+			err = nvgpu_tsg_unbind_channel(tsg, ch);
+			if (err != 0) {
+				nvgpu_err(g,
+					"failed to unbind channel %d from TSG",
+					ch->chid);
+			}
+		} else {
+			/*
+			 * Channel is already unbound from TSG by User with
+			 * explicit call
+			 * Nothing to do here in that case
+			 */
+		}
+	}
+
+	/*
+	 * OS channel close may require that syncpoint should be set to some
+	 * safe value before it is called. nvgpu_tsg_unbind_channel(above) is
+	 * internally doing that by calling nvgpu_nvhost_syncpt_set_safe_state
+	 * deep down in the stack. Otherwise os_channel close may block if the
+	 * app is killed abruptly (which was going to do the syncpoint signal).
+	 */
+	if (g->os_channel.close != NULL) {
+		g->os_channel.close(ch, force);
+	}
+
+	/* wait until there's only our ref to the channel */
+	if (!force) {
+		nvgpu_channel_wait_until_counter_is_N(
+			ch, &ch->ref_count, 1, &ch->ref_count_dec_wq,
+			__func__, "references");
+	}
+
+	/* wait until all pending interrupts for recently completed
+	 * jobs are handled */
+	nvgpu_wait_for_deferred_interrupts(g);
+
+	/* prevent new refs */
+	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
+	if (!ch->referenceable) {
+		nvgpu_spinlock_release(&ch->ref_obtain_lock);
+		nvgpu_err(ch->g,
+			  "Extra %s() called to channel %u",
+			  __func__, ch->chid);
+		return;
+	}
+	ch->referenceable = false;
+	nvgpu_spinlock_release(&ch->ref_obtain_lock);
+
+	/* matches with the initial reference in gk20a_open_new_channel() */
+	nvgpu_atomic_dec(&ch->ref_count);
+
+	/* wait until no more refs to the channel */
+	if (!force) {
+		nvgpu_channel_wait_until_counter_is_N(
+			ch, &ch->ref_count, 0, &ch->ref_count_dec_wq,
+			__func__, "references");
+	}
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+	/* if engine reset was deferred, perform it now */
+	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
+	deferred_reset_pending = g->fifo.deferred_reset_pending;
+	nvgpu_mutex_release(&f->deferred_reset_mutex);
+
+	if (deferred_reset_pending) {
+		nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
+				" deferred, running now");
+		nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
+
+		nvgpu_assert(nvgpu_channel_deferred_reset_engines(g, ch) == 0);
+
+		nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
+	}
+#endif
+
+	if (!nvgpu_channel_as_bound(ch)) {
+		goto unbind;
+	}
+
+	nvgpu_log_info(g, "freeing bound channel context, timeout=%ld",
+			timeout);
+
+#ifdef CONFIG_NVGPU_FECS_TRACE
+	if (g->ops.gr.fecs_trace.unbind_channel && !ch->vpr)
+		g->ops.gr.fecs_trace.unbind_channel(g, &ch->inst_block);
+#endif
+
+	if (g->ops.gr.setup.free_subctx != NULL) {
+		g->ops.gr.setup.free_subctx(ch);
+		ch->subctx = NULL;
+	}
+
+	g->ops.gr.intr.flush_channel_tlb(g);
+
+	if (ch->usermode_submit_enabled) {
+		nvgpu_channel_usermode_deinit(ch);
+	} else {
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+		nvgpu_channel_kernelmode_deinit(ch);
+#endif
+	}
+
+	if (ch->user_sync != NULL) {
+		/*
+		 * Set user managed syncpoint to safe state
+		 * But it's already done if channel is recovered
+		 */
+		if (nvgpu_channel_check_unserviceable(ch)) {
+			nvgpu_channel_sync_destroy(ch->user_sync, false);
+		} else {
+			nvgpu_channel_sync_destroy(ch->user_sync, true);
+		}
+		ch->user_sync = NULL;
+	}
+	nvgpu_mutex_release(&ch->sync_lock);
+
+	/*
+	 * free the channel used semaphore index.
+	 * we need to do this before releasing the address space,
+	 * as the semaphore pool might get freed after that point.
+	 */
+	if (ch->hw_sema != NULL) {
+		nvgpu_hw_semaphore_free(ch);
+	}
+
+	/*
+	 * When releasing the channel we unbind the VM - so release the ref.
+	 */
+	nvgpu_vm_put(ch_vm);
+
+	/* make sure we don't have deferred interrupts pending that
+	 * could still touch the channel */
+	nvgpu_wait_for_deferred_interrupts(g);
+
+unbind:
+	g->ops.channel.unbind(ch);
+	g->ops.channel.free_inst(g, ch);
+
+	/* put back the channel-wide submit ref from init */
+	if (ch->deterministic) {
+		nvgpu_rwsem_down_read(&g->deterministic_busy);
+		ch->deterministic = false;
+		if (!ch->deterministic_railgate_allowed) {
+			gk20a_idle(g);
+		}
+		ch->deterministic_railgate_allowed = false;
+
+		nvgpu_rwsem_up_read(&g->deterministic_busy);
+	}
+
+	ch->vpr = false;
+	ch->vm = NULL;
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+	WARN_ON(ch->sync != NULL);
+#endif
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+	/* unlink all debug sessions */
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	nvgpu_list_for_each_entry_safe(session_data, tmp_s,
+			&ch->dbg_s_list, dbg_session_data, dbg_s_entry) {
+		dbg_s = session_data->dbg_s;
+		nvgpu_mutex_acquire(&dbg_s->ch_list_lock);
+		nvgpu_list_for_each_entry_safe(ch_data, tmp, &dbg_s->ch_list,
+				dbg_session_channel_data, ch_entry) {
+			if (ch_data->chid == ch->chid) {
+				if (ch_data->unbind_single_channel(dbg_s,
+						ch_data) != 0) {
+					nvgpu_err(g,
+						"unbind failed for chid: %d",
+						ch_data->chid);
+				}
+			}
+		}
+		nvgpu_mutex_release(&dbg_s->ch_list_lock);
+	}
+
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
+#endif
+
+#if GK20A_CHANNEL_REFCOUNT_TRACKING
+	(void) memset(ch->ref_actions, 0, sizeof(ch->ref_actions));
+	ch->ref_actions_put = 0;
+#endif
+
+	/* make sure we catch accesses of unopened channels in case
+	 * there's non-refcounted channel pointers hanging around */
+	ch->g = NULL;
+	nvgpu_smp_wmb();
+
+	/* ALWAYS last */
+	free_channel(f, ch);
+}
+
+static void gk20a_channel_dump_ref_actions(struct nvgpu_channel *ch)
+{
+#if GK20A_CHANNEL_REFCOUNT_TRACKING
+	size_t i, get;
+	s64 now = nvgpu_current_time_ms();
+	s64 prev = 0;
+	struct gk20a *g = ch->g;
+
+	nvgpu_spinlock_acquire(&ch->ref_actions_lock);
+
+	nvgpu_info(g, "ch %d: refs %d. Actions, most recent last:",
+			ch->chid, nvgpu_atomic_read(&ch->ref_count));
+
+	/* start at the oldest possible entry. put is next insertion point */
+	get = ch->ref_actions_put;
+
+	/*
+	 * If the buffer is not full, this will first loop to the oldest entry,
+	 * skipping not-yet-initialized entries. There is no ref_actions_get.
+	 */
+	for (i = 0; i < GK20A_CHANNEL_REFCOUNT_TRACKING; i++) {
+		struct nvgpu_channel_ref_action *act = &ch->ref_actions[get];
+
+		if (act->trace.nr_entries) {
+			nvgpu_info(g,
+				"%s ref %zu steps ago (age %lld ms, diff %lld ms)",
+				act->type == channel_gk20a_ref_action_get
+					? "GET" : "PUT",
+				GK20A_CHANNEL_REFCOUNT_TRACKING - 1 - i,
+				now - act->timestamp_ms,
+				act->timestamp_ms - prev);
+
+			print_stack_trace(&act->trace, 0);
+			prev = act->timestamp_ms;
+		}
+
+		get = (get + 1) % GK20A_CHANNEL_REFCOUNT_TRACKING;
+	}
+
+	nvgpu_spinlock_release(&ch->ref_actions_lock);
+#endif
+}
+
+static void gk20a_channel_save_ref_source(struct nvgpu_channel *ch,
+		enum nvgpu_channel_ref_action_type type)
+{
+#if GK20A_CHANNEL_REFCOUNT_TRACKING
+	struct nvgpu_channel_ref_action *act;
+
+	nvgpu_spinlock_acquire(&ch->ref_actions_lock);
+
+	act = &ch->ref_actions[ch->ref_actions_put];
+	act->type = type;
+	act->trace.max_entries = GK20A_CHANNEL_REFCOUNT_TRACKING_STACKLEN;
+	act->trace.nr_entries = 0;
+	act->trace.skip = 3; /* onwards from the caller of this */
+	act->trace.entries = act->trace_entries;
+	save_stack_trace(&act->trace);
+	act->timestamp_ms = nvgpu_current_time_ms();
+	ch->ref_actions_put = (ch->ref_actions_put + 1) %
+		GK20A_CHANNEL_REFCOUNT_TRACKING;
+
+	nvgpu_spinlock_release(&ch->ref_actions_lock);
+#endif
+}
+
+/* Try to get a reference to the channel. Return nonzero on success. If fails,
+ * the channel is dead or being freed elsewhere and you must not touch it.
+ *
+ * Always when a nvgpu_channel pointer is seen and about to be used, a
+ * reference must be held to it - either by you or the caller, which should be
+ * documented well or otherwise clearly seen. This usually boils down to the
+ * file from ioctls directly, or an explicit get in exception handlers when the
+ * channel is found by a chid.
+ *
+ * Most global functions in this file require a reference to be held by the
+ * caller.
+ */
+struct nvgpu_channel *nvgpu_channel_get__func(struct nvgpu_channel *ch,
+					 const char *caller)
+{
+	struct nvgpu_channel *ret;
+
+	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
+
+	if (likely(ch->referenceable)) {
+		gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
+		nvgpu_atomic_inc(&ch->ref_count);
+		ret = ch;
+	} else {
+		ret = NULL;
+	}
+
+	nvgpu_spinlock_release(&ch->ref_obtain_lock);
+
+	if (ret != NULL) {
+		trace_nvgpu_channel_get(ch->chid, caller);
+	}
+
+	return ret;
+}
+
+void nvgpu_channel_put__func(struct nvgpu_channel *ch, const char *caller)
+{
+	gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put);
+	trace_nvgpu_channel_put(ch->chid, caller);
+	nvgpu_atomic_dec(&ch->ref_count);
+	if (nvgpu_cond_broadcast(&ch->ref_count_dec_wq) != 0) {
+		nvgpu_warn(ch->g, "failed to broadcast");
+	}
+
+	/* More puts than gets. Channel is probably going to get
+	 * stuck. */
+	WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0);
+
+	/* Also, more puts than gets. ref_count can go to 0 only if
+	 * the channel is closing. Channel is probably going to get
+	 * stuck. */
+	WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable);
+}
+
+struct nvgpu_channel *nvgpu_channel_from_id__func(struct gk20a *g,
+				u32 chid, const char *caller)
+{
+	if (chid == NVGPU_INVALID_CHANNEL_ID) {
+		return NULL;
+	}
+
+	return nvgpu_channel_get__func(&g->fifo.channel[chid], caller);
+}
+
+void nvgpu_channel_close(struct nvgpu_channel *ch)
+{
+	gk20a_free_channel(ch, false);
+}
+
+/*
+ * Be careful with this - it is meant for terminating channels when we know the
+ * driver is otherwise dying. Ref counts and the like are ignored by this
+ * version of the cleanup.
+ */
+void nvgpu_channel_kill(struct nvgpu_channel *ch)
+{
+	gk20a_free_channel(ch, true);
+}
+
+struct nvgpu_channel *gk20a_open_new_channel(struct gk20a *g,
+		u32 runlist_id,
+		bool is_privileged_channel,
+		pid_t pid, pid_t tid)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_channel *ch;
+
+	/* compatibility with existing code */
+	if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
+		runlist_id = nvgpu_engine_get_gr_runlist_id(g);
+	}
+
+	nvgpu_log_fn(g, " ");
+
+	ch = allocate_channel(f);
+	if (ch == NULL) {
+		/* TBD: we want to make this virtualizable */
+		nvgpu_err(g, "out of hw chids");
+		return NULL;
+	}
+
+#ifdef CONFIG_NVGPU_TRACE
+	trace_gk20a_open_new_channel(ch->chid);
+#endif
+
+	BUG_ON(ch->g != NULL);
+	ch->g = g;
+
+	/* Runlist for the channel */
+	ch->runlist_id = runlist_id;
+
+	/* Channel privilege level */
+	ch->is_privileged_channel = is_privileged_channel;
+
+	ch->pid = tid;
+	ch->tgid = pid;  /* process granularity for FECS traces */
+
+	if (nvgpu_userd_init_channel(g, ch) != 0) {
+		nvgpu_err(g, "userd init failed");
+		goto clean_up;
+	}
+
+	if (g->ops.channel.alloc_inst(g, ch) != 0) {
+		nvgpu_err(g, "inst allocation failed");
+		goto clean_up;
+	}
+
+	/* now the channel is in a limbo out of the free list but not marked as
+	 * alive and used (i.e. get-able) yet */
+
+	/* By default, channel is regular (non-TSG) channel */
+	ch->tsgid = NVGPU_INVALID_TSG_ID;
+
+	/* clear ctxsw timeout counter and update timestamp */
+	ch->ctxsw_timeout_accumulated_ms = 0;
+	ch->ctxsw_timeout_gpfifo_get = 0;
+	/* set gr host default timeout */
+	ch->ctxsw_timeout_max_ms = nvgpu_get_poll_timeout(g);
+	ch->ctxsw_timeout_debug_dump = true;
+	/* ch is unserviceable until it is bound to tsg */
+	ch->unserviceable = true;
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+	/* init kernel watchdog timeout */
+	ch->wdt.enabled = true;
+	ch->wdt.limit_ms = g->ch_wdt_init_limit_ms;
+	ch->wdt.debug_dump = true;
+#endif
+
+	ch->obj_class = 0;
+	ch->subctx_id = 0;
+	ch->runqueue_sel = 0;
+
+	ch->mmu_nack_handled = false;
+
+	/* The channel is *not* runnable at this point. It still needs to have
+	 * an address space bound and allocate a gpfifo and grctx. */
+
+	if (nvgpu_cond_init(&ch->notifier_wq) != 0) {
+		nvgpu_err(g, "cond init failed");
+		goto clean_up;
+	}
+	if (nvgpu_cond_init(&ch->semaphore_wq) != 0) {
+		nvgpu_err(g, "cond init failed");
+		goto clean_up;
+	}
+
+	/* Mark the channel alive, get-able, with 1 initial use
+	 * references. The initial reference will be decreased in
+	 * gk20a_free_channel().
+	 *
+	 * Use the lock, since an asynchronous thread could
+	 * try to access this channel while it's not fully
+	 * initialized.
+	 */
+	nvgpu_spinlock_acquire(&ch->ref_obtain_lock);
+	ch->referenceable = true;
+	nvgpu_atomic_set(&ch->ref_count, 1);
+	nvgpu_spinlock_release(&ch->ref_obtain_lock);
+
+	return ch;
+
+clean_up:
+	ch->g = NULL;
+	free_channel(f, ch);
+	return NULL;
+}
+
+static int nvgpu_channel_setup_ramfc(struct nvgpu_channel *c,
+		struct nvgpu_setup_bind_args *args,
+		u64 gpfifo_gpu_va, u32 gpfifo_size)
+{
+	int err = 0;
+	u64 pbdma_acquire_timeout = 0ULL;
+	struct gk20a *g = c->g;
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+	if (c->wdt.enabled && nvgpu_is_timeouts_enabled(c->g)) {
+		pbdma_acquire_timeout = c->wdt.limit_ms;
+	}
+#else
+	if (nvgpu_is_timeouts_enabled(c->g)) {
+		pbdma_acquire_timeout = g->ch_wdt_init_limit_ms;
+	}
+#endif
+
+	err = g->ops.ramfc.setup(c, gpfifo_gpu_va, gpfifo_size,
+			pbdma_acquire_timeout, args->flags);
+
+	return err;
+}
+
+static int nvgpu_channel_setup_usermode(struct nvgpu_channel *c,
+		struct nvgpu_setup_bind_args *args)
+{
+	u32 gpfifo_size = args->num_gpfifo_entries;
+	int err = 0;
+	struct gk20a *g = c->g;
+	u64 gpfifo_gpu_va;
+
+	if (g->os_channel.alloc_usermode_buffers != NULL) {
+		err = g->os_channel.alloc_usermode_buffers(c, args);
+		if (err != 0) {
+			nvgpu_err(g, "Usermode buffer alloc failed");
+			goto clean_up;
+		}
+		c->userd_iova = nvgpu_mem_get_addr(g,
+			&c->usermode_userd);
+		c->usermode_submit_enabled = true;
+	} else {
+		nvgpu_err(g, "Usermode submit not supported");
+		err = -EINVAL;
+		goto clean_up;
+	}
+	gpfifo_gpu_va = c->usermode_gpfifo.gpu_va;
+
+	nvgpu_log_info(g, "channel %d : gpfifo_base 0x%016llx, size %d",
+		c->chid, gpfifo_gpu_va, gpfifo_size);
+
+	err = nvgpu_channel_setup_ramfc(c, args, gpfifo_gpu_va, gpfifo_size);
+
+	if (err != 0) {
+		goto clean_up_unmap;
+	}
+
+	err = nvgpu_channel_update_runlist(c, true);
+	if (err != 0) {
+		goto clean_up_unmap;
+	}
+
+	return 0;
+
+clean_up_unmap:
+	nvgpu_channel_free_usermode_buffers(c);
+	(void) nvgpu_userd_init_channel(g, c);
+	c->usermode_submit_enabled = false;
+clean_up:
+	return err;
+}
+
+int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
+		struct nvgpu_setup_bind_args *args)
+{
+	struct gk20a *g = c->g;
+	int err = 0;
+
+#ifdef CONFIG_NVGPU_VPR
+	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR) != 0U) {
+		c->vpr = true;
+	}
+#else
+	c->vpr = false;
+#endif
+
+	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC) != 0U) {
+		nvgpu_rwsem_down_read(&g->deterministic_busy);
+		/*
+		 * Railgating isn't deterministic; instead of disallowing
+		 * railgating globally, take a power refcount for this
+		 * channel's lifetime. The gk20a_idle() pair for this happens
+		 * when the channel gets freed.
+		 *
+		 * Deterministic flag and this busy must be atomic within the
+		 * busy lock.
+		 */
+		err = gk20a_busy(g);
+		if (err != 0) {
+			nvgpu_rwsem_up_read(&g->deterministic_busy);
+			return err;
+		}
+
+		c->deterministic = true;
+		nvgpu_rwsem_up_read(&g->deterministic_busy);
+	}
+
+	/* an address space needs to have been bound at this point. */
+	if (!nvgpu_channel_as_bound(c)) {
+		nvgpu_err(g,
+			"not bound to an address space at time of setup_bind");
+		err = -EINVAL;
+		goto clean_up_idle;
+	}
+
+	if (c->usermode_submit_enabled) {
+		nvgpu_err(g, "channel %d : "
+			    "usermode buffers allocated", c->chid);
+		err = -EEXIST;
+		goto clean_up_idle;
+	}
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+	if (nvgpu_mem_is_valid(&c->gpfifo.mem)) {
+		nvgpu_err(g, "channel %d :"
+			   "gpfifo already allocated", c->chid);
+		err = -EEXIST;
+		goto clean_up_idle;
+	}
+#endif
+
+	if ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT) != 0U) {
+		err = nvgpu_channel_setup_usermode(c, args);
+	} else {
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+		if (g->os_channel.open != NULL) {
+			g->os_channel.open(c);
+		}
+		err = nvgpu_channel_setup_kernelmode(c, args);
+#else
+		err = -EINVAL;
+#endif
+	}
+
+	if (err != 0) {
+		goto clean_up_idle;
+	}
+
+	g->ops.channel.bind(c);
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+
+clean_up_idle:
+	if (c->deterministic) {
+		nvgpu_rwsem_down_read(&g->deterministic_busy);
+		gk20a_idle(g);
+		c->deterministic = false;
+		nvgpu_rwsem_up_read(&g->deterministic_busy);
+	}
+	nvgpu_err(g, "fail");
+	return err;
+}
+
+void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c)
+{
+	if (nvgpu_mem_is_valid(&c->usermode_userd)) {
+		nvgpu_dma_free(c->g, &c->usermode_userd);
+	}
+	if (nvgpu_mem_is_valid(&c->usermode_gpfifo)) {
+		nvgpu_dma_unmap_free(c->vm, &c->usermode_gpfifo);
+	}
+	if (c->g->os_channel.free_usermode_buffers != NULL) {
+		c->g->os_channel.free_usermode_buffers(c);
+	}
+}
+
+static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(struct gk20a *g,
+		struct nvgpu_channel *ch)
+{
+	bool verbose = false;
+	if (nvgpu_is_error_notifier_set(ch,
+			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
+		verbose = ch->ctxsw_timeout_debug_dump;
+	}
+
+	return verbose;
+}
+
+static void nvgpu_channel_set_has_timedout_and_wakeup_wqs(struct gk20a *g,
+		struct nvgpu_channel *ch)
+{
+	/* mark channel as faulted */
+	nvgpu_channel_set_unserviceable(ch);
+
+	/* unblock pending waits */
+	if (nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq) != 0) {
+		nvgpu_warn(g, "failed to broadcast");
+	}
+	if (nvgpu_cond_broadcast_interruptible(&ch->notifier_wq) != 0) {
+		nvgpu_warn(g, "failed to broadcast");
+	}
+}
+
+bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch)
+{
+	bool verbose;
+
+	verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(g, ch);
+	nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch);
+
+	return verbose;
+}
+
+void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch,
+				u32 error_notifier)
+{
+	g->ops.channel.set_error_notifier(ch, error_notifier);
+}
+
+void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
+		struct nvgpu_channel *ch)
+{
+	nvgpu_err(g,
+		"channel %d generated a mmu fault", ch->chid);
+	nvgpu_channel_set_error_notifier(g, ch,
+				NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
+}
+
 /*
  * Stop deterministic channel activity for do_idle() when power needs to go off
  * momentarily but deterministic channels keep power refs for potentially a
@@ -2334,8 +2368,10 @@ void nvgpu_channel_deterministic_unidle(struct gk20a *g)
 static void nvgpu_channel_destroy(struct gk20a *g, struct nvgpu_channel *c)
 {
 	nvgpu_mutex_destroy(&c->ioctl_lock);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
 	nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
+#endif
 	nvgpu_mutex_destroy(&c->sync_lock);
 #if defined(CONFIG_NVGPU_CYCLESTATS)
 	nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
@@ -2393,18 +2429,20 @@ int nvgpu_channel_init_support(struct gk20a *g, u32 chid)
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
 	nvgpu_spinlock_init(&c->ref_actions_lock);
 #endif
-	nvgpu_spinlock_init(&c->joblist.dynamic.lock);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 #ifdef CONFIG_NVGPU_CHANNEL_WDT
 	nvgpu_spinlock_init(&c->wdt.lock);
 #endif
-
+	nvgpu_spinlock_init(&c->joblist.dynamic.lock);
 	nvgpu_init_list_node(&c->joblist.dynamic.jobs);
-	nvgpu_init_list_node(&c->dbg_s_list);
 	nvgpu_init_list_node(&c->worker_item);
 
-	nvgpu_mutex_init(&c->ioctl_lock);
 	nvgpu_mutex_init(&c->joblist.cleanup_lock);
 	nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+	nvgpu_init_list_node(&c->dbg_s_list);
+	nvgpu_mutex_init(&c->ioctl_lock);
 	nvgpu_mutex_init(&c->sync_lock);
 #if defined(CONFIG_NVGPU_CYCLESTATS)
 	nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
@@ -2593,6 +2631,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
 					}
 				}
 #endif
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 				/*
 				 * Only non-deterministic channels get the
 				 * channel_update callback. We don't allow
@@ -2607,6 +2646,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
 				if (!c->deterministic) {
 					nvgpu_channel_update(c);
 				}
+#endif
 			}
 			nvgpu_channel_put(c);
 		}
diff --git a/drivers/gpu/nvgpu/common/fifo/fifo.c b/drivers/gpu/nvgpu/common/fifo/fifo.c
index 6b7cd5350..78184ecec 100644
--- a/drivers/gpu/nvgpu/common/fifo/fifo.c
+++ b/drivers/gpu/nvgpu/common/fifo/fifo.c
@@ -60,7 +60,9 @@ void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)
 
 void nvgpu_fifo_cleanup_sw(struct gk20a *g)
 {
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	nvgpu_channel_worker_deinit(g);
+#endif
 	nvgpu_fifo_cleanup_sw_common(g);
 }
 
@@ -169,19 +171,23 @@ int nvgpu_fifo_setup_sw(struct gk20a *g)
 		return err;
 	}
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	err = nvgpu_channel_worker_init(g);
 	if (err != 0) {
 		nvgpu_err(g, "worker init fail, err=%d", err);
 		goto clean_up;
 	}
+#endif
 
 	f->sw_ready = true;
 
 	nvgpu_log_fn(g, "done");
 	return 0;
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 clean_up:
 	nvgpu_fifo_cleanup_sw_common(g);
+#endif
 
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index d3ed866a4..86bdaec1e 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -460,6 +460,7 @@ void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, struct nvgpu_tsg *tsg)
 		NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
 }
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg,
 		bool *debug_dump, u32 *ms)
 {
@@ -522,6 +523,7 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg,
 	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
 	return recover;
 }
+#endif
 
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 int nvgpu_tsg_set_interleave(struct nvgpu_tsg *tsg, u32 level)
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c
index 29e32a24d..ca867ec6a 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c
@@ -45,7 +45,11 @@ struct nvgpu_channel_sync *nvgpu_channel_sync_create(struct nvgpu_channel *c,
 	if (nvgpu_has_syncpoints(c->g)) {
 		return nvgpu_channel_sync_syncpt_create(c, user_managed);
 	} else {
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		return nvgpu_channel_sync_semaphore_create(c, user_managed);
+#else
+		return NULL;
+#endif
 	}
 }
 
@@ -64,6 +68,7 @@ bool nvgpu_has_syncpoints(struct gk20a *g)
 #endif
 }
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
 	struct priv_cmd_entry *entry, u32 max_wait_cmds)
 {
@@ -91,6 +96,18 @@ void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s)
 	s->set_min_eq_max(s);
 }
 
+void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s)
+{
+	nvgpu_atomic_inc(&s->refcount);
+}
+
+bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s)
+{
+	return nvgpu_atomic_dec_and_test(&s->refcount);
+}
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 void nvgpu_channel_sync_set_safe_state(struct nvgpu_channel_sync *s)
 {
 	s->set_safe_state(s);
@@ -105,13 +122,4 @@ void nvgpu_channel_sync_destroy(struct nvgpu_channel_sync *sync,
 	sync->destroy(sync);
 }
 
-void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s)
-{
-	nvgpu_atomic_inc(&s->refcount);
-}
-
-bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s)
-{
-	return nvgpu_atomic_dec_and_test(&s->refcount);
-}
 
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
index c92f70bb0..5235a2b65 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
@@ -39,6 +39,7 @@ struct nvgpu_fence_type;
 struct nvgpu_channel_sync {
 	nvgpu_atomic_t refcount;
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	int (*wait_fence_raw)(struct nvgpu_channel_sync *s, u32 id, u32 thresh,
 			   struct priv_cmd_entry *entry);
 
@@ -58,7 +59,7 @@ struct nvgpu_channel_sync {
 			 bool wfi,
 			 bool need_sync_fence,
 			 bool register_irq);
-
+#endif
 	void (*set_min_eq_max)(struct nvgpu_channel_sync *s);
 
 	void (*set_safe_state)(struct nvgpu_channel_sync *s);
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
index 68280cd0b..f352ad930 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -54,6 +54,7 @@ nvgpu_channel_sync_syncpt_from_ops(struct nvgpu_channel_sync *ops)
 			offsetof(struct nvgpu_channel_sync_syncpt, ops));
 }
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c,
 	u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
 	u32 wait_cmd_size, u32 pos, bool preallocated)
@@ -292,6 +293,14 @@ static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s,
 			entry, fence, need_sync_fence);
 }
 
+int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s,
+	u32 id, u32 thresh, struct priv_cmd_entry *entry)
+{
+	return channel_sync_syncpt_wait_raw(s, id, thresh, entry);
+}
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 static void channel_sync_syncpt_set_min_eq_max(struct nvgpu_channel_sync *s)
 {
 	struct nvgpu_channel_sync_syncpt *sp =
@@ -339,18 +348,12 @@ u64 nvgpu_channel_sync_get_syncpt_address(struct nvgpu_channel_sync_syncpt *s)
 	return channel_sync_syncpt_get_address(s);
 }
 
-int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s,
-	u32 id, u32 thresh, struct priv_cmd_entry *entry)
-{
-	return channel_sync_syncpt_wait_raw(s, id, thresh, entry);
-}
-
 struct nvgpu_channel_sync_syncpt *
 nvgpu_channel_sync_to_syncpt(struct nvgpu_channel_sync *sync)
 {
 	struct nvgpu_channel_sync_syncpt *syncpt = NULL;
 
-	if (sync->wait_fence_fd == channel_sync_syncpt_wait_fd) {
+	if (sync->set_min_eq_max == channel_sync_syncpt_set_min_eq_max) {
 		syncpt = nvgpu_channel_sync_syncpt_from_ops(sync);
 	}
 
@@ -396,9 +399,11 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c, bool user_managed)
 	nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
 
 	nvgpu_atomic_set(&sp->ops.refcount, 0);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	sp->ops.wait_fence_fd		= channel_sync_syncpt_wait_fd;
 	sp->ops.incr			= channel_sync_syncpt_incr;
 	sp->ops.incr_user		= channel_sync_syncpt_incr_user;
+#endif
 	sp->ops.set_min_eq_max		= channel_sync_syncpt_set_min_eq_max;
 	sp->ops.set_safe_state		= channel_sync_syncpt_set_safe_state;
 	sp->ops.destroy			= channel_sync_syncpt_destroy;
diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
index 846b8ab6e..2e1b13017 100644
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
@@ -72,10 +72,12 @@ int vgpu_fifo_setup_sw(struct gk20a *g)
 		return err;
 	}
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	err = nvgpu_channel_worker_init(g);
 	if (err) {
 		goto clean_up;
 	}
+#endif
 
 	f->channel_base = priv->constants.channel_base;
 
@@ -84,9 +86,11 @@ int vgpu_fifo_setup_sw(struct gk20a *g)
 	nvgpu_log_fn(g, "done");
 	return 0;
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 clean_up:
 	/* FIXME: unmap from bar1 */
 	nvgpu_fifo_cleanup_sw_common(g);
+#endif
 
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index 574abf66d..069cab598 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -579,7 +579,9 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.unbind_channel_check_hw_state = NULL,
 		.unbind_channel_check_ctx_reload = NULL,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 		.force_reset = vgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
 		.set_timeslice = vgpu_tsg_set_timeslice,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 796647760..64539e5ca 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -666,7 +666,9 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.unbind_channel_check_hw_state = NULL,
 		.unbind_channel_check_ctx_reload = NULL,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 		.force_reset = vgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
 		.set_timeslice = vgpu_tsg_set_timeslice,
diff --git a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c
index e9e0b97b0..0dea390fc 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c
@@ -115,10 +115,11 @@ bool gk20a_fifo_handle_ctxsw_timeout(struct gk20a *g)
 			nvgpu_channel_put(ch);
 		}
 	}
-
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	if (tsg != NULL) {
 		recover = g->ops.tsg.check_ctxsw_timeout(tsg, &debug_dump, &ms);
 	}
+#endif
 
 	if (recover) {
 		nvgpu_err(g,
diff --git a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c
index b61e825fa..08ebcea5b 100644
--- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c
@@ -221,8 +221,10 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g)
 					0, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR,
 					tsgid);
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 			recover = g->ops.tsg.check_ctxsw_timeout(tsg,
 					&debug_dump, &ms);
+#endif
 			if (recover) {
 				info_status_str =  invalid_str;
 				if (info_status <
diff --git a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c
index ea2ab1eed..783f03de5 100644
--- a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.c
@@ -50,6 +50,7 @@ void gk20a_userd_init_mem(struct gk20a *g, struct nvgpu_channel *c)
 	nvgpu_mem_wr32(g, mem, offset + ram_userd_gp_put_w(), 0);
 }
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 u32 gk20a_userd_gp_get(struct gk20a *g, struct nvgpu_channel *c)
 {
 	u64 userd_gpu_va = nvgpu_channel_userd_gpu_va(c);
@@ -82,6 +83,7 @@ void gk20a_userd_gp_put(struct gk20a *g, struct nvgpu_channel *c)
 	BUG_ON(u64_hi32(addr) != 0U);
 	nvgpu_bar1_writel(g, (u32)addr, c->gpfifo.put);
 }
+#endif
 
 u32 gk20a_userd_entry_size(struct gk20a *g)
 {
diff --git a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h
index 43eddb0c1..45f41b19e 100644
--- a/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/fifo/userd_gk20a.h
@@ -27,9 +27,11 @@ struct gk20a;
 struct nvgpu_channel;
 
 void gk20a_userd_init_mem(struct gk20a *g, struct nvgpu_channel *c);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 u32 gk20a_userd_gp_get(struct gk20a *g, struct nvgpu_channel *c);
 u64 gk20a_userd_pb_get(struct gk20a *g, struct nvgpu_channel *c);
 void gk20a_userd_gp_put(struct gk20a *g, struct nvgpu_channel *c);
+#endif
 u32 gk20a_userd_entry_size(struct gk20a *g);
 
 #endif /* USERD_GK20A_H */
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index 803965ad5..c0d88e013 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -730,11 +730,13 @@ static const struct gpu_ops gm20b_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gk20a_userd_gp_get,
 		.gp_put = gk20a_userd_gp_put,
 		.pb_get = gk20a_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -764,7 +766,9 @@ static const struct gpu_ops gm20b_ops = {
 		.unbind_channel_check_ctx_reload =
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index 35e3f964f..e841d92db 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -815,11 +815,13 @@ static const struct gpu_ops gp10b_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gk20a_userd_gp_get,
 		.gp_put = gk20a_userd_gp_put,
 		.pb_get = gk20a_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -831,7 +833,9 @@ static const struct gpu_ops gp10b_ops = {
 		.count = gm20b_channel_count,
 		.read_state = gk20a_channel_read_state,
 		.force_ctx_reload = gm20b_channel_force_ctx_reload,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.set_syncpt = nvgpu_channel_set_syncpt,
+#endif
 		.abort_clean_up = nvgpu_channel_abort_clean_up,
 		.suspend_all_serviceable_ch =
                         nvgpu_channel_suspend_all_serviceable_ch,
@@ -850,7 +854,9 @@ static const struct gpu_ops gp10b_ops = {
 		.unbind_channel_check_ctx_reload =
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted = NULL,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index 25cafa8c0..73f8d5c83 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -948,11 +948,13 @@ static const struct gpu_ops gv11b_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gv11b_userd_gp_get,
 		.gp_put = gv11b_userd_gp_put,
 		.pb_get = gv11b_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -989,7 +991,9 @@ static const struct gpu_ops gv11b_ops = {
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted =
 				gv11b_tsg_unbind_channel_check_eng_faulted,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 86f3ac95a..d4ca7f6d7 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -991,11 +991,13 @@ static const struct gpu_ops tu104_ops = {
 		.cleanup_sw = nvgpu_userd_cleanup_sw,
 #ifdef CONFIG_NVGPU_USERD
 		.init_mem = gk20a_userd_init_mem,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.gp_get = gv11b_userd_gp_get,
 		.gp_put = gv11b_userd_gp_put,
 		.pb_get = gv11b_userd_pb_get,
-		.entry_size = gk20a_userd_entry_size,
 #endif
+		.entry_size = gk20a_userd_entry_size,
+#endif /* CONFIG_NVGPU_USERD */
 	},
 	.channel = {
 		.alloc_inst = nvgpu_channel_alloc_inst,
@@ -1032,7 +1034,9 @@ static const struct gpu_ops tu104_ops = {
 				nvgpu_tsg_unbind_channel_check_ctx_reload,
 		.unbind_channel_check_eng_faulted =
 				gv11b_tsg_unbind_channel_check_eng_faulted,
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		.force_reset = nvgpu_tsg_force_reset_ch,
 		.post_event_id = nvgpu_tsg_post_event_id,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 7311c6f74..d4e49a622 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -303,13 +303,24 @@ struct nvgpu_channel {
 
 	struct nvgpu_list_node ch_entry; /* channel's entry in TSG */
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 	struct nvgpu_channel_joblist joblist;
+	struct gpfifo_desc gpfifo;
+	struct priv_cmd_queue priv_cmd_q;
+	struct nvgpu_channel_sync *sync;
+	/* for job cleanup handling in the background worker */
+	struct nvgpu_list_node worker_item;
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+	/* kernel watchdog to kill stuck jobs */
+	struct nvgpu_channel_wdt wdt;
+#endif /* CONFIG_NVGPU_CHANNEL_WDT */
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 	struct nvgpu_allocator fence_allocator;
 
 	struct vm_gk20a *vm;
 
-	struct gpfifo_desc gpfifo;
-
 	struct nvgpu_mem usermode_userd; /* Used for Usermode Submission */
 	struct nvgpu_mem usermode_gpfifo;
 	struct nvgpu_mem inst_block;
@@ -319,19 +330,9 @@ struct nvgpu_channel {
 	struct nvgpu_mem *userd_mem;	/* kernel mode userd */
 	u32 userd_offset;		/* in bytes from start of userd_mem */
 
-	struct priv_cmd_queue priv_cmd_q;
-
 	struct nvgpu_cond notifier_wq;
 	struct nvgpu_cond semaphore_wq;
 
-#ifdef CONFIG_NVGPU_CHANNEL_WDT
-	/* kernel watchdog to kill stuck jobs */
-	struct nvgpu_channel_wdt wdt;
-#endif
-
-	/* for job cleanup handling in the background worker */
-	struct nvgpu_list_node worker_item;
-
 #if defined(CONFIG_NVGPU_CYCLESTATS)
 	struct {
 		void *cyclestate_buffer;
@@ -346,7 +347,6 @@ struct nvgpu_channel {
 	struct nvgpu_list_node dbg_s_list;
 
 	struct nvgpu_mutex sync_lock;
-	struct nvgpu_channel_sync *sync;
 	struct nvgpu_channel_sync *user_sync;
 
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
@@ -388,6 +388,66 @@ struct nvgpu_channel {
 	bool mmu_debug_mode_enabled;
 #endif
 };
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
+static inline struct nvgpu_channel *
+nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_channel *)
+	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
+};
+int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
+			     struct priv_cmd_entry *e);
+void nvgpu_channel_update_priv_cmd_q_and_free_entry(
+	struct nvgpu_channel *ch, struct priv_cmd_entry *e);
+int nvgpu_channel_worker_init(struct gk20a *g);
+void nvgpu_channel_worker_deinit(struct gk20a *g);
+struct nvgpu_channel *nvgpu_channel_get_from_file(int fd);
+void nvgpu_channel_update(struct nvgpu_channel *c);
+int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job **job_out);
+void nvgpu_channel_free_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job);
+u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(
+		struct nvgpu_channel *ch);
+u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch);
+int nvgpu_channel_add_job(struct nvgpu_channel *c,
+				 struct nvgpu_channel_job *job,
+				 bool skip_buffer_refcounting);
+void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
+			     struct priv_cmd_entry *e);
+void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
+					bool clean_all);
+int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_profile *profile);
+
+int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out);
+int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch);
+void nvgpu_channel_joblist_lock(struct nvgpu_channel *c);
+void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c);
+bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c);
+bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c);
+
+bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
+		u32 timeout_delta_ms, bool *progress);
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
+#endif
+
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 static inline struct nvgpu_channel *
 nvgpu_channel_from_free_chs(struct nvgpu_list_node *node)
 {
@@ -402,13 +462,6 @@ nvgpu_channel_from_ch_entry(struct nvgpu_list_node *node)
           ((uintptr_t)node - offsetof(struct nvgpu_channel, ch_entry));
 };
 
-static inline struct nvgpu_channel *
-nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
-{
-	return (struct nvgpu_channel *)
-	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
-};
-
 static inline bool nvgpu_channel_as_bound(struct nvgpu_channel *ch)
 {
 	return (ch->vm != NULL);
@@ -426,19 +479,12 @@ void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
 		struct nvgpu_channel *ch);
 bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch);
 
-bool nvgpu_channel_update_and_check_ctxsw_timeout(struct nvgpu_channel *ch,
-		u32 timeout_delta_ms, bool *progress);
-
 void nvgpu_channel_recover(struct gk20a *g, struct nvgpu_channel *ch,
 	bool verbose, u32 rc_type);
 
 void nvgpu_channel_abort(struct nvgpu_channel *ch, bool channel_preempt);
 void nvgpu_channel_abort_clean_up(struct nvgpu_channel *ch);
 void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
-int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
-			     struct priv_cmd_entry *e);
-void nvgpu_channel_update_priv_cmd_q_and_free_entry(
-	struct nvgpu_channel *ch, struct priv_cmd_entry *e);
 
 int nvgpu_channel_enable_tsg(struct gk20a *g, struct nvgpu_channel *ch);
 int nvgpu_channel_disable_tsg(struct gk20a *g, struct nvgpu_channel *ch);
@@ -449,12 +495,6 @@ void nvgpu_channel_resume_all_serviceable_ch(struct gk20a *g);
 void nvgpu_channel_deterministic_idle(struct gk20a *g);
 void nvgpu_channel_deterministic_unidle(struct gk20a *g);
 
-int nvgpu_channel_worker_init(struct gk20a *g);
-void nvgpu_channel_worker_deinit(struct gk20a *g);
-
-struct nvgpu_channel *nvgpu_channel_get_from_file(int fd);
-void nvgpu_channel_update(struct nvgpu_channel *c);
-
 /* returns ch if reference was obtained */
 struct nvgpu_channel *__must_check nvgpu_channel_get__func(
 		struct nvgpu_channel *ch, const char *caller);
@@ -479,51 +519,15 @@ struct nvgpu_channel *gk20a_open_new_channel(struct gk20a *g,
 int nvgpu_channel_setup_bind(struct nvgpu_channel *c,
 		struct nvgpu_setup_bind_args *args);
 
-void nvgpu_channel_wdt_restart_all_channels(struct gk20a *g);
-
-bool nvgpu_channel_is_prealloc_enabled(struct nvgpu_channel *c);
-void nvgpu_channel_joblist_lock(struct nvgpu_channel *c);
-void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c);
-bool nvgpu_channel_joblist_is_empty(struct nvgpu_channel *c);
-
 int nvgpu_channel_update_runlist(struct nvgpu_channel *c, bool add);
 
 void nvgpu_channel_wait_until_counter_is_N(
 	struct nvgpu_channel *ch, nvgpu_atomic_t *counter, int wait_value,
 	struct nvgpu_cond *c, const char *caller, const char *counter_name);
-int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
-		struct nvgpu_channel_job **job_out);
-void nvgpu_channel_free_job(struct nvgpu_channel *c,
-		struct nvgpu_channel_job *job);
-u32 nvgpu_channel_update_gpfifo_get_and_get_free_count(
-		struct nvgpu_channel *ch);
-u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch);
-int nvgpu_channel_add_job(struct nvgpu_channel *c,
-				 struct nvgpu_channel_job *job,
-				 bool skip_buffer_refcounting);
-void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
-			     struct priv_cmd_entry *e);
-void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
-					bool clean_all);
 
 void nvgpu_channel_free_usermode_buffers(struct nvgpu_channel *c);
 u32 nvgpu_get_gpfifo_entry_size(void);
 
-int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
-				struct nvgpu_gpfifo_userdata userdata,
-				u32 num_entries,
-				u32 flags,
-				struct nvgpu_channel_fence *fence,
-				struct nvgpu_fence_type **fence_out,
-				struct nvgpu_profile *profile);
-
-int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
-				struct nvgpu_gpfifo_entry *gpfifo,
-				u32 num_entries,
-				u32 flags,
-				struct nvgpu_channel_fence *fence,
-				struct nvgpu_fence_type **fence_out);
-
 #ifdef CONFIG_DEBUG_FS
 void trace_write_pushbuffers(struct nvgpu_channel *c, u32 count);
 #else
@@ -550,7 +554,6 @@ int nvgpu_channel_alloc_inst(struct gk20a *g, struct nvgpu_channel *ch);
 void nvgpu_channel_free_inst(struct gk20a *g, struct nvgpu_channel *ch);
 void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch,
 			u32 error_notifier);
-int nvgpu_channel_set_syncpt(struct nvgpu_channel *ch);
 struct nvgpu_channel *nvgpu_channel_refch_from_inst_ptr(struct gk20a *g,
 			u64 inst_ptr);
 void nvgpu_channel_debug_dump_all(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
index c76342763..68e676057 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
@@ -29,6 +29,9 @@
 #include <nvgpu/atomic.h>
 
 struct nvgpu_channel_sync;
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
 struct priv_cmd_entry;
 struct nvgpu_channel;
 struct nvgpu_fence_type;
@@ -73,6 +76,17 @@ int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
  * for semaphores.
  */
 void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s);
+/*
+ * Increment the usage_counter for this instance.
+ */
+void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s);
+
+/*
+ * Decrement the usage_counter for this instance and return if equals 0.
+ */
+bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s);
+#endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
+
 /*
  * Set the channel syncpoint/semaphore to safe state
  * This should be used to reset User managed syncpoint since we don't
@@ -86,16 +100,6 @@ void nvgpu_channel_sync_set_safe_state(struct nvgpu_channel_sync *s);
 void nvgpu_channel_sync_destroy(struct nvgpu_channel_sync *sync,
 	bool set_safe_state);
 
-/*
- * Increment the usage_counter for this instance.
- */
-void nvgpu_channel_sync_get_ref(struct nvgpu_channel_sync *s);
-
-/*
- * Decrement the usage_counter for this instance and return if equals 0.
- */
-bool nvgpu_channel_sync_put_ref_and_check(struct nvgpu_channel_sync *s);
-
 /*
  * Construct a channel_sync backed by either a syncpoint or a semaphore.
  * A channel_sync is by default constructed as backed by a syncpoint
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h
index c2c7249ac..e19c752f4 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync_semaphore.h
@@ -29,6 +29,8 @@
 #include <nvgpu/types.h>
 #include <nvgpu/channel_sync.h>
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+
 struct nvgpu_channel;
 struct nvgpu_channel_sync_semaphore;
 /*
@@ -46,4 +48,6 @@ struct nvgpu_channel_sync *
 nvgpu_channel_sync_semaphore_create(
 	struct nvgpu_channel *c, bool user_managed);
 
+#endif
+
 #endif /* NVGPU_CHANNEL_SYNC_SEMAPHORE_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/fence.h b/drivers/gpu/nvgpu/include/nvgpu/fence.h
index 04061fd99..48e674d3b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/fence.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fence.h
@@ -80,7 +80,9 @@ int nvgpu_fence_pool_alloc(struct nvgpu_channel *ch, unsigned int count);
 
 void nvgpu_fence_pool_free(struct nvgpu_channel *ch);
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 struct nvgpu_fence_type *nvgpu_fence_alloc(struct nvgpu_channel *ch);
+#endif
 
 void nvgpu_fence_init(struct nvgpu_fence_type *f,
 		const struct nvgpu_fence_ops *ops,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 7f78d62f4..e31dc476c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1118,9 +1118,11 @@ struct gpu_ops {
 		int (*setup_sw)(struct gk20a *g);
 		void (*cleanup_sw)(struct gk20a *g);
 		void (*init_mem)(struct gk20a *g, struct nvgpu_channel *c);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		u32 (*gp_get)(struct gk20a *g, struct nvgpu_channel *c);
 		void (*gp_put)(struct gk20a *g, struct nvgpu_channel *c);
 		u64 (*pb_get)(struct gk20a *g, struct nvgpu_channel *c);
+#endif
 		u32 (*entry_size)(struct gk20a *g);
 	} userd;
 
@@ -1229,7 +1231,9 @@ struct gpu_ops {
 		void (*set_error_notifier)(struct nvgpu_channel *ch, u32 error);
 		void (*reset_faulted)(struct gk20a *g, struct nvgpu_channel *ch,
 				bool eng, bool pbdma);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		int (*set_syncpt)(struct nvgpu_channel *ch);
+#endif
 		void (*debug_dump)(struct gk20a *g,
 				struct nvgpu_debug_context *o,
 				struct nvgpu_channel_dump_info *info);
@@ -1257,8 +1261,10 @@ struct gpu_ops {
 		void (*unbind_channel_check_eng_faulted)(struct nvgpu_tsg *tsg,
 				struct nvgpu_channel *ch,
 				struct nvgpu_channel_hw_state *state);
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		bool (*check_ctxsw_timeout)(struct nvgpu_tsg *tsg,
 				bool *verbose, u32 *ms);
+#endif
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
 		int (*force_reset)(struct nvgpu_channel *ch,
 					u32 err_code, bool verbose);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
index 71438eae0..b89bfec65 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -127,8 +127,10 @@ void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g,
 		struct nvgpu_tsg *tsg);
 bool nvgpu_tsg_mark_error(struct gk20a *g, struct nvgpu_tsg *tsg);
 
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 bool nvgpu_tsg_check_ctxsw_timeout(struct nvgpu_tsg *tsg,
 		bool *debug_dump, u32 *ms);
+#endif
 int nvgpu_tsg_set_runlist_interleave(struct nvgpu_tsg *tsg, u32 level);
 #ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
 int nvgpu_tsg_set_timeslice(struct nvgpu_tsg *tsg, u32 timeslice_us);