diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index c4038a0e3..b73d3ce35 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -371,6 +371,7 @@ nvgpu-y += \
 	common/power_features/pg/pg.o \
 	common/sim.o \
 	common/sim_pci.o \
+	common/fifo/fifo.o \
 	common/fifo/channel.o \
 	common/fifo/submit.o \
 	common/fifo/tsg.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 6580a1d25..6e3464d54 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -180,6 +180,7 @@ srcs += common/sim.c \
 	common/power_features/cg/cg.c \
 	common/power_features/pg/pg.c \
 	common/fifo/channel.c \
+	common/fifo/fifo.c \
 	common/fifo/submit.c \
 	common/fifo/tsg.c \
 	common/fifo/runlist.c \
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 3a07749d9..b85bff6ff 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -2321,6 +2321,46 @@ void gk20a_channel_deterministic_unidle(struct gk20a *g)
 	nvgpu_rwsem_up_write(&g->deterministic_busy);
 }
 
+static void nvgpu_channel_destroy(struct gk20a *g, struct channel_gk20a *c)
+{
+	nvgpu_mutex_destroy(&c->ioctl_lock);
+	nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
+	nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
+	nvgpu_mutex_destroy(&c->sync_lock);
+#if defined(CONFIG_GK20A_CYCLE_STATS)
+	nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
+	nvgpu_mutex_destroy(&c->cs_client_mutex);
+#endif
+	nvgpu_mutex_destroy(&c->dbg_s_lock);
+}
+
+void nvgpu_channel_cleanup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 chid;
+
+	/*
+	 * Make sure all channels are closed before deleting them.
+	 */
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct channel_gk20a *ch = &f->channel[chid];
+
+		/*
+		 * Could race but worst that happens is we get an error message
+		 * from gk20a_free_channel() complaining about multiple closes.
+		 */
+		if (ch->referenceable) {
+			__gk20a_channel_kill(ch);
+		}
+
+		nvgpu_channel_destroy(g, ch);
+	}
+
+	nvgpu_vfree(g, f->channel);
+	f->channel = NULL;
+	nvgpu_mutex_destroy(&f->free_chs_mutex);
+}
+
 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 {
 	struct channel_gk20a *c = g->fifo.channel+chid;
@@ -2399,6 +2439,54 @@ fail_1:
 	return err;
 }
 
+int nvgpu_channel_setup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 chid, i;
+	int err;
+
+	f->num_channels = g->ops.channel.count(g);
+
+	err = nvgpu_mutex_init(&f->free_chs_mutex);
+	if (err != 0) {
+		nvgpu_err(g, "mutex init failed");
+		return err;
+	}
+
+	f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
+	if (f->channel == NULL) {
+		nvgpu_err(g, "no mem for channels");
+		err = -ENOMEM;
+		goto clean_up_mutex;
+	}
+
+	nvgpu_init_list_node(&f->free_chs);
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		err = gk20a_init_channel_support(g, chid);
+		if (err != 0) {
+			nvgpu_err(g, "channel init failed, chid=%u", chid);
+			goto clean_up;
+		}
+	}
+
+	return 0;
+
+clean_up:
+	for (i = 0; i < chid; i++) {
+		struct channel_gk20a *ch = &f->channel[i];
+
+		nvgpu_channel_destroy(g, ch);
+	}
+	nvgpu_vfree(g, f->channel);
+	f->channel = NULL;
+
+clean_up_mutex:
+	nvgpu_mutex_destroy(&f->free_chs_mutex);
+
+	return err;
+}
+
 /* in this context the "channel" is the host1x channel which
  * maps to *all* gk20a channels */
 int gk20a_channel_suspend(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/common/fifo/engines.c b/drivers/gpu/nvgpu/common/fifo/engines.c
index 2b6b39594..8499eae68 100644
--- a/drivers/gpu/nvgpu/common/fifo/engines.c
+++ b/drivers/gpu/nvgpu/common/fifo/engines.c
@@ -446,4 +446,54 @@ int nvgpu_engine_wait_for_idle(struct gk20a *g)
 
 #endif /* NVGPU_ENGINE */
 
+int nvgpu_engine_setup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	int err = 0;
+	size_t size;
 
+	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+	size = f->max_engines * sizeof(*f->engine_info);
+	f->engine_info = nvgpu_kzalloc(g, size);
+	if (f->engine_info == NULL) {
+		nvgpu_err(g, "no mem for engine info");
+		return -ENOMEM;
+	}
+
+	size = f->max_engines * sizeof(u32);
+	f->active_engines_list = nvgpu_kzalloc(g, size);
+	if (f->active_engines_list == NULL) {
+		nvgpu_err(g, "no mem for active engine list");
+		err = -ENOMEM;
+		goto clean_up_engine_info;
+	}
+	(void) memset(f->active_engines_list, 0xff, size);
+
+	err = g->ops.fifo.init_engine_info(f);
+	if (err != 0) {
+		nvgpu_err(g, "init engine info failed");
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_kfree(g, f->active_engines_list);
+	f->active_engines_list = NULL;
+
+clean_up_engine_info:
+	nvgpu_kfree(g, f->engine_info);
+	f->engine_info = NULL;
+
+	return err;
+}
+
+void nvgpu_engine_cleanup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+
+	nvgpu_kfree(g, f->engine_info);
+	f->engine_info = NULL;
+	nvgpu_kfree(g, f->active_engines_list);
+	f->active_engines_list = NULL;
+}
diff --git a/drivers/gpu/nvgpu/common/fifo/fifo.c b/drivers/gpu/nvgpu/common/fifo/fifo.c
new file mode 100644
index 000000000..abb1931c2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/fifo/fifo.c
@@ -0,0 +1,287 @@
+/*
+ * FIFO
+ *
+ * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <trace/events/gk20a.h>
+
+#include <nvgpu/dma.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/tsg.h>
+#include <nvgpu/vm_area.h>
+
+#include <gk20a/fifo_gk20a.h>
+
+/* TODO: move to pbdma and userd when available */
+static int nvgpu_pbdma_setup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	int err;
+
+	if (g->ops.fifo.init_pbdma_info != NULL) {
+		err = g->ops.fifo.init_pbdma_info(f);
+		if (err != 0) {
+			nvgpu_err(g, "failed to init pbdma support");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void nvgpu_pbdma_cleanup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+
+	nvgpu_kfree(g, f->pbdma_map);
+	f->pbdma_map = NULL;
+}
+
+static int nvgpu_userd_setup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	int err;
+
+	f->userd_entry_size = g->ops.fifo.userd_entry_size(g);
+
+	err = gk20a_fifo_init_userd_slabs(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init userd support");
+		return err;
+	}
+
+	return 0;
+}
+
+static void nvgpu_userd_cleanup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+
+	gk20a_fifo_free_userd_slabs(g);
+	if (f->userd_gpu_va != 0ULL) {
+		(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
+		f->userd_gpu_va = 0ULL;
+	}
+}
+
+static void nvgpu_fifo_cleanup_sw_common(struct fifo_gk20a *f)
+{
+	struct gk20a *g = f->g;
+
+	nvgpu_log_fn(g, " ");
+
+	nvgpu_channel_worker_deinit(g);
+
+	nvgpu_userd_cleanup_sw(g);
+	nvgpu_channel_cleanup_sw(g);
+	nvgpu_tsg_cleanup_sw(g);
+	nvgpu_runlist_cleanup_sw(g);
+	nvgpu_engine_cleanup_sw(g);
+	nvgpu_pbdma_cleanup_sw(g);
+
+	f->deferred_reset_pending = false;
+	nvgpu_mutex_destroy(&f->deferred_reset_mutex);
+	nvgpu_mutex_destroy(&f->engines_reset_mutex);
+	nvgpu_mutex_destroy(&f->intr.isr.mutex);
+}
+
+static int nvgpu_fifo_init_locks(struct gk20a *g, struct fifo_gk20a *f)
+{
+	int err;
+
+	err = nvgpu_mutex_init(&f->intr.isr.mutex);
+	if (err != 0) {
+		goto destroy_0;
+	}
+
+	err = nvgpu_mutex_init(&f->engines_reset_mutex);
+	if (err != 0) {
+		goto destroy_1;
+	}
+
+	err = nvgpu_mutex_init(&f->deferred_reset_mutex);
+	if (err != 0) {
+		goto destroy_2;
+	}
+
+	nvgpu_spinlock_init(&f->runlist_submit_lock);
+
+	return 0;
+
+destroy_2:
+	nvgpu_mutex_destroy(&f->engines_reset_mutex);
+
+destroy_1:
+	nvgpu_mutex_destroy(&f->intr.isr.mutex);
+
+destroy_0:
+	nvgpu_err(g, "failed to init mutex");
+	return err;
+}
+
+int nvgpu_fifo_setup_sw_common(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	f->g = g;
+
+	err = nvgpu_fifo_init_locks(g, f);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init mutexes");
+	}
+
+	err = nvgpu_channel_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init channel support");
+		goto clean_up;
+	}
+
+	err = nvgpu_tsg_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init tsg support");
+		goto clean_up_channel;
+	}
+
+	err = nvgpu_pbdma_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init pbdma support");
+		goto clean_up_tsg;
+	}
+
+	err = nvgpu_engine_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init engine support");
+		goto clean_up_pbdma;
+	}
+
+	err = nvgpu_runlist_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init runlist support");
+		goto clean_up_engine;
+	}
+
+	err = nvgpu_userd_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init userd support");
+		goto clean_up_runlist;
+	}
+
+	f->remove_support = nvgpu_fifo_cleanup_sw_common;
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+
+clean_up_runlist:
+	nvgpu_runlist_cleanup_sw(g);
+
+clean_up_engine:
+	nvgpu_engine_cleanup_sw(g);
+
+clean_up_pbdma:
+	nvgpu_pbdma_cleanup_sw(g);
+
+clean_up_tsg:
+	nvgpu_tsg_cleanup_sw(g);
+
+clean_up_channel:
+	nvgpu_channel_cleanup_sw(g);
+
+clean_up:
+	nvgpu_err(g, "init fifo support failed");
+	return err;
+}
+
+int nvgpu_fifo_setup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	int err = 0;
+	u32 size;
+	u32 num_pages;
+
+	nvgpu_log_fn(g, " ");
+
+	if (f->sw_ready) {
+		nvgpu_log_fn(g, "skip init");
+		return 0;
+	}
+
+	err = nvgpu_fifo_setup_sw_common(g);
+	if (err != 0) {
+		nvgpu_err(g, "fail: err: %d", err);
+		return err;
+	}
+
+	size = f->num_channels * f->userd_entry_size;
+	num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+	err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
+			num_pages, PAGE_SIZE, &f->userd_gpu_va, 0);
+	if (err != 0) {
+		nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
+		goto clean_slabs;
+	}
+
+	err = nvgpu_channel_worker_init(g);
+	if (err != 0) {
+		nvgpu_err(g, "worker init fail, err=%d", err);
+		goto clean_vm_area;
+	}
+
+	f->sw_ready = true;
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+
+clean_vm_area:
+	(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
+	f->userd_gpu_va = 0ULL;
+
+clean_slabs:
+	gk20a_fifo_free_userd_slabs(g);
+	return err;
+}
+
+int nvgpu_fifo_init_support(struct gk20a *g)
+{
+	int err;
+
+	err = g->ops.fifo.setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "fifo sw setup failed, err=%d", err);
+		return err;
+	}
+
+	if (g->ops.fifo.init_fifo_setup_hw != NULL) {
+		err = g->ops.fifo.init_fifo_setup_hw(g);
+		if (err != 0) {
+			nvgpu_err(g, "fifo hw setup failed, err=%d", err);
+			return err;
+		}
+	}
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c
index 13db6c160..7da748da0 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist.c
@@ -600,11 +600,11 @@ void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
 	}
 }
 
-void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
+void nvgpu_runlist_cleanup_sw(struct gk20a *g)
 {
+	struct fifo_gk20a *f = &g->fifo;
 	u32 i, j;
 	struct fifo_runlist_info_gk20a *runlist;
-	struct gk20a *g = NULL;
 
 	if ((f == NULL) || (f->runlist_info == NULL)) {
 		return;
@@ -636,8 +636,49 @@ void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
 	f->max_runlists = 0;
 }
 
-int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f)
+static void nvgpu_init_runlist_enginfo(struct gk20a *g, struct fifo_gk20a *f)
 {
+	struct fifo_runlist_info_gk20a *runlist;
+	struct fifo_engine_info_gk20a *engine_info;
+	u32 i, active_engine_id, pbdma_id, engine_id;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g->is_virtual) {
+		return;
+	}
+
+	for (i = 0; i < f->num_runlists; i++) {
+		runlist = &f->active_runlist_info[i];
+
+		for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
+			if ((f->pbdma_map[pbdma_id] &
+					BIT32(runlist->runlist_id)) != 0U) {
+				runlist->pbdma_bitmask |= BIT32(pbdma_id);
+			}
+		}
+		nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
+				 runlist->runlist_id, runlist->pbdma_bitmask);
+
+		for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
+			active_engine_id = f->active_engines_list[engine_id];
+			engine_info = &f->engine_info[active_engine_id];
+
+			if ((engine_info != NULL) &&
+			    (engine_info->runlist_id == runlist->runlist_id)) {
+				runlist->eng_bitmask |= BIT(active_engine_id);
+			}
+		}
+		nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
+				 runlist->runlist_id, runlist->eng_bitmask);
+	}
+
+	nvgpu_log_fn(g, "done");
+}
+
+int nvgpu_runlist_setup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
 	struct fifo_runlist_info_gk20a *runlist;
 	unsigned int runlist_id;
 	u32 i, j;
@@ -647,6 +688,8 @@ int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 
 	nvgpu_log_fn(g, " ");
 
+	f->runlist_entry_size = g->ops.runlist.entry_size(g);
+	f->num_runlist_entries = g->ops.runlist.length_max(g);
 	f->max_runlists = g->ops.runlist.count_max();
 	f->runlist_info = nvgpu_kzalloc(g,
 			sizeof(*f->runlist_info) * f->max_runlists);
@@ -727,11 +770,13 @@ int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 		runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
 	}
 
+	nvgpu_init_runlist_enginfo(g, f);
+
 	nvgpu_log_fn(g, "done");
 	return 0;
 
 clean_up_runlist:
-	gk20a_fifo_delete_runlist(f);
+	nvgpu_runlist_cleanup_sw(g);
 	nvgpu_log_fn(g, "fail");
 	return err;
 }
diff --git a/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.c b/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.c
index f2bcffff1..361d071c5 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.c
@@ -121,11 +121,16 @@ u32 gk20a_runlist_count_max(void)
 	return fifo_eng_runlist_base__size_1_v();
 }
 
-u32 gk20a_runlist_entry_size(void)
+u32 gk20a_runlist_entry_size(struct gk20a *g)
 {
 	return ram_rl_entry_size_v();
 }
 
+u32 gk20a_runlist_length_max(struct gk20a *g)
+{
+	return fifo_eng_runlist_length_max_v();
+}
+
 void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist)
 {
 
diff --git a/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.h b/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.h
index 5f932b86e..aab9657be 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.h
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.h
@@ -36,7 +36,8 @@ int gk20a_runlist_set_interleave(struct gk20a *g,
 		u32 runlist_id,
 		u32 new_level);
 u32 gk20a_runlist_count_max(void);
-u32 gk20a_runlist_entry_size(void);
+u32 gk20a_runlist_entry_size(struct gk20a *g);
+u32 gk20a_runlist_length_max(struct gk20a *g);
 void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist);
 void gk20a_runlist_get_ch_entry(struct channel_gk20a *ch, u32 *runlist);
 void gk20a_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
diff --git a/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.c b/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.c
index f3d0569fa..3dc844766 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.c
@@ -43,7 +43,7 @@ u32 gv11b_runlist_count_max(void)
 	return fifo_eng_runlist_base__size_1_v();
 }
 
-u32 gv11b_runlist_entry_size(void)
+u32 gv11b_runlist_entry_size(struct gk20a *g)
 {
 	return ram_rl_entry_size_v();
 }
diff --git a/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.h b/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.h
index 26283a042..c84be4658 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.h
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.h
@@ -29,7 +29,7 @@ struct tsg_gk20a;
 
 int gv11b_runlist_reschedule(struct channel_gk20a *ch, bool preempt_next);
 u32 gv11b_runlist_count_max(void);
-u32 gv11b_runlist_entry_size(void);
+u32 gv11b_runlist_entry_size(struct gk20a *g);
 void gv11b_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist);
 void gv11b_runlist_get_ch_entry(struct channel_gk20a *c, u32 *runlist);
 
diff --git a/drivers/gpu/nvgpu/common/fifo/runlist_tu104.c b/drivers/gpu/nvgpu/common/fifo/runlist_tu104.c
index 965c8c537..dd8d2fd89 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist_tu104.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_tu104.c
@@ -37,7 +37,7 @@ u32 tu104_runlist_count_max(void)
 	return fifo_runlist_base_lo__size_1_v();
 }
 
-u32 tu104_runlist_entry_size(void)
+u32 tu104_runlist_entry_size(struct gk20a *g)
 {
 	return ram_rl_entry_size_v();
 }
diff --git a/drivers/gpu/nvgpu/common/fifo/runlist_tu104.h b/drivers/gpu/nvgpu/common/fifo/runlist_tu104.h
index afdae3f23..b7c047bda 100644
--- a/drivers/gpu/nvgpu/common/fifo/runlist_tu104.h
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_tu104.h
@@ -27,7 +27,7 @@
 struct gk20a;
 
 u32 tu104_runlist_count_max(void);
-u32 tu104_runlist_entry_size(void);
+u32 tu104_runlist_entry_size(struct gk20a *g);
 void tu104_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
 		u32 count, u32 buffer_index);
 int tu104_runlist_wait_pending(struct gk20a *g, u32 runlist_id);
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index abe5f2fd5..0c27a2765 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -247,11 +247,30 @@ void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 
+static void nvgpu_tsg_destroy(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+	nvgpu_mutex_destroy(&tsg->event_id_list_lock);
+}
+
+void nvgpu_tsg_cleanup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 tsgid;
+
+	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
+		struct tsg_gk20a *tsg = &f->tsg[tsgid];
+
+		nvgpu_tsg_destroy(g, tsg);
+	}
+
+	nvgpu_vfree(g, f->tsg);
+	f->tsg = NULL;
+	nvgpu_mutex_destroy(&f->tsg_inuse_mutex);
+}
 
 int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid)
 {
 	struct tsg_gk20a *tsg = NULL;
-	int err;
 
 	if (tsgid >= g->fifo.num_channels) {
 		return -EINVAL;
@@ -267,13 +286,51 @@ int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid)
 	nvgpu_rwsem_init(&tsg->ch_list_lock);
 
 	nvgpu_init_list_node(&tsg->event_id_list);
-	err = nvgpu_mutex_init(&tsg->event_id_list_lock);
+
+	return nvgpu_mutex_init(&tsg->event_id_list_lock);
+}
+
+int nvgpu_tsg_setup_sw(struct gk20a *g)
+{
+	struct fifo_gk20a *f = &g->fifo;
+	u32 tsgid, i;
+	int err;
+
+	err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
 	if (err != 0) {
-		tsg->in_use = true; /* make this TSG unusable */
+		nvgpu_err(g, "mutex init failed");
 		return err;
 	}
 
+	f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
+	if (f->tsg == NULL) {
+		nvgpu_err(g, "no mem for tsgs");
+		err = -ENOMEM;
+		goto clean_up_mutex;
+	}
+
+	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
+		err = gk20a_init_tsg_support(g, tsgid);
+		if (err != 0) {
+			nvgpu_err(g, "tsg init failed, tsgid=%u", tsgid);
+			goto clean_up;
+		}
+	}
+
 	return 0;
+
+clean_up:
+	for (i = 0; i < tsgid; i++) {
+		struct tsg_gk20a *tsg = &g->fifo.tsg[i];
+
+		nvgpu_tsg_destroy(g, tsg);
+	}
+	nvgpu_vfree(g, f->tsg);
+	f->tsg = NULL;
+
+clean_up_mutex:
+	nvgpu_mutex_destroy(&f->tsg_inuse_mutex);
+	return err;
 }
 
 bool nvgpu_tsg_mark_error(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
index 78e5368f1..361d6f880 100644
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -269,7 +269,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		goto done;
 	}
 
-	err = gk20a_init_fifo_support(g);
+	err = nvgpu_fifo_init_support(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init gk20a fifo");
 		goto done;
diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
index e1232604c..74ac0e555 100644
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
@@ -222,7 +222,7 @@ int vgpu_fifo_init_engine_info(struct fifo_gk20a *f)
 	return 0;
 }
 
-static int vgpu_init_fifo_setup_sw(struct gk20a *g)
+static int vgpu_fifo_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -237,18 +237,11 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 	}
 
 	f->g = g;
-	f->num_channels = priv->constants.num_channels;
-
-	/*
-	 * This is not the HW format you're looking for (see
-	 * vgpu_fifo_update_runlist_locked(), vgpu_submit_runlist())
-	 */
-	f->runlist_entry_size = (u32)sizeof(u16);
-
-	f->num_runlist_entries = f->num_channels;
+	f->num_channels = g->ops.channel.count(g);
+	f->runlist_entry_size = g->ops.runlist.entry_size(g);
+	f->num_runlist_entries = g->ops.runlist.length_max(g);
 	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
-
-	f->userd_entry_size = 1 << ram_userd_base_shift_v();
+	f->userd_entry_size = g->ops.fifo.userd_entry_size(g);
 
 	err = gk20a_fifo_init_userd_slabs(g);
 	if (err != 0) {
@@ -272,7 +265,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 
 	g->ops.fifo.init_engine_info(f);
 
-	err = nvgpu_init_runlist(g, f);
+	err = nvgpu_runlist_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init runlist");
 		goto clean_up;
@@ -385,7 +378,7 @@ int vgpu_init_fifo_support(struct gk20a *g)
 
 	nvgpu_log_fn(g, " ");
 
-	err = vgpu_init_fifo_setup_sw(g);
+	err = vgpu_fifo_setup_sw(g);
 	if (err) {
 		return err;
 	}
@@ -575,3 +568,10 @@ u32 vgpu_fifo_default_timeslice_us(struct gk20a *g)
 
 	return priv->constants.default_timeslice_us;
 }
+
+u32 vgpu_channel_count(struct gk20a *g)
+{
+	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+
+	return priv->constants.num_channels;
+}
diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h
index 27324c126..3fc0c8a62 100644
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h
@@ -37,6 +37,7 @@ int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
 void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch);
 void vgpu_channel_enable(struct channel_gk20a *ch);
 void vgpu_channel_disable(struct channel_gk20a *ch);
+u32 vgpu_channel_count(struct gk20a *g);
 int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
 				u32 gpfifo_entries,
 				unsigned long acquire_timeout, u32 flags);
diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
index e4f11bda5..85c48c4d6 100644
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
@@ -218,3 +218,19 @@ int vgpu_runlist_set_interleave(struct gk20a *g,
 	WARN_ON(err || msg.ret);
 	return err ? err : msg.ret;
 }
+
+u32 vgpu_runlist_length_max(struct gk20a *g)
+{
+	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
+
+	return priv->constants.num_channels;
+}
+
+u32 vgpu_runlist_entry_size(struct gk20a *g)
+{
+	/*
+	 * This is not the HW format you're looking for (see
+	 * vgpu_fifo_update_runlist_locked(), vgpu_submit_runlist())
+	 */
+	return (u32)sizeof(u16);
+}
diff --git a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
index 671d5ce40..2a3ceaf00 100644
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
@@ -34,3 +34,5 @@ int vgpu_runlist_set_interleave(struct gk20a *g,
 					u32 id,
 					u32 runlist_id,
 					u32 new_level);
+u32 vgpu_runlist_length_max(struct gk20a *g);
+u32 vgpu_runlist_entry_size(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index a61859fac..be57c3476 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -366,6 +366,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.userd_gp_get = gk20a_fifo_userd_gp_get,
 		.userd_gp_put = gk20a_fifo_userd_gp_put,
 		.userd_pb_get = gk20a_fifo_userd_pb_get,
+		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = vgpu_fifo_preempt_channel,
 		.preempt_tsg = vgpu_fifo_preempt_tsg,
@@ -390,7 +391,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.capture_channel_ram_dump = NULL,
 		.intr_0_error_mask = gk20a_fifo_intr_0_error_mask,
 		.is_preempt_pending = NULL,
-		.init_pbdma_intr_descs = gp10b_fifo_init_pbdma_intr_descs,
+		.init_pbdma_intr_descs = NULL,
 		.reset_enable_hw = NULL,
 		.teardown_ch_tsg = NULL,
 		.handle_sched_error = NULL,
@@ -405,7 +406,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.set_sm_exception_type_mask = vgpu_set_sm_exception_type_mask,
 	},
@@ -441,7 +442,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.reload = vgpu_runlist_reload,
 		.set_interleave = vgpu_runlist_set_interleave,
 		.count_max = gk20a_runlist_count_max,
-		.entry_size = NULL,
+		.entry_size = vgpu_runlist_entry_size,
+		.length_max = vgpu_runlist_length_max,
 		.get_tsg_entry = gk20a_runlist_get_tsg_entry,
 		.get_ch_entry = gk20a_runlist_get_ch_entry,
 		.hw_submit = NULL,
@@ -452,7 +454,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.unbind = vgpu_channel_unbind,
 		.enable = vgpu_channel_enable,
 		.disable = vgpu_channel_disable,
-		.count = gm20b_channel_count,
+		.count = vgpu_channel_count,
 	},
 	.netlist = {
 		.get_netlist_name = gp10b_netlist_get_name,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 5b6cfeb96..2aea65f58 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -435,6 +435,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
+		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = vgpu_fifo_preempt_channel,
 		.preempt_tsg = vgpu_fifo_preempt_tsg,
@@ -461,7 +462,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.capture_channel_ram_dump = NULL,
 		.intr_0_error_mask = gv11b_fifo_intr_0_error_mask,
 		.is_preempt_pending = gv11b_fifo_is_preempt_pending,
-		.init_pbdma_intr_descs = gv11b_fifo_init_pbdma_intr_descs,
+		.init_pbdma_intr_descs = NULL,
 		.reset_enable_hw = NULL,
 		.teardown_ch_tsg = NULL,
 		.handle_sched_error = NULL,
@@ -479,7 +480,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = vgpu_gv11b_free_subctx_header,
 		.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
@@ -520,7 +521,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.reload = vgpu_runlist_reload,
 		.set_interleave = vgpu_runlist_set_interleave,
 		.count_max = gv11b_runlist_count_max,
-		.entry_size = NULL,
+		.entry_size = vgpu_runlist_entry_size,
+		.length_max = vgpu_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = NULL,
@@ -531,7 +533,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.unbind = vgpu_channel_unbind,
 		.enable = vgpu_channel_enable,
 		.disable = vgpu_channel_disable,
-		.count = gv11b_channel_count,
+		.count = vgpu_channel_count,
 	},
 	.netlist = {
 		.get_netlist_name = gv11b_netlist_get_name,
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 40996a953..35fc86d55 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -221,59 +221,6 @@ static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
 	return active_engine_id;
 }
 
-static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
-{
-	struct gk20a *g = f->g;
-	unsigned int i = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	nvgpu_channel_worker_deinit(g);
-	/*
-	 * Make sure all channels are closed before deleting them.
-	 */
-	for (; i < f->num_channels; i++) {
-		struct channel_gk20a *c = f->channel + i;
-		struct tsg_gk20a *tsg = f->tsg + i;
-
-		/*
-		 * Could race but worst that happens is we get an error message
-		 * from gk20a_free_channel() complaining about multiple closes.
-		 */
-		if (c->referenceable) {
-			__gk20a_channel_kill(c);
-		}
-
-		nvgpu_mutex_destroy(&tsg->event_id_list_lock);
-
-		nvgpu_mutex_destroy(&c->ioctl_lock);
-		nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
-		nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
-		nvgpu_mutex_destroy(&c->sync_lock);
-#if defined(CONFIG_GK20A_CYCLE_STATS)
-		nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
-		nvgpu_mutex_destroy(&c->cs_client_mutex);
-#endif
-		nvgpu_mutex_destroy(&c->dbg_s_lock);
-
-	}
-
-	nvgpu_vfree(g, f->channel);
-	nvgpu_vfree(g, f->tsg);
-	gk20a_fifo_free_userd_slabs(g);
-	(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
-	f->userd_gpu_va = 0ULL;
-
-	gk20a_fifo_delete_runlist(f);
-
-	nvgpu_kfree(g, f->pbdma_map);
-	f->pbdma_map = NULL;
-	nvgpu_kfree(g, f->engine_info);
-	f->engine_info = NULL;
-	nvgpu_kfree(g, f->active_engines_list);
-	f->active_engines_list = NULL;
-}
-
 u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g)
 {
 	u32 intr_0_error_mask =
@@ -382,170 +329,6 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
 	return 0;
 }
 
-static int nvgpu_init_runlist_enginfo(struct gk20a *g, struct fifo_gk20a *f)
-{
-	struct fifo_runlist_info_gk20a *runlist;
-	struct fifo_engine_info_gk20a *engine_info;
-	u32 i, active_engine_id, pbdma_id, engine_id;
-
-	nvgpu_log_fn(g, " ");
-
-	for (i = 0; i < f->num_runlists; i++) {
-		runlist = &f->active_runlist_info[i];
-
-		for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
-			if ((f->pbdma_map[pbdma_id] &
-					BIT32(runlist->runlist_id)) != 0U) {
-				runlist->pbdma_bitmask |= BIT32(pbdma_id);
-			}
-		}
-		nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
-				 runlist->runlist_id, runlist->pbdma_bitmask);
-
-		for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
-			active_engine_id = f->active_engines_list[engine_id];
-			engine_info = &f->engine_info[active_engine_id];
-
-			if ((engine_info != NULL) &&
-			    (engine_info->runlist_id == runlist->runlist_id)) {
-				runlist->eng_bitmask |= BIT(active_engine_id);
-			}
-		}
-		nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
-				 runlist->runlist_id, runlist->eng_bitmask);
-	}
-
-	nvgpu_log_fn(g, "done");
-
-	return 0;
-}
-
-int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
-{
-	struct fifo_gk20a *f = &g->fifo;
-	unsigned int chid, i;
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	f->g = g;
-
-	err = nvgpu_mutex_init(&f->intr.isr.mutex);
-	if (err != 0) {
-		nvgpu_err(g, "failed to init isr.mutex");
-		return err;
-	}
-
-	err = nvgpu_mutex_init(&f->engines_reset_mutex);
-	if (err != 0) {
-		nvgpu_err(g, "failed to init engines_reset_mutex");
-		return err;
-	}
-
-	nvgpu_spinlock_init(&f->runlist_submit_lock);
-
-	g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */
-
-	f->num_channels = g->ops.channel.count(g);
-	f->runlist_entry_size = g->ops.runlist.entry_size();
-	f->num_runlist_entries = fifo_eng_runlist_length_max_v();
-	f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
-	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
-
-	f->userd_entry_size = BIT16(ram_userd_base_shift_v());
-
-	f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
-	f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
-	f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
-	f->engine_info = nvgpu_kzalloc(g, f->max_engines *
-				sizeof(*f->engine_info));
-	f->active_engines_list = nvgpu_kzalloc(g, f->max_engines * sizeof(u32));
-
-	if (!((f->channel != NULL) &&
-	      (f->tsg != NULL) &&
-	      (f->pbdma_map != NULL) &&
-	      (f->engine_info != NULL) &&
-	      (f->active_engines_list != NULL))) {
-		err = -ENOMEM;
-		goto clean_up;
-	}
-	(void) memset(f->active_engines_list, 0xff,
-		(f->max_engines * sizeof(u32)));
-
-	/* pbdma map needs to be in place before calling engine info init */
-	for (i = 0; i < f->num_pbdma; ++i) {
-		f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
-	}
-
-	g->ops.fifo.init_engine_info(f);
-
-	err = nvgpu_init_runlist(g, f);
-	if (err != 0) {
-		nvgpu_err(g, "failed to init runlist");
-		goto clean_up;
-	}
-
-	nvgpu_init_runlist_enginfo(g, f);
-	if (err != 0) {
-		nvgpu_err(g, "failed to init runlist engine info");
-		goto clean_up;
-	}
-
-	nvgpu_init_list_node(&f->free_chs);
-
-	err = nvgpu_mutex_init(&f->free_chs_mutex);
-	if (err != 0) {
-		nvgpu_err(g, "failed to init free_chs_mutex");
-		goto clean_up;
-	}
-
-	for (chid = 0; chid < f->num_channels; chid++) {
-		gk20a_init_channel_support(g, chid);
-		gk20a_init_tsg_support(g, chid);
-	}
-
-	err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
-	if (err != 0) {
-		nvgpu_err(g, "failed to init tsg_inuse_mutex");
-		goto clean_up;
-	}
-
-	f->remove_support = gk20a_remove_fifo_support;
-
-	f->deferred_reset_pending = false;
-
-	err = nvgpu_mutex_init(&f->deferred_reset_mutex);
-	if (err != 0) {
-		nvgpu_err(g, "failed to init deferred_reset_mutex");
-		goto clean_up;
-	}
-
-	err = gk20a_fifo_init_userd_slabs(g);
-	if (err != 0) {
-		nvgpu_err(g, "userd slabs init fail, err=%d", err);
-		goto clean_up;
-	}
-
-	nvgpu_log_fn(g, "done");
-	return 0;
-
-clean_up:
-	nvgpu_err(g, "fail");
-
-	nvgpu_vfree(g, f->channel);
-	f->channel = NULL;
-	nvgpu_vfree(g, f->tsg);
-	f->tsg = NULL;
-	nvgpu_kfree(g, f->pbdma_map);
-	f->pbdma_map = NULL;
-	nvgpu_kfree(g, f->engine_info);
-	f->engine_info = NULL;
-	nvgpu_kfree(g, f->active_engines_list);
-	f->active_engines_list = NULL;
-
-	return err;
-}
-
 int gk20a_fifo_init_userd_slabs(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
@@ -627,55 +410,6 @@ void gk20a_fifo_free_userd_slabs(struct gk20a *g)
 	f->userd_slabs = NULL;
 }
 
-int gk20a_init_fifo_setup_sw(struct gk20a *g)
-{
-	struct fifo_gk20a *f = &g->fifo;
-	int err = 0;
-	u32 size;
-	u32 num_pages;
-
-	nvgpu_log_fn(g, " ");
-
-	if (f->sw_ready) {
-		nvgpu_log_fn(g, "skip init");
-		return 0;
-	}
-
-	err = gk20a_init_fifo_setup_sw_common(g);
-	if (err != 0) {
-		nvgpu_err(g, "fail: err: %d", err);
-		return err;
-	}
-
-	size = f->num_channels * f->userd_entry_size;
-	num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
-	err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
-			num_pages, PAGE_SIZE, &f->userd_gpu_va, 0);
-	if (err != 0) {
-		nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
-		goto clean_slabs;
-	}
-
-	err = nvgpu_channel_worker_init(g);
-	if (err != 0) {
-		nvgpu_err(g, "worker init fail, err=%d", err);
-		goto clean_vm_area;
-	}
-
-	f->sw_ready = true;
-
-	nvgpu_log_fn(g, "done");
-	return 0;
-
-clean_vm_area:
-	(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
-	f->userd_gpu_va = 0ULL;
-
-clean_slabs:
-	gk20a_fifo_free_userd_slabs(g);
-	return err;
-}
-
 void gk20a_fifo_handle_runlist_event(struct gk20a *g)
 {
 	u32 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
@@ -708,25 +442,6 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
 	return 0;
 }
 
-int gk20a_init_fifo_support(struct gk20a *g)
-{
-	int err;
-
-	err = g->ops.fifo.setup_sw(g);
-	if (err != 0) {
-		return err;
-	}
-
-	if (g->ops.fifo.init_fifo_setup_hw != NULL) {
-		err = g->ops.fifo.init_fifo_setup_hw(g);
-	}
-	if (err != 0) {
-		return err;
-	}
-
-	return err;
-}
-
 /* return with a reference to the channel, caller must put it back */
 struct channel_gk20a *
 gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
@@ -2881,6 +2596,11 @@ void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
 	gk20a_bar1_writel(g, (u32)addr, c->gpfifo.put);
 }
 
+u32 gk20a_fifo_userd_entry_size(struct gk20a *g)
+{
+	return BIT32(ram_userd_base_shift_v());
+}
+
 u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
 {
 	u32 val, exponent, mantissa;
@@ -2944,3 +2664,26 @@ bool gk20a_fifo_find_pbdma_for_runlist(struct fifo_gk20a *f, u32 runlist_id,
 	*pbdma_id = id;
 	return found_pbdma_for_runlist;
 }
+
+int gk20a_fifo_init_pbdma_info(struct fifo_gk20a *f)
+{
+	struct gk20a *g = f->g;
+	u32 id;
+
+	f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
+
+	f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
+	if (f->pbdma_map == NULL) {
+		return -ENOMEM;
+	}
+
+	for (id = 0; id < f->num_pbdma; ++id) {
+		f->pbdma_map[id] = gk20a_readl(g, fifo_pbdma_map_r(id));
+	}
+
+	if (g->ops.fifo.init_pbdma_intr_descs != NULL) {
+		g->ops.fifo.init_pbdma_intr_descs(f);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 9a7a9f317..603a86194 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -246,8 +246,6 @@ struct nvgpu_channel_dump_info {
 	} sema;
 };
 
-int gk20a_init_fifo_support(struct gk20a *g);
-
 int gk20a_init_fifo_setup_hw(struct gk20a *g);
 
 void gk20a_fifo_isr(struct gk20a *g);
@@ -352,8 +350,6 @@ u32 gk20a_fifo_pbdma_acquire_val(u64 timeout);
 
 
 u32 gk20a_fifo_runlist_busy_engines(struct gk20a *g, u32 runlist_id);
-int gk20a_init_fifo_setup_sw_common(struct gk20a *g);
-int gk20a_init_fifo_setup_sw(struct gk20a *g);
 void gk20a_fifo_handle_runlist_event(struct gk20a *g);
 bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
 			u32 engine_subid, bool fake_fault);
@@ -385,9 +381,11 @@ void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault);
 int gk20a_fifo_init_userd_slabs(struct gk20a *g);
 void gk20a_fifo_free_userd_slabs(struct gk20a *g);
 int gk20a_fifo_init_userd(struct gk20a *g, struct channel_gk20a *c);
+u32 gk20a_fifo_userd_entry_size(struct gk20a *g);
 
 bool gk20a_fifo_find_pbdma_for_runlist(struct fifo_gk20a *f, u32 runlist_id,
 			u32 *pbdma_id);
 u32 gk20a_fifo_read_pbdma_data(struct gk20a *g, u32 pbdma_id);
 void gk20a_fifo_reset_pbdma_header(struct gk20a *g, u32 pbdma_id);
+int gk20a_fifo_init_pbdma_info(struct fifo_gk20a *f);
 #endif /* FIFO_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index b8af8a961..07e1627fa 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -515,6 +515,7 @@ static const struct gpu_ops gm20b_ops = {
 		.userd_gp_get = gk20a_fifo_userd_gp_get,
 		.userd_gp_put = gk20a_fifo_userd_gp_put,
 		.userd_pb_get = gk20a_fifo_userd_pb_get,
+		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gk20a_fifo_preempt_channel,
 		.preempt_tsg = gk20a_fifo_preempt_tsg,
@@ -530,6 +531,7 @@ static const struct gpu_ops gm20b_ops = {
 		.get_pbdma_signature = gk20a_fifo_get_pbdma_signature,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
+		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.init_engine_info = gm20b_fifo_init_engine_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
@@ -554,7 +556,7 @@ static const struct gpu_ops gm20b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
 		.runlist_busy_engines = gk20a_fifo_runlist_busy_engines,
 		.find_pbdma_for_runlist = gk20a_fifo_find_pbdma_for_runlist,
@@ -596,6 +598,7 @@ static const struct gpu_ops gm20b_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gk20a_runlist_count_max,
 		.entry_size = gk20a_runlist_entry_size,
+		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gk20a_runlist_get_tsg_entry,
 		.get_ch_entry = gk20a_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index 2a15ef5e0..e83fd69b9 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -566,6 +566,7 @@ static const struct gpu_ops gp10b_ops = {
 		.userd_gp_get = gk20a_fifo_userd_gp_get,
 		.userd_gp_put = gk20a_fifo_userd_gp_put,
 		.userd_pb_get = gk20a_fifo_userd_pb_get,
+		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gk20a_fifo_preempt_channel,
 		.preempt_tsg = gk20a_fifo_preempt_tsg,
@@ -581,6 +582,7 @@ static const struct gpu_ops gp10b_ops = {
 		.get_pbdma_signature = gp10b_fifo_get_pbdma_signature,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
+		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.init_engine_info = gm20b_fifo_init_engine_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
@@ -605,7 +607,7 @@ static const struct gpu_ops gp10b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = gp10b_fifo_resetup_ramfc,
 		.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
 		.runlist_busy_engines = gk20a_fifo_runlist_busy_engines,
@@ -650,6 +652,7 @@ static const struct gpu_ops gp10b_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gk20a_runlist_count_max,
 		.entry_size = gk20a_runlist_entry_size,
+		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gk20a_runlist_get_tsg_entry,
 		.get_ch_entry = gk20a_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index add960799..4aa8d6429 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -734,6 +734,7 @@ static const struct gpu_ops gv100_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
+		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gv11b_fifo_preempt_channel,
 		.preempt_tsg = gv11b_fifo_preempt_tsg,
@@ -751,6 +752,7 @@ static const struct gpu_ops gv100_ops = {
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_engine_info = gm20b_fifo_init_engine_info,
+		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
@@ -777,7 +779,7 @@ static const struct gpu_ops gv100_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = gv11b_free_subctx_header,
 		.ring_channel_doorbell = gv11b_ring_channel_doorbell,
@@ -824,6 +826,7 @@ static const struct gpu_ops gv100_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gv100_runlist_count_max,
 		.entry_size = gv11b_runlist_entry_size,
+		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index 7d36be54b..a15589ede 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -687,6 +687,7 @@ static const struct gpu_ops gv11b_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
+		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gv11b_fifo_preempt_channel,
 		.preempt_tsg = gv11b_fifo_preempt_tsg,
@@ -704,6 +705,7 @@ static const struct gpu_ops gv11b_ops = {
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_engine_info = gm20b_fifo_init_engine_info,
+		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
@@ -730,7 +732,7 @@ static const struct gpu_ops gv11b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = gv11b_free_subctx_header,
 		.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
@@ -780,6 +782,7 @@ static const struct gpu_ops gv11b_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gv11b_runlist_count_max,
 		.entry_size = gv11b_runlist_entry_size,
+		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 78cce2cd3..845b665c9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -361,6 +361,8 @@ static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
 }
 int channel_gk20a_commit_va(struct channel_gk20a *c);
 int gk20a_init_channel_support(struct gk20a *g, u32 chid);
+int nvgpu_channel_setup_sw(struct gk20a *g);
+void nvgpu_channel_cleanup_sw(struct gk20a *g);
 
 /* must be inside gk20a_busy()..gk20a_idle() */
 void gk20a_channel_close(struct channel_gk20a *ch);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/engines.h b/drivers/gpu/nvgpu/include/nvgpu/engines.h
index 4bf373f3c..e3fe98516 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/engines.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/engines.h
@@ -50,6 +50,8 @@ u32 nvgpu_engine_get_gr_id(struct gk20a *g);
 u32 nvgpu_engine_interrupt_mask(struct gk20a *g);
 u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 act_eng_id);
 u32 nvgpu_engine_get_all_ce_reset_mask(struct gk20a *g);
+int nvgpu_engine_setup_sw(struct gk20a *g);
+void nvgpu_engine_cleanup_sw(struct gk20a *g);
 
 int nvgpu_engine_enable_activity(struct gk20a *g,
 			struct fifo_engine_info_gk20a *eng_info);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/fifo.h b/drivers/gpu/nvgpu/include/nvgpu/fifo.h
index 04a9a070a..68ae56ba4 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/fifo.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fifo.h
@@ -1,7 +1,7 @@
 /*
  * fifo common definitions (gr host)
  *
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -45,4 +45,8 @@ struct nvgpu_channel_hw_state {
 	const char *status_string;
 };
 
+int nvgpu_fifo_init_support(struct gk20a *g);
+int nvgpu_fifo_setup_sw(struct gk20a *g);
+int nvgpu_fifo_setup_sw_common(struct gk20a *g);
+
 #endif /* NVGPU_FIFO_COMMON_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 0f913d0d8..0866288b0 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -806,12 +806,14 @@ struct gpu_ops {
 		int (*tsg_unbind_channel)(struct channel_gk20a *ch);
 		int (*tsg_open)(struct tsg_gk20a *tsg);
 		void (*tsg_release)(struct tsg_gk20a *tsg);
+		int (*init_pbdma_info)(struct fifo_gk20a *f);
 		int (*init_engine_info)(struct fifo_gk20a *f);
 		u32 (*get_engines_mask_on_id)(struct gk20a *g,
 			u32 id, bool is_tsg);
 		u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch);
 		void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch);
 		u64 (*userd_pb_get)(struct gk20a *g, struct channel_gk20a *ch);
+		u32 (*userd_entry_size)(struct gk20a  *g);
 		void (*free_channel_ctx_header)(struct channel_gk20a *ch);
 		void (*dump_pbdma_status)(struct gk20a *g,
 				struct gk20a_debug_output *o);
@@ -886,7 +888,8 @@ struct gpu_ops {
 		int (*set_interleave)(struct gk20a *g, u32 id,
 					u32 runlist_id, u32 new_level);
 		u32 (*count_max)(void);
-		u32 (*entry_size)(void);
+		u32 (*entry_size)(struct gk20a *g);
+		u32 (*length_max)(struct gk20a *g);
 		void (*get_tsg_entry)(struct tsg_gk20a *tsg, u32 *runlist);
 		void (*get_ch_entry)(struct channel_gk20a *ch, u32 *runlist);
 		void (*hw_submit)(struct gk20a *g, u32 runlist_id,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/runlist.h b/drivers/gpu/nvgpu/include/nvgpu/runlist.h
index 88a732bfe..8994b356f 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/runlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/runlist.h
@@ -55,8 +55,8 @@ const char *gk20a_fifo_interleave_level_name(u32 interleave_level);
 void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
 		 u32 runlist_state);
 
-void gk20a_fifo_delete_runlist(struct fifo_gk20a *f);
-int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f);
+int nvgpu_runlist_setup_sw(struct gk20a *g);
+void nvgpu_runlist_cleanup_sw(struct gk20a *g);
 
 void nvgpu_fifo_lock_active_runlists(struct gk20a *g);
 void nvgpu_fifo_unlock_active_runlists(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
index 435af5529..212b892b6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -87,6 +87,9 @@ void gk20a_tsg_release_common(struct gk20a *g, struct tsg_gk20a *tsg);
 void gk20a_tsg_release(struct nvgpu_ref *ref);
 
 int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid);
+int nvgpu_tsg_setup_sw(struct gk20a *g);
+void nvgpu_tsg_cleanup_sw(struct gk20a *g);
+
 struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
 
 int gk20a_enable_tsg(struct tsg_gk20a *tsg);
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h
index e6326fad3..0d3fe6128 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.h
+++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -90,8 +90,8 @@ struct nvgpu_channel_linux {
 };
 
 u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
-int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
-void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
+int nvgpu_channel_init_support_linux(struct nvgpu_os_linux *l);
+void nvgpu_channel_remove_support_linux(struct nvgpu_os_linux *l);
 
 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 		void (*update_fn)(struct channel_gk20a *, void *),
diff --git a/drivers/gpu/nvgpu/os/linux/linux-channel.c b/drivers/gpu/nvgpu/os/linux/linux-channel.c
index 05c72f2f8..2ecd9903e 100644
--- a/drivers/gpu/nvgpu/os/linux/linux-channel.c
+++ b/drivers/gpu/nvgpu/os/linux/linux-channel.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2017-2019, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -526,7 +526,7 @@ free_gpfifo:
 	return err;
 }
 
-int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
+int nvgpu_channel_init_support_linux(struct nvgpu_os_linux *l)
 {
 	struct gk20a *g = &l->g;
 	struct fifo_gk20a *f = &g->fifo;
@@ -577,7 +577,7 @@ err_clean:
 	return err;
 }
 
-void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
+void nvgpu_channel_remove_support_linux(struct nvgpu_os_linux *l)
 {
 	struct gk20a *g = &l->g;
 	struct fifo_gk20a *f = &g->fifo;
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
index d9602c7ce..89634e69b 100644
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -204,7 +204,7 @@ int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
 	if (l->init_done)
 		return 0;
 
-	err = nvgpu_init_channel_support_linux(l);
+	err = nvgpu_channel_init_support_linux(l);
 	if (err) {
 		nvgpu_err(g, "failed to init linux channel support");
 		return err;
@@ -751,7 +751,7 @@ void gk20a_remove_support(struct gk20a *g)
 
 	nvgpu_kfree(g, g->dbg_regops_tmp_buf);
 
-	nvgpu_remove_channel_support_linux(l);
+	nvgpu_channel_remove_support_linux(l);
 
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index a973c9e0a..15dc1b181 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -763,6 +763,7 @@ static const struct gpu_ops tu104_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
+		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gv11b_fifo_preempt_channel,
 		.preempt_tsg = gv11b_fifo_preempt_tsg,
@@ -780,6 +781,7 @@ static const struct gpu_ops tu104_ops = {
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_engine_info = gm20b_fifo_init_engine_info,
+		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
@@ -806,7 +808,7 @@ static const struct gpu_ops tu104_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = gv11b_free_subctx_header,
 		.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
@@ -856,6 +858,7 @@ static const struct gpu_ops tu104_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = tu104_runlist_count_max,
 		.entry_size = tu104_runlist_entry_size,
+		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = tu104_runlist_hw_submit,