gpu: nvgpu: move fifo init/deinit code to common

Add fifo sub-unit to common.fifo to handle init/deinit code and global support functions. Split init into: - nvgpu_channel_setup_sw - nvgpu_tsg_setup_sw - nvgpu_fifo_setup_sw - nvgpu_runlist_setup_sw - nvgpu_engine_setup_sw - nvgpu_userd_setup_sw - nvgpu_pbdma_setup_sw Split de-init into - nvgpu_channel_cleanup_sw - nvgpu_tsg_cleanup_sw - nvgpu_fifo_cleanup_sw - nvgpu_runlist_cleanup_sw - nvgpu_engine_cleanup_sw - nvgpu_userd_cleanup_sw - nvgpu_pbdma_cleanup_sw Added the following HALs - runlist.length_max - fifo.init_pbdma_info - fifo.userd_entry_size Last 2 HALs should be moved resp. to pbdma and userd sub-units, when available. Added vgpu implementation of above hals - vgpu_runlist_length_max - vgpu_userd_entry_size - vgpu_channel_count Use hals in vgpu_fifo_setup_sw. Jira NVGPU-1306 Change-Id: I954f56be724eee280d7b5f171b1790d33c810470 Signed-off-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2029620 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2019-02-27 10:06:57 -08:00
parent f087ec0826
commit ffed5095db
36 changed files with 670 additions and 342 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -371,6 +371,7 @@ nvgpu-y += \
 	common/power_features/pg/pg.o \
 	common/sim.o \
 	common/sim_pci.o \
 	common/fifo/fifo.o \
 	common/fifo/channel.o \
 	common/fifo/submit.o \
 	common/fifo/tsg.o \
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -180,6 +180,7 @@ srcs += common/sim.c \
 	common/power_features/cg/cg.c \
 	common/power_features/pg/pg.c \
 	common/fifo/channel.c \
 	common/fifo/fifo.c \
 	common/fifo/submit.c \
 	common/fifo/tsg.c \
 	common/fifo/runlist.c \
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -2321,6 +2321,46 @@ void gk20a_channel_deterministic_unidle(struct gk20a *g)
 	nvgpu_rwsem_up_write(&g->deterministic_busy);
 }
 static void nvgpu_channel_destroy(struct gk20a *g, struct channel_gk20a *c)
 {
 	nvgpu_mutex_destroy(&c->ioctl_lock);
 	nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
 	nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
 	nvgpu_mutex_destroy(&c->sync_lock);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
 	nvgpu_mutex_destroy(&c->cs_client_mutex);
 #endif
 	nvgpu_mutex_destroy(&c->dbg_s_lock);
 }
 void nvgpu_channel_cleanup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	u32 chid;
 	/*
 	 * Make sure all channels are closed before deleting them.
 	 */
 	for (chid = 0; chid < f->num_channels; chid++) {
 		struct channel_gk20a *ch = &f->channel[chid];
 		/*
 		 * Could race but worst that happens is we get an error message
 		 * from gk20a_free_channel() complaining about multiple closes.
 		 */
 		if (ch->referenceable) {
 			__gk20a_channel_kill(ch);
 		}
 		nvgpu_channel_destroy(g, ch);
 	}
 	nvgpu_vfree(g, f->channel);
 	f->channel = NULL;
 	nvgpu_mutex_destroy(&f->free_chs_mutex);
 }
 int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 {
 	struct channel_gk20a *c = g->fifo.channel+chid;
@@ -2399,6 +2439,54 @@ fail_1:
 	return err;
 }
 int nvgpu_channel_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	u32 chid, i;
 	int err;
 	f->num_channels = g->ops.channel.count(g);
 	err = nvgpu_mutex_init(&f->free_chs_mutex);
 	if (err != 0) {
 		nvgpu_err(g, "mutex init failed");
 		return err;
 	}
 	f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
 	if (f->channel == NULL) {
 		nvgpu_err(g, "no mem for channels");
 		err = -ENOMEM;
 		goto clean_up_mutex;
 	}
 	nvgpu_init_list_node(&f->free_chs);
 	for (chid = 0; chid < f->num_channels; chid++) {
 		err = gk20a_init_channel_support(g, chid);
 		if (err != 0) {
 			nvgpu_err(g, "channel init failed, chid=%u", chid);
 			goto clean_up;
 		}
 	}
 	return 0;
 clean_up:
 	for (i = 0; i < chid; i++) {
 		struct channel_gk20a *ch = &f->channel[i];
 		nvgpu_channel_destroy(g, ch);
 	}
 	nvgpu_vfree(g, f->channel);
 	f->channel = NULL;
 clean_up_mutex:
 	nvgpu_mutex_destroy(&f->free_chs_mutex);
 	return err;
 }
 /* in this context the "channel" is the host1x channel which
 * maps to *all* gk20a channels */
 int gk20a_channel_suspend(struct gk20a *g)
--- a/drivers/gpu/nvgpu/common/fifo/engines.c
+++ b/drivers/gpu/nvgpu/common/fifo/engines.c
@@ -446,4 +446,54 @@ int nvgpu_engine_wait_for_idle(struct gk20a *g)
 #endif /* NVGPU_ENGINE */
 int nvgpu_engine_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	int err = 0;
 	size_t size;
 	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
 	size = f->max_engines * sizeof(*f->engine_info);
 	f->engine_info = nvgpu_kzalloc(g, size);
 	if (f->engine_info == NULL) {
 		nvgpu_err(g, "no mem for engine info");
 		return -ENOMEM;
 	}
 	size = f->max_engines * sizeof(u32);
 	f->active_engines_list = nvgpu_kzalloc(g, size);
 	if (f->active_engines_list == NULL) {
 		nvgpu_err(g, "no mem for active engine list");
 		err = -ENOMEM;
 		goto clean_up_engine_info;
 	}
 	(void) memset(f->active_engines_list, 0xff, size);
 	err = g->ops.fifo.init_engine_info(f);
 	if (err != 0) {
 		nvgpu_err(g, "init engine info failed");
 		goto clean_up;
 	}
 	return 0;
 clean_up:
 	nvgpu_kfree(g, f->active_engines_list);
 	f->active_engines_list = NULL;
 clean_up_engine_info:
 	nvgpu_kfree(g, f->engine_info);
 	f->engine_info = NULL;
 	return err;
 }
 void nvgpu_engine_cleanup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	nvgpu_kfree(g, f->engine_info);
 	f->engine_info = NULL;
 	nvgpu_kfree(g, f->active_engines_list);
 	f->active_engines_list = NULL;
 }
--- a/drivers/gpu/nvgpu/common/fifo/fifo.c
+++ b/drivers/gpu/nvgpu/common/fifo/fifo.c
@@ -0,0 +1,287 @@
 /*
 * FIFO
 *
 * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
 #include <trace/events/gk20a.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/fifo.h>
 #include <nvgpu/runlist.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/tsg.h>
 #include <nvgpu/vm_area.h>
 #include <gk20a/fifo_gk20a.h>
 /* TODO: move to pbdma and userd when available */
 static int nvgpu_pbdma_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	int err;
 	if (g->ops.fifo.init_pbdma_info != NULL) {
 		err = g->ops.fifo.init_pbdma_info(f);
 		if (err != 0) {
 			nvgpu_err(g, "failed to init pbdma support");
 			return err;
 		}
 	}
 	return 0;
 }
 static void nvgpu_pbdma_cleanup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	nvgpu_kfree(g, f->pbdma_map);
 	f->pbdma_map = NULL;
 }
 static int nvgpu_userd_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	int err;
 	f->userd_entry_size = g->ops.fifo.userd_entry_size(g);
 	err = gk20a_fifo_init_userd_slabs(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init userd support");
 		return err;
 	}
 	return 0;
 }
 static void nvgpu_userd_cleanup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	gk20a_fifo_free_userd_slabs(g);
 	if (f->userd_gpu_va != 0ULL) {
 		(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
 		f->userd_gpu_va = 0ULL;
 	}
 }
 static void nvgpu_fifo_cleanup_sw_common(struct fifo_gk20a *f)
 {
 	struct gk20a *g = f->g;
 	nvgpu_log_fn(g, " ");
 	nvgpu_channel_worker_deinit(g);
 	nvgpu_userd_cleanup_sw(g);
 	nvgpu_channel_cleanup_sw(g);
 	nvgpu_tsg_cleanup_sw(g);
 	nvgpu_runlist_cleanup_sw(g);
 	nvgpu_engine_cleanup_sw(g);
 	nvgpu_pbdma_cleanup_sw(g);
 	f->deferred_reset_pending = false;
 	nvgpu_mutex_destroy(&f->deferred_reset_mutex);
 	nvgpu_mutex_destroy(&f->engines_reset_mutex);
 	nvgpu_mutex_destroy(&f->intr.isr.mutex);
 }
 static int nvgpu_fifo_init_locks(struct gk20a *g, struct fifo_gk20a *f)
 {
 	int err;
 	err = nvgpu_mutex_init(&f->intr.isr.mutex);
 	if (err != 0) {
 		goto destroy_0;
 	}
 	err = nvgpu_mutex_init(&f->engines_reset_mutex);
 	if (err != 0) {
 		goto destroy_1;
 	}
 	err = nvgpu_mutex_init(&f->deferred_reset_mutex);
 	if (err != 0) {
 		goto destroy_2;
 	}
 	nvgpu_spinlock_init(&f->runlist_submit_lock);
 	return 0;
 destroy_2:
 	nvgpu_mutex_destroy(&f->engines_reset_mutex);
 destroy_1:
 	nvgpu_mutex_destroy(&f->intr.isr.mutex);
 destroy_0:
 	nvgpu_err(g, "failed to init mutex");
 	return err;
 }
 int nvgpu_fifo_setup_sw_common(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	int err = 0;
 	nvgpu_log_fn(g, " ");
 	f->g = g;
 	err = nvgpu_fifo_init_locks(g, f);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init mutexes");
 	}
 	err = nvgpu_channel_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init channel support");
 		goto clean_up;
 	}
 	err = nvgpu_tsg_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init tsg support");
 		goto clean_up_channel;
 	}
 	err = nvgpu_pbdma_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init pbdma support");
 		goto clean_up_tsg;
 	}
 	err = nvgpu_engine_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init engine support");
 		goto clean_up_pbdma;
 	}
 	err = nvgpu_runlist_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init runlist support");
 		goto clean_up_engine;
 	}
 	err = nvgpu_userd_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init userd support");
 		goto clean_up_runlist;
 	}
 	f->remove_support = nvgpu_fifo_cleanup_sw_common;
 	nvgpu_log_fn(g, "done");
 	return 0;
 clean_up_runlist:
 	nvgpu_runlist_cleanup_sw(g);
 clean_up_engine:
 	nvgpu_engine_cleanup_sw(g);
 clean_up_pbdma:
 	nvgpu_pbdma_cleanup_sw(g);
 clean_up_tsg:
 	nvgpu_tsg_cleanup_sw(g);
 clean_up_channel:
 	nvgpu_channel_cleanup_sw(g);
 clean_up:
 	nvgpu_err(g, "init fifo support failed");
 	return err;
 }
 int nvgpu_fifo_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	int err = 0;
 	u32 size;
 	u32 num_pages;
 	nvgpu_log_fn(g, " ");
 	if (f->sw_ready) {
 		nvgpu_log_fn(g, "skip init");
 		return 0;
 	}
 	err = nvgpu_fifo_setup_sw_common(g);
 	if (err != 0) {
 		nvgpu_err(g, "fail: err: %d", err);
 		return err;
 	}
 	size = f->num_channels * f->userd_entry_size;
 	num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
 	err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
 			num_pages, PAGE_SIZE, &f->userd_gpu_va, 0);
 	if (err != 0) {
 		nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
 		goto clean_slabs;
 	}
 	err = nvgpu_channel_worker_init(g);
 	if (err != 0) {
 		nvgpu_err(g, "worker init fail, err=%d", err);
 		goto clean_vm_area;
 	}
 	f->sw_ready = true;
 	nvgpu_log_fn(g, "done");
 	return 0;
 clean_vm_area:
 	(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
 	f->userd_gpu_va = 0ULL;
 clean_slabs:
 	gk20a_fifo_free_userd_slabs(g);
 	return err;
 }
 int nvgpu_fifo_init_support(struct gk20a *g)
 {
 	int err;
 	err = g->ops.fifo.setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "fifo sw setup failed, err=%d", err);
 		return err;
 	}
 	if (g->ops.fifo.init_fifo_setup_hw != NULL) {
 		err = g->ops.fifo.init_fifo_setup_hw(g);
 		if (err != 0) {
 			nvgpu_err(g, "fifo hw setup failed, err=%d", err);
 			return err;
 		}
 	}
 	return err;
 }
--- a/drivers/gpu/nvgpu/common/fifo/runlist.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist.c
@@ -600,11 +600,11 @@ void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
 	}
 }
-void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
+void nvgpu_runlist_cleanup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	u32 i, j;
 	struct fifo_runlist_info_gk20a *runlist;
 	struct gk20a *g = NULL;
 	if ((f == NULL) || (f->runlist_info == NULL)) {
 		return;
@@ -636,8 +636,49 @@ void gk20a_fifo_delete_runlist(struct fifo_gk20a *f)
 	f->max_runlists = 0;
 }
-int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f)
+static void nvgpu_init_runlist_enginfo(struct gk20a *g, struct fifo_gk20a *f)
 {
 	struct fifo_runlist_info_gk20a *runlist;
 	struct fifo_engine_info_gk20a *engine_info;
 	u32 i, active_engine_id, pbdma_id, engine_id;
 	nvgpu_log_fn(g, " ");
 	if (g->is_virtual) {
 		return;
 	}
 	for (i = 0; i < f->num_runlists; i++) {
 		runlist = &f->active_runlist_info[i];
 		for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
 			if ((f->pbdma_map[pbdma_id] &
 					BIT32(runlist->runlist_id)) != 0U) {
 				runlist->pbdma_bitmask |= BIT32(pbdma_id);
 			}
 		}
 		nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
 				 runlist->runlist_id, runlist->pbdma_bitmask);
 		for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
 			active_engine_id = f->active_engines_list[engine_id];
 			engine_info = &f->engine_info[active_engine_id];
 			if ((engine_info != NULL) &&
 			    (engine_info->runlist_id == runlist->runlist_id)) {
 				runlist->eng_bitmask |= BIT(active_engine_id);
 			}
 		}
 		nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
 				 runlist->runlist_id, runlist->eng_bitmask);
 	}
 	nvgpu_log_fn(g, "done");
 }
 int nvgpu_runlist_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	struct fifo_runlist_info_gk20a *runlist;
 	unsigned int runlist_id;
 	u32 i, j;
@@ -647,6 +688,8 @@ int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 	nvgpu_log_fn(g, " ");
 	f->runlist_entry_size = g->ops.runlist.entry_size(g);
 	f->num_runlist_entries = g->ops.runlist.length_max(g);
 	f->max_runlists = g->ops.runlist.count_max();
 	f->runlist_info = nvgpu_kzalloc(g,
 			sizeof(*f->runlist_info) * f->max_runlists);
@@ -727,11 +770,13 @@ int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f)
 		runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
 	}
 	nvgpu_init_runlist_enginfo(g, f);
 	nvgpu_log_fn(g, "done");
 	return 0;
 clean_up_runlist:
-	gk20a_fifo_delete_runlist(f);
+	nvgpu_runlist_cleanup_sw(g);
 	nvgpu_log_fn(g, "fail");
 	return err;
 }
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.c
@@ -121,11 +121,16 @@ u32 gk20a_runlist_count_max(void)
 	return fifo_eng_runlist_base__size_1_v();
 }
-u32 gk20a_runlist_entry_size(void)
+u32 gk20a_runlist_entry_size(struct gk20a *g)
 {
 	return ram_rl_entry_size_v();
 }
 u32 gk20a_runlist_length_max(struct gk20a *g)
 {
 	return fifo_eng_runlist_length_max_v();
 }
 void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist)
 {
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.h
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gk20a.h
@@ -36,7 +36,8 @@ int gk20a_runlist_set_interleave(struct gk20a *g,
 		u32 runlist_id,
 		u32 new_level);
 u32 gk20a_runlist_count_max(void);
-u32 gk20a_runlist_entry_size(void);
+u32 gk20a_runlist_entry_size(struct gk20a *g);
 u32 gk20a_runlist_length_max(struct gk20a *g);
 void gk20a_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist);
 void gk20a_runlist_get_ch_entry(struct channel_gk20a *ch, u32 *runlist);
 void gk20a_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.c
@@ -43,7 +43,7 @@ u32 gv11b_runlist_count_max(void)
 	return fifo_eng_runlist_base__size_1_v();
 }
-u32 gv11b_runlist_entry_size(void)
+u32 gv11b_runlist_entry_size(struct gk20a *g)
 {
 	return ram_rl_entry_size_v();
 }
--- a/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.h
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_gv11b.h
@@ -29,7 +29,7 @@ struct tsg_gk20a;
 int gv11b_runlist_reschedule(struct channel_gk20a *ch, bool preempt_next);
 u32 gv11b_runlist_count_max(void);
-u32 gv11b_runlist_entry_size(void);
+u32 gv11b_runlist_entry_size(struct gk20a *g);
 void gv11b_runlist_get_tsg_entry(struct tsg_gk20a *tsg, u32 *runlist);
 void gv11b_runlist_get_ch_entry(struct channel_gk20a *c, u32 *runlist);
--- a/drivers/gpu/nvgpu/common/fifo/runlist_tu104.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_tu104.c
@@ -37,7 +37,7 @@ u32 tu104_runlist_count_max(void)
 	return fifo_runlist_base_lo__size_1_v();
 }
-u32 tu104_runlist_entry_size(void)
+u32 tu104_runlist_entry_size(struct gk20a *g)
 {
 	return ram_rl_entry_size_v();
 }
--- a/drivers/gpu/nvgpu/common/fifo/runlist_tu104.h
+++ b/drivers/gpu/nvgpu/common/fifo/runlist_tu104.h
@@ -27,7 +27,7 @@
 struct gk20a;
 u32 tu104_runlist_count_max(void);
-u32 tu104_runlist_entry_size(void);
+u32 tu104_runlist_entry_size(struct gk20a *g);
 void tu104_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
 		u32 count, u32 buffer_index);
 int tu104_runlist_wait_pending(struct gk20a *g, u32 runlist_id);
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -247,11 +247,30 @@ void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 static void nvgpu_tsg_destroy(struct gk20a *g, struct tsg_gk20a *tsg)
 {
 	nvgpu_mutex_destroy(&tsg->event_id_list_lock);
 }
 void nvgpu_tsg_cleanup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	u32 tsgid;
 	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
 		struct tsg_gk20a *tsg = &f->tsg[tsgid];
 		nvgpu_tsg_destroy(g, tsg);
 	}
 	nvgpu_vfree(g, f->tsg);
 	f->tsg = NULL;
 	nvgpu_mutex_destroy(&f->tsg_inuse_mutex);
 }
 int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid)
 {
 	struct tsg_gk20a *tsg = NULL;
 	int err;
 	if (tsgid >= g->fifo.num_channels) {
 		return -EINVAL;
@@ -267,13 +286,51 @@ int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid)
 	nvgpu_rwsem_init(&tsg->ch_list_lock);
 	nvgpu_init_list_node(&tsg->event_id_list);
-	err = nvgpu_mutex_init(&tsg->event_id_list_lock);
+
 	return nvgpu_mutex_init(&tsg->event_id_list_lock);
 }
 int nvgpu_tsg_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	u32 tsgid, i;
 	int err;
 	err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
 	if (err != 0) {
-		tsg->in_use = true; /* make this TSG unusable */
+		nvgpu_err(g, "mutex init failed");
 		return err;
 	}
 	f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
 	if (f->tsg == NULL) {
 		nvgpu_err(g, "no mem for tsgs");
 		err = -ENOMEM;
 		goto clean_up_mutex;
 	}
 	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
 		err = gk20a_init_tsg_support(g, tsgid);
 		if (err != 0) {
 			nvgpu_err(g, "tsg init failed, tsgid=%u", tsgid);
 			goto clean_up;
 		}
 	}
 	return 0;
 clean_up:
 	for (i = 0; i < tsgid; i++) {
 		struct tsg_gk20a *tsg = &g->fifo.tsg[i];
 		nvgpu_tsg_destroy(g, tsg);
 	}
 	nvgpu_vfree(g, f->tsg);
 	f->tsg = NULL;
 clean_up_mutex:
 	nvgpu_mutex_destroy(&f->tsg_inuse_mutex);
 	return err;
 }
 bool nvgpu_tsg_mark_error(struct gk20a *g,
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -269,7 +269,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		goto done;
 	}
-	err = gk20a_init_fifo_support(g);
+	err = nvgpu_fifo_init_support(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init gk20a fifo");
 		goto done;
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
@@ -222,7 +222,7 @@ int vgpu_fifo_init_engine_info(struct fifo_gk20a *f)
 	return 0;
 }
-static int vgpu_init_fifo_setup_sw(struct gk20a *g)
+static int vgpu_fifo_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
@@ -237,18 +237,11 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 	}
 	f->g = g;
-	f->num_channels = priv->constants.num_channels;
+	f->num_channels = g->ops.channel.count(g);
-
+	f->runlist_entry_size = g->ops.runlist.entry_size(g);
-	/*
+	f->num_runlist_entries = g->ops.runlist.length_max(g);
 	 * This is not the HW format you're looking for (see
 	 * vgpu_fifo_update_runlist_locked(), vgpu_submit_runlist())
 	 */
 	f->runlist_entry_size = (u32)sizeof(u16);
 	f->num_runlist_entries = f->num_channels;
 	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
-
+	f->userd_entry_size = g->ops.fifo.userd_entry_size(g);
 	f->userd_entry_size = 1 << ram_userd_base_shift_v();
 	err = gk20a_fifo_init_userd_slabs(g);
 	if (err != 0) {
@@ -272,7 +265,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 	g->ops.fifo.init_engine_info(f);
-	err = nvgpu_init_runlist(g, f);
+	err = nvgpu_runlist_setup_sw(g);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init runlist");
 		goto clean_up;
@@ -385,7 +378,7 @@ int vgpu_init_fifo_support(struct gk20a *g)
 	nvgpu_log_fn(g, " ");
-	err = vgpu_init_fifo_setup_sw(g);
+	err = vgpu_fifo_setup_sw(g);
 	if (err) {
 		return err;
 	}
@@ -575,3 +568,10 @@ u32 vgpu_fifo_default_timeslice_us(struct gk20a *g)
 	return priv->constants.default_timeslice_us;
 }
 u32 vgpu_channel_count(struct gk20a *g)
 {
 	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
 	return priv->constants.num_channels;
 }
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h
@@ -37,6 +37,7 @@ int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch);
 void vgpu_channel_free_inst(struct gk20a *g, struct channel_gk20a *ch);
 void vgpu_channel_enable(struct channel_gk20a *ch);
 void vgpu_channel_disable(struct channel_gk20a *ch);
 u32 vgpu_channel_count(struct gk20a *g);
 int vgpu_channel_setup_ramfc(struct channel_gk20a *ch, u64 gpfifo_base,
 				u32 gpfifo_entries,
 				unsigned long acquire_timeout, u32 flags);
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.c
@@ -218,3 +218,19 @@ int vgpu_runlist_set_interleave(struct gk20a *g,
 	WARN_ON(err || msg.ret);
 	return err ? err : msg.ret;
 }
 u32 vgpu_runlist_length_max(struct gk20a *g)
 {
 	struct vgpu_priv_data *priv = vgpu_get_priv_data(g);
 	return priv->constants.num_channels;
 }
 u32 vgpu_runlist_entry_size(struct gk20a *g)
 {
 	/*
 	 * This is not the HW format you're looking for (see
 	 * vgpu_fifo_update_runlist_locked(), vgpu_submit_runlist())
 	 */
 	return (u32)sizeof(u16);
 }
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/runlist_vgpu.h
@@ -34,3 +34,5 @@ int vgpu_runlist_set_interleave(struct gk20a *g,
 					u32 id,
 					u32 runlist_id,
 					u32 new_level);
 u32 vgpu_runlist_length_max(struct gk20a *g);
 u32 vgpu_runlist_entry_size(struct gk20a *g);
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -366,6 +366,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.userd_gp_get = gk20a_fifo_userd_gp_get,
 		.userd_gp_put = gk20a_fifo_userd_gp_put,
 		.userd_pb_get = gk20a_fifo_userd_pb_get,
 		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = vgpu_fifo_preempt_channel,
 		.preempt_tsg = vgpu_fifo_preempt_tsg,
@@ -390,7 +391,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.capture_channel_ram_dump = NULL,
 		.intr_0_error_mask = gk20a_fifo_intr_0_error_mask,
 		.is_preempt_pending = NULL,
-		.init_pbdma_intr_descs = gp10b_fifo_init_pbdma_intr_descs,
+		.init_pbdma_intr_descs = NULL,
 		.reset_enable_hw = NULL,
 		.teardown_ch_tsg = NULL,
 		.handle_sched_error = NULL,
@@ -405,7 +406,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.set_sm_exception_type_mask = vgpu_set_sm_exception_type_mask,
 	},
@@ -441,7 +442,8 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.reload = vgpu_runlist_reload,
 		.set_interleave = vgpu_runlist_set_interleave,
 		.count_max = gk20a_runlist_count_max,
-		.entry_size = NULL,
+		.entry_size = vgpu_runlist_entry_size,
 		.length_max = vgpu_runlist_length_max,
 		.get_tsg_entry = gk20a_runlist_get_tsg_entry,
 		.get_ch_entry = gk20a_runlist_get_ch_entry,
 		.hw_submit = NULL,
@@ -452,7 +454,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.unbind = vgpu_channel_unbind,
 		.enable = vgpu_channel_enable,
 		.disable = vgpu_channel_disable,
-		.count = gm20b_channel_count,
+		.count = vgpu_channel_count,
 	},
 	.netlist = {
 		.get_netlist_name = gp10b_netlist_get_name,
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -435,6 +435,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
 		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = vgpu_fifo_preempt_channel,
 		.preempt_tsg = vgpu_fifo_preempt_tsg,
@@ -461,7 +462,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.capture_channel_ram_dump = NULL,
 		.intr_0_error_mask = gv11b_fifo_intr_0_error_mask,
 		.is_preempt_pending = gv11b_fifo_is_preempt_pending,
-		.init_pbdma_intr_descs = gv11b_fifo_init_pbdma_intr_descs,
+		.init_pbdma_intr_descs = NULL,
 		.reset_enable_hw = NULL,
 		.teardown_ch_tsg = NULL,
 		.handle_sched_error = NULL,
@@ -479,7 +480,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = vgpu_gv11b_free_subctx_header,
 		.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
@@ -520,7 +521,8 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.reload = vgpu_runlist_reload,
 		.set_interleave = vgpu_runlist_set_interleave,
 		.count_max = gv11b_runlist_count_max,
-		.entry_size = NULL,
+		.entry_size = vgpu_runlist_entry_size,
 		.length_max = vgpu_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = NULL,
@@ -531,7 +533,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.unbind = vgpu_channel_unbind,
 		.enable = vgpu_channel_enable,
 		.disable = vgpu_channel_disable,
-		.count = gv11b_channel_count,
+		.count = vgpu_channel_count,
 	},
 	.netlist = {
 		.get_netlist_name = gv11b_netlist_get_name,
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -221,59 +221,6 @@ static inline u32 gk20a_mmu_id_to_engine_id(struct gk20a *g, u32 fault_id)
 	return active_engine_id;
 }
 static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 {
 	struct gk20a *g = f->g;
 	unsigned int i = 0;
 	nvgpu_log_fn(g, " ");
 	nvgpu_channel_worker_deinit(g);
 	/*
 	 * Make sure all channels are closed before deleting them.
 	 */
 	for (; i < f->num_channels; i++) {
 		struct channel_gk20a *c = f->channel + i;
 		struct tsg_gk20a *tsg = f->tsg + i;
 		/*
 		 * Could race but worst that happens is we get an error message
 		 * from gk20a_free_channel() complaining about multiple closes.
 		 */
 		if (c->referenceable) {
 			__gk20a_channel_kill(c);
 		}
 		nvgpu_mutex_destroy(&tsg->event_id_list_lock);
 		nvgpu_mutex_destroy(&c->ioctl_lock);
 		nvgpu_mutex_destroy(&c->joblist.cleanup_lock);
 		nvgpu_mutex_destroy(&c->joblist.pre_alloc.read_lock);
 		nvgpu_mutex_destroy(&c->sync_lock);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 		nvgpu_mutex_destroy(&c->cyclestate.cyclestate_buffer_mutex);
 		nvgpu_mutex_destroy(&c->cs_client_mutex);
 #endif
 		nvgpu_mutex_destroy(&c->dbg_s_lock);
 	}
 	nvgpu_vfree(g, f->channel);
 	nvgpu_vfree(g, f->tsg);
 	gk20a_fifo_free_userd_slabs(g);
 	(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
 	f->userd_gpu_va = 0ULL;
 	gk20a_fifo_delete_runlist(f);
 	nvgpu_kfree(g, f->pbdma_map);
 	f->pbdma_map = NULL;
 	nvgpu_kfree(g, f->engine_info);
 	f->engine_info = NULL;
 	nvgpu_kfree(g, f->active_engines_list);
 	f->active_engines_list = NULL;
 }
 u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g)
 {
 	u32 intr_0_error_mask =
@@ -382,170 +329,6 @@ int gk20a_init_fifo_reset_enable_hw(struct gk20a *g)
 	return 0;
 }
 static int nvgpu_init_runlist_enginfo(struct gk20a *g, struct fifo_gk20a *f)
 {
 	struct fifo_runlist_info_gk20a *runlist;
 	struct fifo_engine_info_gk20a *engine_info;
 	u32 i, active_engine_id, pbdma_id, engine_id;
 	nvgpu_log_fn(g, " ");
 	for (i = 0; i < f->num_runlists; i++) {
 		runlist = &f->active_runlist_info[i];
 		for (pbdma_id = 0; pbdma_id < f->num_pbdma; pbdma_id++) {
 			if ((f->pbdma_map[pbdma_id] &
 					BIT32(runlist->runlist_id)) != 0U) {
 				runlist->pbdma_bitmask |= BIT32(pbdma_id);
 			}
 		}
 		nvgpu_log(g, gpu_dbg_info, "runlist %d : pbdma bitmask 0x%x",
 				 runlist->runlist_id, runlist->pbdma_bitmask);
 		for (engine_id = 0; engine_id < f->num_engines; ++engine_id) {
 			active_engine_id = f->active_engines_list[engine_id];
 			engine_info = &f->engine_info[active_engine_id];
 			if ((engine_info != NULL) &&
 			    (engine_info->runlist_id == runlist->runlist_id)) {
 				runlist->eng_bitmask |= BIT(active_engine_id);
 			}
 		}
 		nvgpu_log(g, gpu_dbg_info, "runlist %d : act eng bitmask 0x%x",
 				 runlist->runlist_id, runlist->eng_bitmask);
 	}
 	nvgpu_log_fn(g, "done");
 	return 0;
 }
 int gk20a_init_fifo_setup_sw_common(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	unsigned int chid, i;
 	int err = 0;
 	nvgpu_log_fn(g, " ");
 	f->g = g;
 	err = nvgpu_mutex_init(&f->intr.isr.mutex);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init isr.mutex");
 		return err;
 	}
 	err = nvgpu_mutex_init(&f->engines_reset_mutex);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init engines_reset_mutex");
 		return err;
 	}
 	nvgpu_spinlock_init(&f->runlist_submit_lock);
 	g->ops.fifo.init_pbdma_intr_descs(f); /* just filling in data/tables */
 	f->num_channels = g->ops.channel.count(g);
 	f->runlist_entry_size = g->ops.runlist.entry_size();
 	f->num_runlist_entries = fifo_eng_runlist_length_max_v();
 	f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
 	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
 	f->userd_entry_size = BIT16(ram_userd_base_shift_v());
 	f->channel = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->channel));
 	f->tsg = nvgpu_vzalloc(g, f->num_channels * sizeof(*f->tsg));
 	f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
 	f->engine_info = nvgpu_kzalloc(g, f->max_engines *
 				sizeof(*f->engine_info));
 	f->active_engines_list = nvgpu_kzalloc(g, f->max_engines * sizeof(u32));
 	if (!((f->channel != NULL) &&
 	      (f->tsg != NULL) &&
 	      (f->pbdma_map != NULL) &&
 	      (f->engine_info != NULL) &&
 	      (f->active_engines_list != NULL))) {
 		err = -ENOMEM;
 		goto clean_up;
 	}
 	(void) memset(f->active_engines_list, 0xff,
 		(f->max_engines * sizeof(u32)));
 	/* pbdma map needs to be in place before calling engine info init */
 	for (i = 0; i < f->num_pbdma; ++i) {
 		f->pbdma_map[i] = gk20a_readl(g, fifo_pbdma_map_r(i));
 	}
 	g->ops.fifo.init_engine_info(f);
 	err = nvgpu_init_runlist(g, f);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init runlist");
 		goto clean_up;
 	}
 	nvgpu_init_runlist_enginfo(g, f);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init runlist engine info");
 		goto clean_up;
 	}
 	nvgpu_init_list_node(&f->free_chs);
 	err = nvgpu_mutex_init(&f->free_chs_mutex);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init free_chs_mutex");
 		goto clean_up;
 	}
 	for (chid = 0; chid < f->num_channels; chid++) {
 		gk20a_init_channel_support(g, chid);
 		gk20a_init_tsg_support(g, chid);
 	}
 	err = nvgpu_mutex_init(&f->tsg_inuse_mutex);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init tsg_inuse_mutex");
 		goto clean_up;
 	}
 	f->remove_support = gk20a_remove_fifo_support;
 	f->deferred_reset_pending = false;
 	err = nvgpu_mutex_init(&f->deferred_reset_mutex);
 	if (err != 0) {
 		nvgpu_err(g, "failed to init deferred_reset_mutex");
 		goto clean_up;
 	}
 	err = gk20a_fifo_init_userd_slabs(g);
 	if (err != 0) {
 		nvgpu_err(g, "userd slabs init fail, err=%d", err);
 		goto clean_up;
 	}
 	nvgpu_log_fn(g, "done");
 	return 0;
 clean_up:
 	nvgpu_err(g, "fail");
 	nvgpu_vfree(g, f->channel);
 	f->channel = NULL;
 	nvgpu_vfree(g, f->tsg);
 	f->tsg = NULL;
 	nvgpu_kfree(g, f->pbdma_map);
 	f->pbdma_map = NULL;
 	nvgpu_kfree(g, f->engine_info);
 	f->engine_info = NULL;
 	nvgpu_kfree(g, f->active_engines_list);
 	f->active_engines_list = NULL;
 	return err;
 }
 int gk20a_fifo_init_userd_slabs(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
@@ -627,55 +410,6 @@ void gk20a_fifo_free_userd_slabs(struct gk20a *g)
 	f->userd_slabs = NULL;
 }
 int gk20a_init_fifo_setup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	int err = 0;
 	u32 size;
 	u32 num_pages;
 	nvgpu_log_fn(g, " ");
 	if (f->sw_ready) {
 		nvgpu_log_fn(g, "skip init");
 		return 0;
 	}
 	err = gk20a_init_fifo_setup_sw_common(g);
 	if (err != 0) {
 		nvgpu_err(g, "fail: err: %d", err);
 		return err;
 	}
 	size = f->num_channels * f->userd_entry_size;
 	num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
 	err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
 			num_pages, PAGE_SIZE, &f->userd_gpu_va, 0);
 	if (err != 0) {
 		nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
 		goto clean_slabs;
 	}
 	err = nvgpu_channel_worker_init(g);
 	if (err != 0) {
 		nvgpu_err(g, "worker init fail, err=%d", err);
 		goto clean_vm_area;
 	}
 	f->sw_ready = true;
 	nvgpu_log_fn(g, "done");
 	return 0;
 clean_vm_area:
 	(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
 	f->userd_gpu_va = 0ULL;
 clean_slabs:
 	gk20a_fifo_free_userd_slabs(g);
 	return err;
 }
 void gk20a_fifo_handle_runlist_event(struct gk20a *g)
 {
 	u32 runlist_event = gk20a_readl(g, fifo_intr_runlist_r());
@@ -708,25 +442,6 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
 	return 0;
 }
 int gk20a_init_fifo_support(struct gk20a *g)
 {
 	int err;
 	err = g->ops.fifo.setup_sw(g);
 	if (err != 0) {
 		return err;
 	}
 	if (g->ops.fifo.init_fifo_setup_hw != NULL) {
 		err = g->ops.fifo.init_fifo_setup_hw(g);
 	}
 	if (err != 0) {
 		return err;
 	}
 	return err;
 }
 /* return with a reference to the channel, caller must put it back */
 struct channel_gk20a *
 gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
@@ -2881,6 +2596,11 @@ void gk20a_fifo_userd_gp_put(struct gk20a *g, struct channel_gk20a *c)
 	gk20a_bar1_writel(g, (u32)addr, c->gpfifo.put);
 }
 u32 gk20a_fifo_userd_entry_size(struct gk20a *g)
 {
 	return BIT32(ram_userd_base_shift_v());
 }
 u32 gk20a_fifo_pbdma_acquire_val(u64 timeout)
 {
 	u32 val, exponent, mantissa;
@@ -2944,3 +2664,26 @@ bool gk20a_fifo_find_pbdma_for_runlist(struct fifo_gk20a *f, u32 runlist_id,
 	*pbdma_id = id;
 	return found_pbdma_for_runlist;
 }
 int gk20a_fifo_init_pbdma_info(struct fifo_gk20a *f)
 {
 	struct gk20a *g = f->g;
 	u32 id;
 	f->num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
 	f->pbdma_map = nvgpu_kzalloc(g, f->num_pbdma * sizeof(*f->pbdma_map));
 	if (f->pbdma_map == NULL) {
 		return -ENOMEM;
 	}
 	for (id = 0; id < f->num_pbdma; ++id) {
 		f->pbdma_map[id] = gk20a_readl(g, fifo_pbdma_map_r(id));
 	}
 	if (g->ops.fifo.init_pbdma_intr_descs != NULL) {
 		g->ops.fifo.init_pbdma_intr_descs(f);
 	}
 	return 0;
 }
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -246,8 +246,6 @@ struct nvgpu_channel_dump_info {
 	} sema;
 };
 int gk20a_init_fifo_support(struct gk20a *g);
 int gk20a_init_fifo_setup_hw(struct gk20a *g);
 void gk20a_fifo_isr(struct gk20a *g);
@@ -352,8 +350,6 @@ u32 gk20a_fifo_pbdma_acquire_val(u64 timeout);
 u32 gk20a_fifo_runlist_busy_engines(struct gk20a *g, u32 runlist_id);
 int gk20a_init_fifo_setup_sw_common(struct gk20a *g);
 int gk20a_init_fifo_setup_sw(struct gk20a *g);
 void gk20a_fifo_handle_runlist_event(struct gk20a *g);
 bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
 			u32 engine_subid, bool fake_fault);
@@ -385,9 +381,11 @@ void gk20a_fifo_get_mmu_fault_gpc_desc(struct mmu_fault_info *mmfault);
 int gk20a_fifo_init_userd_slabs(struct gk20a *g);
 void gk20a_fifo_free_userd_slabs(struct gk20a *g);
 int gk20a_fifo_init_userd(struct gk20a *g, struct channel_gk20a *c);
 u32 gk20a_fifo_userd_entry_size(struct gk20a *g);
 bool gk20a_fifo_find_pbdma_for_runlist(struct fifo_gk20a *f, u32 runlist_id,
 			u32 *pbdma_id);
 u32 gk20a_fifo_read_pbdma_data(struct gk20a *g, u32 pbdma_id);
 void gk20a_fifo_reset_pbdma_header(struct gk20a *g, u32 pbdma_id);
 int gk20a_fifo_init_pbdma_info(struct fifo_gk20a *f);
 #endif /* FIFO_GK20A_H */
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -515,6 +515,7 @@ static const struct gpu_ops gm20b_ops = {
 		.userd_gp_get = gk20a_fifo_userd_gp_get,
 		.userd_gp_put = gk20a_fifo_userd_gp_put,
 		.userd_pb_get = gk20a_fifo_userd_pb_get,
 		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gk20a_fifo_preempt_channel,
 		.preempt_tsg = gk20a_fifo_preempt_tsg,
@@ -530,6 +531,7 @@ static const struct gpu_ops gm20b_ops = {
 		.get_pbdma_signature = gk20a_fifo_get_pbdma_signature,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.init_engine_info = gm20b_fifo_init_engine_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
@@ -554,7 +556,7 @@ static const struct gpu_ops gm20b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
 		.runlist_busy_engines = gk20a_fifo_runlist_busy_engines,
 		.find_pbdma_for_runlist = gk20a_fifo_find_pbdma_for_runlist,
@@ -596,6 +598,7 @@ static const struct gpu_ops gm20b_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gk20a_runlist_count_max,
 		.entry_size = gk20a_runlist_entry_size,
 		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gk20a_runlist_get_tsg_entry,
 		.get_ch_entry = gk20a_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -566,6 +566,7 @@ static const struct gpu_ops gp10b_ops = {
 		.userd_gp_get = gk20a_fifo_userd_gp_get,
 		.userd_gp_put = gk20a_fifo_userd_gp_put,
 		.userd_pb_get = gk20a_fifo_userd_pb_get,
 		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gk20a_fifo_preempt_channel,
 		.preempt_tsg = gk20a_fifo_preempt_tsg,
@@ -581,6 +582,7 @@ static const struct gpu_ops gp10b_ops = {
 		.get_pbdma_signature = gp10b_fifo_get_pbdma_signature,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.init_engine_info = gm20b_fifo_init_engine_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
@@ -605,7 +607,7 @@ static const struct gpu_ops gp10b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = gp10b_fifo_resetup_ramfc,
 		.set_sm_exception_type_mask = gk20a_tsg_set_sm_exception_type_mask,
 		.runlist_busy_engines = gk20a_fifo_runlist_busy_engines,
@@ -650,6 +652,7 @@ static const struct gpu_ops gp10b_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gk20a_runlist_count_max,
 		.entry_size = gk20a_runlist_entry_size,
 		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gk20a_runlist_get_tsg_entry,
 		.get_ch_entry = gk20a_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -734,6 +734,7 @@ static const struct gpu_ops gv100_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
 		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gv11b_fifo_preempt_channel,
 		.preempt_tsg = gv11b_fifo_preempt_tsg,
@@ -751,6 +752,7 @@ static const struct gpu_ops gv100_ops = {
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_engine_info = gm20b_fifo_init_engine_info,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
@@ -777,7 +779,7 @@ static const struct gpu_ops gv100_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = gv11b_free_subctx_header,
 		.ring_channel_doorbell = gv11b_ring_channel_doorbell,
@@ -824,6 +826,7 @@ static const struct gpu_ops gv100_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gv100_runlist_count_max,
 		.entry_size = gv11b_runlist_entry_size,
 		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -687,6 +687,7 @@ static const struct gpu_ops gv11b_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
 		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gv11b_fifo_preempt_channel,
 		.preempt_tsg = gv11b_fifo_preempt_tsg,
@@ -704,6 +705,7 @@ static const struct gpu_ops gv11b_ops = {
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_engine_info = gm20b_fifo_init_engine_info,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
@@ -730,7 +732,7 @@ static const struct gpu_ops gv11b_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = gv11b_free_subctx_header,
 		.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
@@ -780,6 +782,7 @@ static const struct gpu_ops gv11b_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = gv11b_runlist_count_max,
 		.entry_size = gv11b_runlist_entry_size,
 		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = gk20a_runlist_hw_submit,
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -361,6 +361,8 @@ static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch)
 }
 int channel_gk20a_commit_va(struct channel_gk20a *c);
 int gk20a_init_channel_support(struct gk20a *g, u32 chid);
 int nvgpu_channel_setup_sw(struct gk20a *g);
 void nvgpu_channel_cleanup_sw(struct gk20a *g);
 /* must be inside gk20a_busy()..gk20a_idle() */
 void gk20a_channel_close(struct channel_gk20a *ch);
--- a/drivers/gpu/nvgpu/include/nvgpu/engines.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/engines.h
@@ -50,6 +50,8 @@ u32 nvgpu_engine_get_gr_id(struct gk20a *g);
 u32 nvgpu_engine_interrupt_mask(struct gk20a *g);
 u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 act_eng_id);
 u32 nvgpu_engine_get_all_ce_reset_mask(struct gk20a *g);
 int nvgpu_engine_setup_sw(struct gk20a *g);
 void nvgpu_engine_cleanup_sw(struct gk20a *g);
 int nvgpu_engine_enable_activity(struct gk20a *g,
 			struct fifo_engine_info_gk20a *eng_info);
--- a/drivers/gpu/nvgpu/include/nvgpu/fifo.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fifo.h
@@ -1,7 +1,7 @@
 /*
 * fifo common definitions (gr host)
 *
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@@ -45,4 +45,8 @@ struct nvgpu_channel_hw_state {
 	const char *status_string;
 };
 int nvgpu_fifo_init_support(struct gk20a *g);
 int nvgpu_fifo_setup_sw(struct gk20a *g);
 int nvgpu_fifo_setup_sw_common(struct gk20a *g);
 #endif /* NVGPU_FIFO_COMMON_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -806,12 +806,14 @@ struct gpu_ops {
 		int (*tsg_unbind_channel)(struct channel_gk20a *ch);
 		int (*tsg_open)(struct tsg_gk20a *tsg);
 		void (*tsg_release)(struct tsg_gk20a *tsg);
 		int (*init_pbdma_info)(struct fifo_gk20a *f);
 		int (*init_engine_info)(struct fifo_gk20a *f);
 		u32 (*get_engines_mask_on_id)(struct gk20a *g,
 			u32 id, bool is_tsg);
 		u32 (*userd_gp_get)(struct gk20a *g, struct channel_gk20a *ch);
 		void (*userd_gp_put)(struct gk20a *g, struct channel_gk20a *ch);
 		u64 (*userd_pb_get)(struct gk20a *g, struct channel_gk20a *ch);
 		u32 (*userd_entry_size)(struct gk20a  *g);
 		void (*free_channel_ctx_header)(struct channel_gk20a *ch);
 		void (*dump_pbdma_status)(struct gk20a *g,
 				struct gk20a_debug_output *o);
@@ -886,7 +888,8 @@ struct gpu_ops {
 		int (*set_interleave)(struct gk20a *g, u32 id,
 					u32 runlist_id, u32 new_level);
 		u32 (*count_max)(void);
-		u32 (*entry_size)(void);
+		u32 (*entry_size)(struct gk20a *g);
 		u32 (*length_max)(struct gk20a *g);
 		void (*get_tsg_entry)(struct tsg_gk20a *tsg, u32 *runlist);
 		void (*get_ch_entry)(struct channel_gk20a *ch, u32 *runlist);
 		void (*hw_submit)(struct gk20a *g, u32 runlist_id,
--- a/drivers/gpu/nvgpu/include/nvgpu/runlist.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/runlist.h
@@ -55,8 +55,8 @@ const char *gk20a_fifo_interleave_level_name(u32 interleave_level);
 void gk20a_fifo_set_runlist_state(struct gk20a *g, u32 runlists_mask,
 		 u32 runlist_state);
-void gk20a_fifo_delete_runlist(struct fifo_gk20a *f);
+int nvgpu_runlist_setup_sw(struct gk20a *g);
-int nvgpu_init_runlist(struct gk20a *g, struct fifo_gk20a *f);
+void nvgpu_runlist_cleanup_sw(struct gk20a *g);
 void nvgpu_fifo_lock_active_runlists(struct gk20a *g);
 void nvgpu_fifo_unlock_active_runlists(struct gk20a *g);
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -87,6 +87,9 @@ void gk20a_tsg_release_common(struct gk20a *g, struct tsg_gk20a *tsg);
 void gk20a_tsg_release(struct nvgpu_ref *ref);
 int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid);
 int nvgpu_tsg_setup_sw(struct gk20a *g);
 void nvgpu_tsg_cleanup_sw(struct gk20a *g);
 struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
 int gk20a_enable_tsg(struct tsg_gk20a *tsg);
--- a/drivers/gpu/nvgpu/os/linux/channel.h
+++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -90,8 +90,8 @@ struct nvgpu_channel_linux {
 };
 u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
-int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
+int nvgpu_channel_init_support_linux(struct nvgpu_os_linux *l);
-void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
+void nvgpu_channel_remove_support_linux(struct nvgpu_os_linux *l);
 struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g,
 		void (*update_fn)(struct channel_gk20a *, void *),
--- a/drivers/gpu/nvgpu/os/linux/linux-channel.c
+++ b/drivers/gpu/nvgpu/os/linux/linux-channel.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2017-2019, NVIDIA Corporation.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -526,7 +526,7 @@ free_gpfifo:
 	return err;
 }
-int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l)
+int nvgpu_channel_init_support_linux(struct nvgpu_os_linux *l)
 {
 	struct gk20a *g = &l->g;
 	struct fifo_gk20a *f = &g->fifo;
@@ -577,7 +577,7 @@ err_clean:
 	return err;
 }
-void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l)
+void nvgpu_channel_remove_support_linux(struct nvgpu_os_linux *l)
 {
 	struct gk20a *g = &l->g;
 	struct fifo_gk20a *f = &g->fifo;
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -204,7 +204,7 @@ int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l)
 	if (l->init_done)
 		return 0;
-	err = nvgpu_init_channel_support_linux(l);
+	err = nvgpu_channel_init_support_linux(l);
 	if (err) {
 		nvgpu_err(g, "failed to init linux channel support");
 		return err;
@@ -751,7 +751,7 @@ void gk20a_remove_support(struct gk20a *g)
 	nvgpu_kfree(g, g->dbg_regops_tmp_buf);
-	nvgpu_remove_channel_support_linux(l);
+	nvgpu_channel_remove_support_linux(l);
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -763,6 +763,7 @@ static const struct gpu_ops tu104_ops = {
 		.userd_gp_get = gv11b_userd_gp_get,
 		.userd_gp_put = gv11b_userd_gp_put,
 		.userd_pb_get = gv11b_userd_pb_get,
 		.userd_entry_size = gk20a_fifo_userd_entry_size,
 		.pbdma_acquire_val = gk20a_fifo_pbdma_acquire_val,
 		.preempt_channel = gv11b_fifo_preempt_channel,
 		.preempt_tsg = gv11b_fifo_preempt_tsg,
@@ -780,6 +781,7 @@ static const struct gpu_ops tu104_ops = {
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
 		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_engine_info = gm20b_fifo_init_engine_info,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.get_engines_mask_on_id = gk20a_fifo_engines_on_id,
 		.dump_pbdma_status = gk20a_dump_pbdma_status,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
@@ -806,7 +808,7 @@ static const struct gpu_ops tu104_ops = {
 		.channel_suspend = gk20a_channel_suspend,
 		.channel_resume = gk20a_channel_resume,
 		.set_error_notifier = nvgpu_set_error_notifier_if_empty,
-		.setup_sw = gk20a_init_fifo_setup_sw,
+		.setup_sw = nvgpu_fifo_setup_sw,
 		.resetup_ramfc = NULL,
 		.free_channel_ctx_header = gv11b_free_subctx_header,
 		.handle_ctxsw_timeout = gv11b_fifo_handle_ctxsw_timeout,
@@ -856,6 +858,7 @@ static const struct gpu_ops tu104_ops = {
 		.set_interleave = gk20a_runlist_set_interleave,
 		.count_max = tu104_runlist_count_max,
 		.entry_size = tu104_runlist_entry_size,
 		.length_max = gk20a_runlist_length_max,
 		.get_tsg_entry = gv11b_runlist_get_tsg_entry,
 		.get_ch_entry = gv11b_runlist_get_ch_entry,
 		.hw_submit = tu104_runlist_hw_submit,