Open source GPL/LGPL release

2025-12-24 10:34:43 +03:00 · 2022-07-21 16:03:29 -07:00
commit f338182221
2260 changed files with 576813 additions and 0 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
--- a/drivers/gpu/nvgpu/common/fifo/channel_wdt.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel_wdt.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2015-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "channel_wdt.h"
+#include "channel_worker.h"
+
+#include <nvgpu/watchdog.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/error_notifier.h>
+#include <nvgpu/gk20a.h>
+
+void nvgpu_channel_set_wdt_debug_dump(struct nvgpu_channel *ch, bool dump)
+{
+	ch->wdt_debug_dump = dump;
+}
+
+static struct nvgpu_channel_wdt_state nvgpu_channel_collect_wdt_state(
+		struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+	struct nvgpu_channel_wdt_state state = { 0, 0 };
+
+	/*
+	 * Note: just checking for nvgpu_channel_wdt_enabled() is not enough at
+	 * the moment because system suspend puts g->regs away but doesn't stop
+	 * the worker thread that runs the watchdog. This might need to be
+	 * cleared up in the future.
+	 */
+	if (nvgpu_channel_wdt_running(ch->wdt)) {
+		/*
+		 * Read the state only if the wdt is on to avoid unnecessary
+		 * accesses. The kernel mem for userd may not even exist; this
+		 * channel could be in usermode submit mode.
+		 */
+		state.gp_get = g->ops.userd.gp_get(g, ch);
+		state.pb_get = g->ops.userd.pb_get(g, ch);
+	}
+
+	return state;
+}
+
+void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch)
+{
+	struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
+
+	/*
+	 * FIXME: channel recovery can race the submit path and can start even
+	 * after this, but this check is the best we can do for now.
+	 */
+	if (!nvgpu_channel_check_unserviceable(ch)) {
+		nvgpu_channel_wdt_start(ch->wdt, &state);
+	}
+}
+
+void nvgpu_channel_restart_all_wdts(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 chid;
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
+
+		if (ch != NULL) {
+			if ((ch->wdt != NULL) &&
+			    !nvgpu_channel_check_unserviceable(ch)) {
+				struct nvgpu_channel_wdt_state state =
+					nvgpu_channel_collect_wdt_state(ch);
+
+				nvgpu_channel_wdt_rewind(ch->wdt, &state);
+			}
+			nvgpu_channel_put(ch);
+		}
+	}
+}
+
+static void nvgpu_channel_recover_from_wdt(struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+
+	nvgpu_log_fn(g, " ");
+
+	if (nvgpu_channel_check_unserviceable(ch)) {
+		/* channel is already recovered */
+		nvgpu_info(g, "chid: %d unserviceable but wdt was ON", ch->chid);
+		return;
+	}
+
+	nvgpu_err(g, "Job on channel %d timed out", ch->chid);
+
+	/* force reset calls gk20a_debug_dump but not this */
+	if (ch->wdt_debug_dump) {
+		gk20a_gr_debug_dump(g);
+	}
+
+#ifdef CONFIG_NVGPU_CHANNEL_TSG_CONTROL
+	if (g->ops.tsg.force_reset(ch,
+	    NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
+	    ch->wdt_debug_dump) != 0) {
+		nvgpu_err(g, "failed tsg force reset for chid: %d", ch->chid);
+	}
+#endif
+}
+
+/*
+ * Test the watchdog progress. If the channel is stuck, reset it.
+ *
+ * The gpu is implicitly on at this point because the watchdog can only run on
+ * channels that have submitted jobs pending for cleanup.
+ */
+static void nvgpu_channel_check_wdt(struct nvgpu_channel *ch)
+{
+	struct nvgpu_channel_wdt_state state = nvgpu_channel_collect_wdt_state(ch);
+
+	if (nvgpu_channel_wdt_check(ch->wdt, &state)) {
+		nvgpu_channel_recover_from_wdt(ch);
+	}
+}
+
+void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker)
+{
+	struct nvgpu_channel_worker *ch_worker =
+		nvgpu_channel_worker_from_worker(worker);
+	int ret;
+
+	ch_worker->watchdog_interval = 100U;
+
+	ret = nvgpu_timeout_init(worker->g, &ch_worker->timeout,
+			ch_worker->watchdog_interval, NVGPU_TIMER_CPU_TIMER);
+	if (ret != 0) {
+		nvgpu_err(worker->g, "timeout_init failed: %d", ret);
+	}
+}
+
+/**
+ * Loop every living channel, check timeouts and handle stuck channels.
+ */
+static void nvgpu_channel_poll_wdt(struct gk20a *g)
+{
+	unsigned int chid;
+
+	for (chid = 0; chid < g->fifo.num_channels; chid++) {
+		struct nvgpu_channel *ch = nvgpu_channel_from_id(g, chid);
+
+		if (ch != NULL) {
+			if (!nvgpu_channel_check_unserviceable(ch)) {
+				nvgpu_channel_check_wdt(ch);
+			}
+			nvgpu_channel_put(ch);
+		}
+	}
+}
+
+void nvgpu_channel_worker_poll_wakeup_post_process_item(
+		struct nvgpu_worker *worker)
+{
+	struct gk20a *g = worker->g;
+
+	struct nvgpu_channel_worker *ch_worker =
+		nvgpu_channel_worker_from_worker(worker);
+	int ret;
+
+	if (nvgpu_timeout_peek_expired(&ch_worker->timeout)) {
+		nvgpu_channel_poll_wdt(g);
+		ret = nvgpu_timeout_init(g, &ch_worker->timeout,
+				ch_worker->watchdog_interval,
+				NVGPU_TIMER_CPU_TIMER);
+		if (ret != 0) {
+			nvgpu_err(g, "timeout_init failed: %d", ret);
+		}
+	}
+}
+
+u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
+		struct nvgpu_worker *worker)
+{
+	struct nvgpu_channel_worker *ch_worker =
+		nvgpu_channel_worker_from_worker(worker);
+
+	return ch_worker->watchdog_interval;
+}
--- a/drivers/gpu/nvgpu/common/fifo/channel_wdt.h
+++ b/drivers/gpu/nvgpu/common/fifo/channel_wdt.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2015-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_COMMON_FIFO_CHANNEL_WDT_H
+#define NVGPU_COMMON_FIFO_CHANNEL_WDT_H
+
+#include <nvgpu/types.h>
+
+struct nvgpu_channel;
+
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+struct nvgpu_worker;
+
+void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch);
+void nvgpu_channel_worker_poll_init(struct nvgpu_worker *worker);
+void nvgpu_channel_worker_poll_wakeup_post_process_item(
+		struct nvgpu_worker *worker);
+u32 nvgpu_channel_worker_poll_wakeup_condition_get_timeout(
+		struct nvgpu_worker *worker);
+#else
+static inline void nvgpu_channel_launch_wdt(struct nvgpu_channel *ch) {}
+#endif /* CONFIG_NVGPU_CHANNEL_WDT */
+
+#endif /* NVGPU_COMMON_FIFO_CHANNEL_WDT_H */
--- a/drivers/gpu/nvgpu/common/fifo/channel_worker.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel_worker.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "channel_worker.h"
+#include "channel_wdt.h"
+
+#include <nvgpu/worker.h>
+#include <nvgpu/channel.h>
+
+static inline struct nvgpu_channel *
+nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_channel *)
+	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
+};
+
+static void nvgpu_channel_worker_poll_wakeup_process_item(
+		struct nvgpu_list_node *work_item)
+{
+	struct nvgpu_channel *ch = nvgpu_channel_from_worker_item(work_item);
+
+	nvgpu_assert(ch != NULL);
+
+	nvgpu_log_fn(ch->g, " ");
+
+	nvgpu_channel_clean_up_jobs(ch);
+
+	/* ref taken when enqueued */
+	nvgpu_channel_put(ch);
+}
+
+static const struct nvgpu_worker_ops channel_worker_ops = {
+#ifdef CONFIG_NVGPU_CHANNEL_WDT
+	.pre_process = nvgpu_channel_worker_poll_init,
+	.wakeup_post_process =
+		nvgpu_channel_worker_poll_wakeup_post_process_item,
+	.wakeup_timeout =
+		nvgpu_channel_worker_poll_wakeup_condition_get_timeout,
+#endif
+	.wakeup_early_exit = NULL,
+	.wakeup_process_item =
+		nvgpu_channel_worker_poll_wakeup_process_item,
+	.wakeup_condition = NULL,
+};
+
+/**
+ * Initialize the channel worker's metadata and start the background thread.
+ */
+int nvgpu_channel_worker_init(struct gk20a *g)
+{
+	struct nvgpu_worker *worker = &g->channel_worker.worker;
+
+	nvgpu_worker_init_name(worker, "nvgpu_channel_poll", g->name);
+
+	return nvgpu_worker_init(g, worker, &channel_worker_ops);
+}
+
+void nvgpu_channel_worker_deinit(struct gk20a *g)
+{
+	struct nvgpu_worker *worker = &g->channel_worker.worker;
+
+	nvgpu_worker_deinit(worker);
+}
+
+/**
+ * Append a channel to the worker's list, if not there already.
+ *
+ * The worker thread processes work items (channels in its work list) and polls
+ * for other things. This adds @ch to the end of the list and wakes the worker
+ * up immediately. If the channel already existed in the list, it's not added,
+ * because in that case it has been scheduled already but has not yet been
+ * processed.
+ */
+void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch)
+{
+	struct gk20a *g = ch->g;
+	int ret;
+
+	nvgpu_log_fn(g, " ");
+
+	/*
+	 * Ref released when this item gets processed. The caller should hold
+	 * one ref already, so normally shouldn't fail, but the channel could
+	 * end up being freed between the time the caller got its reference and
+	 * the time we end up here (e.g., if the client got killed); if so, just
+	 * return.
+	 */
+	if (nvgpu_channel_get(ch) == NULL) {
+		nvgpu_info(g, "cannot get ch ref for worker!");
+		return;
+	}
+
+	ret = nvgpu_worker_enqueue(&g->channel_worker.worker,
+			&ch->worker_item);
+	if (ret != 0) {
+		nvgpu_channel_put(ch);
+		return;
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/channel_worker.h
+++ b/drivers/gpu/nvgpu/common/fifo/channel_worker.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
+#define NVGPU_COMMON_FIFO_CHANNEL_WORKER_H
+
+#include <nvgpu/gk20a.h>
+
+void nvgpu_channel_worker_enqueue(struct nvgpu_channel *ch);
+
+static inline struct nvgpu_channel_worker *
+nvgpu_channel_worker_from_worker(struct nvgpu_worker *worker)
+{
+	return (struct nvgpu_channel_worker *)
+	   ((uintptr_t)worker - offsetof(struct nvgpu_channel_worker, worker));
+};
+
+#endif /* NVGPU_COMMON_FIFO_CHANNEL_WORKER_H */
--- a/drivers/gpu/nvgpu/common/fifo/engine_status.c
+++ b/drivers/gpu/nvgpu/common/fifo/engine_status.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/io.h>
+#include <nvgpu/engine_status.h>
+
+bool nvgpu_engine_status_is_ctxsw_switch(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH;
+}
+
+bool nvgpu_engine_status_is_ctxsw_load(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_LOAD;
+}
+
+bool nvgpu_engine_status_is_ctxsw_save(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return	engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SAVE;
+}
+
+bool nvgpu_engine_status_is_ctxsw(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return (nvgpu_engine_status_is_ctxsw_switch(engine_status) ||
+		nvgpu_engine_status_is_ctxsw_load(engine_status) ||
+		nvgpu_engine_status_is_ctxsw_save(engine_status));
+}
+
+bool nvgpu_engine_status_is_ctxsw_invalid(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_INVALID;
+}
+
+bool nvgpu_engine_status_is_ctxsw_valid(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID;
+}
+bool nvgpu_engine_status_is_ctx_type_tsg(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctx_id_type == ENGINE_STATUS_CTX_ID_TYPE_TSGID;
+}
+bool nvgpu_engine_status_is_next_ctx_type_tsg(struct nvgpu_engine_status_info
+		*engine_status)
+{
+	return engine_status->ctx_next_id_type ==
+		ENGINE_STATUS_CTX_NEXT_ID_TYPE_TSGID;
+}
+
+void nvgpu_engine_status_get_ctx_id_type(struct nvgpu_engine_status_info
+		*engine_status, u32 *ctx_id, u32 *ctx_type)
+{
+	*ctx_id = engine_status->ctx_id;
+	*ctx_type = engine_status->ctx_id_type;
+}
+
+void nvgpu_engine_status_get_next_ctx_id_type(struct nvgpu_engine_status_info
+		*engine_status, u32 *ctx_next_id,
+		u32 *ctx_next_type)
+{
+	*ctx_next_id = engine_status->ctx_next_id;
+	*ctx_next_type = engine_status->ctx_next_id_type;
+}
--- a/drivers/gpu/nvgpu/common/fifo/engines.c
+++ b/drivers/gpu/nvgpu/common/fifo/engines.c
@@ -0,0 +1,960 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <nvgpu/log.h>
+#include <nvgpu/errno.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/bitops.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu.h>
+#include <nvgpu/pmu/mutex.h>
+#endif
+#include <nvgpu/runlist.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/engine_status.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/pbdma_status.h>
+#include <nvgpu/power_features/pg.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/device.h>
+#include <nvgpu/gr/gr_falcon.h>
+#include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/swprofile.h>
+
+#include <nvgpu/fifo/swprofile.h>
+
+#define FECS_METHOD_WFI_RESTORE	0x80000U
+
+enum nvgpu_fifo_engine nvgpu_engine_enum_from_dev(struct gk20a *g,
+			const struct nvgpu_device *dev)
+{
+	enum nvgpu_fifo_engine ret = NVGPU_ENGINE_INVAL;
+
+	if (nvgpu_device_is_graphics(g, dev)) {
+		ret = NVGPU_ENGINE_GR;
+	} else if (nvgpu_device_is_ce(g, dev)) {
+		/* For now, all CE engines have separate runlists. We can
+		 * identify the NVGPU_ENGINE_GRCE type CE using runlist_id
+		 * comparsion logic with GR runlist_id in init_info()
+		 */
+		ret = NVGPU_ENGINE_ASYNC_CE;
+	} else {
+		ret = NVGPU_ENGINE_INVAL;
+	}
+
+	return ret;
+}
+
+const struct nvgpu_device *nvgpu_engine_get_active_eng_info(
+	struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (engine_id >= f->max_engines) {
+		return NULL;
+	}
+
+	return f->host_engines[engine_id];
+}
+
+bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (engine_id >= f->max_engines) {
+		return false;
+	}
+
+	return f->host_engines[engine_id] != NULL;
+}
+
+u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, inst_id);
+	if (dev == NULL) {
+		nvgpu_warn(g, "No GR devices on this GPU for inst[%u]?!",
+			inst_id);
+		return NVGPU_INVALID_ENG_ID;
+	}
+
+	return dev->engine_id;
+}
+
+u32 nvgpu_engine_get_gr_id(struct gk20a *g)
+{
+	/* Consider 1st available GR engine */
+	return nvgpu_engine_get_gr_id_for_inst(g, 0U);
+}
+
+u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 engine_id)
+{
+	const struct nvgpu_device *dev = NULL;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		return 0;
+	}
+
+	return BIT32(dev->intr_id);
+}
+
+u32 nvgpu_gr_engine_interrupt_mask(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 intr_mask = 0U;
+	u32 i;
+
+	for (i = 0U; i < g->num_gr_instances; i++) {
+		dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS,
+				nvgpu_gr_get_syspipe_id(g, i));
+		if (dev == NULL) {
+			continue;
+		}
+
+		intr_mask |= BIT32(dev->intr_id);
+	}
+
+	return intr_mask;
+}
+
+u32 nvgpu_ce_engine_interrupt_mask(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 i;
+	u32 mask = 0U;
+
+	/*
+	 * For old chips - pre-Pascal - we have COPY[0-2], for new chips we
+	 * have some number of LCE instances. For the purpose of this code we
+	 * imagine a system that could have both; in reality that'll never be
+	 * the case.
+	 *
+	 * This can be cleaned up in the future by defining a SW type for CE and
+	 * hiding this ugliness in the device management code.
+	 */
+	for (i = NVGPU_DEVTYPE_COPY0;  i <= NVGPU_DEVTYPE_COPY2; i++) {
+		dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
+		if (dev == NULL) {
+			continue;
+		}
+
+		mask |= BIT32(dev->intr_id);
+	}
+
+	/*
+	 * Now take care of LCEs.
+	 */
+	for (i = 0U; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) {
+		dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i);
+		nvgpu_assert(dev != NULL);
+
+		mask |= BIT32(dev->intr_id);
+	}
+
+	return mask;
+}
+
+#ifdef CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
+
+static void nvgpu_engine_enable_activity(struct gk20a *g,
+					 const struct nvgpu_device *dev)
+{
+	nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), RUNLIST_ENABLED);
+}
+
+void nvgpu_engine_enable_activity_all(struct gk20a *g)
+{
+	u32 i;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		nvgpu_engine_enable_activity(g, g->fifo.active_engines[i]);
+	}
+}
+
+int nvgpu_engine_disable_activity(struct gk20a *g,
+				const struct nvgpu_device *dev,
+				bool wait_for_idle)
+{
+	u32 pbdma_chid = NVGPU_INVALID_CHANNEL_ID;
+	u32 engine_chid = NVGPU_INVALID_CHANNEL_ID;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = -EINVAL;
+#endif
+	int err = 0;
+	struct nvgpu_channel *ch = NULL;
+	struct nvgpu_engine_status_info engine_status;
+	struct nvgpu_pbdma_status_info pbdma_status;
+	unsigned long runlist_served_pbdmas;
+	unsigned long bit;
+	u32 pbdma_id;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	nvgpu_log_fn(g, " ");
+
+	g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+		 &engine_status);
+	if (engine_status.is_busy && !wait_for_idle) {
+		return -EBUSY;
+	}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (g->ops.pmu.is_pmu_supported(g)) {
+		mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+						PMU_MUTEX_ID_FIFO, &token);
+	}
+#endif
+
+	nvgpu_runlist_set_state(g, BIT32(dev->runlist_id),
+			RUNLIST_DISABLED);
+
+	runlist_served_pbdmas = f->runlists[dev->runlist_id]->pbdma_bitmask;
+
+	for_each_set_bit(bit, &runlist_served_pbdmas,
+			 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
+		pbdma_id = U32(bit);
+		/* chid from pbdma status */
+		g->ops.pbdma_status.read_pbdma_status_info(g,
+						pbdma_id,
+						&pbdma_status);
+		if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) ||
+			nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) {
+			pbdma_chid = pbdma_status.id;
+		} else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) ||
+			nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) {
+			pbdma_chid = pbdma_status.next_id;
+		} else {
+			/* Nothing to do here */
+		}
+
+		if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) {
+			ch = nvgpu_channel_from_id(g, pbdma_chid);
+			if (ch != NULL) {
+				err = g->ops.fifo.preempt_channel(g, ch);
+				nvgpu_channel_put(ch);
+			}
+			if (err != 0) {
+				goto clean_up;
+			}
+		}
+	}
+
+	/* chid from engine status */
+	g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+		 &engine_status);
+	if (nvgpu_engine_status_is_ctxsw_valid(&engine_status) ||
+	    nvgpu_engine_status_is_ctxsw_save(&engine_status)) {
+		engine_chid = engine_status.ctx_id;
+	} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status) ||
+	    nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
+		engine_chid = engine_status.ctx_next_id;
+	} else {
+		/* Nothing to do here */
+	}
+
+	if (engine_chid != NVGPU_INVALID_ENG_ID && engine_chid != pbdma_chid) {
+		ch = nvgpu_channel_from_id(g, engine_chid);
+		if (ch != NULL) {
+			err = g->ops.fifo.preempt_channel(g, ch);
+			nvgpu_channel_put(ch);
+		}
+		if (err != 0) {
+			goto clean_up;
+		}
+	}
+
+clean_up:
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+			PMU_MUTEX_ID_FIFO, &token) != 0){
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+	if (err != 0) {
+		nvgpu_log_fn(g, "failed");
+		nvgpu_engine_enable_activity(g, dev);
+	} else {
+		nvgpu_log_fn(g, "done");
+	}
+	return err;
+}
+
+int nvgpu_engine_disable_activity_all(struct gk20a *g,
+					   bool wait_for_idle)
+{
+	unsigned int i;
+	int err = 0, ret = 0;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		err = nvgpu_engine_disable_activity(g,
+				g->fifo.active_engines[i],
+				wait_for_idle);
+		if (err != 0) {
+			nvgpu_err(g, "failed to disable engine %d activity",
+				  g->fifo.active_engines[i]->engine_id);
+			ret = err;
+			break;
+		}
+	}
+
+	if (err != 0) {
+		while (i-- != 0U) {
+			nvgpu_engine_enable_activity(g,
+					g->fifo.active_engines[i]);
+		}
+	}
+
+	return ret;
+}
+
+int nvgpu_engine_wait_for_idle(struct gk20a *g)
+{
+	struct nvgpu_timeout timeout;
+	u32 delay = POLL_DELAY_MIN_US;
+	int ret = 0, err = 0;
+	u32 i, host_num_engines;
+	struct nvgpu_engine_status_info engine_status;
+
+	nvgpu_log_fn(g, " ");
+
+	host_num_engines =
+		 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+
+	err = nvgpu_timeout_init(g, &timeout, nvgpu_get_poll_timeout(g),
+			   NVGPU_TIMER_CPU_TIMER);
+	if (err != 0) {
+		return -EINVAL;
+	}
+
+	for (i = 0; i < host_num_engines; i++) {
+		if (!nvgpu_engine_check_valid_id(g, i)) {
+			continue;
+		}
+
+		ret = -ETIMEDOUT;
+		do {
+			g->ops.engine_status.read_engine_status_info(g, i,
+				&engine_status);
+			if (!engine_status.is_busy) {
+				ret = 0;
+				break;
+			}
+
+			nvgpu_usleep_range(delay, delay * 2U);
+			delay = min_t(u32,
+					delay << 1U, POLL_DELAY_MAX_US);
+		} while (nvgpu_timeout_expired(&timeout) == 0);
+
+		if (ret != 0) {
+			/* possible causes:
+			 * check register settings programmed in hal set by
+			 * elcg_init_idle_filters and init_therm_setup_hw
+			 */
+			nvgpu_err(g, "cannot idle engine: %u "
+					"engine_status: 0x%08x", i,
+					engine_status.reg_data);
+			break;
+		}
+	}
+
+	nvgpu_log_fn(g, "done");
+
+	return ret;
+}
+
+#endif /* CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY */
+
+int nvgpu_engine_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err = 0;
+	size_t size;
+
+	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+	size = nvgpu_safe_mult_u64(f->max_engines,
+				   sizeof(struct nvgpu_device *));
+
+	/*
+	 * Allocate the two device lists for host devices.
+	 */
+	f->host_engines = nvgpu_kzalloc(g, size);
+	if (f->host_engines == NULL) {
+		nvgpu_err(g, "OOM allocating host engine list");
+		return -ENOMEM;
+	}
+	f->active_engines = nvgpu_kzalloc(g, size);
+	if (f->active_engines == NULL) {
+		nvgpu_err(g, "no mem for active engine list");
+		err = -ENOMEM;
+		goto clean_up_engine_info;
+	}
+
+	err = nvgpu_engine_init_info(f);
+	if (err != 0) {
+		nvgpu_err(g, "init engine info failed");
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_kfree(g, f->active_engines);
+	f->active_engines = NULL;
+
+clean_up_engine_info:
+	nvgpu_kfree(g, f->host_engines);
+	f->host_engines = NULL;
+
+	return err;
+}
+
+void nvgpu_engine_cleanup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	f->num_engines = 0;
+	nvgpu_kfree(g, f->host_engines);
+	f->host_engines = NULL;
+	nvgpu_kfree(g, f->active_engines);
+	f->active_engines = NULL;
+}
+
+#ifdef CONFIG_NVGPU_ENGINE_RESET
+static void nvgpu_engine_gr_reset(struct gk20a *g)
+{
+	struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
+	int err = 0;
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_PREAMBLE);
+
+#ifdef CONFIG_NVGPU_POWER_PG
+	if (nvgpu_pg_elpg_disable(g) != 0 ) {
+		nvgpu_err(g, "failed to set disable elpg");
+	}
+#endif
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_DISABLE);
+
+#ifdef CONFIG_NVGPU_FECS_TRACE
+	/*
+	 * Resetting engine will alter read/write index. Need to flush
+	 * circular buffer before re-enabling FECS.
+	 */
+	if (g->ops.gr.fecs_trace.reset != NULL) {
+		if (g->ops.gr.fecs_trace.reset(g) != 0) {
+			nvgpu_warn(g, "failed to reset fecs traces");
+		}
+	}
+#endif
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_FECS_TRACE_RESET);
+
+	/*
+	 * HALT_PIPELINE method and gr reset during recovery is supported
+	 * starting nvgpu-next simulation.
+	 */
+	err = g->ops.gr.falcon.ctrl_ctxsw(g,
+			NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "failed to halt gr pipe");
+	}
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_HALT_PIPELINE);
+
+	/*
+	 * resetting only engine is not
+	 * enough, we do full init sequence
+	 */
+	nvgpu_log(g, gpu_dbg_rec, "resetting gr engine");
+
+	err = nvgpu_gr_reset(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to reset gr engine");
+	}
+
+#ifdef CONFIG_NVGPU_POWER_PG
+	if (nvgpu_pg_elpg_enable(g) != 0) {
+		nvgpu_err(g, "failed to set enable elpg");
+	}
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_REENABLE);
+#endif
+}
+
+void nvgpu_engine_reset(struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
+	const struct nvgpu_device *dev;
+	int err = 0;
+	u32 gr_instance_id;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g == NULL) {
+		return;
+	}
+
+	nvgpu_swprofile_begin_sample(prof);
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		nvgpu_err(g, "unsupported engine_id %d", engine_id);
+		return;
+	}
+
+	if (!nvgpu_device_is_ce(g, dev) &&
+	    !nvgpu_device_is_graphics(g, dev)) {
+		nvgpu_warn(g, "Ignoring reset for non-host engine.");
+		return;
+	}
+
+	/*
+	 * Simple case first: reset a copy engine.
+	 */
+	if (nvgpu_device_is_ce(g, dev)) {
+		err = nvgpu_mc_reset_dev(g, dev);
+		if (err != 0) {
+			nvgpu_log_info(g, "CE engine [id:%u] reset failed",
+				dev->engine_id);
+		}
+		return;
+	}
+
+	/*
+	 * Now reset a GR engine.
+	 */
+	gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id_for_syspipe(
+			g, dev->inst_id);
+
+	nvgpu_gr_exec_for_instance(g,
+		gr_instance_id, nvgpu_engine_gr_reset(g));
+}
+#endif
+
+u32 nvgpu_engine_get_fast_ce_runlist_id(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 nr_lces;
+	u32 i;
+
+	/*
+	 * Obtain a runlist ID for the fastest available CE. The priority order
+	 * is:
+	 *
+	 *   1. Last available LCE
+	 *   2. Last available COPY[0-2]
+	 *   3. GRAPHICS runlist as a last resort.
+	 */
+	nr_lces = nvgpu_device_count(g, NVGPU_DEVTYPE_LCE);
+	if (nr_lces > 0U) {
+		dev = nvgpu_device_get(g,
+				       NVGPU_DEVTYPE_LCE,
+				       nr_lces - 1U);
+		nvgpu_assert(dev != NULL);
+
+		return dev->runlist_id;
+	}
+
+	/*
+	 * Note: this only works since NVGPU_DEVTYPE_GRAPHICS is 0 and the COPYx
+	 * are all > 0.
+	 */
+	for (i = NVGPU_DEVTYPE_COPY2; i >= NVGPU_DEVTYPE_COPY0; i--) {
+		dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
+		if (dev != NULL) {
+			return dev->runlist_id;
+		}
+	}
+
+	/*
+	 * Fall back to GR.
+	 */
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
+	nvgpu_assert(dev != NULL);
+
+	return dev->runlist_id;
+}
+
+u32 nvgpu_engine_get_gr_runlist_id(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
+	if (dev == NULL) {
+		nvgpu_warn(g, "No GR device on this GPU?!");
+		return NVGPU_INVALID_RUNLIST_ID;
+	}
+
+	return dev->runlist_id;
+}
+
+bool nvgpu_engine_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
+{
+	u32 i;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		const struct nvgpu_device *dev = f->active_engines[i];
+
+		if (dev->runlist_id == runlist_id) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Link engine IDs to MMU IDs and vice versa.
+ */
+u32 nvgpu_engine_id_to_mmu_fault_id(struct gk20a *g, u32 engine_id)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+
+	if (dev == NULL) {
+		nvgpu_err(g,
+			  "engine_id: %u is not in active list",
+			  engine_id);
+		return NVGPU_INVALID_ENG_ID;
+	}
+
+	return dev->fault_id;
+}
+
+u32 nvgpu_engine_mmu_fault_id_to_engine_id(struct gk20a *g, u32 fault_id)
+{
+	u32 i;
+	const struct nvgpu_device *dev;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		dev = f->active_engines[i];
+
+		if (dev->fault_id == fault_id) {
+			return dev->engine_id;
+		}
+	}
+
+	return NVGPU_INVALID_ENG_ID;
+}
+
+u32 nvgpu_engine_get_mask_on_id(struct gk20a *g, u32 id, bool is_tsg)
+{
+	unsigned int i;
+	u32 engines = 0;
+	struct nvgpu_engine_status_info engine_status;
+	u32 ctx_id;
+	u32 type;
+	bool busy;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		const struct nvgpu_device *dev = g->fifo.active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g,
+			dev->engine_id, &engine_status);
+
+		if (nvgpu_engine_status_is_ctxsw_load(
+			&engine_status)) {
+			nvgpu_engine_status_get_next_ctx_id_type(
+				&engine_status, &ctx_id, &type);
+		} else {
+			nvgpu_engine_status_get_ctx_id_type(
+				&engine_status, &ctx_id, &type);
+		}
+
+		busy = engine_status.is_busy;
+
+		if (!busy || !(ctx_id == id)) {
+			continue;
+		}
+
+		if ((is_tsg  && (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID)) ||
+		    (!is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_CHID))) {
+			engines |= BIT32(dev->engine_id);
+		}
+	}
+
+	return engines;
+}
+
+static int nvgpu_engine_init_one_dev(struct nvgpu_fifo *f,
+				     const struct nvgpu_device *dev)
+{
+	bool found;
+	struct nvgpu_device *dev_rw;
+	struct gk20a *g = f->g;
+
+	dev_rw = (struct nvgpu_device *)dev;
+
+	/*
+	 * Populate the PBDMA info for this device; ideally it'd be done
+	 * during device init, but the FIFO unit is not out of reset that
+	 * early in the nvgpu_finalize_poweron() sequence.
+	 *
+	 * We only need to do this for native; vGPU already has pbdma_id
+	 * populated during device initialization.
+	 */
+	if (g->ops.fifo.find_pbdma_for_runlist != NULL) {
+		found = g->ops.fifo.find_pbdma_for_runlist(g,
+							   dev->runlist_id,
+							   &dev_rw->pbdma_id);
+		if (!found) {
+			nvgpu_err(g, "busted pbdma map");
+			return -EINVAL;
+		}
+	}
+
+#if defined(CONFIG_NVGPU_NEXT)
+	{
+		int err = nvgpu_next_engine_init_one_dev(g, dev);
+		if (err != 0) {
+			return err;
+		}
+	}
+#endif
+
+	f->host_engines[dev->engine_id] = dev;
+	f->active_engines[f->num_engines] = dev;
+	++f->num_engines;
+
+	return 0;
+}
+
+int nvgpu_engine_init_info(struct nvgpu_fifo *f)
+{
+	int err;
+	struct gk20a *g = f->g;
+	const struct nvgpu_device *dev;
+
+	f->num_engines = 0;
+
+	nvgpu_log(g, gpu_dbg_device, "Loading host engines from device list");
+	nvgpu_log(g, gpu_dbg_device, "  GFX devices: %u",
+		  nvgpu_device_count(g, NVGPU_DEVTYPE_GRAPHICS));
+
+	nvgpu_device_for_each(g, dev, NVGPU_DEVTYPE_GRAPHICS) {
+		err = nvgpu_engine_init_one_dev(f, dev);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	return g->ops.engine.init_ce_info(f);
+}
+
+void nvgpu_engine_get_id_and_type(struct gk20a *g, u32 engine_id,
+					  u32 *id, u32 *type)
+{
+	struct nvgpu_engine_status_info engine_status;
+
+	g->ops.engine_status.read_engine_status_info(g, engine_id,
+		&engine_status);
+
+	/* use next_id if context load is failing */
+	if (nvgpu_engine_status_is_ctxsw_load(
+		&engine_status)) {
+		nvgpu_engine_status_get_next_ctx_id_type(
+			&engine_status, id, type);
+	} else {
+		nvgpu_engine_status_get_ctx_id_type(
+			&engine_status, id, type);
+	}
+}
+
+u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g,
+			u32 *id_ptr, bool *is_tsg_ptr)
+{
+	u32 i;
+	u32 id = U32_MAX;
+	bool is_tsg = false;
+	u32 mailbox2;
+	struct nvgpu_engine_status_info engine_status;
+	const struct nvgpu_device *dev = NULL;
+
+	for (i = 0U; i < g->fifo.num_engines; i++) {
+		dev = g->fifo.active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+			&engine_status);
+
+		/*
+		 * we are interested in busy engines that
+		 * are doing context switch
+		 */
+		if (!engine_status.is_busy ||
+		    !nvgpu_engine_status_is_ctxsw(&engine_status)) {
+			continue;
+		}
+
+		if (nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
+			id = engine_status.ctx_next_id;
+			is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
+					&engine_status);
+		} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) {
+			mailbox2 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
+					NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2);
+			if ((mailbox2 & FECS_METHOD_WFI_RESTORE) != 0U) {
+				id = engine_status.ctx_next_id;
+				is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
+						&engine_status);
+			} else {
+				id = engine_status.ctx_id;
+				is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
+						&engine_status);
+			}
+		} else {
+			id = engine_status.ctx_id;
+			is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
+					&engine_status);
+		}
+		break;
+	}
+
+	*id_ptr = id;
+	*is_tsg_ptr = is_tsg;
+
+	return dev->engine_id;
+}
+
+u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 i, eng_bitmask = 0U;
+	struct nvgpu_engine_status_info engine_status;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		const struct nvgpu_device *dev = f->active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+			&engine_status);
+
+		if (engine_status.is_busy && (dev->runlist_id == runlist_id)) {
+			eng_bitmask |= BIT32(dev->engine_id);
+		}
+	}
+
+	return eng_bitmask;
+}
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id,
+		u32 engine_subid, bool fake_fault)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		return false;
+	}
+
+	/*
+	 * channel recovery is only deferred if an sm debugger
+	 * is attached and has MMU debug mode is enabled
+	 */
+	if (!g->ops.gr.sm_debugger_attached(g) ||
+	    !g->ops.fb.is_debug_mode_enabled(g)) {
+		return false;
+	}
+
+	/* if this fault is fake (due to RC recovery), don't defer recovery */
+	if (fake_fault) {
+		return false;
+	}
+
+	if (dev->type != NVGPU_DEVTYPE_GRAPHICS) {
+		return false;
+	}
+
+	return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid);
+}
+#endif
+
+u32 nvgpu_engine_mmu_fault_id_to_veid(struct gk20a *g, u32 mmu_fault_id,
+			u32 gr_eng_fault_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 num_subctx;
+	u32 veid = INVAL_ID;
+
+	num_subctx = f->max_subctx_count;
+
+	if ((mmu_fault_id >= gr_eng_fault_id) &&
+		(mmu_fault_id < nvgpu_safe_add_u32(gr_eng_fault_id,
+						num_subctx))) {
+		veid = mmu_fault_id - gr_eng_fault_id;
+	}
+
+	return veid;
+}
+
+static u32 nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(struct gk20a *g,
+			 u32 mmu_fault_id, u32 *veid)
+{
+	u32 i;
+	u32 engine_id = INVAL_ID;
+	const struct nvgpu_device *dev;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		dev = f->active_engines[i];
+
+		if (dev->type == NVGPU_DEVTYPE_GRAPHICS) {
+			*veid = nvgpu_engine_mmu_fault_id_to_veid(g,
+					mmu_fault_id, dev->fault_id);
+			if (*veid != INVAL_ID) {
+				engine_id = dev->engine_id;
+				break;
+			}
+		} else {
+			if (dev->fault_id == mmu_fault_id) {
+				engine_id = dev->engine_id;
+				*veid = INVAL_ID;
+				break;
+			}
+		}
+	}
+	return engine_id;
+}
+
+void nvgpu_engine_mmu_fault_id_to_eng_ve_pbdma_id(struct gk20a *g,
+	u32 mmu_fault_id, u32 *engine_id, u32 *veid, u32 *pbdma_id)
+{
+	*engine_id = nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(g,
+				 mmu_fault_id, veid);
+
+	if (*engine_id == INVAL_ID) {
+		*pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id(g,
+				mmu_fault_id);
+	} else {
+		*pbdma_id = INVAL_ID;
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/fifo.c
+++ b/drivers/gpu/nvgpu/common/fifo/fifo.c
@@ -0,0 +1,315 @@
+/*
+ * FIFO
+ *
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/trace.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/preempt.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/pbdma.h>
+#include <nvgpu/tsg.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/cic.h>
+#include <nvgpu/mc.h>
+#include <nvgpu/swprofile.h>
+#include <nvgpu/fifo/swprofile.h>
+
+static const char *nvgpu_fifo_kickoff_profile_events[] = {
+	NVGPU_FIFO_KICKOFF_PROFILE_EVENTS,
+};
+
+static const char *nvgpu_fifo_recovery_profile_events[] = {
+	NVGPU_FIFO_RECOVERY_PROFILE_EVENTS,
+};
+
+static const char *nvgpu_fifo_engine_reset_events[] = {
+	NVGPU_FIFO_ENGINE_RESET_EVENTS,
+};
+
+void nvgpu_fifo_cleanup_sw_common(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	nvgpu_log_fn(g, " ");
+
+#ifdef CONFIG_NVGPU_USERD
+	g->ops.userd.cleanup_sw(g);
+#endif
+	nvgpu_channel_cleanup_sw(g);
+	nvgpu_tsg_cleanup_sw(g);
+	nvgpu_runlist_cleanup_sw(g);
+	nvgpu_engine_cleanup_sw(g);
+	if (g->ops.pbdma.cleanup_sw != NULL) {
+		g->ops.pbdma.cleanup_sw(g);
+	}
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+	f->deferred_reset_pending = false;
+	nvgpu_mutex_destroy(&f->deferred_reset_mutex);
+#endif
+	nvgpu_mutex_destroy(&f->engines_reset_mutex);
+	nvgpu_mutex_destroy(&f->intr.isr.mutex);
+
+	f->sw_ready = false;
+}
+
+void nvgpu_fifo_cleanup_sw(struct gk20a *g)
+{
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+	nvgpu_channel_worker_deinit(g);
+#endif
+	nvgpu_fifo_cleanup_sw_common(g);
+}
+
+static void nvgpu_fifo_remove_support(struct nvgpu_fifo *f)
+{
+	struct gk20a *g = f->g;
+
+	g->ops.fifo.cleanup_sw(g);
+}
+
+int nvgpu_fifo_setup_sw_common(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	f->g = g;
+
+	nvgpu_mutex_init(&f->intr.isr.mutex);
+	nvgpu_mutex_init(&f->engines_reset_mutex);
+#ifdef CONFIG_NVGPU_DEBUGGER
+	nvgpu_mutex_init(&f->deferred_reset_mutex);
+#endif
+
+	nvgpu_swprofile_initialize(g, &f->kickoff_profiler,
+				 nvgpu_fifo_kickoff_profile_events);
+	nvgpu_swprofile_initialize(g, &f->recovery_profiler,
+				 nvgpu_fifo_recovery_profile_events);
+	nvgpu_swprofile_initialize(g, &f->eng_reset_profiler,
+				 nvgpu_fifo_engine_reset_events);
+
+
+	err = nvgpu_channel_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init channel support");
+		goto clean_up;
+	}
+
+	err = nvgpu_tsg_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init tsg support");
+		goto clean_up_channel;
+	}
+
+	if (g->ops.pbdma.setup_sw != NULL) {
+		err = g->ops.pbdma.setup_sw(g);
+		if (err != 0) {
+			nvgpu_err(g, "failed to init pbdma support");
+			goto clean_up_tsg;
+		}
+	}
+
+	err = nvgpu_engine_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init engine support");
+		goto clean_up_pbdma;
+	}
+
+	err = nvgpu_runlist_setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init runlist support");
+		goto clean_up_engine;
+	}
+
+#ifdef CONFIG_NVGPU_USERD
+	err = g->ops.userd.setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init userd support");
+		goto clean_up_runlist;
+	}
+#endif
+
+	f->remove_support = nvgpu_fifo_remove_support;
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+
+#ifdef CONFIG_NVGPU_USERD
+clean_up_runlist:
+	nvgpu_runlist_cleanup_sw(g);
+#endif
+
+clean_up_engine:
+	nvgpu_engine_cleanup_sw(g);
+
+clean_up_pbdma:
+	if (g->ops.pbdma.cleanup_sw != NULL) {
+		g->ops.pbdma.cleanup_sw(g);
+	}
+
+clean_up_tsg:
+	nvgpu_tsg_cleanup_sw(g);
+
+clean_up_channel:
+	nvgpu_channel_cleanup_sw(g);
+
+clean_up:
+	nvgpu_err(g, "init fifo support failed");
+	return err;
+}
+
+int nvgpu_fifo_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	if (f->sw_ready) {
+		nvgpu_log_fn(g, "skip init");
+		return 0;
+	}
+
+	err = nvgpu_fifo_setup_sw_common(g);
+	if (err != 0) {
+		nvgpu_err(g, "fifo common sw setup failed, err=%d", err);
+		return err;
+	}
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+	err = nvgpu_channel_worker_init(g);
+	if (err != 0) {
+		nvgpu_err(g, "worker init fail, err=%d", err);
+		goto clean_up;
+	}
+#endif
+
+	f->sw_ready = true;
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+
+#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
+clean_up:
+	nvgpu_fifo_cleanup_sw_common(g);
+
+	return err;
+#endif
+}
+
+int nvgpu_fifo_init_support(struct gk20a *g)
+{
+	int err;
+
+	err = g->ops.fifo.setup_sw(g);
+	if (err != 0) {
+		nvgpu_err(g, "fifo sw setup failed, err=%d", err);
+		return err;
+	}
+
+	if (g->ops.fifo.init_fifo_setup_hw != NULL) {
+		err = g->ops.fifo.init_fifo_setup_hw(g);
+		if (err != 0) {
+			nvgpu_err(g, "fifo hw setup failed, err=%d", err);
+			goto clean_up;
+		}
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_fifo_cleanup_sw_common(g);
+
+	return err;
+}
+
+static const char * const pbdma_ch_eng_status_str[] = {
+	"invalid",
+	"valid",
+	"NA",
+	"NA",
+	"NA",
+	"load",
+	"save",
+	"switch",
+};
+
+static const char * const not_found_str[] = {
+	"NOT FOUND"
+};
+
+const char *nvgpu_fifo_decode_pbdma_ch_eng_status(u32 index)
+{
+	if (index >= ARRAY_SIZE(pbdma_ch_eng_status_str)) {
+		return not_found_str[0];
+	} else {
+		return pbdma_ch_eng_status_str[index];
+	}
+}
+
+static void disable_fifo_interrupts(struct gk20a *g)
+{
+	/** Disable fifo intr */
+	g->ops.fifo.intr_0_enable(g, false);
+	g->ops.fifo.intr_1_enable(g, false);
+
+	if (g->ops.fifo.intr_top_enable == NULL) {
+		nvgpu_cic_intr_stall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
+					NVGPU_CIC_INTR_DISABLE);
+		nvgpu_cic_intr_nonstall_unit_config(g, NVGPU_CIC_INTR_UNIT_FIFO,
+					   NVGPU_CIC_INTR_DISABLE);
+	} else {
+		g->ops.fifo.intr_top_enable(g, NVGPU_CIC_INTR_DISABLE);
+	}
+}
+
+int nvgpu_fifo_suspend(struct gk20a *g)
+{
+	nvgpu_log_fn(g, " ");
+
+	if (g->ops.mm.is_bar1_supported(g)) {
+		g->ops.fifo.bar1_snooping_disable(g);
+	}
+
+	disable_fifo_interrupts(g);
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+}
+
+void nvgpu_fifo_sw_quiesce(struct gk20a *g)
+{
+	u32 runlist_mask = U32_MAX;
+
+	g->ops.runlist.write_state(g, runlist_mask, RUNLIST_DISABLED);
+
+	/* Preempt all runlists */
+	nvgpu_fifo_preempt_runlists_for_rc(g, runlist_mask);
+}
--- a/drivers/gpu/nvgpu/common/fifo/job.c
+++ b/drivers/gpu/nvgpu/common/fifo/job.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/circ_buf.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/job.h>
+#include <nvgpu/priv_cmdbuf.h>
+#include <nvgpu/fence.h>
+
+static inline struct nvgpu_channel_job *
+channel_gk20a_job_from_list(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_channel_job *)
+	((uintptr_t)node - offsetof(struct nvgpu_channel_job, list));
+};
+
+int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job **job_out)
+{
+	unsigned int put = c->joblist.pre_alloc.put;
+	unsigned int get = c->joblist.pre_alloc.get;
+	unsigned int next = (put + 1) % c->joblist.pre_alloc.length;
+	bool full = next == get;
+
+	if (full) {
+		return -EAGAIN;
+	}
+
+	*job_out = &c->joblist.pre_alloc.jobs[put];
+	(void) memset(*job_out, 0, sizeof(**job_out));
+
+	return 0;
+}
+
+void nvgpu_channel_free_job(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job)
+{
+	/*
+	 * Nothing needed for now. The job contents are preallocated. The
+	 * completion fence may briefly outlive the job, but the job memory is
+	 * reclaimed only when a new submit comes in and the ringbuffer has ran
+	 * out of space.
+	 */
+}
+
+void nvgpu_channel_joblist_lock(struct nvgpu_channel *c)
+{
+	nvgpu_mutex_acquire(&c->joblist.pre_alloc.read_lock);
+}
+
+void nvgpu_channel_joblist_unlock(struct nvgpu_channel *c)
+{
+	nvgpu_mutex_release(&c->joblist.pre_alloc.read_lock);
+}
+
+struct nvgpu_channel_job *nvgpu_channel_joblist_peek(struct nvgpu_channel *c)
+{
+	unsigned int get = c->joblist.pre_alloc.get;
+	unsigned int put = c->joblist.pre_alloc.put;
+	bool empty = get == put;
+
+	return empty ? NULL : &c->joblist.pre_alloc.jobs[get];
+}
+
+void nvgpu_channel_joblist_add(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job)
+{
+	c->joblist.pre_alloc.put = (c->joblist.pre_alloc.put + 1U) %
+			(c->joblist.pre_alloc.length);
+}
+
+void nvgpu_channel_joblist_delete(struct nvgpu_channel *c,
+		struct nvgpu_channel_job *job)
+{
+	c->joblist.pre_alloc.get = (c->joblist.pre_alloc.get + 1U) %
+			(c->joblist.pre_alloc.length);
+}
+
+int nvgpu_channel_joblist_init(struct nvgpu_channel *c, u32 num_jobs)
+{
+	int err;
+	u32 size;
+
+	size = (u32)sizeof(struct nvgpu_channel_job);
+	if (num_jobs > nvgpu_safe_sub_u32(U32_MAX / size, 1U)) {
+		err = -ERANGE;
+		goto clean_up;
+	}
+
+	/*
+	 * The max capacity of this ring buffer is the alloc size minus one (in
+	 * units of item slot), so allocate a size of (num_jobs + 1) * size
+	 * bytes.
+	 */
+	c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g,
+			nvgpu_safe_mult_u32(
+				nvgpu_safe_add_u32(num_jobs, 1U),
+				size));
+	if (c->joblist.pre_alloc.jobs == NULL) {
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	/*
+	 * length is the allocation size of the ringbuffer; the number of jobs
+	 * that fit is one less.
+	 */
+	c->joblist.pre_alloc.length = nvgpu_safe_add_u32(num_jobs, 1U);
+	c->joblist.pre_alloc.put = 0;
+	c->joblist.pre_alloc.get = 0;
+
+	return 0;
+
+clean_up:
+	nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
+	(void) memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
+	return err;
+}
+
+void nvgpu_channel_joblist_deinit(struct nvgpu_channel *c)
+{
+	if (c->joblist.pre_alloc.jobs != NULL) {
+		nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
+		c->joblist.pre_alloc.jobs = NULL;
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/pbdma.c
+++ b/drivers/gpu/nvgpu/common/fifo/pbdma.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/pbdma.h>
+
+static void nvgpu_pbdma_init_intr_descs(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (g->ops.pbdma.device_fatal_0_intr_descs != NULL) {
+		f->intr.pbdma.device_fatal_0 =
+			g->ops.pbdma.device_fatal_0_intr_descs();
+	}
+
+	if (g->ops.pbdma.channel_fatal_0_intr_descs != NULL) {
+		f->intr.pbdma.channel_fatal_0 =
+			g->ops.pbdma.channel_fatal_0_intr_descs();
+	}
+	if (g->ops.pbdma.restartable_0_intr_descs != NULL) {
+		f->intr.pbdma.restartable_0 =
+			g->ops.pbdma.restartable_0_intr_descs();
+	}
+}
+
+int nvgpu_pbdma_setup_sw(struct gk20a *g)
+{
+	nvgpu_pbdma_init_intr_descs(g);
+
+	return 0;
+}
+
+void nvgpu_pbdma_cleanup_sw(struct gk20a *g)
+{
+	return;
+}
--- a/drivers/gpu/nvgpu/common/fifo/pbdma_status.c
+++ b/drivers/gpu/nvgpu/common/fifo/pbdma_status.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/pbdma_status.h>
+
+bool nvgpu_pbdma_status_is_chsw_switch(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SWITCH;
+}
+bool nvgpu_pbdma_status_is_chsw_load(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_LOAD;
+}
+bool nvgpu_pbdma_status_is_chsw_save(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_SAVE;
+}
+bool nvgpu_pbdma_status_is_chsw_valid(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->chsw_status == NVGPU_PBDMA_CHSW_STATUS_VALID;
+}
+bool nvgpu_pbdma_status_is_id_type_tsg(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->id_type == PBDMA_STATUS_ID_TYPE_TSGID;
+}
+bool nvgpu_pbdma_status_is_next_id_type_tsg(struct nvgpu_pbdma_status_info
+		*pbdma_status)
+{
+	return pbdma_status->next_id_type == PBDMA_STATUS_NEXT_ID_TYPE_TSGID;
+}
--- a/drivers/gpu/nvgpu/common/fifo/preempt.c
+++ b/drivers/gpu/nvgpu/common/fifo/preempt.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/soc.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/errata.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/types.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/tsg.h>
+#include <nvgpu/preempt.h>
+#include <nvgpu/nvgpu_err.h>
+#include <nvgpu/cic.h>
+#include <nvgpu/rc.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu/mutex.h>
+#endif
+
+u32 nvgpu_preempt_get_timeout(struct gk20a *g)
+{
+	return g->ctxsw_timeout_period_ms;
+}
+
+int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
+{
+	int ret = 0;
+	u32 preempt_retry_count = 10U;
+	u32 preempt_retry_timeout =
+			nvgpu_preempt_get_timeout(g) / preempt_retry_count;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+
+	nvgpu_log_fn(g, "tsgid: %d", tsg->tsgid);
+
+	if (tsg->runlist == NULL) {
+		return 0;
+	}
+
+	do {
+		nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
+
+		if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
+			nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
+						RUNLIST_DISABLED);
+		}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+		mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+						   PMU_MUTEX_ID_FIFO, &token);
+#endif
+		g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
+
+		/*
+		 * Poll for preempt done. if stalling interrupts are pending
+		 * while preempt is in progress we poll for stalling interrupts
+		 * to finish based on return value from this function and
+		 * retry preempt again.
+		 * If HW is hung, on the last retry instance we try to identify
+		 * the engines hung and set the runlist reset_eng_bitmask
+		 * and mark preemption completion.
+		 */
+		ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid,
+					ID_TYPE_TSG, preempt_retry_count > 1U);
+
+#ifdef CONFIG_NVGPU_LS_PMU
+		if (mutex_ret == 0) {
+			int err = nvgpu_pmu_lock_release(g, g->pmu,
+						PMU_MUTEX_ID_FIFO, &token);
+			if (err != 0) {
+				nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", err);
+			}
+		}
+#endif
+		if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
+			nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
+						RUNLIST_ENABLED);
+		}
+
+		nvgpu_mutex_release(&tsg->runlist->runlist_lock);
+
+		if (ret != -EAGAIN) {
+			break;
+		}
+
+		ret = nvgpu_cic_wait_for_stall_interrupts(g, preempt_retry_timeout);
+		if (ret != 0) {
+			nvgpu_log_info(g, "wait for stall interrupts failed %d", ret);
+		}
+	} while (--preempt_retry_count != 0U);
+
+	if (ret != 0) {
+		if (nvgpu_platform_is_silicon(g)) {
+			nvgpu_err(g, "preempt timed out for tsgid: %u, "
+			"ctxsw timeout will trigger recovery if needed",
+			tsg->tsgid);
+		} else {
+			nvgpu_rc_preempt_timeout(g, tsg);
+		}
+	}
+	return ret;
+}
+
+int nvgpu_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch)
+{
+	int err;
+	struct nvgpu_tsg *tsg = nvgpu_tsg_from_ch(ch);
+
+	if (tsg != NULL) {
+		err = g->ops.fifo.preempt_tsg(ch->g, tsg);
+	} else {
+		err = g->ops.fifo.preempt_channel(ch->g, ch);
+	}
+
+	return err;
+}
+
+/* called from rc */
+int nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g,
+		struct nvgpu_tsg *tsg)
+{
+	unsigned long runlist_served_pbdmas;
+	unsigned long pbdma_id_bit;
+	u32 tsgid, pbdma_id;
+
+	if (g->ops.fifo.preempt_poll_pbdma == NULL) {
+		return 0;
+	}
+
+	tsgid = tsg->tsgid;
+	runlist_served_pbdmas = tsg->runlist->pbdma_bitmask;
+
+	for_each_set_bit(pbdma_id_bit, &runlist_served_pbdmas,
+			 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
+		pbdma_id = U32(pbdma_id_bit);
+		/*
+		 * If pbdma preempt fails the only option is to reset
+		 * GPU. Any sort of hang indicates the entire GPU’s
+		 * memory system would be blocked.
+		 */
+		if (g->ops.fifo.preempt_poll_pbdma(g, tsgid, pbdma_id) != 0) {
+			nvgpu_err(g, "PBDMA preempt failed");
+			return -EBUSY;
+		}
+	}
+	return 0;
+}
+
+/*
+ * This should be called with runlist_lock held for all the
+ * runlists set in runlists_mask
+ */
+void nvgpu_fifo_preempt_runlists_for_rc(struct gk20a *g, u32 runlists_bitmask)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 i;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+
+	/* runlist_lock are locked by teardown and sched are disabled too */
+	nvgpu_log_fn(g, "preempt runlists_bitmask:0x%08x", runlists_bitmask);
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+			PMU_MUTEX_ID_FIFO, &token);
+#endif
+
+	for (i = 0U; i < f->num_runlists; i++) {
+		struct nvgpu_runlist *runlist;
+
+		runlist = &f->active_runlists[i];
+
+		if ((BIT32(runlist->id) & runlists_bitmask) == 0U) {
+			continue;
+		}
+		/* issue runlist preempt */
+		g->ops.fifo.preempt_trigger(g, runlist->id,
+					ID_TYPE_RUNLIST);
+#ifdef CONFIG_NVGPU_RECOVERY
+		/*
+		 * Preemption will never complete in RC due to some
+		 * fatal condition. Do not poll for preemption to
+		 * complete. Reset engines served by runlists.
+		 */
+		runlist->reset_eng_bitmask = runlist->eng_bitmask;
+#endif
+	}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		int err = nvgpu_pmu_lock_release(g, g->pmu, PMU_MUTEX_ID_FIFO,
+				&token);
+		if (err != 0) {
+			nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d",
+					err);
+		}
+	}
+#endif
+}
--- a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
+++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/log2.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/priv_cmdbuf.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/trace.h>
+#include <nvgpu/circ_buf.h>
+
+struct priv_cmd_entry {
+	struct nvgpu_mem *mem;
+	u32 off;	/* offset in mem, in u32 entries */
+	u32 fill_off;	/* write offset from off, in u32 entries */
+	u32 size;	/* in words */
+	u32 alloc_size;
+};
+
+struct priv_cmd_queue {
+	struct vm_gk20a *vm;
+	struct nvgpu_mem mem; /* pushbuf */
+	u32 size;	/* allocated length in words */
+	u32 put;	/* next entry will begin here */
+	u32 get;	/* next entry to free begins here */
+
+	/* an entry is a fragment of the pushbuf memory */
+	struct priv_cmd_entry *entries;
+	u32 entries_len; /* allocated length */
+	u32 entry_put;
+	u32 entry_get;
+};
+
+/* allocate private cmd buffer queue.
+   used for inserting commands before/after user submitted buffers. */
+int nvgpu_priv_cmdbuf_queue_alloc(struct vm_gk20a *vm,
+	u32 job_count, struct priv_cmd_queue **queue)
+{
+	struct gk20a *g = vm->mm->g;
+	struct priv_cmd_queue *q;
+	u64 size, tmp_size;
+	int err = 0;
+	u32 wait_size, incr_size;
+	u32 mem_per_job;
+
+	/*
+	 * sema size is at least as much as syncpt size, but semas may not be
+	 * enabled in the build. If neither semas nor syncpts are enabled, priv
+	 * cmdbufs and as such kernel mode submits with job tracking won't be
+	 * supported.
+	 */
+#ifdef CONFIG_NVGPU_SW_SEMAPHORE
+	wait_size = g->ops.sync.sema.get_wait_cmd_size();
+	incr_size = g->ops.sync.sema.get_incr_cmd_size();
+#else
+	wait_size = g->ops.sync.syncpt.get_wait_cmd_size();
+	incr_size = g->ops.sync.syncpt.get_incr_cmd_size(true);
+#endif
+
+	/*
+	 * Compute the amount of priv_cmdbuf space we need. In general the
+	 * worst case is the kernel inserts both a semaphore pre-fence and
+	 * post-fence. Any sync-pt fences will take less memory so we can
+	 * ignore them unless they're the only supported type. Jobs can also
+	 * have more than one pre-fence but that's abnormal and we'll -EAGAIN
+	 * if such jobs would fill the queue.
+	 *
+	 * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
+	 * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be
+	 * 10 words: all the same as an ACQ plus a non-stalling intr which is
+	 * another 2 words. In reality these numbers vary by chip but we'll use
+	 * 8 and 10 as examples.
+	 *
+	 * Given the job count, cmdbuf space is allocated such that each job
+	 * can get one wait command and one increment command:
+	 *
+	 *   job_count * (8 + 10) * 4 bytes
+	 *
+	 * These cmdbufs are inserted as gpfifo entries right before and after
+	 * the user submitted gpfifo entries per submit.
+	 *
+	 * One extra slot is added to the queue length so that the requested
+	 * job count can actually be allocated. This ring buffer implementation
+	 * is full when the number of consumed entries is one less than the
+	 * allocation size:
+	 *
+	 * alloc bytes = job_count * (wait + incr + 1) * slot in bytes
+	 */
+	mem_per_job = nvgpu_safe_mult_u32(
+			nvgpu_safe_add_u32(
+				nvgpu_safe_add_u32(wait_size, incr_size),
+				1U),
+			(u32)sizeof(u32));
+	/* both 32 bit and mem_per_job is small */
+	size = nvgpu_safe_mult_u64((u64)job_count, (u64)mem_per_job);
+
+	tmp_size = PAGE_ALIGN(roundup_pow_of_two(size));
+	if (tmp_size > U32_MAX) {
+		return -ERANGE;
+	}
+	size = (u32)tmp_size;
+
+	q = nvgpu_kzalloc(g, sizeof(*q));
+	if (q == NULL) {
+		return -ENOMEM;
+	}
+
+	q->vm = vm;
+
+	if (job_count > U32_MAX / 2U - 1U) {
+		err = -ERANGE;
+		goto err_free_queue;
+	}
+
+	/* One extra to account for the full condition: 2 * job_count + 1 */
+	q->entries_len = nvgpu_safe_mult_u32(2U,
+			nvgpu_safe_add_u32(job_count, 1U));
+	q->entries = nvgpu_vzalloc(g,
+			nvgpu_safe_mult_u64((u64)q->entries_len,
+				sizeof(*q->entries)));
+	if (q->entries == NULL) {
+		err = -ENOMEM;
+		goto err_free_queue;
+	}
+
+	err = nvgpu_dma_alloc_map_sys(vm, size, &q->mem);
+	if (err != 0) {
+		nvgpu_err(g, "%s: memory allocation failed", __func__);
+		goto err_free_entries;
+	}
+
+	tmp_size = q->mem.size / sizeof(u32);
+	nvgpu_assert(tmp_size <= U32_MAX);
+	q->size = (u32)tmp_size;
+
+	*queue = q;
+	return 0;
+err_free_entries:
+	nvgpu_vfree(g, q->entries);
+err_free_queue:
+	nvgpu_kfree(g, q);
+	return err;
+}
+
+void nvgpu_priv_cmdbuf_queue_free(struct priv_cmd_queue *q)
+{
+	struct vm_gk20a *vm = q->vm;
+	struct gk20a *g = vm->mm->g;
+
+	nvgpu_dma_unmap_free(vm, &q->mem);
+	nvgpu_vfree(g, q->entries);
+	nvgpu_kfree(g, q);
+}
+
+/* allocate a cmd buffer with given size. size is number of u32 entries */
+static int nvgpu_priv_cmdbuf_alloc_buf(struct priv_cmd_queue *q, u32 orig_size,
+			     struct priv_cmd_entry *e)
+{
+	struct gk20a *g = q->vm->mm->g;
+	u32 size = orig_size;
+	u32 free_count;
+
+	nvgpu_log_fn(g, "size %d", orig_size);
+
+	/*
+	 * If free space in the end is less than requested, increase the size
+	 * to make the real allocated space start from beginning. The hardware
+	 * expects each cmdbuf to be contiguous in the dma space.
+	 *
+	 * This too small extra space in the end may happen because the
+	 * requested wait and incr command buffers do not necessarily align
+	 * with the whole buffer capacity. They don't always align because the
+	 * buffer size is rounded to the next power of two and because not all
+	 * jobs necessarily use exactly one wait command.
+	 */
+	if (nvgpu_safe_add_u32(q->put, size) > q->size) {
+		size = orig_size + (q->size - q->put);
+	}
+
+	nvgpu_log_info(g, "priv cmd queue get:put %d:%d",
+			q->get, q->put);
+
+	nvgpu_assert(q->put < q->size);
+	nvgpu_assert(q->get < q->size);
+	nvgpu_assert(q->size > 0U);
+	free_count = (q->size - q->put + q->get - 1U) & (q->size - 1U);
+
+	if (size > free_count) {
+		return -EAGAIN;
+	}
+
+	e->fill_off = 0;
+	e->size = orig_size;
+	e->alloc_size = size;
+	e->mem = &q->mem;
+
+	/*
+	 * if we have increased size to skip free space in the end, set put
+	 * to beginning of cmd buffer + size, as if the prev put was at
+	 * position 0.
+	 */
+	if (size != orig_size) {
+		e->off = 0;
+		q->put = orig_size;
+	} else {
+		e->off = q->put;
+		q->put = (q->put + orig_size) & (q->size - 1U);
+	}
+
+	/* we already handled q->put + size > q->size so BUG_ON this */
+	BUG_ON(q->put > q->size);
+
+	nvgpu_log_fn(g, "done");
+
+	return 0;
+}
+
+int nvgpu_priv_cmdbuf_alloc(struct priv_cmd_queue *q, u32 size,
+			     struct priv_cmd_entry **e)
+{
+	u32 next_put = nvgpu_safe_add_u32(q->entry_put, 1U) % q->entries_len;
+	struct priv_cmd_entry *entry;
+	int err;
+
+	if (next_put == q->entry_get) {
+		return -EAGAIN;
+	}
+	entry = &q->entries[q->entry_put];
+
+	err = nvgpu_priv_cmdbuf_alloc_buf(q, size, entry);
+	if (err != 0) {
+		return err;
+	}
+
+	q->entry_put = next_put;
+	*e = entry;
+
+	return 0;
+}
+
+void nvgpu_priv_cmdbuf_rollback(struct priv_cmd_queue *q,
+		struct priv_cmd_entry *e)
+{
+	nvgpu_assert(q->put < q->size);
+	nvgpu_assert(q->size > 0U);
+	nvgpu_assert(e->alloc_size <= q->size);
+	q->put = (q->put + q->size - e->alloc_size) & (q->size - 1U);
+
+	(void)memset(e, 0, sizeof(*e));
+
+	nvgpu_assert(q->entry_put < q->entries_len);
+	nvgpu_assert(q->entries_len > 0U);
+	q->entry_put = (q->entry_put + q->entries_len - 1U)
+		% q->entries_len;
+}
+
+void nvgpu_priv_cmdbuf_free(struct priv_cmd_queue *q, struct priv_cmd_entry *e)
+{
+	struct gk20a *g = q->vm->mm->g;
+
+	if ((q->get != e->off) && e->off != 0U) {
+		nvgpu_err(g, "priv cmdbuf requests out-of-order");
+	}
+	nvgpu_assert(q->size > 0U);
+	q->get = nvgpu_safe_add_u32(e->off, e->size) & (q->size - 1U);
+	q->entry_get = nvgpu_safe_add_u32(q->entry_get, 1U) % q->entries_len;
+
+	(void)memset(e, 0, sizeof(*e));
+}
+
+void nvgpu_priv_cmdbuf_append(struct gk20a *g, struct priv_cmd_entry *e,
+		u32 *data, u32 entries)
+{
+	nvgpu_assert(e->fill_off + entries <= e->size);
+	nvgpu_mem_wr_n(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
+			data, entries * sizeof(u32));
+	e->fill_off += entries;
+}
+
+void nvgpu_priv_cmdbuf_append_zeros(struct gk20a *g, struct priv_cmd_entry *e,
+		u32 entries)
+{
+	nvgpu_assert(e->fill_off + entries <= e->size);
+	nvgpu_memset(g, e->mem, (e->off + e->fill_off) * sizeof(u32),
+			0, entries * sizeof(u32));
+	e->fill_off += entries;
+}
+
+void nvgpu_priv_cmdbuf_finish(struct gk20a *g, struct priv_cmd_entry *e,
+		u64 *gva, u32 *size)
+{
+	/*
+	 * The size is written to the pushbuf entry, so make sure this buffer
+	 * is complete at this point. The responsibility of the channel sync is
+	 * to be consistent in allocation and usage, and the matching size and
+	 * add gops (e.g., get_wait_cmd_size, add_wait_cmd) help there.
+	 */
+	nvgpu_assert(e->fill_off == e->size);
+
+#ifdef CONFIG_NVGPU_TRACE
+	if (e->mem->aperture == APERTURE_SYSMEM) {
+		trace_gk20a_push_cmdbuf(g->name, 0, e->size, 0,
+				(u32 *)e->mem->cpu_va + e->off);
+	}
+#endif
+	*gva = nvgpu_safe_add_u64(e->mem->gpu_va,
+			nvgpu_safe_mult_u64((u64)e->off, sizeof(u32)));
+	*size = e->size;
+}
--- a/drivers/gpu/nvgpu/common/fifo/runlist.c
+++ b/drivers/gpu/nvgpu/common/fifo/runlist.c
@@ -0,0 +1,914 @@
+/*
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/device.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/ptimer.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/rc.h>
+#include <nvgpu/static_analysis.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu/mutex.h>
+#endif
+
+void nvgpu_runlist_lock_active_runlists(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+	u32 i;
+
+	nvgpu_log_info(g, "acquire runlist_lock for active runlists");
+	for (i = 0; i < g->fifo.num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+		nvgpu_mutex_acquire(&runlist->runlist_lock);
+	}
+}
+
+void nvgpu_runlist_unlock_active_runlists(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+	u32 i;
+
+	nvgpu_log_info(g, "release runlist_lock for active runlists");
+	for (i = 0; i < g->fifo.num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+		nvgpu_mutex_release(&runlist->runlist_lock);
+	}
+}
+
+static u32 nvgpu_runlist_append_tsg(struct gk20a *g,
+		struct nvgpu_runlist *runlist,
+		u32 **runlist_entry,
+		u32 *entries_left,
+		struct nvgpu_tsg *tsg)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 runlist_entry_words = f->runlist_entry_size / (u32)sizeof(u32);
+	struct nvgpu_channel *ch;
+	u32 count = 0;
+	u32 timeslice;
+
+	nvgpu_log_fn(f->g, " ");
+
+	if (*entries_left == 0U) {
+		return RUNLIST_APPEND_FAILURE;
+	}
+
+	/* add TSG entry */
+	nvgpu_log_info(g, "add TSG %d to runlist", tsg->tsgid);
+
+	/*
+	 * timeslice is measured with PTIMER.
+	 * On some platforms, PTIMER is lower than 1GHz.
+	 */
+	timeslice = scale_ptimer(tsg->timeslice_us,
+			ptimer_scalingfactor10x(g->ptimer_src_freq));
+
+	g->ops.runlist.get_tsg_entry(tsg, *runlist_entry, timeslice);
+
+	nvgpu_log_info(g, "tsg rl entries left %d runlist [0] %x [1] %x",
+			*entries_left,
+			(*runlist_entry)[0], (*runlist_entry)[1]);
+	*runlist_entry += runlist_entry_words;
+	count++;
+	(*entries_left)--;
+
+	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+	/* add runnable channels bound to this TSG */
+	nvgpu_list_for_each_entry(ch, &tsg->ch_list,
+			nvgpu_channel, ch_entry) {
+		if (!nvgpu_test_bit(ch->chid,
+			      runlist->active_channels)) {
+			continue;
+		}
+
+		if (*entries_left == 0U) {
+			nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+			return RUNLIST_APPEND_FAILURE;
+		}
+
+		nvgpu_log_info(g, "add channel %d to runlist",
+			ch->chid);
+		g->ops.runlist.get_ch_entry(ch, *runlist_entry);
+		nvgpu_log_info(g, "rl entries left %d runlist [0] %x [1] %x",
+			*entries_left,
+			(*runlist_entry)[0], (*runlist_entry)[1]);
+		count = nvgpu_safe_add_u32(count, 1U);
+		*runlist_entry += runlist_entry_words;
+		(*entries_left)--;
+	}
+	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+
+	return count;
+}
+
+
+static u32 nvgpu_runlist_append_prio(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left,
+				u32 interleave_level)
+{
+	u32 count = 0;
+	unsigned long tsgid;
+
+	nvgpu_log_fn(f->g, " ");
+
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
+		u32 entries;
+
+		if (tsg->interleave_level == interleave_level) {
+			entries = nvgpu_runlist_append_tsg(f->g, runlist,
+					runlist_entry, entries_left, tsg);
+			if (entries == RUNLIST_APPEND_FAILURE) {
+				return RUNLIST_APPEND_FAILURE;
+			}
+			count += entries;
+		}
+	}
+
+	return count;
+}
+
+static u32 nvgpu_runlist_append_hi(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	nvgpu_log_fn(f->g, " ");
+
+	/*
+	 * No higher levels - this is where the "recursion" ends; just add all
+	 * active TSGs at this level.
+	 */
+	return nvgpu_runlist_append_prio(f, runlist, runlist_entry,
+			entries_left,
+			NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH);
+}
+
+static u32 nvgpu_runlist_append_med(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	u32 count = 0;
+	unsigned long tsgid;
+
+	nvgpu_log_fn(f->g, " ");
+
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
+		u32 entries;
+
+		if (tsg->interleave_level !=
+				NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM) {
+			continue;
+		}
+
+		/* LEVEL_MEDIUM list starts with a LEVEL_HIGH, if any */
+
+		entries = nvgpu_runlist_append_hi(f, runlist,
+				runlist_entry, entries_left);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+
+		entries = nvgpu_runlist_append_tsg(f->g, runlist,
+				runlist_entry, entries_left, tsg);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+	}
+
+	return count;
+}
+
+static u32 nvgpu_runlist_append_low(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	u32 count = 0;
+	unsigned long tsgid;
+
+	nvgpu_log_fn(f->g, " ");
+
+	for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) {
+		struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(f->g, (u32)tsgid);
+		u32 entries;
+
+		if (tsg->interleave_level !=
+				NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW) {
+			continue;
+		}
+
+		/* The medium level starts with the highs, if any. */
+
+		entries = nvgpu_runlist_append_med(f, runlist,
+				runlist_entry, entries_left);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+
+		entries = nvgpu_runlist_append_hi(f, runlist,
+				runlist_entry, entries_left);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+
+		entries = nvgpu_runlist_append_tsg(f->g, runlist,
+				runlist_entry, entries_left, tsg);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+	}
+
+	if (count == 0U) {
+		/*
+		 * No transitions to fill with higher levels, so add
+		 * the next level once. If that's empty too, we have only
+		 * LEVEL_HIGH jobs.
+		 */
+		count = nvgpu_runlist_append_med(f, runlist,
+				runlist_entry, entries_left);
+		if (count == 0U) {
+			count = nvgpu_runlist_append_hi(f, runlist,
+					runlist_entry, entries_left);
+		}
+	}
+
+	return count;
+}
+
+static u32 nvgpu_runlist_append_flat(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 **runlist_entry,
+				u32 *entries_left)
+{
+	u32 count = 0, entries, i;
+
+	nvgpu_log_fn(f->g, " ");
+
+	/* Group by priority but don't interleave. High comes first. */
+
+	for (i = 0; i < NVGPU_FIFO_RUNLIST_INTERLEAVE_NUM_LEVELS; i++) {
+		u32 level = NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH - i;
+
+		entries = nvgpu_runlist_append_prio(f, runlist, runlist_entry,
+				entries_left, level);
+		if (entries == RUNLIST_APPEND_FAILURE) {
+			return RUNLIST_APPEND_FAILURE;
+		}
+		count += entries;
+	}
+
+	return count;
+}
+
+u32 nvgpu_runlist_construct_locked(struct nvgpu_fifo *f,
+				struct nvgpu_runlist *runlist,
+				u32 buf_id,
+				u32 max_entries)
+{
+	u32 *runlist_entry_base = runlist->mem[buf_id].cpu_va;
+
+	/*
+	 * The entry pointer and capacity counter that live on the stack here
+	 * keep track of the current position and the remaining space when tsg
+	 * and channel entries are ultimately appended.
+	 */
+	if (f->g->runlist_interleave) {
+		return nvgpu_runlist_append_low(f, runlist,
+				&runlist_entry_base, &max_entries);
+	} else {
+		return nvgpu_runlist_append_flat(f, runlist,
+				&runlist_entry_base, &max_entries);
+	}
+}
+
+static bool nvgpu_runlist_modify_active_locked(struct gk20a *g,
+					       struct nvgpu_runlist *runlist,
+					       struct nvgpu_channel *ch, bool add)
+{
+	struct nvgpu_tsg *tsg = NULL;
+
+	tsg = nvgpu_tsg_from_ch(ch);
+
+	if (tsg == NULL) {
+		/*
+		 * Unsupported condition, but shouldn't break anything. Warn
+		 * and tell the caller that nothing has changed.
+		 */
+		nvgpu_warn(g, "Bare channel in runlist update");
+		return false;
+	}
+
+	if (add) {
+		if (nvgpu_test_and_set_bit(ch->chid,
+				runlist->active_channels)) {
+			/* was already there */
+			return false;
+		} else {
+			/* new, and belongs to a tsg */
+			nvgpu_set_bit(tsg->tsgid, runlist->active_tsgs);
+			tsg->num_active_channels = nvgpu_safe_add_u32(
+					tsg->num_active_channels, 1U);
+		}
+	} else {
+		if (!nvgpu_test_and_clear_bit(ch->chid,
+				runlist->active_channels)) {
+			/* wasn't there */
+			return false;
+		} else {
+			tsg->num_active_channels = nvgpu_safe_sub_u32(
+				tsg->num_active_channels, 1U);
+			if (tsg->num_active_channels == 0U) {
+				/* was the only member of this tsg */
+				nvgpu_clear_bit(tsg->tsgid,
+						runlist->active_tsgs);
+			}
+		}
+	}
+
+	return true;
+}
+
+static int nvgpu_runlist_reconstruct_locked(struct gk20a *g,
+					    struct nvgpu_runlist *runlist,
+					    u32 buf_id, bool add_entries)
+{
+	u32 num_entries;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	rl_dbg(g, "[%u] switch to new buffer 0x%16llx",
+		runlist->id, (u64)nvgpu_mem_get_addr(g, &runlist->mem[buf_id]));
+
+	if (!add_entries) {
+		runlist->count = 0;
+		return 0;
+	}
+
+	num_entries = nvgpu_runlist_construct_locked(f, runlist, buf_id,
+						f->num_runlist_entries);
+	if (num_entries == RUNLIST_APPEND_FAILURE) {
+		return -E2BIG;
+	}
+	runlist->count = num_entries;
+NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 10_3), "Bug 2277532")
+NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 14_4), "Bug 2277532")
+NVGPU_COV_WHITELIST_BLOCK_BEGIN(false_positive, 1, NVGPU_MISRA(Rule, 15_6), "Bug 2277532")
+	WARN_ON(runlist->count > f->num_runlist_entries);
+NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 10_3))
+NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 14_4))
+NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 15_6))
+
+	return 0;
+}
+
+int nvgpu_runlist_update_locked(struct gk20a *g, struct nvgpu_runlist *rl,
+				struct nvgpu_channel *ch, bool add,
+				bool wait_for_finish)
+{
+	int ret = 0;
+	u32 buf_id;
+	bool add_entries;
+
+	if (ch != NULL) {
+		bool update = nvgpu_runlist_modify_active_locked(g, rl, ch, add);
+		if (!update) {
+			/* no change in runlist contents */
+			return 0;
+		}
+		/* had a channel to update, so reconstruct */
+		add_entries = true;
+	} else {
+		/* no channel; add means update all, !add means clear all */
+		add_entries = add;
+	}
+
+	/* double buffering, swap to next */
+	buf_id = (rl->cur_buffer == 0U) ? 1U : 0U;
+
+	ret = nvgpu_runlist_reconstruct_locked(g, rl, buf_id, add_entries);
+	if (ret != 0) {
+		return ret;
+	}
+
+	g->ops.runlist.hw_submit(g, rl->id, rl->count, buf_id);
+
+	if (wait_for_finish) {
+		ret = g->ops.runlist.wait_pending(g, rl->id);
+
+		if (ret == -ETIMEDOUT) {
+			nvgpu_err(g, "runlist %d update timeout", rl->id);
+			/* trigger runlist update timeout recovery */
+			return ret;
+
+		} else {
+			if (ret == -EINTR) {
+				nvgpu_err(g, "runlist update interrupted");
+			}
+		}
+	}
+
+	rl->cur_buffer = buf_id;
+
+	return ret;
+}
+
+#ifdef CONFIG_NVGPU_CHANNEL_TSG_SCHEDULING
+/* trigger host to expire current timeslice and reschedule runlist from front */
+int nvgpu_runlist_reschedule(struct nvgpu_channel *ch, bool preempt_next,
+		bool wait_preempt)
+{
+	struct gk20a *g = ch->g;
+	struct nvgpu_runlist *runlist;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+	int ret = 0;
+
+	runlist = ch->runlist;
+	if (nvgpu_mutex_tryacquire(&runlist->runlist_lock) == 0) {
+		return -EBUSY;
+	}
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(
+		g, g->pmu, PMU_MUTEX_ID_FIFO, &token);
+#endif
+
+	g->ops.runlist.hw_submit(
+		g, runlist->id, runlist->count, runlist->cur_buffer);
+
+	if (preempt_next) {
+		if (g->ops.runlist.reschedule_preempt_next_locked(ch,
+				wait_preempt) != 0) {
+			nvgpu_err(g, "reschedule preempt next failed");
+		}
+	}
+
+	if (g->ops.runlist.wait_pending(g, runlist->id) != 0) {
+		nvgpu_err(g, "wait pending failed for runlist %u",
+				runlist->id);
+	}
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+				PMU_MUTEX_ID_FIFO, &token) != 0) {
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+	nvgpu_mutex_release(&runlist->runlist_lock);
+
+	return ret;
+}
+#endif
+
+/* add/remove a channel from runlist
+   special cases below: runlist->active_channels will NOT be changed.
+   (ch == NULL && !add) means remove all active channels from runlist.
+   (ch == NULL &&  add) means restore all active channels on runlist. */
+static int nvgpu_runlist_do_update(struct gk20a *g, struct nvgpu_runlist *rl,
+				   struct nvgpu_channel *ch,
+				   bool add, bool wait_for_finish)
+{
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+	int ret = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	nvgpu_mutex_acquire(&rl->runlist_lock);
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+		PMU_MUTEX_ID_FIFO, &token);
+#endif
+	ret = nvgpu_runlist_update_locked(g, rl, ch, add, wait_for_finish);
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+				PMU_MUTEX_ID_FIFO, &token) != 0) {
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+	nvgpu_mutex_release(&rl->runlist_lock);
+
+	if (ret == -ETIMEDOUT) {
+		nvgpu_rc_runlist_update(g, rl->id);
+	}
+
+	return ret;
+}
+
+int nvgpu_runlist_update(struct gk20a *g, struct nvgpu_runlist *rl,
+			 struct nvgpu_channel *ch,
+			 bool add, bool wait_for_finish)
+{
+	nvgpu_assert(ch != NULL);
+
+	return nvgpu_runlist_do_update(g, rl, ch, add, wait_for_finish);
+}
+
+int nvgpu_runlist_reload(struct gk20a *g, struct nvgpu_runlist *rl,
+			      bool add, bool wait_for_finish)
+{
+	return nvgpu_runlist_do_update(g, rl, NULL, add, wait_for_finish);
+}
+
+int nvgpu_runlist_reload_ids(struct gk20a *g, u32 runlist_ids, bool add)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int ret = -EINVAL;
+	unsigned long runlist_id = 0;
+	int errcode;
+	unsigned long ulong_runlist_ids = (unsigned long)runlist_ids;
+
+	if (g == NULL) {
+		goto end;
+	}
+
+	ret = 0;
+	for_each_set_bit(runlist_id, &ulong_runlist_ids, 32U) {
+		/* Capture the last failure error code */
+		errcode = g->ops.runlist.reload(g,
+						f->runlists[runlist_id], add, true);
+		if (errcode != 0) {
+			nvgpu_err(g,
+				"failed to update_runlist %lu %d",
+				runlist_id, errcode);
+			ret = errcode;
+		}
+	}
+end:
+	return ret;
+}
+
+const char *nvgpu_runlist_interleave_level_name(u32 interleave_level)
+{
+	const char *ret_string = NULL;
+
+	switch (interleave_level) {
+	case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_LOW:
+		ret_string = "LOW";
+		break;
+
+	case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_MEDIUM:
+		ret_string = "MEDIUM";
+		break;
+
+	case NVGPU_FIFO_RUNLIST_INTERLEAVE_LEVEL_HIGH:
+		ret_string = "HIGH";
+		break;
+
+	default:
+		ret_string = "?";
+		break;
+	}
+
+	return ret_string;
+}
+
+void nvgpu_runlist_set_state(struct gk20a *g, u32 runlists_mask,
+		u32 runlist_state)
+{
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = 0;
+#endif
+	nvgpu_log(g, gpu_dbg_info, "runlist mask = 0x%08x state = 0x%08x",
+			runlists_mask, runlist_state);
+
+#ifdef CONFIG_NVGPU_LS_PMU
+	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+		PMU_MUTEX_ID_FIFO, &token);
+#endif
+	g->ops.runlist.write_state(g, runlists_mask, runlist_state);
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+				PMU_MUTEX_ID_FIFO, &token) != 0) {
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+}
+
+void nvgpu_runlist_cleanup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 i, j;
+	struct nvgpu_runlist *runlist;
+
+	if ((f->runlists == NULL) || (f->active_runlists == NULL)) {
+		return;
+	}
+
+	g = f->g;
+
+	for (i = 0; i < f->num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+		for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
+			nvgpu_dma_free(g, &runlist->mem[j]);
+		}
+
+		nvgpu_kfree(g, runlist->active_channels);
+		runlist->active_channels = NULL;
+
+		nvgpu_kfree(g, runlist->active_tsgs);
+		runlist->active_tsgs = NULL;
+
+		nvgpu_mutex_destroy(&runlist->runlist_lock);
+		f->runlists[runlist->id] = NULL;
+	}
+
+	nvgpu_kfree(g, f->active_runlists);
+	f->active_runlists = NULL;
+	f->num_runlists = 0;
+	nvgpu_kfree(g, f->runlists);
+	f->runlists = NULL;
+	f->max_runlists = 0;
+}
+
+void nvgpu_runlist_init_enginfo(struct gk20a *g, struct nvgpu_fifo *f)
+{
+	struct nvgpu_runlist *runlist;
+	const struct nvgpu_device *dev;
+	u32 i, j;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g->is_virtual) {
+		return;
+	}
+
+	for (i = 0; i < f->num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+
+		(void) g->ops.fifo.find_pbdma_for_runlist(g,
+						runlist->id,
+						&runlist->pbdma_bitmask);
+		nvgpu_log(g, gpu_dbg_info, "runlist %d: pbdma bitmask 0x%x",
+				 runlist->id, runlist->pbdma_bitmask);
+
+		for (j = 0; j < f->num_engines; j++) {
+			dev = f->active_engines[j];
+
+			if (dev->runlist_id == runlist->id) {
+				runlist->eng_bitmask |= BIT32(dev->engine_id);
+			}
+		}
+		nvgpu_log(g, gpu_dbg_info, "runlist %d: act eng bitmask 0x%x",
+				 runlist->id, runlist->eng_bitmask);
+	}
+
+	nvgpu_log_fn(g, "done");
+}
+
+static int nvgpu_init_active_runlist_mapping(struct gk20a *g)
+{
+	struct nvgpu_runlist *runlist;
+	struct nvgpu_fifo *f = &g->fifo;
+	unsigned int runlist_id;
+	size_t runlist_size;
+	u32 i, j;
+	int err = 0;
+
+	rl_dbg(g, "Building active runlist map.");
+
+	/*
+	 * In most case we want to loop through active runlists only. Here
+	 * we need to loop through all possible runlists, to build the mapping
+	 * between runlists[runlist_id] and active_runlists[i].
+	 */
+	i = 0U;
+	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+		if (!nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
+			/* skip inactive runlist */
+			rl_dbg(g, "  Skipping invalid runlist: %d", runlist_id);
+			continue;
+		}
+
+		rl_dbg(g, "  Configuring HW runlist: %u", runlist_id);
+		rl_dbg(g, "  SW runlist index to HW: %u -> %u", i, runlist_id);
+
+		runlist = &f->active_runlists[i];
+		runlist->id = runlist_id;
+		f->runlists[runlist_id] = runlist;
+		i = nvgpu_safe_add_u32(i, 1U);
+
+		runlist->active_channels =
+			nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
+						      BITS_PER_BYTE));
+		if (runlist->active_channels == NULL) {
+			err = -ENOMEM;
+			goto clean_up_runlist;
+		}
+
+		runlist->active_tsgs =
+			nvgpu_kzalloc(g, DIV_ROUND_UP(f->num_channels,
+						      BITS_PER_BYTE));
+		if (runlist->active_tsgs == NULL) {
+			err = -ENOMEM;
+			goto clean_up_runlist;
+		}
+
+		runlist_size = (size_t)f->runlist_entry_size *
+				(size_t)f->num_runlist_entries;
+		rl_dbg(g, "    RL entries: %d", f->num_runlist_entries);
+		rl_dbg(g, "    RL size %zu", runlist_size);
+
+		for (j = 0; j < MAX_RUNLIST_BUFFERS; j++) {
+			err = nvgpu_dma_alloc_flags_sys(g,
+					g->is_virtual ?
+					  0ULL : NVGPU_DMA_PHYSICALLY_ADDRESSED,
+					runlist_size,
+					&runlist->mem[j]);
+			if (err != 0) {
+				nvgpu_err(g, "memory allocation failed");
+				err = -ENOMEM;
+				goto clean_up_runlist;
+			}
+		}
+
+		nvgpu_mutex_init(&runlist->runlist_lock);
+
+		/*
+                 * None of buffers is pinned if this value doesn't change.
+		 * Otherwise, one of them (cur_buffer) must have been pinned.
+                 */
+		runlist->cur_buffer = MAX_RUNLIST_BUFFERS;
+	}
+
+	return 0;
+
+clean_up_runlist:
+	return err;
+}
+
+int nvgpu_runlist_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 num_runlists = 0U;
+	unsigned int runlist_id;
+	int err = 0;
+
+	rl_dbg(g, "Initializing Runlists");
+
+	nvgpu_spinlock_init(&f->runlist_submit_lock);
+
+	f->runlist_entry_size = g->ops.runlist.entry_size(g);
+	f->num_runlist_entries = g->ops.runlist.length_max(g);
+	f->max_runlists = g->ops.runlist.count_max(g);
+
+	f->runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
+				sizeof(*f->runlists), f->max_runlists));
+	if (f->runlists == NULL) {
+		err = -ENOMEM;
+		goto clean_up_runlist;
+	}
+
+	for (runlist_id = 0; runlist_id < f->max_runlists; runlist_id++) {
+		if (nvgpu_engine_is_valid_runlist_id(g, runlist_id)) {
+			num_runlists = nvgpu_safe_add_u32(num_runlists, 1U);
+		}
+	}
+	f->num_runlists = num_runlists;
+
+	f->active_runlists = nvgpu_kzalloc(g, nvgpu_safe_mult_u64(
+			 sizeof(*f->active_runlists), num_runlists));
+	if (f->active_runlists == NULL) {
+		err = -ENOMEM;
+		goto clean_up_runlist;
+	}
+
+
+	rl_dbg(g, "  Max runlists:    %u", f->max_runlists);
+	rl_dbg(g, "  Active runlists: %u", f->num_runlists);
+	rl_dbg(g, "  RL entry size:   %u bytes", f->runlist_entry_size);
+	rl_dbg(g, "  Max RL entries:  %u", f->num_runlist_entries);
+
+	err = nvgpu_init_active_runlist_mapping(g);
+	if (err != 0) {
+		goto clean_up_runlist;
+	}
+
+	g->ops.runlist.init_enginfo(g, f);
+	return 0;
+
+clean_up_runlist:
+	nvgpu_runlist_cleanup_sw(g);
+	rl_dbg(g, "fail");
+	return err;
+}
+
+u32 nvgpu_runlist_get_runlists_mask(struct gk20a *g, u32 id,
+	unsigned int id_type, u32 act_eng_bitmask, u32 pbdma_bitmask)
+{
+	u32 i, runlists_mask = 0;
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+
+	bool bitmask_disabled = ((act_eng_bitmask == 0U) &&
+				(pbdma_bitmask == 0U));
+
+	/* engine and/or pbdma ids are known */
+	if (!bitmask_disabled) {
+		for (i = 0U; i < f->num_runlists; i++) {
+			runlist = &f->active_runlists[i];
+
+			if ((runlist->eng_bitmask & act_eng_bitmask) != 0U) {
+				runlists_mask |= BIT32(runlist->id);
+			}
+
+			if ((runlist->pbdma_bitmask & pbdma_bitmask) != 0U) {
+				runlists_mask |= BIT32(runlist->id);
+			}
+		}
+	}
+
+	if (id_type != ID_TYPE_UNKNOWN) {
+		if (id_type == ID_TYPE_TSG) {
+			runlist = f->tsg[id].runlist;
+		} else {
+			runlist = f->channel[id].runlist;
+		}
+
+		if (runlist == NULL) {
+			/* Warning on Linux, real assert on QNX. */
+			nvgpu_assert(runlist != NULL);
+		} else {
+			runlists_mask |= BIT32(runlist->id);
+		}
+	} else {
+		if (bitmask_disabled) {
+			nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
+				"and pbdma ids are unknown");
+
+			for (i = 0U; i < f->num_runlists; i++) {
+				runlist = &f->active_runlists[i];
+
+				runlists_mask |= BIT32(runlist->id);
+			}
+		} else {
+			nvgpu_log(g, gpu_dbg_info, "id_type_unknown, engine "
+				"and/or pbdma ids are known");
+		}
+	}
+
+	nvgpu_log(g, gpu_dbg_info, "runlists_mask = 0x%08x", runlists_mask);
+	return runlists_mask;
+}
+
+void nvgpu_runlist_unlock_runlists(struct gk20a *g, u32 runlists_mask)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_runlist *runlist;
+	u32 i;
+
+	nvgpu_log_info(g, "release runlist_lock for runlists set in "
+				"runlists_mask: 0x%08x", runlists_mask);
+
+	for (i = 0U; i < f->num_runlists; i++) {
+		runlist = &f->active_runlists[i];
+
+		if ((BIT32(i) & runlists_mask) != 0U) {
+			nvgpu_mutex_release(&runlist->runlist_lock);
+		}
+	}
+}
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_init.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/ltc.h>
+#include <nvgpu/os_sched.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/channel_sync.h>
+#include <nvgpu/channel_sync_syncpt.h>
+#include <nvgpu/watchdog.h>
+#include <nvgpu/job.h>
+#include <nvgpu/priv_cmdbuf.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/fence.h>
+#include <nvgpu/swprofile.h>
+#include <nvgpu/vpr.h>
+#include <nvgpu/trace.h>
+#include <nvgpu/nvhost.h>
+#include <nvgpu/user_fence.h>
+
+#include <nvgpu/fifo/swprofile.h>
+
+/*
+ * We might need two extra gpfifo entries per submit - one for pre fence and
+ * one for post fence.
+ */
+#define EXTRA_GPFIFO_ENTRIES 2U
+
+static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c,
+		struct nvgpu_channel_fence *fence,
+		struct priv_cmd_entry **wait_cmd, bool flag_sync_fence)
+{
+	/*
+	 * A single input sync fd may contain multiple fences. The preallocated
+	 * priv cmdbuf space allows exactly one per submit in the worst case.
+	 * Require at most one wait for consistent deterministic submits; if
+	 * there are more and no space, we'll -EAGAIN in nondeterministic mode.
+	 */
+	u32 max_wait_cmds = nvgpu_channel_is_deterministic(c) ?
+		1U : 0U;
+	int err;
+
+	if (flag_sync_fence) {
+		nvgpu_assert(fence->id <= (u32)INT_MAX);
+		err = nvgpu_channel_sync_wait_fence_fd(c->sync,
+			(int)fence->id, wait_cmd, max_wait_cmds);
+	} else {
+		struct nvgpu_channel_sync_syncpt *sync_syncpt;
+
+		sync_syncpt = nvgpu_channel_sync_to_syncpt(c->sync);
+		if (sync_syncpt != NULL) {
+			err = nvgpu_channel_sync_wait_syncpt(sync_syncpt,
+					fence->id, fence->value, wait_cmd);
+		} else {
+			err = -EINVAL;
+		}
+	}
+
+	return err;
+}
+
+static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
+		struct priv_cmd_entry **incr_cmd,
+		struct nvgpu_fence_type *post_fence, bool flag_fence_get,
+		bool need_wfi, bool need_sync_fence)
+{
+	int err;
+
+	if (flag_fence_get) {
+		err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd,
+				post_fence, need_wfi, need_sync_fence);
+	} else {
+		err = nvgpu_channel_sync_incr(c->sync, incr_cmd,
+				post_fence, need_sync_fence);
+	}
+
+	return err;
+}
+
+/*
+ * Handle the submit synchronization - pre-fences and post-fences.
+ */
+static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
+				      struct nvgpu_channel_fence *fence,
+				      struct nvgpu_channel_job *job,
+				      u32 flags)
+{
+	struct gk20a *g = c->g;
+	bool need_sync_fence;
+	bool new_sync_created = false;
+	int err = 0;
+	bool need_wfi = (flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI) == 0U;
+	bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
+	bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
+	bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
+
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		nvgpu_mutex_acquire(&c->sync_lock);
+		if (c->sync == NULL) {
+			c->sync = nvgpu_channel_sync_create(c);
+			if (c->sync == NULL) {
+				err = -ENOMEM;
+				goto clean_up_unlock;
+			}
+			new_sync_created = true;
+		}
+		nvgpu_channel_sync_get_ref(c->sync);
+	}
+
+	if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) {
+		err = g->ops.channel.set_syncpt(c);
+		if (err != 0) {
+			goto clean_up_put_sync;
+		}
+	}
+
+	/*
+	 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
+	 * submission when user requested.
+	 */
+	if (flag_fence_wait) {
+		err = nvgpu_submit_create_wait_cmd(c, fence, &job->wait_cmd,
+				flag_sync_fence);
+		if (err != 0) {
+			goto clean_up_put_sync;
+		}
+	}
+
+	need_sync_fence = flag_fence_get && flag_sync_fence;
+
+	/*
+	 * Always generate an increment at the end of a GPFIFO submission. When
+	 * we do job tracking, post fences are needed for various reasons even
+	 * if not requested by user.
+	 */
+	err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence,
+			flag_fence_get, need_wfi, need_sync_fence);
+	if (err != 0) {
+		goto clean_up_wait_cmd;
+	}
+
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		nvgpu_mutex_release(&c->sync_lock);
+	}
+	return 0;
+
+clean_up_wait_cmd:
+	if (job->wait_cmd != NULL) {
+		nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
+	}
+	job->wait_cmd = NULL;
+clean_up_put_sync:
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		if (nvgpu_channel_sync_put_ref_and_check(c->sync)
+		    && g->aggressive_sync_destroy) {
+			nvgpu_channel_sync_destroy(c->sync);
+		}
+	}
+clean_up_unlock:
+	if (g->aggressive_sync_destroy_thresh != 0U) {
+		nvgpu_mutex_release(&c->sync_lock);
+	}
+	return err;
+}
+
+static void nvgpu_submit_append_priv_cmdbuf(struct nvgpu_channel *c,
+		struct priv_cmd_entry *cmd)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
+	struct nvgpu_gpfifo_entry gpfifo_entry;
+	u64 gva;
+	u32 size;
+
+	nvgpu_priv_cmdbuf_finish(g, cmd, &gva, &size);
+	g->ops.pbdma.format_gpfifo_entry(g, &gpfifo_entry, gva, size);
+
+	nvgpu_mem_wr_n(g, gpfifo_mem,
+			c->gpfifo.put * (u32)sizeof(gpfifo_entry),
+			&gpfifo_entry, (u32)sizeof(gpfifo_entry));
+
+	c->gpfifo.put = (c->gpfifo.put + 1U) & (c->gpfifo.entry_num - 1U);
+}
+
+static int nvgpu_submit_append_gpfifo_user_direct(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_gpfifo_entry *gpfifo_cpu = c->gpfifo.mem.cpu_va;
+	u32 gpfifo_size = c->gpfifo.entry_num;
+	u32 len = num_entries;
+	u32 start = c->gpfifo.put;
+	u32 end = start + len; /* exclusive */
+	int err;
+
+	nvgpu_speculation_barrier();
+	if (end > gpfifo_size) {
+		/* wrap-around */
+		u32 length0 = gpfifo_size - start;
+		u32 length1 = len - length0;
+
+		err = g->os_channel.copy_user_gpfifo(
+				&gpfifo_cpu[start], userdata,
+				0, length0);
+		if (err != 0) {
+			return err;
+		}
+
+		err = g->os_channel.copy_user_gpfifo(
+				gpfifo_cpu, userdata,
+				length0, length1);
+		if (err != 0) {
+			return err;
+		}
+	} else {
+		err = g->os_channel.copy_user_gpfifo(
+				&gpfifo_cpu[start], userdata,
+				0, len);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static void nvgpu_submit_append_gpfifo_common(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *src, u32 num_entries)
+{
+	struct gk20a *g = c->g;
+	struct nvgpu_mem *gpfifo_mem = &c->gpfifo.mem;
+	/* in bytes */
+	u32 gpfifo_size =
+		c->gpfifo.entry_num * (u32)sizeof(struct nvgpu_gpfifo_entry);
+	u32 len = num_entries * (u32)sizeof(struct nvgpu_gpfifo_entry);
+	u32 start = c->gpfifo.put * (u32)sizeof(struct nvgpu_gpfifo_entry);
+	u32 end = start + len; /* exclusive */
+
+	if (end > gpfifo_size) {
+		/* wrap-around */
+		u32 length0 = gpfifo_size - start;
+		u32 length1 = len - length0;
+		struct nvgpu_gpfifo_entry *src2 = &src[length0];
+
+		nvgpu_mem_wr_n(g, gpfifo_mem, start, src, length0);
+		nvgpu_mem_wr_n(g, gpfifo_mem, 0, src2, length1);
+	} else {
+		nvgpu_mem_wr_n(g, gpfifo_mem, start, src, len);
+	}
+}
+
+/*
+ * Copy source gpfifo entries into the gpfifo ring buffer, potentially
+ * splitting into two memcpys to handle wrap-around.
+ */
+static int nvgpu_submit_append_gpfifo(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *kern_gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries)
+{
+	int err;
+
+	if ((kern_gpfifo == NULL)
+#ifdef CONFIG_NVGPU_DGPU
+	    && (c->gpfifo.pipe == NULL)
+#endif
+	   ) {
+		/*
+		 * This path (from userspace to sysmem) is special in order to
+		 * avoid two copies unnecessarily (from user to pipe, then from
+		 * pipe to gpu sysmem buffer).
+		 */
+		err = nvgpu_submit_append_gpfifo_user_direct(c, userdata,
+				num_entries);
+		if (err != 0) {
+			return err;
+		}
+	}
+#ifdef CONFIG_NVGPU_DGPU
+	else if (kern_gpfifo == NULL) {
+		/* from userspace to vidmem, use the common path */
+		err = c->g->os_channel.copy_user_gpfifo(c->gpfifo.pipe,
+				userdata, 0, num_entries);
+		if (err != 0) {
+			return err;
+		}
+
+		nvgpu_submit_append_gpfifo_common(c, c->gpfifo.pipe,
+				num_entries);
+	}
+#endif
+	else {
+		/* from kernel to either sysmem or vidmem, don't need
+		 * copy_user_gpfifo so use the common path */
+		nvgpu_submit_append_gpfifo_common(c, kern_gpfifo, num_entries);
+	}
+
+	trace_write_pushbuffers(c, num_entries);
+
+	c->gpfifo.put = (c->gpfifo.put + num_entries) &
+		(c->gpfifo.entry_num - 1U);
+
+	return 0;
+}
+
+static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries,
+		u32 flags,
+		struct nvgpu_channel_fence *fence,
+		struct nvgpu_fence_type **fence_out,
+		struct nvgpu_swprofiler *profiler,
+		bool need_deferred_cleanup)
+{
+	bool skip_buffer_refcounting = (flags &
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
+	struct nvgpu_channel_job *job = NULL;
+	int err;
+
+	nvgpu_channel_joblist_lock(c);
+	err = nvgpu_channel_alloc_job(c, &job);
+	nvgpu_channel_joblist_unlock(c);
+	if (err != 0) {
+		return err;
+	}
+
+	err = nvgpu_submit_prepare_syncs(c, fence, job, flags);
+	if (err != 0) {
+		goto clean_up_job;
+	}
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
+
+	/*
+	 * wait_cmd can be unset even if flag_fence_wait exists; the
+	 * android sync framework for example can provide entirely
+	 * empty fences that act like trivially expired waits.
+	 */
+	if (job->wait_cmd != NULL) {
+		nvgpu_submit_append_priv_cmdbuf(c, job->wait_cmd);
+	}
+
+	err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata, num_entries);
+	if (err != 0) {
+		goto clean_up_gpfifo_wait;
+	}
+
+	nvgpu_submit_append_priv_cmdbuf(c, job->incr_cmd);
+
+	err = nvgpu_channel_add_job(c, job, skip_buffer_refcounting);
+	if (err != 0) {
+		goto clean_up_gpfifo_incr;
+	}
+
+	nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup);
+
+	if (fence_out != NULL) {
+		/* This fence ref is going somewhere else but it's owned by the
+		 * job; the caller is expected to release it promptly, so that
+		 * a subsequent job cannot reclaim its memory.
+		 */
+		*fence_out = nvgpu_fence_get(&job->post_fence);
+	}
+
+	return 0;
+
+clean_up_gpfifo_incr:
+	/*
+	 * undo the incr priv cmdbuf and the user entries:
+	 * new gp.put =
+	 * (gp.put - (1 + num_entries)) & (gp.entry_num - 1) =
+	 * (gp.put + (gp.entry_num - (1 + num_entries))) & (gp.entry_num - 1)
+	 * the + entry_num does not affect the result but avoids wrapping below
+	 * zero for MISRA, although it would be well defined.
+	 */
+	c->gpfifo.put =
+		(nvgpu_safe_add_u32(c->gpfifo.put,
+		  nvgpu_safe_sub_u32(c->gpfifo.entry_num,
+		    nvgpu_safe_add_u32(1U, num_entries)))) &
+		nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
+clean_up_gpfifo_wait:
+	if (job->wait_cmd != NULL) {
+		/*
+		 * undo the wait priv cmdbuf entry:
+		 * gp.put =
+		 * (gp.put - 1) & (gp.entry_num - 1) =
+		 * (gp.put + (gp.entry_num - 1)) & (gp.entry_num - 1)
+		 * same as above with the gp.entry_num on the left side.
+		 */
+		c->gpfifo.put =
+			nvgpu_safe_add_u32(c->gpfifo.put,
+			  nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U)) &
+			nvgpu_safe_sub_u32(c->gpfifo.entry_num, 1U);
+	}
+	nvgpu_fence_put(&job->post_fence);
+	nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->incr_cmd);
+	if (job->wait_cmd != NULL) {
+		nvgpu_priv_cmdbuf_rollback(c->priv_cmd_q, job->wait_cmd);
+	}
+clean_up_job:
+	nvgpu_channel_free_job(c, job);
+	return err;
+}
+
+static int nvgpu_submit_prepare_gpfifo_notrack(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries,
+		struct nvgpu_fence_type **fence_out,
+		struct nvgpu_swprofiler *profiler)
+{
+	int err;
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_JOB_TRACKING);
+
+	err = nvgpu_submit_append_gpfifo(c, gpfifo, userdata,
+			num_entries);
+	if (err != 0) {
+		return err;
+	}
+
+	if (fence_out != NULL) {
+		*fence_out = NULL;
+	}
+
+	return 0;
+}
+
+static int check_gpfifo_capacity(struct nvgpu_channel *c, u32 required)
+{
+	/*
+	 * Make sure we have enough space for gpfifo entries. Check cached
+	 * values first and then read from HW. If no space, return -EAGAIN
+	 * and let userpace decide to re-try request or not.
+	 */
+	if (nvgpu_channel_get_gpfifo_free_count(c) < required) {
+		if (nvgpu_channel_update_gpfifo_get_and_get_free_count(c) <
+				required) {
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
+
+static int nvgpu_do_submit(struct nvgpu_channel *c,
+		struct nvgpu_gpfifo_entry *gpfifo,
+		struct nvgpu_gpfifo_userdata userdata,
+		u32 num_entries,
+		u32 flags,
+		struct nvgpu_channel_fence *fence,
+		struct nvgpu_fence_type **fence_out,
+		struct nvgpu_swprofiler *profiler,
+		bool need_job_tracking,
+		bool need_deferred_cleanup)
+{
+	struct gk20a *g = c->g;
+	int err;
+
+#ifdef CONFIG_NVGPU_TRACE
+	trace_gk20a_channel_submit_gpfifo(g->name,
+					  c->chid,
+					  num_entries,
+					  flags,
+					  fence ? fence->id : 0,
+					  fence ? fence->value : 0);
+#endif
+
+	nvgpu_log_info(g, "pre-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	err = check_gpfifo_capacity(c, num_entries + EXTRA_GPFIFO_ENTRIES);
+	if (err != 0) {
+		return err;
+	}
+
+	if (need_job_tracking) {
+		err = nvgpu_submit_prepare_gpfifo_track(c, gpfifo,
+				userdata, num_entries, flags, fence,
+				fence_out, profiler, need_deferred_cleanup);
+	} else {
+		err = nvgpu_submit_prepare_gpfifo_notrack(c, gpfifo,
+				userdata, num_entries, fence_out, profiler);
+	}
+
+	if (err != 0) {
+		return err;
+	}
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_APPEND);
+
+	g->ops.userd.gp_put(g, c);
+
+	return 0;
+}
+
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
+static int nvgpu_submit_deterministic(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	bool skip_buffer_refcounting = (flags &
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
+	bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
+	bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
+	bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
+	struct gk20a *g = c->g;
+	bool need_job_tracking;
+	int err = 0;
+
+	nvgpu_assert(nvgpu_channel_is_deterministic(c));
+
+	/* sync framework on post fences would not be deterministic */
+	if (flag_fence_get && flag_sync_fence) {
+		return -EINVAL;
+	}
+
+	/* this would be O(n) */
+	if (!skip_buffer_refcounting) {
+		return -EINVAL;
+	}
+
+	/* the watchdog needs periodic job cleanup */
+	if (nvgpu_channel_wdt_enabled(c->wdt)) {
+		return -EINVAL;
+	}
+
+	/*
+	 * Job tracking is necessary on deterministic channels if and only if
+	 * pre- or post-fence functionality is needed. If not, a fast submit
+	 * can be done (ie. only need to write out userspace GPFIFO entries and
+	 * update GP_PUT).
+	 */
+	need_job_tracking = flag_fence_wait || flag_fence_get;
+
+	if (need_job_tracking) {
+		/* nvgpu_semaphore is dynamically allocated, not pooled */
+		if (!nvgpu_has_syncpoints(g)) {
+			return -EINVAL;
+		}
+
+		/* dynamic sync allocation wouldn't be deterministic */
+		if (g->aggressive_sync_destroy_thresh != 0U) {
+			return -EINVAL;
+		}
+
+		/*
+		 * (Try to) clean up a single job, if available. Each job
+		 * requires the same amount of metadata, so this is enough for
+		 * the job list, fence pool, and private command buffers that
+		 * this submit will need.
+		 *
+		 * This submit might still need more gpfifo space than what the
+		 * previous has used. The job metadata doesn't look at it
+		 * though - the hw GP_GET pointer can be much further away than
+		 * our metadata pointers; gpfifo space is "freed" by the HW.
+		 */
+		nvgpu_channel_clean_up_deterministic_job(c);
+	}
+
+	/* Grab access to HW to deal with do_idle */
+	nvgpu_rwsem_down_read(&g->deterministic_busy);
+
+	if (c->deterministic_railgate_allowed) {
+		/*
+		 * Nope - this channel has dropped its own power ref. As
+		 * deterministic submits don't hold power on per each submitted
+		 * job like normal ones do, the GPU might railgate any time now
+		 * and thus submit is disallowed.
+		 */
+		err = -EINVAL;
+		goto clean_up;
+	}
+
+	err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
+			fence_out, profiler, need_job_tracking, false);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	/* No hw access beyond this point */
+	nvgpu_rwsem_up_read(&g->deterministic_busy);
+
+	return 0;
+
+clean_up:
+	nvgpu_log_fn(g, "fail %d", err);
+	nvgpu_rwsem_up_read(&g->deterministic_busy);
+
+	return err;
+}
+#endif
+
+static int nvgpu_submit_nondeterministic(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	bool skip_buffer_refcounting = (flags &
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING) != 0U;
+	bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
+	bool flag_fence_get = (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) != 0U;
+	struct gk20a *g = c->g;
+	bool need_job_tracking;
+	int err = 0;
+
+	nvgpu_assert(!nvgpu_channel_is_deterministic(c));
+
+	/*
+	 * Job tracking is necessary for any of the following conditions on
+	 * non-deterministic channels:
+	 *  - pre- or post-fence functionality
+	 *  - GPU rail-gating
+	 *  - VPR resize enabled
+	 *  - buffer refcounting
+	 *  - channel watchdog
+	 *
+	 * If none of the conditions are met, then job tracking is not
+	 * required and a fast submit can be done (ie. only need to write
+	 * out userspace GPFIFO entries and update GP_PUT).
+	 */
+	need_job_tracking = flag_fence_wait ||
+			flag_fence_get ||
+			nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE) ||
+			nvgpu_is_vpr_resize_enabled() ||
+			!skip_buffer_refcounting ||
+			nvgpu_channel_wdt_enabled(c->wdt);
+
+	if (need_job_tracking) {
+		/*
+		 * Get a power ref because this isn't a deterministic
+		 * channel that holds them during the channel lifetime.
+		 * This one is released by nvgpu_channel_clean_up_jobs,
+		 * via syncpt or sema interrupt, whichever is used.
+		 */
+		err = gk20a_busy(g);
+		if (err != 0) {
+			nvgpu_err(g,
+				"failed to host gk20a to submit gpfifo");
+			nvgpu_print_current(g, NULL, NVGPU_ERROR);
+			return err;
+		}
+	}
+
+	err = nvgpu_do_submit(c, gpfifo, userdata, num_entries, flags, fence,
+			fence_out, profiler, need_job_tracking, true);
+	if (err != 0) {
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_log_fn(g, "fail %d", err);
+	gk20a_idle(g);
+
+	return err;
+}
+
+static int check_submit_allowed(struct nvgpu_channel *c)
+{
+	struct gk20a *g = c->g;
+
+	if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
+		return -ENODEV;
+	}
+
+	if (nvgpu_channel_check_unserviceable(c)) {
+		return -ETIMEDOUT;
+	}
+
+	if (c->usermode_submit_enabled) {
+		return -EINVAL;
+	}
+
+	if (!nvgpu_mem_is_valid(&c->gpfifo.mem)) {
+		return -ENOMEM;
+	}
+
+	/* an address space needs to have been bound at this point. */
+	if (!nvgpu_channel_as_bound(c)) {
+		nvgpu_err(g,
+			    "not bound to an address space at time of gpfifo"
+			    " submission.");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	struct gk20a *g = c->g;
+	int err;
+
+	err = check_submit_allowed(c);
+	if (err != 0) {
+		return err;
+	}
+
+	/*
+	 * Fifo not large enough for request. Return error immediately.
+	 * Kernel can insert gpfifo entries before and after user gpfifos.
+	 * So, add extra entries in user request. Also, HW with fifo size N
+	 * can accept only N-1 entries.
+	 */
+	if (c->gpfifo.entry_num - 1U < num_entries + EXTRA_GPFIFO_ENTRIES) {
+		nvgpu_err(g, "not enough gpfifo space allocated");
+		return -ENOMEM;
+	}
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_ENTRY);
+
+	/* update debug settings */
+	nvgpu_ltc_sync_enabled(g);
+
+	nvgpu_log_info(g, "channel %d", c->chid);
+
+#ifdef CONFIG_NVGPU_DETERMINISTIC_CHANNELS
+	if (c->deterministic) {
+		err = nvgpu_submit_deterministic(c, gpfifo, userdata,
+				num_entries, flags, fence, fence_out, profiler);
+	} else
+#endif
+	{
+		err = nvgpu_submit_nondeterministic(c, gpfifo, userdata,
+				num_entries, flags, fence, fence_out, profiler);
+	}
+
+	if (err != 0) {
+		return err;
+	}
+
+#ifdef CONFIG_NVGPU_TRACE
+	if (fence_out != NULL && *fence_out != NULL) {
+		/*
+		 * This is not a good example on how to use the fence type.
+		 * Don't touch the priv data. The debug trace is special.
+		 */
+#ifdef CONFIG_TEGRA_GK20A_NVHOST
+		trace_gk20a_channel_submitted_gpfifo(g->name,
+					c->chid, num_entries, flags,
+					(*fence_out)->priv.syncpt_id,
+					(*fence_out)->priv.syncpt_value);
+#else
+		trace_gk20a_channel_submitted_gpfifo(g->name,
+					c->chid, num_entries, flags,
+					0, 0);
+#endif
+	} else {
+		trace_gk20a_channel_submitted_gpfifo(g->name,
+					c->chid, num_entries, flags,
+					0, 0);
+	}
+#endif
+
+	nvgpu_log_info(g, "post-submit put %d, get %d, size %d",
+		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
+
+	nvgpu_swprofile_snapshot(profiler, PROF_KICKOFF_END);
+
+	nvgpu_log_fn(g, "done");
+	return err;
+}
+
+int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_userdata userdata,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_user_fence *fence_out,
+				struct nvgpu_swprofiler *profiler)
+{
+	struct nvgpu_fence_type *fence_internal = NULL;
+	int err;
+
+	err = nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
+			flags, fence, &fence_internal, profiler);
+	if (err == 0 && fence_internal != NULL) {
+		*fence_out = nvgpu_fence_extract_user(fence_internal);
+		nvgpu_fence_put(fence_internal);
+	}
+	return err;
+}
+
+int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
+				struct nvgpu_gpfifo_entry *gpfifo,
+				u32 num_entries,
+				u32 flags,
+				struct nvgpu_channel_fence *fence,
+				struct nvgpu_fence_type **fence_out)
+{
+	struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
+
+	return nvgpu_submit_channel_gpfifo(c, gpfifo, userdata, num_entries,
+			flags, fence, fence_out, NULL);
+}
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
--- a/drivers/gpu/nvgpu/common/fifo/userd.c
+++ b/drivers/gpu/nvgpu/common/fifo/userd.c
@@ -0,0 +1,157 @@
+/*
+ * USERD
+ *
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/trace.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/fifo/userd.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/dma.h>
+
+int nvgpu_userd_init_slabs(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err;
+
+	nvgpu_mutex_init(&f->userd_mutex);
+
+	f->num_channels_per_slab = NVGPU_CPU_PAGE_SIZE /  g->ops.userd.entry_size(g);
+	f->num_userd_slabs =
+		DIV_ROUND_UP(f->num_channels, f->num_channels_per_slab);
+
+	f->userd_slabs = nvgpu_big_zalloc(g, f->num_userd_slabs *
+					sizeof(struct nvgpu_mem));
+	if (f->userd_slabs == NULL) {
+		nvgpu_err(g, "could not allocate userd slabs");
+		err = -ENOMEM;
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_mutex_destroy(&f->userd_mutex);
+
+	return err;
+}
+
+void nvgpu_userd_free_slabs(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 slab;
+
+	for (slab = 0; slab < f->num_userd_slabs; slab++) {
+		nvgpu_dma_free(g, &f->userd_slabs[slab]);
+	}
+	nvgpu_big_free(g, f->userd_slabs);
+	f->userd_slabs = NULL;
+
+	nvgpu_mutex_destroy(&f->userd_mutex);
+}
+
+int nvgpu_userd_init_channel(struct gk20a *g, struct nvgpu_channel *c)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_mem *mem;
+	u32 slab = c->chid / f->num_channels_per_slab;
+	int err = 0;
+
+	if (slab > f->num_userd_slabs) {
+		nvgpu_err(g, "chid %u, slab %u out of range (max=%u)",
+			c->chid, slab,  f->num_userd_slabs);
+		return -EINVAL;
+	}
+
+	mem = &g->fifo.userd_slabs[slab];
+
+	nvgpu_mutex_acquire(&f->userd_mutex);
+	if (!nvgpu_mem_is_valid(mem)) {
+		err = nvgpu_dma_alloc_sys(g, NVGPU_CPU_PAGE_SIZE, mem);
+		if (err != 0) {
+			nvgpu_err(g, "userd allocation failed, err=%d", err);
+			goto done;
+		}
+
+		if (g->ops.mm.is_bar1_supported(g)) {
+			mem->gpu_va = g->ops.mm.bar1_map_userd(g, mem,
+							 slab * NVGPU_CPU_PAGE_SIZE);
+		}
+	}
+	c->userd_mem = mem;
+	c->userd_offset = (c->chid % f->num_channels_per_slab) *
+				g->ops.userd.entry_size(g);
+	c->userd_iova = nvgpu_channel_userd_addr(c);
+
+	nvgpu_log(g, gpu_dbg_info,
+		"chid=%u slab=%u mem=%p offset=%u addr=%llx gpu_va=%llx",
+		c->chid, slab, mem, c->userd_offset,
+		nvgpu_channel_userd_addr(c),
+		nvgpu_channel_userd_gpu_va(c));
+
+done:
+	nvgpu_mutex_release(&f->userd_mutex);
+	return err;
+}
+
+int nvgpu_userd_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err;
+	u32 size, num_pages;
+
+	err = nvgpu_userd_init_slabs(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init userd support");
+		return err;
+	}
+
+	size = f->num_channels * g->ops.userd.entry_size(g);
+	num_pages = DIV_ROUND_UP(size, NVGPU_CPU_PAGE_SIZE);
+	err = nvgpu_vm_area_alloc(g->mm.bar1.vm,
+			num_pages, NVGPU_CPU_PAGE_SIZE, &f->userd_gpu_va, 0);
+	if (err != 0) {
+		nvgpu_err(g, "userd gpu va allocation failed, err=%d", err);
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_userd_free_slabs(g);
+
+	return err;
+}
+
+void nvgpu_userd_cleanup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (f->userd_gpu_va != 0ULL) {
+		(void) nvgpu_vm_area_free(g->mm.bar1.vm, f->userd_gpu_va);
+		f->userd_gpu_va = 0ULL;
+	}
+
+	nvgpu_userd_free_slabs(g);
+}
--- a/drivers/gpu/nvgpu/common/fifo/watchdog.c
+++ b/drivers/gpu/nvgpu/common/fifo/watchdog.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/watchdog.h>
+#include <nvgpu/error_notifier.h>
+#include <nvgpu/watchdog.h>
+#include <nvgpu/string.h>
+
+struct nvgpu_channel_wdt {
+	struct gk20a *g;
+
+	/* lock protects the running timer state */
+	struct nvgpu_spinlock lock;
+	struct nvgpu_timeout timer;
+	bool running;
+	struct nvgpu_channel_wdt_state ch_state;
+
+	/* lock not needed */
+	u32 limit_ms;
+	bool enabled;
+};
+
+struct nvgpu_channel_wdt *nvgpu_channel_wdt_alloc(struct gk20a *g)
+{
+	struct nvgpu_channel_wdt *wdt = nvgpu_kzalloc(g, sizeof(*wdt));
+
+	if (wdt == NULL) {
+		return NULL;
+	}
+
+	wdt->g = g;
+	nvgpu_spinlock_init(&wdt->lock);
+	wdt->enabled = true;
+	wdt->limit_ms = g->ch_wdt_init_limit_ms;
+
+	return wdt;
+}
+
+void nvgpu_channel_wdt_destroy(struct nvgpu_channel_wdt *wdt)
+{
+	nvgpu_kfree(wdt->g, wdt);
+}
+
+void nvgpu_channel_wdt_enable(struct nvgpu_channel_wdt *wdt)
+{
+	wdt->enabled = true;
+}
+
+void nvgpu_channel_wdt_disable(struct nvgpu_channel_wdt *wdt)
+{
+	wdt->enabled = false;
+}
+
+bool nvgpu_channel_wdt_enabled(struct nvgpu_channel_wdt *wdt)
+{
+	return wdt->enabled;
+}
+
+void nvgpu_channel_wdt_set_limit(struct nvgpu_channel_wdt *wdt, u32 limit_ms)
+{
+	wdt->limit_ms = limit_ms;
+}
+
+u32 nvgpu_channel_wdt_limit(struct nvgpu_channel_wdt *wdt)
+{
+	return wdt->limit_ms;
+}
+
+static void nvgpu_channel_wdt_init(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	struct gk20a *g = wdt->g;
+	int ret;
+
+	ret = nvgpu_timeout_init(g, &wdt->timer,
+			   wdt->limit_ms,
+			   NVGPU_TIMER_CPU_TIMER);
+	if (ret != 0) {
+		nvgpu_err(g, "timeout_init failed: %d", ret);
+		return;
+	}
+
+	wdt->ch_state = *state;
+	wdt->running = true;
+}
+
+/**
+ * Start a timeout counter (watchdog) on this channel.
+ *
+ * Trigger a watchdog to recover the channel after the per-platform timeout
+ * duration (but strictly no earlier) if the channel hasn't advanced within
+ * that time.
+ *
+ * If the timeout is already running, do nothing. This should be called when
+ * new jobs are submitted. The timeout will stop when the last tracked job
+ * finishes, making the channel idle.
+ */
+void nvgpu_channel_wdt_start(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	if (!nvgpu_is_timeouts_enabled(wdt->g)) {
+		return;
+	}
+
+	if (!wdt->enabled) {
+		return;
+	}
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+
+	if (wdt->running) {
+		nvgpu_spinlock_release(&wdt->lock);
+		return;
+	}
+	nvgpu_channel_wdt_init(wdt, state);
+	nvgpu_spinlock_release(&wdt->lock);
+}
+
+/**
+ * Stop a running timeout counter (watchdog) on this channel.
+ *
+ * Make the watchdog consider the channel not running, so that it won't get
+ * recovered even if no progress is detected. Progress is not tracked if the
+ * watchdog is turned off.
+ *
+ * No guarantees are made about concurrent execution of the timeout handler.
+ * (This should be called from an update handler running in the same thread
+ * with the watchdog.)
+ */
+bool nvgpu_channel_wdt_stop(struct nvgpu_channel_wdt *wdt)
+{
+	bool was_running;
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+	was_running = wdt->running;
+	wdt->running = false;
+	nvgpu_spinlock_release(&wdt->lock);
+	return was_running;
+}
+
+/**
+ * Continue a previously stopped timeout
+ *
+ * Enable the timeout again but don't reinitialize its timer.
+ *
+ * No guarantees are made about concurrent execution of the timeout handler.
+ * (This should be called from an update handler running in the same thread
+ * with the watchdog.)
+ */
+void nvgpu_channel_wdt_continue(struct nvgpu_channel_wdt *wdt)
+{
+	nvgpu_spinlock_acquire(&wdt->lock);
+	wdt->running = true;
+	nvgpu_spinlock_release(&wdt->lock);
+}
+
+/**
+ * Reset the counter of a timeout that is in effect.
+ *
+ * If this channel has an active timeout, act as if something happened on the
+ * channel right now.
+ *
+ * Rewinding a stopped counter is irrelevant; this is a no-op for non-running
+ * timeouts. Stopped timeouts can only be started (which is technically a
+ * rewind too) or continued (where the stop is actually pause).
+ */
+void nvgpu_channel_wdt_rewind(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	nvgpu_spinlock_acquire(&wdt->lock);
+	if (wdt->running) {
+		nvgpu_channel_wdt_init(wdt, state);
+	}
+	nvgpu_spinlock_release(&wdt->lock);
+}
+
+/**
+ * Check if the watchdog is running.
+ *
+ * A running watchdog means one that is requested to run and expire in the
+ * future. The state of a running watchdog has to be checked periodically to
+ * see if it's expired.
+ */
+bool nvgpu_channel_wdt_running(struct nvgpu_channel_wdt *wdt)
+{
+	bool running;
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+	running = wdt->running;
+	nvgpu_spinlock_release(&wdt->lock);
+
+	return running;
+}
+
+/**
+ * Check if a channel has been stuck for the watchdog limit.
+ *
+ * Test if this channel has really got stuck at this point by checking if its
+ * {gp,pb}_get have advanced or not. If progress was detected, start the timer
+ * from zero again. If no {gp,pb}_get action happened in the watchdog time
+ * limit, return true. Else return false.
+ */
+static bool nvgpu_channel_wdt_handler(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	struct gk20a *g = wdt->g;
+	struct nvgpu_channel_wdt_state previous_state;
+
+	nvgpu_log_fn(g, " ");
+
+	/* Get status but keep timer running */
+	nvgpu_spinlock_acquire(&wdt->lock);
+	previous_state = wdt->ch_state;
+	nvgpu_spinlock_release(&wdt->lock);
+
+	if (nvgpu_memcmp((const u8 *)state,
+			(const u8 *)&previous_state,
+			sizeof(*state)) != 0) {
+		/* Channel has advanced, timer keeps going but resets */
+		nvgpu_channel_wdt_rewind(wdt, state);
+		return false;
+	}
+
+	if (!nvgpu_timeout_peek_expired(&wdt->timer)) {
+		/* Seems stuck but waiting to time out */
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * Test if the per-channel watchdog is on; check the timeout in that case.
+ *
+ * Each channel has an expiration time based watchdog. The timer is
+ * (re)initialized in two situations: when a new job is submitted on an idle
+ * channel and when the timeout is checked but progress is detected. The
+ * watchdog timeout limit is a coarse sliding window.
+ *
+ * The timeout is stopped (disabled) after the last job in a row finishes
+ * and marks the channel idle.
+ */
+bool nvgpu_channel_wdt_check(struct nvgpu_channel_wdt *wdt,
+		struct nvgpu_channel_wdt_state *state)
+{
+	bool running;
+
+	nvgpu_spinlock_acquire(&wdt->lock);
+	running = wdt->running;
+	nvgpu_spinlock_release(&wdt->lock);
+
+	if (running) {
+		return nvgpu_channel_wdt_handler(wdt, state);
+	} else {
+		return false;
+	}
+}