Open source GPL/LGPL release

2025-12-22 09:12:24 +03:00 · 2025-12-19 15:25:44 -08:00
commit 9fc87a7ec7
2261 changed files with 576825 additions and 0 deletions
--- a/drivers/gpu/nvgpu/common/fifo/engines.c
+++ b/drivers/gpu/nvgpu/common/fifo/engines.c
@@ -0,0 +1,960 @@
+/*
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <nvgpu/log.h>
+#include <nvgpu/errno.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/bitops.h>
+#ifdef CONFIG_NVGPU_LS_PMU
+#include <nvgpu/pmu.h>
+#include <nvgpu/pmu/mutex.h>
+#endif
+#include <nvgpu/runlist.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/engine_status.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/pbdma_status.h>
+#include <nvgpu/power_features/pg.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/soc.h>
+#include <nvgpu/device.h>
+#include <nvgpu/gr/gr_falcon.h>
+#include <nvgpu/gr/gr.h>
+#include <nvgpu/gr/gr_instances.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/swprofile.h>
+
+#include <nvgpu/fifo/swprofile.h>
+
+#define FECS_METHOD_WFI_RESTORE	0x80000U
+
+enum nvgpu_fifo_engine nvgpu_engine_enum_from_dev(struct gk20a *g,
+			const struct nvgpu_device *dev)
+{
+	enum nvgpu_fifo_engine ret = NVGPU_ENGINE_INVAL;
+
+	if (nvgpu_device_is_graphics(g, dev)) {
+		ret = NVGPU_ENGINE_GR;
+	} else if (nvgpu_device_is_ce(g, dev)) {
+		/* For now, all CE engines have separate runlists. We can
+		 * identify the NVGPU_ENGINE_GRCE type CE using runlist_id
+		 * comparsion logic with GR runlist_id in init_info()
+		 */
+		ret = NVGPU_ENGINE_ASYNC_CE;
+	} else {
+		ret = NVGPU_ENGINE_INVAL;
+	}
+
+	return ret;
+}
+
+const struct nvgpu_device *nvgpu_engine_get_active_eng_info(
+	struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (engine_id >= f->max_engines) {
+		return NULL;
+	}
+
+	return f->host_engines[engine_id];
+}
+
+bool nvgpu_engine_check_valid_id(struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	if (engine_id >= f->max_engines) {
+		return false;
+	}
+
+	return f->host_engines[engine_id] != NULL;
+}
+
+u32 nvgpu_engine_get_gr_id_for_inst(struct gk20a *g, u32 inst_id)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, inst_id);
+	if (dev == NULL) {
+		nvgpu_warn(g, "No GR devices on this GPU for inst[%u]?!",
+			inst_id);
+		return NVGPU_INVALID_ENG_ID;
+	}
+
+	return dev->engine_id;
+}
+
+u32 nvgpu_engine_get_gr_id(struct gk20a *g)
+{
+	/* Consider 1st available GR engine */
+	return nvgpu_engine_get_gr_id_for_inst(g, 0U);
+}
+
+u32 nvgpu_engine_act_interrupt_mask(struct gk20a *g, u32 engine_id)
+{
+	const struct nvgpu_device *dev = NULL;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		return 0;
+	}
+
+	return BIT32(dev->intr_id);
+}
+
+u32 nvgpu_gr_engine_interrupt_mask(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 intr_mask = 0U;
+	u32 i;
+
+	for (i = 0U; i < g->num_gr_instances; i++) {
+		dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS,
+				nvgpu_gr_get_syspipe_id(g, i));
+		if (dev == NULL) {
+			continue;
+		}
+
+		intr_mask |= BIT32(dev->intr_id);
+	}
+
+	return intr_mask;
+}
+
+u32 nvgpu_ce_engine_interrupt_mask(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 i;
+	u32 mask = 0U;
+
+	/*
+	 * For old chips - pre-Pascal - we have COPY[0-2], for new chips we
+	 * have some number of LCE instances. For the purpose of this code we
+	 * imagine a system that could have both; in reality that'll never be
+	 * the case.
+	 *
+	 * This can be cleaned up in the future by defining a SW type for CE and
+	 * hiding this ugliness in the device management code.
+	 */
+	for (i = NVGPU_DEVTYPE_COPY0;  i <= NVGPU_DEVTYPE_COPY2; i++) {
+		dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
+		if (dev == NULL) {
+			continue;
+		}
+
+		mask |= BIT32(dev->intr_id);
+	}
+
+	/*
+	 * Now take care of LCEs.
+	 */
+	for (i = 0U; i < nvgpu_device_count(g, NVGPU_DEVTYPE_LCE); i++) {
+		dev = nvgpu_device_get(g, NVGPU_DEVTYPE_LCE, i);
+		nvgpu_assert(dev != NULL);
+
+		mask |= BIT32(dev->intr_id);
+	}
+
+	return mask;
+}
+
+#ifdef CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY
+
+static void nvgpu_engine_enable_activity(struct gk20a *g,
+					 const struct nvgpu_device *dev)
+{
+	nvgpu_runlist_set_state(g, BIT32(dev->runlist_id), RUNLIST_ENABLED);
+}
+
+void nvgpu_engine_enable_activity_all(struct gk20a *g)
+{
+	u32 i;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		nvgpu_engine_enable_activity(g, g->fifo.active_engines[i]);
+	}
+}
+
+int nvgpu_engine_disable_activity(struct gk20a *g,
+				const struct nvgpu_device *dev,
+				bool wait_for_idle)
+{
+	u32 pbdma_chid = NVGPU_INVALID_CHANNEL_ID;
+	u32 engine_chid = NVGPU_INVALID_CHANNEL_ID;
+#ifdef CONFIG_NVGPU_LS_PMU
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	int mutex_ret = -EINVAL;
+#endif
+	int err = 0;
+	struct nvgpu_channel *ch = NULL;
+	struct nvgpu_engine_status_info engine_status;
+	struct nvgpu_pbdma_status_info pbdma_status;
+	unsigned long runlist_served_pbdmas;
+	unsigned long bit;
+	u32 pbdma_id;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	nvgpu_log_fn(g, " ");
+
+	g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+		 &engine_status);
+	if (engine_status.is_busy && !wait_for_idle) {
+		return -EBUSY;
+	}
+
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (g->ops.pmu.is_pmu_supported(g)) {
+		mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+						PMU_MUTEX_ID_FIFO, &token);
+	}
+#endif
+
+	nvgpu_runlist_set_state(g, BIT32(dev->runlist_id),
+			RUNLIST_DISABLED);
+
+	runlist_served_pbdmas = f->runlists[dev->runlist_id]->pbdma_bitmask;
+
+	for_each_set_bit(bit, &runlist_served_pbdmas,
+			 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA)) {
+		pbdma_id = U32(bit);
+		/* chid from pbdma status */
+		g->ops.pbdma_status.read_pbdma_status_info(g,
+						pbdma_id,
+						&pbdma_status);
+		if (nvgpu_pbdma_status_is_chsw_valid(&pbdma_status) ||
+			nvgpu_pbdma_status_is_chsw_save(&pbdma_status)) {
+			pbdma_chid = pbdma_status.id;
+		} else if (nvgpu_pbdma_status_is_chsw_load(&pbdma_status) ||
+			nvgpu_pbdma_status_is_chsw_switch(&pbdma_status)) {
+			pbdma_chid = pbdma_status.next_id;
+		} else {
+			/* Nothing to do here */
+		}
+
+		if (pbdma_chid != NVGPU_INVALID_CHANNEL_ID) {
+			ch = nvgpu_channel_from_id(g, pbdma_chid);
+			if (ch != NULL) {
+				err = g->ops.fifo.preempt_channel(g, ch);
+				nvgpu_channel_put(ch);
+			}
+			if (err != 0) {
+				goto clean_up;
+			}
+		}
+	}
+
+	/* chid from engine status */
+	g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+		 &engine_status);
+	if (nvgpu_engine_status_is_ctxsw_valid(&engine_status) ||
+	    nvgpu_engine_status_is_ctxsw_save(&engine_status)) {
+		engine_chid = engine_status.ctx_id;
+	} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status) ||
+	    nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
+		engine_chid = engine_status.ctx_next_id;
+	} else {
+		/* Nothing to do here */
+	}
+
+	if (engine_chid != NVGPU_INVALID_ENG_ID && engine_chid != pbdma_chid) {
+		ch = nvgpu_channel_from_id(g, engine_chid);
+		if (ch != NULL) {
+			err = g->ops.fifo.preempt_channel(g, ch);
+			nvgpu_channel_put(ch);
+		}
+		if (err != 0) {
+			goto clean_up;
+		}
+	}
+
+clean_up:
+#ifdef CONFIG_NVGPU_LS_PMU
+	if (mutex_ret == 0) {
+		if (nvgpu_pmu_lock_release(g, g->pmu,
+			PMU_MUTEX_ID_FIFO, &token) != 0){
+			nvgpu_err(g, "failed to release PMU lock");
+		}
+	}
+#endif
+	if (err != 0) {
+		nvgpu_log_fn(g, "failed");
+		nvgpu_engine_enable_activity(g, dev);
+	} else {
+		nvgpu_log_fn(g, "done");
+	}
+	return err;
+}
+
+int nvgpu_engine_disable_activity_all(struct gk20a *g,
+					   bool wait_for_idle)
+{
+	unsigned int i;
+	int err = 0, ret = 0;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		err = nvgpu_engine_disable_activity(g,
+				g->fifo.active_engines[i],
+				wait_for_idle);
+		if (err != 0) {
+			nvgpu_err(g, "failed to disable engine %d activity",
+				  g->fifo.active_engines[i]->engine_id);
+			ret = err;
+			break;
+		}
+	}
+
+	if (err != 0) {
+		while (i-- != 0U) {
+			nvgpu_engine_enable_activity(g,
+					g->fifo.active_engines[i]);
+		}
+	}
+
+	return ret;
+}
+
+int nvgpu_engine_wait_for_idle(struct gk20a *g)
+{
+	struct nvgpu_timeout timeout;
+	u32 delay = POLL_DELAY_MIN_US;
+	int ret = 0, err = 0;
+	u32 i, host_num_engines;
+	struct nvgpu_engine_status_info engine_status;
+
+	nvgpu_log_fn(g, " ");
+
+	host_num_engines =
+		 nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+
+	err = nvgpu_timeout_init(g, &timeout, nvgpu_get_poll_timeout(g),
+			   NVGPU_TIMER_CPU_TIMER);
+	if (err != 0) {
+		return -EINVAL;
+	}
+
+	for (i = 0; i < host_num_engines; i++) {
+		if (!nvgpu_engine_check_valid_id(g, i)) {
+			continue;
+		}
+
+		ret = -ETIMEDOUT;
+		do {
+			g->ops.engine_status.read_engine_status_info(g, i,
+				&engine_status);
+			if (!engine_status.is_busy) {
+				ret = 0;
+				break;
+			}
+
+			nvgpu_usleep_range(delay, delay * 2U);
+			delay = min_t(u32,
+					delay << 1U, POLL_DELAY_MAX_US);
+		} while (nvgpu_timeout_expired(&timeout) == 0);
+
+		if (ret != 0) {
+			/* possible causes:
+			 * check register settings programmed in hal set by
+			 * elcg_init_idle_filters and init_therm_setup_hw
+			 */
+			nvgpu_err(g, "cannot idle engine: %u "
+					"engine_status: 0x%08x", i,
+					engine_status.reg_data);
+			break;
+		}
+	}
+
+	nvgpu_log_fn(g, "done");
+
+	return ret;
+}
+
+#endif /* CONFIG_NVGPU_FIFO_ENGINE_ACTIVITY */
+
+int nvgpu_engine_setup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	int err = 0;
+	size_t size;
+
+	f->max_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
+	size = nvgpu_safe_mult_u64(f->max_engines,
+				   sizeof(struct nvgpu_device *));
+
+	/*
+	 * Allocate the two device lists for host devices.
+	 */
+	f->host_engines = nvgpu_kzalloc(g, size);
+	if (f->host_engines == NULL) {
+		nvgpu_err(g, "OOM allocating host engine list");
+		return -ENOMEM;
+	}
+	f->active_engines = nvgpu_kzalloc(g, size);
+	if (f->active_engines == NULL) {
+		nvgpu_err(g, "no mem for active engine list");
+		err = -ENOMEM;
+		goto clean_up_engine_info;
+	}
+
+	err = nvgpu_engine_init_info(f);
+	if (err != 0) {
+		nvgpu_err(g, "init engine info failed");
+		goto clean_up;
+	}
+
+	return 0;
+
+clean_up:
+	nvgpu_kfree(g, f->active_engines);
+	f->active_engines = NULL;
+
+clean_up_engine_info:
+	nvgpu_kfree(g, f->host_engines);
+	f->host_engines = NULL;
+
+	return err;
+}
+
+void nvgpu_engine_cleanup_sw(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+
+	f->num_engines = 0;
+	nvgpu_kfree(g, f->host_engines);
+	f->host_engines = NULL;
+	nvgpu_kfree(g, f->active_engines);
+	f->active_engines = NULL;
+}
+
+#ifdef CONFIG_NVGPU_ENGINE_RESET
+static void nvgpu_engine_gr_reset(struct gk20a *g)
+{
+	struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
+	int err = 0;
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_PREAMBLE);
+
+#ifdef CONFIG_NVGPU_POWER_PG
+	if (nvgpu_pg_elpg_disable(g) != 0 ) {
+		nvgpu_err(g, "failed to set disable elpg");
+	}
+#endif
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_DISABLE);
+
+#ifdef CONFIG_NVGPU_FECS_TRACE
+	/*
+	 * Resetting engine will alter read/write index. Need to flush
+	 * circular buffer before re-enabling FECS.
+	 */
+	if (g->ops.gr.fecs_trace.reset != NULL) {
+		if (g->ops.gr.fecs_trace.reset(g) != 0) {
+			nvgpu_warn(g, "failed to reset fecs traces");
+		}
+	}
+#endif
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_FECS_TRACE_RESET);
+
+	/*
+	 * HALT_PIPELINE method and gr reset during recovery is supported
+	 * starting nvgpu-next simulation.
+	 */
+	err = g->ops.gr.falcon.ctrl_ctxsw(g,
+			NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL);
+	if (err != 0) {
+		nvgpu_err(g, "failed to halt gr pipe");
+	}
+
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_HALT_PIPELINE);
+
+	/*
+	 * resetting only engine is not
+	 * enough, we do full init sequence
+	 */
+	nvgpu_log(g, gpu_dbg_rec, "resetting gr engine");
+
+	err = nvgpu_gr_reset(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to reset gr engine");
+	}
+
+#ifdef CONFIG_NVGPU_POWER_PG
+	if (nvgpu_pg_elpg_enable(g) != 0) {
+		nvgpu_err(g, "failed to set enable elpg");
+	}
+	nvgpu_swprofile_snapshot(prof, PROF_ENG_RESET_ELPG_REENABLE);
+#endif
+}
+
+void nvgpu_engine_reset(struct gk20a *g, u32 engine_id)
+{
+	struct nvgpu_swprofiler *prof = &g->fifo.eng_reset_profiler;
+	const struct nvgpu_device *dev;
+	int err = 0;
+	u32 gr_instance_id;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g == NULL) {
+		return;
+	}
+
+	nvgpu_swprofile_begin_sample(prof);
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		nvgpu_err(g, "unsupported engine_id %d", engine_id);
+		return;
+	}
+
+	if (!nvgpu_device_is_ce(g, dev) &&
+	    !nvgpu_device_is_graphics(g, dev)) {
+		nvgpu_warn(g, "Ignoring reset for non-host engine.");
+		return;
+	}
+
+	/*
+	 * Simple case first: reset a copy engine.
+	 */
+	if (nvgpu_device_is_ce(g, dev)) {
+		err = nvgpu_mc_reset_dev(g, dev);
+		if (err != 0) {
+			nvgpu_log_info(g, "CE engine [id:%u] reset failed",
+				dev->engine_id);
+		}
+		return;
+	}
+
+	/*
+	 * Now reset a GR engine.
+	 */
+	gr_instance_id =
+		nvgpu_grmgr_get_gr_instance_id_for_syspipe(
+			g, dev->inst_id);
+
+	nvgpu_gr_exec_for_instance(g,
+		gr_instance_id, nvgpu_engine_gr_reset(g));
+}
+#endif
+
+u32 nvgpu_engine_get_fast_ce_runlist_id(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+	u32 nr_lces;
+	u32 i;
+
+	/*
+	 * Obtain a runlist ID for the fastest available CE. The priority order
+	 * is:
+	 *
+	 *   1. Last available LCE
+	 *   2. Last available COPY[0-2]
+	 *   3. GRAPHICS runlist as a last resort.
+	 */
+	nr_lces = nvgpu_device_count(g, NVGPU_DEVTYPE_LCE);
+	if (nr_lces > 0U) {
+		dev = nvgpu_device_get(g,
+				       NVGPU_DEVTYPE_LCE,
+				       nr_lces - 1U);
+		nvgpu_assert(dev != NULL);
+
+		return dev->runlist_id;
+	}
+
+	/*
+	 * Note: this only works since NVGPU_DEVTYPE_GRAPHICS is 0 and the COPYx
+	 * are all > 0.
+	 */
+	for (i = NVGPU_DEVTYPE_COPY2; i >= NVGPU_DEVTYPE_COPY0; i--) {
+		dev = nvgpu_device_get(g, i, i - NVGPU_DEVTYPE_COPY0);
+		if (dev != NULL) {
+			return dev->runlist_id;
+		}
+	}
+
+	/*
+	 * Fall back to GR.
+	 */
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
+	nvgpu_assert(dev != NULL);
+
+	return dev->runlist_id;
+}
+
+u32 nvgpu_engine_get_gr_runlist_id(struct gk20a *g)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_device_get(g, NVGPU_DEVTYPE_GRAPHICS, 0);
+	if (dev == NULL) {
+		nvgpu_warn(g, "No GR device on this GPU?!");
+		return NVGPU_INVALID_RUNLIST_ID;
+	}
+
+	return dev->runlist_id;
+}
+
+bool nvgpu_engine_is_valid_runlist_id(struct gk20a *g, u32 runlist_id)
+{
+	u32 i;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		const struct nvgpu_device *dev = f->active_engines[i];
+
+		if (dev->runlist_id == runlist_id) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * Link engine IDs to MMU IDs and vice versa.
+ */
+u32 nvgpu_engine_id_to_mmu_fault_id(struct gk20a *g, u32 engine_id)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+
+	if (dev == NULL) {
+		nvgpu_err(g,
+			  "engine_id: %u is not in active list",
+			  engine_id);
+		return NVGPU_INVALID_ENG_ID;
+	}
+
+	return dev->fault_id;
+}
+
+u32 nvgpu_engine_mmu_fault_id_to_engine_id(struct gk20a *g, u32 fault_id)
+{
+	u32 i;
+	const struct nvgpu_device *dev;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		dev = f->active_engines[i];
+
+		if (dev->fault_id == fault_id) {
+			return dev->engine_id;
+		}
+	}
+
+	return NVGPU_INVALID_ENG_ID;
+}
+
+u32 nvgpu_engine_get_mask_on_id(struct gk20a *g, u32 id, bool is_tsg)
+{
+	unsigned int i;
+	u32 engines = 0;
+	struct nvgpu_engine_status_info engine_status;
+	u32 ctx_id;
+	u32 type;
+	bool busy;
+
+	for (i = 0; i < g->fifo.num_engines; i++) {
+		const struct nvgpu_device *dev = g->fifo.active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g,
+			dev->engine_id, &engine_status);
+
+		if (nvgpu_engine_status_is_ctxsw_load(
+			&engine_status)) {
+			nvgpu_engine_status_get_next_ctx_id_type(
+				&engine_status, &ctx_id, &type);
+		} else {
+			nvgpu_engine_status_get_ctx_id_type(
+				&engine_status, &ctx_id, &type);
+		}
+
+		busy = engine_status.is_busy;
+
+		if (!busy || !(ctx_id == id)) {
+			continue;
+		}
+
+		if ((is_tsg  && (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID)) ||
+		    (!is_tsg && (type == ENGINE_STATUS_CTX_ID_TYPE_CHID))) {
+			engines |= BIT32(dev->engine_id);
+		}
+	}
+
+	return engines;
+}
+
+static int nvgpu_engine_init_one_dev(struct nvgpu_fifo *f,
+				     const struct nvgpu_device *dev)
+{
+	bool found;
+	struct nvgpu_device *dev_rw;
+	struct gk20a *g = f->g;
+
+	dev_rw = (struct nvgpu_device *)dev;
+
+	/*
+	 * Populate the PBDMA info for this device; ideally it'd be done
+	 * during device init, but the FIFO unit is not out of reset that
+	 * early in the nvgpu_finalize_poweron() sequence.
+	 *
+	 * We only need to do this for native; vGPU already has pbdma_id
+	 * populated during device initialization.
+	 */
+	if (g->ops.fifo.find_pbdma_for_runlist != NULL) {
+		found = g->ops.fifo.find_pbdma_for_runlist(g,
+							   dev->runlist_id,
+							   &dev_rw->pbdma_id);
+		if (!found) {
+			nvgpu_err(g, "busted pbdma map");
+			return -EINVAL;
+		}
+	}
+
+#if defined(CONFIG_NVGPU_NEXT)
+	{
+		int err = nvgpu_next_engine_init_one_dev(g, dev);
+		if (err != 0) {
+			return err;
+		}
+	}
+#endif
+
+	f->host_engines[dev->engine_id] = dev;
+	f->active_engines[f->num_engines] = dev;
+	++f->num_engines;
+
+	return 0;
+}
+
+int nvgpu_engine_init_info(struct nvgpu_fifo *f)
+{
+	int err;
+	struct gk20a *g = f->g;
+	const struct nvgpu_device *dev;
+
+	f->num_engines = 0;
+
+	nvgpu_log(g, gpu_dbg_device, "Loading host engines from device list");
+	nvgpu_log(g, gpu_dbg_device, "  GFX devices: %u",
+		  nvgpu_device_count(g, NVGPU_DEVTYPE_GRAPHICS));
+
+	nvgpu_device_for_each(g, dev, NVGPU_DEVTYPE_GRAPHICS) {
+		err = nvgpu_engine_init_one_dev(f, dev);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	return g->ops.engine.init_ce_info(f);
+}
+
+void nvgpu_engine_get_id_and_type(struct gk20a *g, u32 engine_id,
+					  u32 *id, u32 *type)
+{
+	struct nvgpu_engine_status_info engine_status;
+
+	g->ops.engine_status.read_engine_status_info(g, engine_id,
+		&engine_status);
+
+	/* use next_id if context load is failing */
+	if (nvgpu_engine_status_is_ctxsw_load(
+		&engine_status)) {
+		nvgpu_engine_status_get_next_ctx_id_type(
+			&engine_status, id, type);
+	} else {
+		nvgpu_engine_status_get_ctx_id_type(
+			&engine_status, id, type);
+	}
+}
+
+u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g,
+			u32 *id_ptr, bool *is_tsg_ptr)
+{
+	u32 i;
+	u32 id = U32_MAX;
+	bool is_tsg = false;
+	u32 mailbox2;
+	struct nvgpu_engine_status_info engine_status;
+	const struct nvgpu_device *dev = NULL;
+
+	for (i = 0U; i < g->fifo.num_engines; i++) {
+		dev = g->fifo.active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+			&engine_status);
+
+		/*
+		 * we are interested in busy engines that
+		 * are doing context switch
+		 */
+		if (!engine_status.is_busy ||
+		    !nvgpu_engine_status_is_ctxsw(&engine_status)) {
+			continue;
+		}
+
+		if (nvgpu_engine_status_is_ctxsw_load(&engine_status)) {
+			id = engine_status.ctx_next_id;
+			is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
+					&engine_status);
+		} else if (nvgpu_engine_status_is_ctxsw_switch(&engine_status)) {
+			mailbox2 = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
+					NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2);
+			if ((mailbox2 & FECS_METHOD_WFI_RESTORE) != 0U) {
+				id = engine_status.ctx_next_id;
+				is_tsg = nvgpu_engine_status_is_next_ctx_type_tsg(
+						&engine_status);
+			} else {
+				id = engine_status.ctx_id;
+				is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
+						&engine_status);
+			}
+		} else {
+			id = engine_status.ctx_id;
+			is_tsg = nvgpu_engine_status_is_ctx_type_tsg(
+					&engine_status);
+		}
+		break;
+	}
+
+	*id_ptr = id;
+	*is_tsg_ptr = is_tsg;
+
+	return dev->engine_id;
+}
+
+u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 i, eng_bitmask = 0U;
+	struct nvgpu_engine_status_info engine_status;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		const struct nvgpu_device *dev = f->active_engines[i];
+
+		g->ops.engine_status.read_engine_status_info(g, dev->engine_id,
+			&engine_status);
+
+		if (engine_status.is_busy && (dev->runlist_id == runlist_id)) {
+			eng_bitmask |= BIT32(dev->engine_id);
+		}
+	}
+
+	return eng_bitmask;
+}
+
+#ifdef CONFIG_NVGPU_DEBUGGER
+bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id,
+		u32 engine_subid, bool fake_fault)
+{
+	const struct nvgpu_device *dev;
+
+	dev = nvgpu_engine_get_active_eng_info(g, engine_id);
+	if (dev == NULL) {
+		return false;
+	}
+
+	/*
+	 * channel recovery is only deferred if an sm debugger
+	 * is attached and has MMU debug mode is enabled
+	 */
+	if (!g->ops.gr.sm_debugger_attached(g) ||
+	    !g->ops.fb.is_debug_mode_enabled(g)) {
+		return false;
+	}
+
+	/* if this fault is fake (due to RC recovery), don't defer recovery */
+	if (fake_fault) {
+		return false;
+	}
+
+	if (dev->type != NVGPU_DEVTYPE_GRAPHICS) {
+		return false;
+	}
+
+	return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid);
+}
+#endif
+
+u32 nvgpu_engine_mmu_fault_id_to_veid(struct gk20a *g, u32 mmu_fault_id,
+			u32 gr_eng_fault_id)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	u32 num_subctx;
+	u32 veid = INVAL_ID;
+
+	num_subctx = f->max_subctx_count;
+
+	if ((mmu_fault_id >= gr_eng_fault_id) &&
+		(mmu_fault_id < nvgpu_safe_add_u32(gr_eng_fault_id,
+						num_subctx))) {
+		veid = mmu_fault_id - gr_eng_fault_id;
+	}
+
+	return veid;
+}
+
+static u32 nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(struct gk20a *g,
+			 u32 mmu_fault_id, u32 *veid)
+{
+	u32 i;
+	u32 engine_id = INVAL_ID;
+	const struct nvgpu_device *dev;
+	struct nvgpu_fifo *f = &g->fifo;
+
+	for (i = 0U; i < f->num_engines; i++) {
+		dev = f->active_engines[i];
+
+		if (dev->type == NVGPU_DEVTYPE_GRAPHICS) {
+			*veid = nvgpu_engine_mmu_fault_id_to_veid(g,
+					mmu_fault_id, dev->fault_id);
+			if (*veid != INVAL_ID) {
+				engine_id = dev->engine_id;
+				break;
+			}
+		} else {
+			if (dev->fault_id == mmu_fault_id) {
+				engine_id = dev->engine_id;
+				*veid = INVAL_ID;
+				break;
+			}
+		}
+	}
+	return engine_id;
+}
+
+void nvgpu_engine_mmu_fault_id_to_eng_ve_pbdma_id(struct gk20a *g,
+	u32 mmu_fault_id, u32 *engine_id, u32 *veid, u32 *pbdma_id)
+{
+	*engine_id = nvgpu_engine_mmu_fault_id_to_eng_id_and_veid(g,
+				 mmu_fault_id, veid);
+
+	if (*engine_id == INVAL_ID) {
+		*pbdma_id = g->ops.fifo.mmu_fault_id_to_pbdma_id(g,
+				mmu_fault_id);
+	} else {
+		*pbdma_id = INVAL_ID;
+	}
+}