gpu: nvgpu: move mmu fault handling to hal/fifo

Move chip specific mmu fault handling from fifo_gk20a.c to hal/fifo/mmu_fault_gk20a.c Move gk20a_teardown_ch_tsg to hal/rc/rc_gk20a.c JIRA NVGPU-1314 Change-Id: Idf88b1c312bc9f46c2508f2c63e948d71d622297 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2094051 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2019-04-10 00:03:40 -07:00
parent 6ba1f5db3b
commit da9dee85e2
10 changed files with 399 additions and 306 deletions
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -237,6 +237,7 @@ nvgpu-y += \
 	hal/fuse/fuse_gm20b.o \
 	hal/fuse/fuse_gp10b.o \
 	hal/fuse/fuse_gp106.o \
+	hal/rc/rc_gk20a.o \
 	hal/fifo/usermode_gv11b.o \
 	hal/fifo/usermode_tu104.o \
 	hal/fifo/engines_gm20b.o \
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -358,6 +358,7 @@ srcs += common/sim.c \
 	hal/fuse/fuse_gm20b.c \
 	hal/fuse/fuse_gp10b.c \
 	hal/fuse/fuse_gp106.c \
+	hal/rc/rc_gk20a.c \
 	hal/fifo/usermode_gv11b.c \
 	hal/fifo/usermode_tu104.c \
 	hal/fifo/engines_gm20b.c \
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -119,301 +119,6 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
 	return 0;
 }

-static bool gk20a_fifo_handle_mmu_fault_locked(
-	struct gk20a *g,
-	u32 mmu_fault_engines, /* queried from HW if 0 */
-	u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
-	bool id_is_tsg)
-{
-	bool fake_fault;
-	unsigned long fault_id;
-	unsigned long engine_mmu_fault_id;
-	bool verbose = true;
-	struct nvgpu_engine_status_info engine_status;
-	bool deferred_reset_pending = false;
-	struct fifo_gk20a *f = &g->fifo;
-
-	nvgpu_log_fn(g, " ");
-
-	if (nvgpu_cg_pg_disable(g) != 0) {
-		nvgpu_warn(g, "fail to disable power mgmt");
-	}
-
-	/* Disable fifo access */
-	g->ops.gr.init.fifo_access(g, false);
-
-	if (mmu_fault_engines != 0U) {
-		fault_id = mmu_fault_engines;
-		fake_fault = true;
-	} else {
-		fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
-		fake_fault = false;
-	}
-	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
-	g->fifo.deferred_reset_pending = false;
-	nvgpu_mutex_release(&f->deferred_reset_mutex);
-
-	/* go through all faulted engines */
-	for_each_set_bit(engine_mmu_fault_id, &fault_id, 32U) {
-		/* bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
-		 * engines. Convert engine_mmu_id to engine_id */
-		u32 engine_id = nvgpu_engine_mmu_fault_id_to_engine_id(g,
-					(u32)engine_mmu_fault_id);
-		struct mmu_fault_info mmfault_info;
-		struct channel_gk20a *ch = NULL;
-		struct tsg_gk20a *tsg = NULL;
-		struct channel_gk20a *refch = NULL;
-		bool ctxsw;
-		/* read and parse engine status */
-		g->ops.engine_status.read_engine_status_info(g, engine_id,
-			&engine_status);
-
-		ctxsw = nvgpu_engine_status_is_ctxsw(&engine_status);
-
-		gk20a_fifo_mmu_fault_info_dump(g, engine_id,
-				(u32)engine_mmu_fault_id,
-				fake_fault, &mmfault_info);
-
-		if (ctxsw) {
-			g->ops.gr.falcon.dump_stats(g);
-			nvgpu_err(g, "  gr_status_r: 0x%x",
-				  gk20a_readl(g, gr_status_r()));
-		}
-
-		/* get the channel/TSG */
-		if (fake_fault) {
-			/* use next_id if context load is failing */
-			u32 id, type;
-
-			if (hw_id == ~(u32)0) {
-				if (nvgpu_engine_status_is_ctxsw_load(
-					&engine_status)) {
-					nvgpu_engine_status_get_next_ctx_id_type(
-						&engine_status, &id, &type);
-				} else {
-					nvgpu_engine_status_get_ctx_id_type(
-						&engine_status, &id, &type);
-				}
-			} else {
-				id = hw_id;
-				type = id_is_tsg ?
-					ENGINE_STATUS_CTX_ID_TYPE_TSGID :
-					ENGINE_STATUS_CTX_ID_TYPE_CHID;
-			}
-
-			if (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID) {
-				tsg = &g->fifo.tsg[id];
-			} else if (type == ENGINE_STATUS_CTX_ID_TYPE_CHID) {
-				ch = &g->fifo.channel[id];
-				refch = gk20a_channel_get(ch);
-				if (refch != NULL) {
-					tsg = tsg_gk20a_from_ch(refch);
-				}
-			}
-		} else {
-			/* Look up channel from the inst block pointer. */
-			ch = nvgpu_channel_refch_from_inst_ptr(g,
-					mmfault_info.inst_ptr);
-			refch = ch;
-			if (refch != NULL) {
-				tsg = tsg_gk20a_from_ch(refch);
-			}
-		}
-
-		/* check if engine reset should be deferred */
-		if (engine_id != FIFO_INVAL_ENGINE_ID) {
-			bool defer = nvgpu_engine_should_defer_reset(g,
-					engine_id, mmfault_info.client_type,
-					fake_fault);
-			if (((ch != NULL) || (tsg != NULL)) && defer) {
-				g->fifo.deferred_fault_engines |= BIT(engine_id);
-
-				/* handled during channel free */
-				nvgpu_mutex_acquire(&f->deferred_reset_mutex);
-				g->fifo.deferred_reset_pending = true;
-				nvgpu_mutex_release(&f->deferred_reset_mutex);
-
-				deferred_reset_pending = true;
-
-				nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
-					   "sm debugger attached,"
-					   " deferring channel recovery to channel free");
-			} else {
-				nvgpu_engine_reset(g, engine_id);
-			}
-		}
-
-#ifdef CONFIG_GK20A_CTXSW_TRACE
-		if (tsg != NULL) {
-			nvgpu_gr_fecs_trace_add_tsg_reset(g, tsg);
-		}
-#endif
-		/*
-		 * Disable the channel/TSG from hw and increment syncpoints.
-		 */
-		if (tsg != NULL) {
-			if (deferred_reset_pending) {
-				g->ops.tsg.disable(tsg);
-			} else {
-				if (!fake_fault) {
-					nvgpu_tsg_set_ctx_mmu_error(g, tsg);
-				}
-				verbose = nvgpu_tsg_mark_error(g, tsg);
-				nvgpu_tsg_abort(g, tsg, false);
-			}
-
-			/* put back the ref taken early above */
-			if (refch != NULL) {
-				gk20a_channel_put(ch);
-			}
-		} else if (refch != NULL) {
-			nvgpu_err(g, "mmu error in unbound channel %d",
-					  ch->chid);
-			gk20a_channel_put(ch);
-		} else if (mmfault_info.inst_ptr ==
-				nvgpu_inst_block_addr(g,
-					&g->mm.bar1.inst_block)) {
-			nvgpu_err(g, "mmu fault from bar1");
-		} else if (mmfault_info.inst_ptr ==
-				nvgpu_inst_block_addr(g,
-					&g->mm.pmu.inst_block)) {
-			nvgpu_err(g, "mmu fault from pmu");
-		} else {
-			nvgpu_err(g, "couldn't locate channel for mmu fault");
-		}
-	}
-
-	if (!fake_fault) {
-		gk20a_debug_dump(g);
-	}
-
-	/* clear interrupt */
-	gk20a_writel(g, fifo_intr_mmu_fault_id_r(), (u32)fault_id);
-
-	/* resume scheduler */
-	gk20a_writel(g, fifo_error_sched_disable_r(),
-		     gk20a_readl(g, fifo_error_sched_disable_r()));
-
-	/* Re-enable fifo access */
-	g->ops.gr.init.fifo_access(g, true);
-
-	if (nvgpu_cg_pg_enable(g) != 0) {
-		nvgpu_warn(g, "fail to enable power mgmt");
-	}
-	return verbose;
-}
-
-bool gk20a_fifo_handle_mmu_fault(
-	struct gk20a *g,
-	u32 mmu_fault_engines, /* queried from HW if 0 */
-	u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
-	bool id_is_tsg)
-{
-	bool verbose;
-
-	nvgpu_log_fn(g, " ");
-
-	nvgpu_log_info(g, "acquire engines_reset_mutex");
-	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
-
-	nvgpu_fifo_lock_active_runlists(g);
-
-	verbose = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
-			hw_id, id_is_tsg);
-
-	nvgpu_fifo_unlock_active_runlists(g);
-
-	nvgpu_log_info(g, "release engines_reset_mutex");
-	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
-
-	return verbose;
-}
-
-void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
-			u32 hw_id, unsigned int id_type, unsigned int rc_type,
-			 struct mmu_fault_info *mmfault)
-{
-	unsigned long engine_id, i;
-	unsigned long _engine_ids = __engine_ids;
-	unsigned long engine_ids = 0;
-	u32 mmu_fault_engines = 0;
-	u32 ref_type;
-	u32 ref_id;
-	bool ref_id_is_tsg = false;
-	bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false;
-	bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false;
-
-	nvgpu_log_info(g, "acquire engines_reset_mutex");
-	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
-
-	nvgpu_fifo_lock_active_runlists(g);
-
-	if (id_is_known) {
-		engine_ids = g->ops.engine.get_mask_on_id(g,
-				hw_id, id_is_tsg);
-		ref_id = hw_id;
-		ref_type = id_is_tsg ?
-			fifo_engine_status_id_type_tsgid_v() :
-			fifo_engine_status_id_type_chid_v();
-		ref_id_is_tsg = id_is_tsg;
-		/* atleast one engine will get passed during sched err*/
-		engine_ids |= __engine_ids;
-		for_each_set_bit(engine_id, &engine_ids, 32U) {
-			u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g,
-							(u32)engine_id);
-
-			if (mmu_id != FIFO_INVAL_ENGINE_ID) {
-				mmu_fault_engines |= BIT(mmu_id);
-			}
-		}
-	} else {
-		/* store faulted engines in advance */
-		for_each_set_bit(engine_id, &_engine_ids, 32U) {
-			nvgpu_engine_get_id_and_type(g, (u32)engine_id,
-						      &ref_id, &ref_type);
-			if (ref_type == fifo_engine_status_id_type_tsgid_v()) {
-				ref_id_is_tsg = true;
-			} else {
-				ref_id_is_tsg = false;
-			}
-			/* Reset *all* engines that use the
-			 * same channel as faulty engine */
-			for (i = 0; i < g->fifo.num_engines; i++) {
-				u32 active_engine_id = g->fifo.active_engines_list[i];
-				u32 type;
-				u32 id;
-
-				nvgpu_engine_get_id_and_type(g,
-					active_engine_id, &id, &type);
-				if (ref_type == type && ref_id == id) {
-					u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g,
-							active_engine_id);
-
-					engine_ids |= BIT(active_engine_id);
-					if (mmu_id != FIFO_INVAL_ENGINE_ID) {
-						mmu_fault_engines |= BIT(mmu_id);
-					}
-				}
-			}
-		}
-	}
-
-	if (mmu_fault_engines != 0U) {
-		g->ops.fifo.intr_set_recover_mask(g);
-
-		g->ops.fifo.trigger_mmu_fault(g, engine_ids);
-		gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id,
-				ref_id_is_tsg);
-
-		g->ops.fifo.intr_unset_recover_mask(g);
-	}
-
-	nvgpu_fifo_unlock_active_runlists(g);
-
-	nvgpu_log_info(g, "release engines_reset_mutex");
-	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
-}
-
 void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
 {
 	if (is_tsg) {
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -29,7 +29,6 @@
 #include <nvgpu/engines.h>

 struct gk20a_debug_output;
-struct mmu_fault_info;
 struct nvgpu_semaphore;
 struct channel_gk20a;
 struct tsg_gk20a;
@@ -211,9 +210,6 @@ struct fifo_gk20a {

 int gk20a_init_fifo_setup_hw(struct gk20a *g);

-void gk20a_fifo_isr(struct gk20a *g);
-u32 gk20a_fifo_nonstall_isr(struct gk20a *g);
-
 int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch);
 int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
 int gk20a_fifo_preempt(struct gk20a *g, struct channel_gk20a *ch);
@@ -255,14 +251,8 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 			unsigned int id_type);
 int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);

-void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
-			u32 hw_id, unsigned int id_type, unsigned int rc_type,
-			 struct mmu_fault_info *mmfault);
-
 u32 gk20a_fifo_default_timeslice_us(struct gk20a *g);

 int gk20a_fifo_init_pbdma_map(struct gk20a *g, u32 *pbdma_map, u32 num_pbdma);
-bool gk20a_fifo_handle_mmu_fault(struct gk20a *g,
-	u32 mmu_fault_engines, u32 hw_id, bool id_is_tsg);

 #endif /* FIFO_GK20A_H */
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -75,6 +75,7 @@
 #include "hal/fifo/ctxsw_timeout_gk20a.h"
 #include "hal/fifo/mmu_fault_gk20a.h"
 #include "hal/fifo/mmu_fault_gm20b.h"
+#include "hal/rc/rc_gk20a.h"
 #include "hal/gr/zbc/zbc_gm20b.h"
 #include "hal/gr/zcull/zcull_gm20b.h"
 #include "hal/gr/falcon/gr_falcon_gm20b.h"
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -88,6 +88,7 @@
 #include "hal/fifo/mmu_fault_gm20b.h"
 #include "hal/fifo/mmu_fault_gp10b.h"
 #include "hal/fifo/ctxsw_timeout_gk20a.h"
+#include "hal/rc/rc_gk20a.h"
 #include "hal/gr/ecc/ecc_gp10b.h"
 #include "hal/gr/fecs_trace/fecs_trace_gm20b.h"
 #include "hal/gr/fecs_trace/fecs_trace_gp10b.h"
--- a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c
@@ -25,12 +25,23 @@
 #include <nvgpu/timers.h>
 #include <nvgpu/log.h>
 #include <nvgpu/io.h>
+#include <nvgpu/debug.h>
 #include <nvgpu/fifo.h>
+#include <nvgpu/runlist.h>
 #include <nvgpu/engines.h>
+#include <nvgpu/engine_status.h>
+#include <nvgpu/power_features/cg.h>
+#include <nvgpu/power_features/pg.h>
+#include <nvgpu/power_features/power_features.h>
+#include <nvgpu/gr/fecs_trace.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/tsg.h>

 #include <hal/fifo/mmu_fault_gk20a.h>

 #include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
+/* TODO: remove gr_status_r */
+#include <nvgpu/hw/gk20a/hw_gr_gk20a.h>

 /* fault info/descriptions */

@@ -213,7 +224,220 @@ void gk20a_fifo_mmu_fault_info_dump(struct gk20a *g, u32 engine_id,

 void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
 {
-	u32 fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r());
+	u32 fault_id = nvgpu_readl(g, fifo_intr_mmu_fault_id_r());

 	nvgpu_err(g, "dropped mmu fault (0x%08x)", fault_id);
 }
+
+bool gk20a_fifo_handle_mmu_fault_locked(
+	struct gk20a *g,
+	u32 mmu_fault_engines, /* queried from HW if 0 */
+	u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
+	bool id_is_tsg)
+{
+	bool fake_fault;
+	unsigned long fault_id;
+	unsigned long engine_mmu_fault_id;
+	bool debug_dump = true;
+	struct nvgpu_engine_status_info engine_status;
+	bool deferred_reset_pending = false;
+	struct fifo_gk20a *f = &g->fifo;
+
+	nvgpu_log_fn(g, " ");
+
+	if (nvgpu_cg_pg_disable(g) != 0) {
+		nvgpu_warn(g, "fail to disable power mgmt");
+	}
+
+	/* Disable fifo access */
+	g->ops.gr.init.fifo_access(g, false);
+
+	if (mmu_fault_engines != 0U) {
+		fault_id = mmu_fault_engines;
+		fake_fault = true;
+	} else {
+		fault_id = nvgpu_readl(g, fifo_intr_mmu_fault_id_r());
+		fake_fault = false;
+	}
+	nvgpu_mutex_acquire(&f->deferred_reset_mutex);
+	g->fifo.deferred_reset_pending = false;
+	nvgpu_mutex_release(&f->deferred_reset_mutex);
+
+	/* go through all faulted engines */
+	for_each_set_bit(engine_mmu_fault_id, &fault_id, 32U) {
+		/*
+		 * bits in fifo_intr_mmu_fault_id_r do not correspond 1:1 to
+		 * engines. Convert engine_mmu_id to engine_id
+		 */
+		u32 engine_id = nvgpu_engine_mmu_fault_id_to_engine_id(g,
+					(u32)engine_mmu_fault_id);
+		struct mmu_fault_info mmfault_info;
+		struct channel_gk20a *ch = NULL;
+		struct tsg_gk20a *tsg = NULL;
+		struct channel_gk20a *refch = NULL;
+		bool ctxsw;
+
+		/* read and parse engine status */
+		g->ops.engine_status.read_engine_status_info(g, engine_id,
+			&engine_status);
+
+		ctxsw = nvgpu_engine_status_is_ctxsw(&engine_status);
+
+		gk20a_fifo_mmu_fault_info_dump(g, engine_id,
+				(u32)engine_mmu_fault_id,
+				fake_fault, &mmfault_info);
+
+		if (ctxsw) {
+			g->ops.gr.falcon.dump_stats(g);
+			nvgpu_err(g, "  gr_status_r: 0x%x",
+				  nvgpu_readl(g, gr_status_r()));
+		}
+
+		/* get the channel/TSG */
+		if (fake_fault) {
+			/* use next_id if context load is failing */
+			u32 id, type;
+
+			if (hw_id == ~(u32)0) {
+				if (nvgpu_engine_status_is_ctxsw_load(
+					&engine_status)) {
+					nvgpu_engine_status_get_next_ctx_id_type(
+						&engine_status, &id, &type);
+				} else {
+					nvgpu_engine_status_get_ctx_id_type(
+						&engine_status, &id, &type);
+				}
+			} else {
+				id = hw_id;
+				type = id_is_tsg ?
+					ENGINE_STATUS_CTX_ID_TYPE_TSGID :
+					ENGINE_STATUS_CTX_ID_TYPE_CHID;
+			}
+
+			if (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID) {
+				tsg = &g->fifo.tsg[id];
+			} else if (type == ENGINE_STATUS_CTX_ID_TYPE_CHID) {
+				ch = &g->fifo.channel[id];
+				refch = gk20a_channel_get(ch);
+				if (refch != NULL) {
+					tsg = tsg_gk20a_from_ch(refch);
+				}
+			}
+		} else {
+			/* Look up channel from the inst block pointer. */
+			ch = nvgpu_channel_refch_from_inst_ptr(g,
+					mmfault_info.inst_ptr);
+			refch = ch;
+			if (refch != NULL) {
+				tsg = tsg_gk20a_from_ch(refch);
+			}
+		}
+
+		/* check if engine reset should be deferred */
+		if (engine_id != FIFO_INVAL_ENGINE_ID) {
+			bool defer = nvgpu_engine_should_defer_reset(g,
+					engine_id, mmfault_info.client_type,
+					fake_fault);
+			if (((ch != NULL) || (tsg != NULL)) && defer) {
+				g->fifo.deferred_fault_engines |= BIT(engine_id);
+
+				/* handled during channel free */
+				nvgpu_mutex_acquire(&f->deferred_reset_mutex);
+				g->fifo.deferred_reset_pending = true;
+				nvgpu_mutex_release(&f->deferred_reset_mutex);
+
+				deferred_reset_pending = true;
+
+				nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
+					   "sm debugger attached,"
+					   " deferring channel recovery to channel free");
+			} else {
+				nvgpu_engine_reset(g, engine_id);
+			}
+		}
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+		if (tsg != NULL) {
+			nvgpu_gr_fecs_trace_add_tsg_reset(g, tsg);
+		}
+#endif
+		/*
+		 * Disable the channel/TSG from hw and increment syncpoints.
+		 */
+		if (tsg != NULL) {
+			if (deferred_reset_pending) {
+				g->ops.tsg.disable(tsg);
+			} else {
+				if (!fake_fault) {
+					nvgpu_tsg_set_ctx_mmu_error(g, tsg);
+				}
+				debug_dump = nvgpu_tsg_mark_error(g, tsg);
+				nvgpu_tsg_abort(g, tsg, false);
+			}
+
+			/* put back the ref taken early above */
+			if (refch != NULL) {
+				gk20a_channel_put(ch);
+			}
+		} else if (refch != NULL) {
+			nvgpu_err(g, "mmu error in unbound channel %d",
+					  ch->chid);
+			gk20a_channel_put(ch);
+		} else if (mmfault_info.inst_ptr ==
+				nvgpu_inst_block_addr(g,
+					&g->mm.bar1.inst_block)) {
+			nvgpu_err(g, "mmu fault from bar1");
+		} else if (mmfault_info.inst_ptr ==
+				nvgpu_inst_block_addr(g,
+					&g->mm.pmu.inst_block)) {
+			nvgpu_err(g, "mmu fault from pmu");
+		} else {
+			nvgpu_err(g, "couldn't locate channel for mmu fault");
+		}
+	}
+
+	if (!fake_fault) {
+		gk20a_debug_dump(g);
+	}
+
+	/* clear interrupt */
+	nvgpu_writel(g, fifo_intr_mmu_fault_id_r(), (u32)fault_id);
+
+	/* resume scheduler */
+	nvgpu_writel(g, fifo_error_sched_disable_r(),
+		     nvgpu_readl(g, fifo_error_sched_disable_r()));
+
+	/* Re-enable fifo access */
+	g->ops.gr.init.fifo_access(g, true);
+
+	if (nvgpu_cg_pg_enable(g) != 0) {
+		nvgpu_warn(g, "fail to enable power mgmt");
+	}
+	return debug_dump;
+}
+
+bool gk20a_fifo_handle_mmu_fault(
+	struct gk20a *g,
+	u32 mmu_fault_engines, /* queried from HW if 0 */
+	u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
+	bool id_is_tsg)
+{
+	bool debug_dump;
+
+	nvgpu_log_fn(g, " ");
+
+	nvgpu_log_info(g, "acquire engines_reset_mutex");
+	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
+
+	nvgpu_fifo_lock_active_runlists(g);
+
+	debug_dump = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
+			hw_id, id_is_tsg);
+
+	nvgpu_fifo_unlock_active_runlists(g);
+
+	nvgpu_log_info(g, "release engines_reset_mutex");
+	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
+
+	return debug_dump;
+}
--- a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h
@@ -39,4 +39,10 @@ void gk20a_fifo_mmu_fault_info_dump(struct gk20a *g, u32 engine_id,

 void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g);

+bool gk20a_fifo_handle_mmu_fault(struct gk20a *g, u32 mmu_fault_engines,
+	u32 hw_id, bool id_is_tsg);
+
+bool gk20a_fifo_handle_mmu_fault_locked(struct gk20a *g, u32 mmu_fault_engines,
+	u32 hw_id, bool id_is_tsg);
+
 #endif /* NVGPU_FIFO_MMU_FAULT_GK20A_H */
--- a/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/debug.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/fifo.h>
+#include <nvgpu/rc.h>
+#include <nvgpu/runlist.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/unit.h>
+#include <nvgpu/types.h>
+#include <nvgpu/engine_status.h>
+#include <nvgpu/engines.h>
+#include <nvgpu/power_features/cg.h>
+#include <nvgpu/power_features/pg.h>
+#include <nvgpu/power_features/power_features.h>
+#include <nvgpu/gr/fecs_trace.h>
+
+#include <hal/fifo/mmu_fault_gk20a.h>
+#include <hal/rc/rc_gk20a.h>
+
+#include <nvgpu/hw/gk20a/hw_fifo_gk20a.h>
+
+
+void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 eng_bitmask,
+			u32 hw_id, unsigned int id_type, unsigned int rc_type,
+			 struct mmu_fault_info *mmufault)
+{
+	unsigned long engine_id, i;
+	unsigned long _engine_ids = eng_bitmask;
+	unsigned long engine_ids = 0UL;
+	u32 mmu_fault_engines = 0U;
+	u32 ref_type;
+	u32 ref_id;
+	bool ref_id_is_tsg = false;
+	bool id_is_known = (id_type != ID_TYPE_UNKNOWN) ? true : false;
+	bool id_is_tsg = (id_type == ID_TYPE_TSG) ? true : false;
+
+	nvgpu_log_info(g, "acquire engines_reset_mutex");
+	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
+
+	nvgpu_fifo_lock_active_runlists(g);
+
+	if (id_is_known) {
+		engine_ids = g->ops.engine.get_mask_on_id(g,
+				hw_id, id_is_tsg);
+		ref_id = hw_id;
+		ref_type = id_is_tsg ?
+			fifo_engine_status_id_type_tsgid_v() :
+			fifo_engine_status_id_type_chid_v();
+		ref_id_is_tsg = id_is_tsg;
+		/* atleast one engine will get passed during sched err*/
+		engine_ids |= eng_bitmask;
+		for_each_set_bit(engine_id, &engine_ids, 32U) {
+			u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g,
+							(u32)engine_id);
+
+			if (mmu_id != FIFO_INVAL_ENGINE_ID) {
+				mmu_fault_engines |= BIT(mmu_id);
+			}
+		}
+	} else {
+		/* store faulted engines in advance */
+		for_each_set_bit(engine_id, &_engine_ids, 32U) {
+			nvgpu_engine_get_id_and_type(g, (u32)engine_id,
+						      &ref_id, &ref_type);
+			if (ref_type == fifo_engine_status_id_type_tsgid_v()) {
+				ref_id_is_tsg = true;
+			} else {
+				ref_id_is_tsg = false;
+			}
+			/*
+			 * Reset *all* engines that use the
+			 * same channel as faulty engine
+			 */
+			for (i = 0; i < g->fifo.num_engines; i++) {
+				u32 active_engine_id = g->fifo.active_engines_list[i];
+				u32 type;
+				u32 id;
+
+				nvgpu_engine_get_id_and_type(g,
+					active_engine_id, &id, &type);
+				if (ref_type == type && ref_id == id) {
+					u32 mmu_id = nvgpu_engine_id_to_mmu_fault_id(g,
+							active_engine_id);
+
+					engine_ids |= BIT(active_engine_id);
+					if (mmu_id != FIFO_INVAL_ENGINE_ID) {
+						mmu_fault_engines |= BIT(mmu_id);
+					}
+				}
+			}
+		}
+	}
+
+	if (mmu_fault_engines != 0U) {
+		g->ops.fifo.intr_set_recover_mask(g);
+
+		g->ops.fifo.trigger_mmu_fault(g, engine_ids);
+		gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines, ref_id,
+				ref_id_is_tsg);
+
+		g->ops.fifo.intr_unset_recover_mask(g);
+	}
+
+	nvgpu_fifo_unlock_active_runlists(g);
+
+	nvgpu_log_info(g, "release engines_reset_mutex");
+	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
+}
--- a/drivers/gpu/nvgpu/hal/rc/rc_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef RC_GK20A_H
+#define RC_GK20A_H
+
+#include <nvgpu/types.h>
+
+struct mmu_fault_info;
+
+void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 eng_bitmask,
+			u32 hw_id, unsigned int id_type, unsigned int rc_type,
+			 struct mmu_fault_info *mmufault);
+
+#endif /* RC_GK20A_H */