diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
index 69a715759..26dabd724 100644
--- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
+++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c
@@ -792,10 +792,11 @@ static void gv11b_fb_copy_from_hw_fault_buf(struct gk20a *g,
 static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		 struct mmu_fault_info *mmfault, u32 *invalidate_replay_val)
 {
-	unsigned int id_type;
+	unsigned int id_type = ID_TYPE_UNKNOWN;
 	u32 num_lce, act_eng_bitmask = 0;
 	int err = 0;
-	u32 id = ((u32)~0);
+	u32 id = FIFO_INVAL_TSG_ID;
+	unsigned int rc_type = RC_TYPE_NO_RC;
 
 	if (!mmfault->valid)
 		return;
@@ -810,18 +811,23 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		/* CE page faults are not reported as replayable */
 		nvgpu_log(g, gpu_dbg_intr, "CE Faulted");
 		err = gv11b_fb_fix_page_fault(g, mmfault);
-		gv11b_fifo_reset_pbdma_and_eng_faulted(g, mmfault->refch,
-			mmfault->faulted_pbdma, mmfault->faulted_engine);
+		if (mmfault->refch &&
+			(u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID) {
+			gv11b_fifo_reset_pbdma_and_eng_faulted(g,
+				&g->fifo.tsg[mmfault->refch->tsgid],
+				mmfault->faulted_pbdma,
+				mmfault->faulted_engine);
+		}
 		if (!err) {
 			nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Fixed");
 			*invalidate_replay_val = 0;
-			/* refch in mmfault is assigned at the time of copying
-			 * fault info from snap reg or bar2 fault buf
-			 */
-			gk20a_channel_put(mmfault->refch);
+			if (mmfault->refch) {
+				gk20a_channel_put(mmfault->refch);
+				mmfault->refch = NULL;
+			}
 			return;
 		}
-		/* Do recovery. Channel recovery needs refch */
+		/* Do recovery */
 		nvgpu_log(g, gpu_dbg_intr, "CE Page Fault Not Fixed");
 	}
 
@@ -833,16 +839,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		 * instance block, the fault cannot be isolated to a
 		 * single context so we need to reset the entire runlist
 		 */
-		id_type = ID_TYPE_UNKNOWN;
+			rc_type = RC_TYPE_MMU_FAULT;
 
 		} else if (mmfault->refch) {
-			if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
-				id = mmfault->refch->tsgid;
-				id_type = ID_TYPE_TSG;
-			} else {
-				id = mmfault->chid;
-				id_type = ID_TYPE_CHANNEL;
-			}
 			if (mmfault->refch->mmu_nack_handled) {
 				/* We have already recovered for the same
 				 * context, skip doing another recovery.
@@ -863,19 +862,40 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 				 */
 				gk20a_channel_put(mmfault->refch);
 				return;
+			} else {
+				/* Indicate recovery is handled if mmu fault is
+				 * a result of mmu nack.
+				 */
+				mmfault->refch->mmu_nack_handled = true;
 			}
-		} else {
-			id_type = ID_TYPE_UNKNOWN;
-		}
-		if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID)
-			act_eng_bitmask = BIT(mmfault->faulted_engine);
 
-		/* Indicate recovery is handled if mmu fault is a result of
-		 * mmu nack.
+			rc_type = RC_TYPE_MMU_FAULT;
+			if (gk20a_is_channel_marked_as_tsg(mmfault->refch)) {
+				id = mmfault->refch->tsgid;
+				if (id != FIFO_INVAL_TSG_ID)
+					id_type = ID_TYPE_TSG;
+			} else {
+				nvgpu_err(g, "bare channels not supported");
+			}
+		}
+
+		/* engine is faulted */
+		if (mmfault->faulted_engine != FIFO_INVAL_ENGINE_ID) {
+			act_eng_bitmask = BIT(mmfault->faulted_engine);
+			rc_type = RC_TYPE_MMU_FAULT;
+		}
+
+		/* refch in mmfault is assigned at the time of copying
+		 * fault info from snap reg or bar2 fault buf
 		 */
-		mmfault->refch->mmu_nack_handled = true;
-		g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
-			id, id_type, RC_TYPE_MMU_FAULT, mmfault);
+		if (mmfault->refch) {
+			gk20a_channel_put(mmfault->refch);
+			mmfault->refch = NULL;
+		}
+
+		if (rc_type != RC_TYPE_NO_RC)
+			g->ops.fifo.teardown_ch_tsg(g, act_eng_bitmask,
+				id, id_type, rc_type, mmfault);
 	} else {
 		if (mmfault->fault_type == gmmu_fault_type_pte_v()) {
 			nvgpu_log(g, gpu_dbg_intr, "invalid pte! try to fix");
@@ -894,7 +914,10 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g,
 		/* refch in mmfault is assigned at the time of copying
 		 * fault info from snap reg or bar2 fault buf
 		 */
-		gk20a_channel_put(mmfault->refch);
+		if (mmfault->refch) {
+			gk20a_channel_put(mmfault->refch);
+			mmfault->refch = NULL;
+		}
 	}
 }
 
@@ -985,8 +1008,10 @@ void gv11b_fb_handle_mmu_nonreplay_replay_fault(struct gk20a *g,
 			next_fault_addr = mmfault->fault_addr;
 			if (prev_fault_addr == next_fault_addr) {
 				nvgpu_log(g, gpu_dbg_intr, "pte already scanned");
-				if (mmfault->refch)
+				if (mmfault->refch) {
 					gk20a_channel_put(mmfault->refch);
+					mmfault->refch = NULL;
+				}
 				continue;
 			}
 		}
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index cd54baf1d..57cb0019b 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -55,9 +55,7 @@
 #define FECS_METHOD_WFI_RESTORE 0x80000
 #define FECS_MAILBOX_0_ACK_RESTORE 0x4
 
-static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
-					    u32 chid, bool add,
-					    bool wait_for_finish);
+
 static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg);
 
 static const char *const pbdma_intr_fault_type_desc[] = {
@@ -2708,7 +2706,7 @@ void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg)
 }
 
 int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		unsigned int id_type, unsigned int timeout_rc_type)
+		unsigned int id_type)
 {
 	struct nvgpu_timeout timeout;
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
@@ -2781,8 +2779,8 @@ int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
 	id_type = is_tsg ? ID_TYPE_TSG : ID_TYPE_CHANNEL;
 
 	/* wait for preempt */
-	ret = g->ops.fifo.is_preempt_pending(g, id, id_type,
-					 PREEMPT_TIMEOUT_RC);
+	ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
+
 	return ret;
 }
 
@@ -3279,7 +3277,7 @@ void gk20a_fifo_runlist_hw_submit(struct gk20a *g, u32 runlist_id,
 		fifo_eng_runlist_length_f(count));
 }
 
-static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
 					    u32 chid, bool add,
 					    bool wait_for_finish)
 {
@@ -3452,8 +3450,7 @@ static int __locked_fifo_reschedule_preempt_next(struct channel_gk20a *ch,
 		gk20a_readl(g, fifo_preempt_r()));
 #endif
 	if (wait_preempt) {
-		g->ops.fifo.is_preempt_pending(
-			g, preempt_id, preempt_type, PREEMPT_TIMEOUT_RC);
+		g->ops.fifo.is_preempt_pending(g, preempt_id, preempt_type);
 	}
 #ifdef TRACEPOINTS_ENABLED
 	trace_gk20a_reschedule_preempted_next(ch->chid);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index bccd15f61..77030c944 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -50,9 +50,6 @@ enum {
 #define ID_TYPE_TSG			1
 #define ID_TYPE_UNKNOWN			((u32)~0)
 
-#define PREEMPT_TIMEOUT_RC		1
-#define PREEMPT_TIMEOUT_NORC		0
-
 #define RC_YES				1
 #define RC_NO				0
 
@@ -257,6 +254,9 @@ int nvgpu_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next,
 int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 chid,
 			      bool add, bool wait_for_finish);
 
+int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
+					    u32 chid, bool add,
+					    bool wait_for_finish);
 int gk20a_fifo_suspend(struct gk20a *g);
 
 bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
@@ -390,8 +390,8 @@ void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a);
 
 u32 gk20a_fifo_intr_0_error_mask(struct gk20a *g);
 
-int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, unsigned int id_type,
-					 unsigned int timeout_rc_type);
+int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
+			unsigned int id_type);
 int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
 void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, u32 id,
 					 unsigned int id_type);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index d6e0342bd..17b0a60b8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -685,9 +685,9 @@ struct gpu_ops {
 				struct ch_state *ch_state);
 		u32 (*intr_0_error_mask)(struct gk20a *g);
 		int (*is_preempt_pending)(struct gk20a *g, u32 id,
-			unsigned int id_type, unsigned int timeout_rc_type);
+			unsigned int id_type);
 		int (*preempt_ch_tsg)(struct gk20a *g, u32 id,
-			unsigned int id_type, unsigned int timeout_rc_type);
+			unsigned int id_type);
 		void (*init_pbdma_intr_descs)(struct fifo_gk20a *f);
 		int (*reset_enable_hw)(struct gk20a *g);
 		int (*setup_userd)(struct channel_gk20a *c);
@@ -1132,7 +1132,7 @@ struct gpu_ops {
 		bool (*is_intr_hub_pending)(struct gk20a *g, u32 mc_intr);
 		bool (*is_intr_nvlink_pending)(struct gk20a *g, u32 mc_intr);
 		bool (*is_stall_and_eng_intr_pending)(struct gk20a *g,
-								u32 act_eng_id);
+					u32 act_eng_id, u32 *eng_intr_pending);
 		u32 (*intr_stall)(struct gk20a *g);
 		void (*intr_stall_pause)(struct gk20a *g);
 		void (*intr_stall_resume)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gv100/mc_gv100.c b/drivers/gpu/nvgpu/gv100/mc_gv100.c
index 46af100a0..7d38a3fb3 100644
--- a/drivers/gpu/nvgpu/gv100/mc_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/mc_gv100.c
@@ -66,15 +66,14 @@ bool gv100_mc_is_intr_nvlink_pending(struct gk20a *g, u32 mc_intr_0)
 	return (((mc_intr_0 & mc_intr_nvlink_pending_f()) != 0U) ? true : false);
 }
 
-bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id)
+bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
+			u32 *eng_intr_pending)
 {
 	u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
 	u32 stall_intr, eng_intr_mask;
 
 	eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id);
-	if ((mc_intr_0 & eng_intr_mask) != 0U) {
-		return true;
-	}
+	*eng_intr_pending = mc_intr_0 & eng_intr_mask;
 
 	stall_intr = mc_intr_pfifo_pending_f() |
 			mc_intr_hub_pending_f() |
@@ -82,9 +81,10 @@ bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id)
 			mc_intr_pbus_pending_f() |
 			mc_intr_ltc_pending_f() |
 			mc_intr_nvlink_pending_f();
-	if ((mc_intr_0 & stall_intr) != 0U) {
-		return true;
-	}
 
-	return false;
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
+		"mc_intr_0 = 0x%08x, eng_intr = 0x%08x",
+		mc_intr_0 & stall_intr, *eng_intr_pending);
+
+	return (mc_intr_0 & (eng_intr_mask | stall_intr)) != 0U;
 }
diff --git a/drivers/gpu/nvgpu/gv100/mc_gv100.h b/drivers/gpu/nvgpu/gv100/mc_gv100.h
index 4aff4a366..e90692588 100644
--- a/drivers/gpu/nvgpu/gv100/mc_gv100.h
+++ b/drivers/gpu/nvgpu/gv100/mc_gv100.h
@@ -26,5 +26,6 @@ struct gk20a;
 
 void mc_gv100_intr_enable(struct gk20a *g);
 bool gv100_mc_is_intr_nvlink_pending(struct gk20a *g, u32 mc_intr_0);
-bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id);
+bool gv100_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
+			u32 *eng_intr_pending);
 #endif
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
index 4edaaac11..f30f2ae14 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -387,17 +387,24 @@ u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g)
 
 u32 gv11b_fifo_get_preempt_timeout(struct gk20a *g)
 {
-	return gk20a_get_gr_idle_timeout(g);
+	/* if timeouts are enabled, using 3000ms timeout
+	 * for polling pdma/eng/runlist might kick in
+	 * timeout handler in the cases where preempt
+	 * is stuck. Use 1000ms timeout for polling when
+	 * timeouts are enabled */
+	return nvgpu_is_timeouts_enabled(g) ? PREEMPT_TIMEOUT_1000_MS :
+		g->gr_idle_timeout_default;
 }
 
 static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
-				 u32 pbdma_id, unsigned int timeout_rc_type)
+				 u32 pbdma_id)
 {
 	struct nvgpu_timeout timeout;
 	unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
 	u32 pbdma_stat;
 	u32 chan_stat;
 	int ret = -EBUSY;
+	unsigned int loop_count = 0;
 
 	/* timeout in milli seconds */
 	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
@@ -406,6 +413,14 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
 	nvgpu_log(g, gpu_dbg_info, "wait preempt pbdma %d", pbdma_id);
 	/* Verify that ch/tsg is no longer on the pbdma */
 	do {
+		if (!nvgpu_platform_is_silicon(g)) {
+			if (loop_count >= MAX_PRE_SI_RETRIES) {
+				nvgpu_err(g, "preempt pbdma retries: %u",
+					loop_count);
+				break;
+			}
+			loop_count++;
+		}
 		/*
 		 * If the PBDMA has a stalling interrupt and receives a NACK,
 		 * the PBDMA won't save out until the STALLING interrupt is
@@ -458,21 +473,24 @@ static int gv11b_fifo_poll_pbdma_chan_status(struct gk20a *g, u32 id,
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
-	} while (!nvgpu_timeout_expired_msg(&timeout,
-				 "preempt timeout pbdma"));
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	if (ret)
+		nvgpu_err(g, "preempt timeout pbdma: %u pbdma_stat: %u "
+				"tsgid: %u", pbdma_id, pbdma_stat, id);
 	return ret;
 }
 
 static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
-			 u32 act_eng_id, u32 *reset_eng_bitmask,
-			 unsigned int timeout_rc_type)
+			 u32 act_eng_id, u32 *reset_eng_bitmask)
 {
 	struct nvgpu_timeout timeout;
 	unsigned long delay = GR_IDLE_CHECK_DEFAULT; /* in micro seconds */
 	u32 eng_stat;
 	u32 ctx_stat;
 	int ret = -EBUSY;
-	bool stall_intr = false;
+	unsigned int loop_count = 0;
+	u32 eng_intr_pending;
 
 	/* timeout in milli seconds */
 	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
@@ -482,20 +500,56 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 			act_eng_id);
 	/* Check if ch/tsg has saved off the engine or if ctxsw is hung */
 	do {
+		if (!nvgpu_platform_is_silicon(g)) {
+			if (loop_count >= MAX_PRE_SI_RETRIES) {
+				nvgpu_err(g, "preempt eng retries: %u",
+					loop_count);
+				break;
+			}
+			loop_count++;
+		}
 		eng_stat = gk20a_readl(g, fifo_engine_status_r(act_eng_id));
 		ctx_stat  = fifo_engine_status_ctx_status_v(eng_stat);
 
-		if (g->ops.mc.is_stall_and_eng_intr_pending(g, act_eng_id)) {
-			stall_intr = true;
+		if (g->ops.mc.is_stall_and_eng_intr_pending(g, act_eng_id,
+					&eng_intr_pending)) {
+		/* From h/w team
+		 * Engine save can be blocked by eng  stalling interrupts.
+		 * FIFO interrupts shouldn’t block an engine save from
+		 * finishing, but could block FIFO from reporting preempt done.
+		 * No immediate reason to reset the engine if FIFO interrupt is
+		 * pending.
+		 * The hub, priv_ring, and ltc interrupts could block context
+		 * switch (or memory), but doesn’t necessarily have to.
+		 * For Hub interrupts they just report access counters and page
+		 * faults. Neither of these necessarily block context switch
+		 * or preemption, but they could.
+		 * For example a page fault for graphics would prevent graphics
+		 * from saving out. An access counter interrupt is a
+		 * notification and has no effect.
+		 * SW should handle page faults though for preempt to complete.
+		 * PRI interrupt (due to a failed PRI transaction) will result
+		 * in ctxsw failure reported to HOST.
+		 * LTC interrupts are generally ECC related and if so,
+		 * certainly don’t block preemption/ctxsw but they could.
+		 * Bus interrupts shouldn’t have anything to do with preemption
+		 * state as they are part of the Host EXT pipe, though they may
+		 * exhibit a symptom that indicates that GPU is in a bad state.
+		 * To be completely fair, when an engine is preempting SW
+		 * really should just handle other interrupts as they come in.
+		 * It’s generally bad to just poll and wait on a preempt
+		 * to complete since there are many things in the GPU which may
+		 * cause a system to hang/stop responding.
+		 */
 			nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
 					"stall intr set, "
-					"preemption will not finish");
+					"preemption might not finish");
 		}
 		if (ctx_stat ==
 			 fifo_engine_status_ctx_status_ctxsw_switch_v()) {
 			/* Eng save hasn't started yet. Continue polling */
-			if (stall_intr) {
-				/* if stall intr stop polling */
+			if (eng_intr_pending) {
+				/* if eng intr, stop polling */
 				*reset_eng_bitmask |= BIT(act_eng_id);
 				ret = 0;
 				break;
@@ -507,8 +561,7 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 			 fifo_engine_status_ctx_status_ctxsw_save_v()) {
 
 			if (id == fifo_engine_status_id_v(eng_stat)) {
-				if (stall_intr ||
-					timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
+				if (eng_intr_pending) {
 					/* preemption will not finish */
 					*reset_eng_bitmask |= BIT(act_eng_id);
 					ret = 0;
@@ -524,9 +577,7 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 			 fifo_engine_status_ctx_status_ctxsw_load_v()) {
 
 			if (id == fifo_engine_status_next_id_v(eng_stat)) {
-
-				if (stall_intr ||
-					timeout_rc_type == PREEMPT_TIMEOUT_NORC) {
+				if (eng_intr_pending) {
 					/* preemption will not finish */
 					*reset_eng_bitmask |= BIT(act_eng_id);
 					ret = 0;
@@ -546,8 +597,21 @@ static int gv11b_fifo_poll_eng_ctx_status(struct gk20a *g, u32 id,
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
-	} while (!nvgpu_timeout_expired_msg(&timeout,
-				 "preempt timeout eng"));
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	if (ret) {
+		/*
+		* The reasons a preempt can fail are:
+		* 1.Some other stalling interrupt is asserted preventing
+		*   channel or context save.
+		* 2.The memory system hangs.
+		* 3.The engine hangs during CTXSW.
+		*/
+		nvgpu_err(g, "preempt timeout eng: %u ctx_stat: %u tsgid: %u",
+			act_eng_id, ctx_stat, id);
+		*reset_eng_bitmask |= BIT(act_eng_id);
+	}
+
 	return ret;
 }
 
@@ -594,29 +658,19 @@ static void gv11b_reset_pbdma_faulted_tsg(struct tsg_gk20a *tsg)
 }
 
 void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
-			struct channel_gk20a *refch,
+			struct tsg_gk20a *tsg,
 			u32 faulted_pbdma, u32 faulted_engine)
 {
-	struct tsg_gk20a *tsg;
+	if (!tsg)
+		return;
 
 	nvgpu_log(g, gpu_dbg_intr, "reset faulted pbdma:0x%x eng:0x%x",
 				faulted_pbdma, faulted_engine);
 
-	if (!refch)
-		return;
-
-	if (gk20a_is_channel_marked_as_tsg(refch)) {
-		tsg = &g->fifo.tsg[refch->tsgid];
-		if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
-			gv11b_reset_pbdma_faulted_tsg(tsg);
-		if (faulted_engine != FIFO_INVAL_ENGINE_ID)
-			gv11b_reset_eng_faulted_tsg(tsg);
-	} else {
-		if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
-			gv11b_reset_pbdma_faulted_ch(g, refch->chid);
-		if (faulted_engine != FIFO_INVAL_ENGINE_ID)
-			gv11b_reset_eng_faulted_ch(g, refch->chid);
-	}
+	if (faulted_pbdma != FIFO_INVAL_PBDMA_ID)
+		gv11b_reset_pbdma_faulted_tsg(tsg);
+	if (faulted_engine != FIFO_INVAL_ENGINE_ID)
+		gv11b_reset_eng_faulted_tsg(tsg);
 }
 
 static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
@@ -626,7 +680,7 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
 	u32 runlists_mask = 0;
 	struct fifo_gk20a *f = &g->fifo;
 	struct fifo_runlist_info_gk20a *runlist;
-	u32 pbdma_bitmask = 0;
+	u32 rlid, pbdma_bitmask = 0;
 
 	if (id_type != ID_TYPE_UNKNOWN) {
 		if (id_type == ID_TYPE_TSG)
@@ -641,31 +695,31 @@ static u32 gv11b_fifo_get_runlists_mask(struct gk20a *g, u32 act_eng_bitmask,
 		if (mmfault->faulted_pbdma != FIFO_INVAL_PBDMA_ID)
 			pbdma_bitmask = BIT(mmfault->faulted_pbdma);
 
-		for (id = 0; id < f->max_runlists; id++) {
+		for (rlid = 0; rlid < f->max_runlists; rlid++) {
 
-			runlist = &f->runlist_info[id];
+			runlist = &f->runlist_info[rlid];
 
 			if (runlist->eng_bitmask & act_eng_bitmask)
 				runlists_mask |=
-				 fifo_sched_disable_runlist_m(id);
+				 fifo_sched_disable_runlist_m(rlid);
 
 			if (runlist->pbdma_bitmask & pbdma_bitmask)
 				runlists_mask |=
-				 fifo_sched_disable_runlist_m(id);
+				 fifo_sched_disable_runlist_m(rlid);
 		}
 	}
 
 	if (id_type == ID_TYPE_UNKNOWN) {
-		for (id = 0; id < f->max_runlists; id++) {
+		for (rlid = 0; rlid < f->max_runlists; rlid++) {
 			if (act_eng_bitmask) {
 				/* eng ids are known */
-				runlist = &f->runlist_info[id];
+				runlist = &f->runlist_info[rlid];
 				if (runlist->eng_bitmask & act_eng_bitmask)
 					runlists_mask |=
-					fifo_sched_disable_runlist_m(id);
+					fifo_sched_disable_runlist_m(rlid);
 			} else {
 				runlists_mask |=
-					fifo_sched_disable_runlist_m(id);
+					fifo_sched_disable_runlist_m(rlid);
 			}
 		}
 	}
@@ -697,10 +751,20 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
 	struct nvgpu_timeout timeout;
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
 	int ret = -EBUSY;
+	unsigned int loop_count = 0;
 
 	nvgpu_timeout_init(g, &timeout, g->ops.fifo.get_preempt_timeout(g),
 			   NVGPU_TIMER_CPU_TIMER);
 	do {
+		if (!nvgpu_platform_is_silicon(g)) {
+			if (loop_count >= MAX_PRE_SI_RETRIES) {
+				nvgpu_err(g, "preempt runlist retries: %u",
+					loop_count);
+				break;
+			}
+			loop_count++;
+		}
+
 		if (!((gk20a_readl(g, fifo_runlist_preempt_r())) &
 				 runlists_mask)) {
 			ret = 0;
@@ -710,13 +774,16 @@ static int gv11b_fifo_poll_runlist_preempt_pending(struct gk20a *g,
 		nvgpu_usleep_range(delay, delay * 2);
 		delay = min_t(unsigned long,
 				delay << 1, GR_IDLE_CHECK_MAX);
-	} while (!nvgpu_timeout_expired_msg(&timeout,
-				 "runlist preempt timeout"));
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	if (ret)
+		nvgpu_err(g, "preempt runlist timeout, runlists_mask:0x%08x",
+				runlists_mask);
 	return ret;
 }
 
 int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		 unsigned int id_type, unsigned int timeout_rc_type)
+		 unsigned int id_type)
 {
 	struct fifo_gk20a *f = &g->fifo;
 	unsigned long runlist_served_pbdmas;
@@ -724,7 +791,6 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 	u32 pbdma_id;
 	u32 act_eng_id;
 	u32 runlist_id;
-	int func_ret;
 	int ret = 0;
 	u32 tsgid;
 
@@ -741,30 +807,14 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 	runlist_served_pbdmas = f->runlist_info[runlist_id].pbdma_bitmask;
 	runlist_served_engines = f->runlist_info[runlist_id].eng_bitmask;
 
-	for_each_set_bit(pbdma_id, &runlist_served_pbdmas, f->num_pbdma) {
-
-		func_ret = gv11b_fifo_poll_pbdma_chan_status(g, tsgid, pbdma_id,
-							 timeout_rc_type);
-		if (func_ret != 0) {
-			nvgpu_log_info(g, "preempt timeout pbdma %d", pbdma_id);
-			ret |= func_ret;
-		}
-	}
+	for_each_set_bit(pbdma_id, &runlist_served_pbdmas, f->num_pbdma)
+		ret |= gv11b_fifo_poll_pbdma_chan_status(g, tsgid, pbdma_id);
 
 	f->runlist_info[runlist_id].reset_eng_bitmask = 0;
 
-	for_each_set_bit(act_eng_id, &runlist_served_engines, f->max_engines) {
-
-		func_ret = gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id,
-				&f->runlist_info[runlist_id].reset_eng_bitmask,
-				 timeout_rc_type);
-
-		if (func_ret != 0) {
-			nvgpu_log_info(g, "preempt timeout engine %d", act_eng_id);
-			ret |= func_ret;
-		}
-	}
-
+	for_each_set_bit(act_eng_id, &runlist_served_engines, f->max_engines)
+		ret |= gv11b_fifo_poll_eng_ctx_status(g, tsgid, act_eng_id,
+				&f->runlist_info[runlist_id].reset_eng_bitmask);
 	return ret;
 }
 
@@ -848,6 +898,9 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 
 	nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock);
 
+	/* WAR for Bug 2065990 */
+	gk20a_fifo_disable_tsg_sched(g, &f->tsg[tsgid]);
+
 	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
 	ret = __locked_fifo_preempt(g, tsgid, true);
@@ -855,6 +908,9 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 	if (!mutex_ret)
 		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
+	/* WAR for Bug 2065990 */
+	gk20a_fifo_enable_tsg_sched(g, &f->tsg[tsgid]);
+
 	nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
 
 	if (ret)
@@ -863,44 +919,36 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid)
 	return ret;
 }
 
-static int gv11b_fifo_preempt_runlists(struct gk20a *g, u32 runlists_mask)
+static void gv11b_fifo_locked_preempt_runlists(struct gk20a *g, u32 runlists_mask)
 {
 	int ret = 0;
 	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
 	u32 mutex_ret = 0;
-	u32 runlist_id;
+	u32 rlid;
 
-	nvgpu_log_fn(g, " ");
-
-	for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
-		if (runlists_mask & fifo_runlist_preempt_runlist_m(runlist_id))
-			nvgpu_mutex_acquire(&g->fifo.
-				runlist_info[runlist_id].runlist_lock);
-	}
+	/* runlist_lock are locked by teardown and sched are disabled too */
+	nvgpu_log_fn(g, "preempt runlists_mask:0x%08x", runlists_mask);
 
 	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
 	ret = __locked_fifo_preempt_runlists(g, runlists_mask);
 
-	if (!mutex_ret)
-		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
-
-	for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
-		if (runlists_mask &
-				fifo_runlist_preempt_runlist_m(runlist_id)) {
-			/* during recovery reset engs served by this runlist */
-			g->fifo.runlist_info[runlist_id].reset_eng_bitmask =
-				 g->fifo.runlist_info[runlist_id].eng_bitmask;
-			nvgpu_mutex_release(&g->fifo.
-				runlist_info[runlist_id].runlist_lock);
+	if (ret) {
+		/* if preempt timed out, reset engs served by runlists */
+		for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
+			if (runlists_mask &
+				fifo_runlist_preempt_runlist_m(rlid))
+				g->fifo.runlist_info[rlid].reset_eng_bitmask =
+				g->fifo.runlist_info[rlid].eng_bitmask;
 		}
 	}
 
-	return ret;
+	if (!mutex_ret)
+		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 }
 
 static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
-			 unsigned int id_type, unsigned int timeout_rc_type)
+			 unsigned int id_type)
 {
 	int ret;
 	struct fifo_gk20a *f = &g->fifo;
@@ -914,63 +962,164 @@ static int __locked_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
 		gk20a_fifo_issue_preempt(g, id, true);
 
 	/* wait for preempt */
-	ret = g->ops.fifo.is_preempt_pending(g, id, id_type,
-					 timeout_rc_type);
+	ret = g->ops.fifo.is_preempt_pending(g, id, id_type);
 
-	if (ret && (timeout_rc_type == PREEMPT_TIMEOUT_RC))
-		gk20a_fifo_preempt_timeout_rc(g, id, id_type);
+	/* No recovery even if preempt timed out since
+	 * this is called from recovery path
+	 */
 
 	return ret;
 }
 
 
 int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
-			 unsigned int id_type, unsigned int timeout_rc_type)
+			 unsigned int id_type)
 {
-	struct fifo_gk20a *f = &g->fifo;
 	u32 ret = 0;
 	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
 	u32 mutex_ret = 0;
-	u32 runlist_id;
-
-	if (id_type == ID_TYPE_TSG)
-		runlist_id = f->tsg[id].runlist_id;
-	else if (id_type == ID_TYPE_CHANNEL)
-		runlist_id = f->channel[id].runlist_id;
-	else
-		return -EINVAL;
-
-	if (runlist_id >= g->fifo.max_runlists) {
-		nvgpu_log_info(g, "runlist_id = %d", runlist_id);
-		return -EINVAL;
-	}
-
-	nvgpu_log_fn(g, "preempt id = %d, runlist_id = %d", id, runlist_id);
-
-	nvgpu_mutex_acquire(&f->runlist_info[runlist_id].runlist_lock);
 
 	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
-
-	ret = __locked_fifo_preempt_ch_tsg(g, id, id_type, timeout_rc_type);
+	/*
+	 * This is called from teardown path only. runlist_lock
+	 * is already acquired before calling this function.
+	 */
+	ret = __locked_fifo_preempt_ch_tsg(g, id, id_type);
 
 	if (!mutex_ret)
 		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
 
-	nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
-
 	return ret;
 
 }
 
+static void gv11b_fifo_locked_abort_runlist_active_tsgs(struct gk20a *g,
+			unsigned int rc_type,
+			u32 runlists_mask)
+{
+	struct tsg_gk20a *tsg = NULL;
+	u32 rlid, tsgid;
+	struct fifo_runlist_info_gk20a *runlist = NULL;
+	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
+	u32 mutex_ret = 0;
+	bool add = false, wait_for_finish = false;
+	int err;
+
+	nvgpu_err(g, "runlist id unknown, abort active tsgs in runlists");
+
+	/* runlist_lock  are locked by teardown */
+	mutex_ret = nvgpu_pmu_mutex_acquire(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+
+	for (rlid = 0; rlid < g->fifo.max_runlists;
+						 rlid++) {
+		if (!(runlists_mask & BIT(rlid)))
+			continue;
+		nvgpu_log(g, gpu_dbg_info, "abort runlist id %d",
+				rlid);
+		runlist = &g->fifo.runlist_info[rlid];
+
+		for_each_set_bit(tsgid, runlist->active_tsgs,
+			g->fifo.num_channels) {
+			nvgpu_log(g, gpu_dbg_info, "abort tsg id %d", tsgid);
+			tsg = &g->fifo.tsg[tsgid];
+			gk20a_disable_tsg(tsg);
+
+			/* assume all pbdma and eng faulted are set */
+			nvgpu_log(g, gpu_dbg_info, "reset pbdma and eng faulted");
+			gv11b_reset_pbdma_faulted_tsg(tsg);
+			gv11b_reset_eng_faulted_tsg(tsg);
+
+#ifdef CONFIG_GK20A_CTXSW_TRACE
+			gk20a_ctxsw_trace_tsg_reset(g, tsg);
+#endif
+			if (!g->fifo.deferred_reset_pending) {
+				if (rc_type == RC_TYPE_MMU_FAULT) {
+					gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg);
+					gk20a_fifo_error_tsg(g, tsg);
+				}
+			}
+
+			/* (chid == ~0 && !add) remove all act ch from runlist*/
+			err = gk20a_fifo_update_runlist_locked(g, rlid,
+					FIFO_INVAL_CHANNEL_ID, add, wait_for_finish);
+			if (err)
+				nvgpu_err(g, "runlist id %d is not cleaned up",
+					rlid);
+
+			gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
+
+			nvgpu_log(g, gpu_dbg_info, "aborted tsg id %d", tsgid);
+		}
+	}
+	if (!mutex_ret)
+		nvgpu_pmu_mutex_release(&g->pmu, PMU_MUTEX_ID_FIFO, &token);
+}
+
 void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 			u32 id, unsigned int id_type, unsigned int rc_type,
 			 struct mmu_fault_info *mmfault)
 {
 	struct tsg_gk20a *tsg = NULL;
-	struct channel_gk20a *refch = NULL;
-	u32 runlists_mask, runlist_id;
+	u32 runlists_mask, rlid;
 	struct fifo_runlist_info_gk20a *runlist = NULL;
 	u32 engine_id, client_type = ~0;
+	struct fifo_gk20a *f = &g->fifo;
+	u32 runlist_id = FIFO_INVAL_RUNLIST_ID;
+	u32 num_runlists = 0;
+
+	nvgpu_log_fn(g, "acquire runlist_lock for all runlists");
+	for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
+		nvgpu_mutex_acquire(&f->runlist_info[rlid].
+			runlist_lock);
+
+	/* get runlist id and tsg */
+	if (id_type == ID_TYPE_TSG) {
+		if (id != FIFO_INVAL_TSG_ID) {
+			tsg = &g->fifo.tsg[id];
+			runlist_id = tsg->runlist_id;
+			if (runlist_id != FIFO_INVAL_RUNLIST_ID)
+				num_runlists++;
+			else
+				nvgpu_log_fn(g, "tsg runlist id is invalid");
+		} else {
+			nvgpu_log_fn(g, "id type is tsg but tsg id is inval");
+		}
+	} else {
+		/*
+		 * id type is unknown, get runlist_id if eng mask is such that
+		 * it corresponds to single runlist id. If eng mask corresponds
+		 * to multiple runlists, then abort all runlists
+		 */
+		for (rlid = 0; rlid < f->max_runlists; rlid++) {
+			if (act_eng_bitmask) {
+				/* eng ids are known */
+				runlist = &f->runlist_info[rlid];
+				if (runlist->eng_bitmask & act_eng_bitmask) {
+					runlist_id = rlid;
+					num_runlists++;
+				}
+			} else {
+				break;
+			}
+		}
+		if (num_runlists > 1 ) /* abort all runlists */
+			runlist_id = FIFO_INVAL_RUNLIST_ID;
+	}
+
+	/* if runlist_id is valid and there is only single runlist to be
+	 * aborted, release runlist lock that are not
+	 * needed for this recovery
+	 */
+	if (runlist_id != FIFO_INVAL_RUNLIST_ID && num_runlists == 1) {
+		for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
+			if (rlid != runlist_id) {
+				nvgpu_log_fn(g, "release runlist_lock for "
+					"unused runlist id: %d", rlid);
+				nvgpu_mutex_release(&f->runlist_info[rlid].
+					runlist_lock);
+			}
+		}
+	}
 
 	nvgpu_log(g, gpu_dbg_info, "id = %d, id_type = %d, rc_type = %d, "
 			"act_eng_bitmask = 0x%x, mmfault ptr = 0x%p",
@@ -979,6 +1128,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 	runlists_mask =  gv11b_fifo_get_runlists_mask(g, act_eng_bitmask, id,
 					 id_type, rc_type, mmfault);
 
+	/* Disable runlist scheduler */
 	gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_DISABLED);
 
 	g->fifo.deferred_reset_pending = false;
@@ -1000,41 +1150,41 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 
 	gr_gk20a_init_cg_mode(g, ELCG_MODE, ELCG_RUN);
 
-	/* Get tsg/ch */
 	if (rc_type == RC_TYPE_MMU_FAULT) {
 		gk20a_debug_dump(g);
-		refch = mmfault->refch;
 		client_type = mmfault->client_type;
-		gv11b_fifo_reset_pbdma_and_eng_faulted(g, refch,
-			mmfault->faulted_pbdma,
-			mmfault->faulted_engine);
+		gv11b_fifo_reset_pbdma_and_eng_faulted(g, tsg,
+				mmfault->faulted_pbdma,
+				mmfault->faulted_engine);
 	}
 
-	if (id_type == ID_TYPE_TSG) {
-		tsg = &g->fifo.tsg[id];
-	} else if (id_type == ID_TYPE_CHANNEL) {
-		if (refch == NULL)
-			refch = gk20a_channel_get(&g->fifo.channel[id]);
-	}
-	/* Disable tsg/ch */
 	if (tsg)
 		gk20a_disable_tsg(tsg);
-	else if (refch)
-		g->ops.fifo.disable_channel(refch);
 
-	/* Preempt tsg/ch */
-	if (id_type == ID_TYPE_TSG || id_type == ID_TYPE_CHANNEL) {
-		g->ops.fifo.preempt_ch_tsg(g, id, id_type,
-					 PREEMPT_TIMEOUT_NORC);
+	/*
+	 * Even though TSG preempt timed out, the RC sequence would by design
+	 * require s/w to issue another preempt.
+	 * If recovery includes an ENGINE_RESET, to not have race conditions,
+	 * use RUNLIST_PREEMPT to kick all work off, and cancel any context
+	 * load which may be pending. This is also needed to make sure
+	 * that all PBDMAs serving the engine are not loaded when engine is
+	 * reset.
+	 */
+	if (tsg) {
+		int preempt_failed;
+
+		preempt_failed = g->ops.fifo.preempt_ch_tsg(g, id, id_type);
+		if (preempt_failed)
+			gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
 	} else {
-		gv11b_fifo_preempt_runlists(g, runlists_mask);
+		gv11b_fifo_locked_preempt_runlists(g, runlists_mask);
 	}
 
 	/* check if engine reset should be deferred */
-	for (runlist_id = 0; runlist_id < g->fifo.max_runlists; runlist_id++) {
+	for (rlid = 0; rlid < g->fifo.max_runlists; rlid++) {
 
-		runlist = &g->fifo.runlist_info[runlist_id];
-		if ((runlists_mask & BIT(runlist_id)) &&
+		runlist = &g->fifo.runlist_info[rlid];
+		if ((runlists_mask & BIT(rlid)) &&
 					runlist->reset_eng_bitmask) {
 
 			unsigned long __reset_eng_bitmask =
@@ -1042,7 +1192,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 
 			for_each_set_bit(engine_id, &__reset_eng_bitmask,
 							g->fifo.max_engines) {
-				if ((refch || tsg) &&
+				if (tsg &&
 					 gk20a_fifo_should_defer_engine_reset(g,
 					engine_id, client_type, false)) {
 
@@ -1074,13 +1224,9 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 	}
 
 #ifdef CONFIG_GK20A_CTXSW_TRACE
-	/* tsg and refch both could be valid for mmu fault. Check tsg first */
 	if (tsg)
 		gk20a_ctxsw_trace_tsg_reset(g, tsg);
-	else if (refch)
-		gk20a_ctxsw_trace_channel_reset(g, refch);
 #endif
-
 	if (tsg) {
 		if (g->fifo.deferred_reset_pending) {
 			gk20a_disable_tsg(tsg);
@@ -1090,26 +1236,9 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 
 			gk20a_fifo_abort_tsg(g, tsg->tsgid, false);
 		}
-		if (refch)
-			gk20a_channel_put(refch);
-	} else if (refch) {
-		if (g->fifo.deferred_reset_pending) {
-			g->ops.fifo.disable_channel(refch);
-		} else {
-			if (rc_type == RC_TYPE_MMU_FAULT)
-				gk20a_fifo_set_ctx_mmu_error_ch(g, refch);
-
-			gk20a_channel_abort(refch, false);
-		}
-		gk20a_channel_put(refch);
 	} else {
-		nvgpu_err(g, "id unknown, abort runlist");
-		for (runlist_id = 0; runlist_id < g->fifo.max_runlists;
-						 runlist_id++) {
-			if (runlists_mask & BIT(runlist_id))
-				g->ops.fifo.update_runlist(g, runlist_id,
-					 FIFO_INVAL_CHANNEL_ID, false, true);
-		}
+		gv11b_fifo_locked_abort_runlist_active_tsgs(g, rc_type,
+			runlists_mask);
 	}
 
 	gk20a_fifo_set_runlist_state(g, runlists_mask, RUNLIST_ENABLED);
@@ -1117,6 +1246,18 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 	/* It is safe to enable ELPG again. */
 	if (g->support_pmu && g->elpg_enabled)
 		nvgpu_pmu_enable_elpg(g);
+
+	/* release runlist_lock */
+	if (runlist_id != FIFO_INVAL_RUNLIST_ID) {
+		nvgpu_log_fn(g, "release runlist_lock runlist_id = %d",
+				runlist_id);
+		nvgpu_mutex_release(&f->runlist_info[runlist_id].runlist_lock);
+	} else {
+		nvgpu_log_fn(g, "release runlist_lock for all runlists");
+		for (rlid = 0; rlid < g->fifo.max_runlists; rlid++)
+			nvgpu_mutex_release(&f->runlist_info[rlid].
+				runlist_lock);
+	}
 }
 
 void gv11b_fifo_init_pbdma_intr_descs(struct fifo_gk20a *f)
diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
index 1ae3c93e3..aee7aef29 100644
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.h
@@ -50,10 +50,13 @@
 
 #define CHANNEL_INFO_VEID0		0
 
+#define MAX_PRE_SI_RETRIES		200000	/* 1G/500KHz * 100 */
+#define PREEMPT_TIMEOUT_1000_MS		1000
+
 struct gpu_ops;
 
 void gv11b_fifo_reset_pbdma_and_eng_faulted(struct gk20a *g,
-			struct channel_gk20a *refch,
+			struct tsg_gk20a *tsg,
 			u32 faulted_pbdma, u32 faulted_engine);
 void gv11b_mmu_fault_id_to_eng_pbdma_id_and_veid(struct gk20a *g,
 	u32 mmu_fault_id, u32 *active_engine_id, u32 *veid, u32 *pbdma_id);
@@ -78,12 +81,11 @@ void gv11b_dump_eng_status(struct gk20a *g,
 u32 gv11b_fifo_intr_0_error_mask(struct gk20a *g);
 int gv11b_fifo_reschedule_runlist(struct channel_gk20a *ch, bool preempt_next);
 int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		 unsigned int id_type, unsigned int timeout_rc_type);
+		 unsigned int id_type);
 int gv11b_fifo_preempt_channel(struct gk20a *g, u32 chid);
 int gv11b_fifo_preempt_tsg(struct gk20a *g, u32 tsgid);
 int gv11b_fifo_enable_tsg(struct tsg_gk20a *tsg);
-int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id,
-			 unsigned int id_type, unsigned int timeout_rc_type);
+int gv11b_fifo_preempt_ch_tsg(struct gk20a *g, u32 id, unsigned int id_type);
 void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
 			u32 id, unsigned int id_type, unsigned int rc_type,
 			 struct mmu_fault_info *mmfault);
diff --git a/drivers/gpu/nvgpu/gv11b/mc_gv11b.c b/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
index 64680fc60..bc802c2d5 100644
--- a/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mc_gv11b.c
@@ -66,24 +66,24 @@ bool gv11b_mc_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0)
 	return (((mc_intr_0 & mc_intr_hub_pending_f()) != 0U) ? true : false);
 }
 
-bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id)
+bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
+			u32 *eng_intr_pending)
 {
 	u32 mc_intr_0 = gk20a_readl(g, mc_intr_r(0));
 	u32 stall_intr, eng_intr_mask;
 
 	eng_intr_mask = gk20a_fifo_act_eng_interrupt_mask(g, act_eng_id);
-	if ((mc_intr_0 & eng_intr_mask) != 0U) {
-		return true;
-	}
+	*eng_intr_pending = mc_intr_0 & eng_intr_mask;
 
 	stall_intr = mc_intr_pfifo_pending_f() |
 			mc_intr_hub_pending_f() |
 			mc_intr_priv_ring_pending_f() |
 			mc_intr_pbus_pending_f() |
 			mc_intr_ltc_pending_f();
-	if ((mc_intr_0 & stall_intr) != 0U) {
-		return true;
-	}
 
-	return false;
+	nvgpu_log(g, gpu_dbg_info | gpu_dbg_intr,
+		"mc_intr_0 = 0x%08x, eng_intr = 0x%08x",
+		mc_intr_0 & stall_intr, *eng_intr_pending);
+
+	return (mc_intr_0 & (eng_intr_mask | stall_intr)) != 0U;
 }
diff --git a/drivers/gpu/nvgpu/gv11b/mc_gv11b.h b/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
index eb9d0e4ea..faa4d38d4 100644
--- a/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/mc_gv11b.h
@@ -26,5 +26,6 @@ struct gk20a;
 
 void mc_gv11b_intr_enable(struct gk20a *g);
 bool gv11b_mc_is_intr_hub_pending(struct gk20a *g, u32 mc_intr_0);
-bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id);
+bool gv11b_mc_is_stall_and_eng_intr_pending(struct gk20a *g, u32 act_eng_id,
+			u32 *eng_intr_pending);
 #endif