diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 08a808bba..40af71ad4 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1694,12 +1694,9 @@ static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(
 	return verbose;
 }
 
-static void nvgpu_channel_set_has_timedout_and_wakeup_wqs(struct gk20a *g,
-		struct nvgpu_channel *ch)
+void nvgpu_channel_wakeup_wqs(struct gk20a *g,
+				struct nvgpu_channel *ch)
 {
-	/* mark channel as faulted */
-	nvgpu_channel_set_unserviceable(ch);
-
 	/* unblock pending waits */
 	if (nvgpu_cond_broadcast_interruptible(&ch->semaphore_wq) != 0) {
 		nvgpu_warn(g, "failed to broadcast");
@@ -1714,7 +1711,11 @@ bool nvgpu_channel_mark_error(struct gk20a *g, struct nvgpu_channel *ch)
 	bool verbose;
 
 	verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(ch);
-	nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch);
+
+	/* mark channel as faulted */
+	nvgpu_channel_set_unserviceable(ch);
+
+	nvgpu_channel_wakeup_wqs(g, ch);
 
 	return verbose;
 }
@@ -1736,7 +1737,8 @@ void nvgpu_channel_sw_quiesce(struct gk20a *g)
 		if (ch != NULL) {
 			nvgpu_channel_set_error_notifier(g, ch,
 				NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
-			nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch);
+			nvgpu_channel_set_unserviceable(ch);
+			nvgpu_channel_wakeup_wqs(g, ch);
 			nvgpu_channel_put(ch);
 		}
 	}
diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index a01139a7d..692499099 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -423,6 +423,36 @@ clean_up_mutex:
 	return err;
 }
 
+void nvgpu_tsg_set_unserviceable(struct gk20a *g,
+				struct nvgpu_tsg *tsg)
+{
+	struct nvgpu_channel *ch = NULL;
+
+	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+	nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) {
+		if (nvgpu_channel_get(ch) != NULL) {
+			nvgpu_channel_set_unserviceable(ch);
+			nvgpu_channel_put(ch);
+		}
+	}
+	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+}
+
+void nvgpu_tsg_wakeup_wqs(struct gk20a *g,
+				struct nvgpu_tsg *tsg)
+{
+	struct nvgpu_channel *ch = NULL;
+
+	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
+	nvgpu_list_for_each_entry(ch, &tsg->ch_list, nvgpu_channel, ch_entry) {
+		if (nvgpu_channel_get(ch) != NULL) {
+			nvgpu_channel_wakeup_wqs(g, ch);
+			nvgpu_channel_put(ch);
+		}
+	}
+	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
+}
+
 bool nvgpu_tsg_mark_error(struct gk20a *g,
 		struct nvgpu_tsg *tsg)
 {
diff --git a/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c
index edd61a6d2..e8e62001e 100644
--- a/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -188,7 +188,7 @@ void gk20a_fifo_intr_0_isr(struct gk20a *g)
 	}
 
 	if ((fifo_intr & fifo_intr_0_mmu_fault_pending_f()) != 0U) {
-		(void) gk20a_fifo_handle_mmu_fault(g, 0, INVAL_ID, false);
+		gk20a_fifo_handle_mmu_fault(g, 0, INVAL_ID, false);
 		clear_intr |= fifo_intr_0_mmu_fault_pending_f();
 	}
 
diff --git a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c
index 1545fe1ea..f29bd6c78 100644
--- a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c
@@ -231,7 +231,7 @@ void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g)
 	nvgpu_err(g, "dropped mmu fault (0x%08x)", fault_id);
 }
 
-bool gk20a_fifo_handle_mmu_fault_locked(
+void gk20a_fifo_handle_mmu_fault_locked(
 	struct gk20a *g,
 	u32 mmu_fault_engines, /* queried from HW if 0 */
 	u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
@@ -240,7 +240,6 @@ bool gk20a_fifo_handle_mmu_fault_locked(
 	bool fake_fault;
 	unsigned long fault_id;
 	unsigned long engine_mmu_fault_id;
-	bool debug_dump = true;
 	struct nvgpu_engine_status_info engine_status;
 	bool deferred_reset_pending = false;
 
@@ -340,6 +339,28 @@ bool gk20a_fifo_handle_mmu_fault_locked(
 			}
 		}
 
+		/* Set unserviceable flag right at start of recovery to reduce
+		 * the window of race between job submit and recovery on same
+		 * TSG.
+		 * The unserviceable flag is checked during job submit and
+		 * prevent new jobs from being submitted to TSG which is headed
+		 * for teardown.
+		 */
+		if (tsg != NULL) {
+			/* Set error notifier before letting userspace
+			 * know about faulty channel.
+			 * The unserviceable flag is moved early to
+			 * disallow submits on the broken channel. If
+			 * userspace checks the notifier code when a
+			 * submit fails, we need it set to convey to
+			 * userspace that channel is no longer usable.
+			 */
+			if (!fake_fault) {
+				nvgpu_tsg_set_ctx_mmu_error(g, tsg);
+			}
+			nvgpu_tsg_set_unserviceable(g, tsg);
+		}
+
 		/* check if engine reset should be deferred */
 		if (engine_id != NVGPU_INVALID_ENG_ID) {
 #ifdef CONFIG_NVGPU_DEBUGGER
@@ -381,10 +402,7 @@ bool gk20a_fifo_handle_mmu_fault_locked(
 			if (deferred_reset_pending) {
 				g->ops.tsg.disable(tsg);
 			} else {
-				if (!fake_fault) {
-					nvgpu_tsg_set_ctx_mmu_error(g, tsg);
-				}
-				debug_dump = nvgpu_tsg_mark_error(g, tsg);
+				nvgpu_tsg_wakeup_wqs(g, tsg);
 				nvgpu_tsg_abort(g, tsg, false);
 			}
 
@@ -426,17 +444,14 @@ bool gk20a_fifo_handle_mmu_fault_locked(
 	if (nvgpu_cg_pg_enable(g) != 0) {
 		nvgpu_warn(g, "fail to enable power mgmt");
 	}
-	return debug_dump;
 }
 
-bool gk20a_fifo_handle_mmu_fault(
+void gk20a_fifo_handle_mmu_fault(
 	struct gk20a *g,
 	u32 mmu_fault_engines, /* queried from HW if 0 */
 	u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/
 	bool id_is_tsg)
 {
-	bool debug_dump;
-
 	nvgpu_log_fn(g, " ");
 
 	nvgpu_log_info(g, "acquire engines_reset_mutex");
@@ -444,13 +459,11 @@ bool gk20a_fifo_handle_mmu_fault(
 
 	nvgpu_runlist_lock_active_runlists(g);
 
-	debug_dump = gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
+	gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
 			hw_id, id_is_tsg);
 
 	nvgpu_runlist_unlock_active_runlists(g);
 
 	nvgpu_log_info(g, "release engines_reset_mutex");
 	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
-
-	return debug_dump;
 }
diff --git a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h
index 6813a5fe4..3386720be 100644
--- a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -39,10 +39,10 @@ void gk20a_fifo_mmu_fault_info_dump(struct gk20a *g, u32 engine_id,
 
 void gk20a_fifo_handle_dropped_mmu_fault(struct gk20a *g);
 
-bool gk20a_fifo_handle_mmu_fault(struct gk20a *g, u32 mmu_fault_engines,
+void gk20a_fifo_handle_mmu_fault(struct gk20a *g, u32 mmu_fault_engines,
 	u32 hw_id, bool id_is_tsg);
 
-bool gk20a_fifo_handle_mmu_fault_locked(struct gk20a *g, u32 mmu_fault_engines,
+void gk20a_fifo_handle_mmu_fault_locked(struct gk20a *g, u32 mmu_fault_engines,
 	u32 hw_id, bool id_is_tsg);
 
 #endif /* NVGPU_FIFO_MMU_FAULT_GK20A_H */
diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c
index 622e7d186..cca661460 100644
--- a/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/rc/rc_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -116,12 +116,8 @@ void gk20a_fifo_recover(struct gk20a *g, u32 eng_bitmask,
 		g->ops.fifo.intr_set_recover_mask(g);
 
 		g->ops.fifo.trigger_mmu_fault(g, engine_ids);
-		/*
-		 * Ignore the "Verbose" flag from
-		 * gk20a_fifo_handle_mmu_fault_locked since it is not needed
-		 * here
-		 */
-		(void) gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
+
+		gk20a_fifo_handle_mmu_fault_locked(g, mmu_fault_engines,
 				ref_id, ref_id_is_tsg);
 
 		g->ops.fifo.intr_unset_recover_mask(g);
diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c
index e19d94ede..f216dcaf9 100644
--- a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c
@@ -175,6 +175,28 @@ void gv11b_fifo_recover(struct gk20a *g, u32 act_eng_bitmask,
 			"act_eng_bitmask = 0x%x, mmufault ptr = 0x%p",
 			 id, id_type, rc_type, act_eng_bitmask, mmufault);
 
+	/* Set unserviceable flag right at start of recovery to reduce
+	 * the window of race between job submit and recovery on same
+	 * TSG.
+	 * The unserviceable flag is checked during job submit and
+	 * prevent new jobs from being submitted to TSG which is headed
+	 * for teardown.
+	 */
+	if (tsg != NULL) {
+		/* Set error notifier before letting userspace
+		 * know about faulty channel
+		 * The unserviceable flag is moved early to
+		 * disallow submits on the broken channel. If
+		 * userspace checks the notifier code when a
+		 * submit fails, we need it set to convey to
+		 * userspace that channel is no longer usable.
+		 */
+		if (rc_type == RC_TYPE_MMU_FAULT) {
+			nvgpu_tsg_set_ctx_mmu_error(g, tsg);
+		}
+		nvgpu_tsg_set_unserviceable(g, tsg);
+	}
+
 	if (rc_type == RC_TYPE_MMU_FAULT && mmufault != NULL) {
 		if(mmufault->faulted_pbdma != INVAL_ID) {
 			pbdma_bitmask = BIT32(mmufault->faulted_pbdma);
@@ -290,10 +312,7 @@ void gv11b_fifo_recover(struct gk20a *g, u32 act_eng_bitmask,
 			g->ops.tsg.disable(tsg);
 		} else {
 #endif
-			if (rc_type == RC_TYPE_MMU_FAULT) {
-				nvgpu_tsg_set_ctx_mmu_error(g, tsg);
-			}
-			(void)nvgpu_tsg_mark_error(g, tsg);
+			nvgpu_tsg_wakeup_wqs(g, tsg);
 			nvgpu_tsg_abort(g, tsg, false);
 #ifdef CONFIG_NVGPU_DEBUGGER
 		}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 2805eca3d..631ba2110 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -1050,6 +1050,18 @@ void nvgpu_channel_set_unserviceable(struct nvgpu_channel *ch);
  */
 bool nvgpu_channel_check_unserviceable(struct nvgpu_channel *ch);
 
+/**
+ * @brief Signal on wait queues (notify_wq and semaphore_wq).
+ *
+ * @param g [in]	Pointer to GPU driver struct.
+ * @param ch [in]	Channel pointer.
+ *
+ * Unblock pending waits on this channel (semaphore and error
+ * notifier wait queues).
+ *
+ */
+void nvgpu_channel_wakeup_wqs(struct gk20a *g, struct nvgpu_channel *ch);
+
 #ifdef CONFIG_NVGPU_USERD
 /**
  * @brief Channel userd physical address.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
index ed76a6195..f1b189799 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -541,6 +541,49 @@ void nvgpu_tsg_disable_sched(struct gk20a *g, struct nvgpu_tsg *tsg);
 int nvgpu_tsg_alloc_sm_error_states_mem(struct gk20a *g,
 					struct nvgpu_tsg *tsg,
 					u32 num_sm);
+
+/**
+ * @brief Mark for all the referenceable channels of tsg's channel
+ *        list as unserviceable.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ *
+ * The channels are set unserviceable to convey that an uncorrectable
+ * error has occurred on the channel. It is not possible to take more
+ * references on this channel and only available option for userspace
+ * is to close the channel fd.
+ * - Acquire #nvgpu_tsg.ch_list_lock of the tsg.
+ * - For each entry of the channels in #nvgpu_tsg.ch_list of the tsg,
+ * -- Get reference to the channel.
+ * -- If channel is referenceable,
+ * --- Call #nvgpu_channel_set_unserviceable.
+ * --- Put reference to the channel.
+ * - Release #nvgpu_tsg.ch_list_lock of the tsg.
+ *
+ */
+void nvgpu_tsg_set_unserviceable(struct gk20a *g, struct nvgpu_tsg *tsg);
+
+/**
+ * @brief Release waiters for all the referenceable channels of tsg's
+ *        channel list.
+ *
+ * @param g [in]		The GPU driver struct.
+ * @param tsg [in]		Pointer to TSG struct.
+ *
+ * Unblock pending waits on this channel (semaphore and error
+ * notifier wait queues)
+ * - Acquire #nvgpu_tsg.ch_list_lock of the tsg.
+ * - For each entry of the channels in #nvgpu_tsg.ch_list of the tsg,
+ * -- Get reference to the channel.
+ * -- If channel is referenceable,
+ * --- Call #nvgpu_channel_wakeup_wqs.
+ * --- Put reference to the channel.
+ * - Release #nvgpu_tsg.ch_list_lock of the tsg.
+ *
+ */
+void nvgpu_tsg_wakeup_wqs(struct gk20a *g, struct nvgpu_tsg *tsg);
+
 #ifdef CONFIG_NVGPU_DEBUGGER
 int nvgpu_tsg_set_sm_exception_type_mask(struct nvgpu_channel *ch,
 		u32 exception_mask);