diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c
index 984a7d23f..793bf94d3 100644
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -185,18 +185,45 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch)
 void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 			 bool verbose, u32 rc_type)
 {
-	u32 engines;
+	u32 engines_mask = 0U;
+	int err;
 
-	/* stop context switching to prevent engine assignments from
-	   changing until TSG is recovered */
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	gr_gk20a_disable_ctxsw(g);
 
-	engines = g->ops.fifo.get_engines_mask_on_id(g, tsg->tsgid, true);
+	/* disable tsg so that it does not get scheduled again */
+	g->ops.fifo.disable_tsg(tsg);
 
-	if (engines != 0U) {
-		gk20a_fifo_recover(g, engines, tsg->tsgid, true, true, verbose,
-					rc_type);
+	/*
+	 * stop context switching to prevent engine assignments from
+	 * changing until engine status is checked to make sure tsg
+	 * being recovered is not loaded on the engines
+	 */
+	err = gr_gk20a_disable_ctxsw(g);
+
+	if (err != 0) {
+		/* if failed to disable ctxsw, just abort tsg */
+		nvgpu_err(g, "failed to disable ctxsw");
+	} else {
+		/* recover engines if tsg is loaded on the engines */
+		engines_mask = g->ops.fifo.get_engines_mask_on_id(g,
+				tsg->tsgid, true);
+
+		/*
+		 * it is ok to enable ctxsw before tsg is recovered. If engines
+		 * is 0, no engine recovery is needed and if it is  non zero,
+		 * gk20a_fifo_recover will call get_engines_mask_on_id again.
+		 * By that time if tsg is not on the engine, engine need not
+		 * be reset.
+		 */
+		err = gr_gk20a_enable_ctxsw(g);
+		if (err != 0) {
+			nvgpu_err(g, "failed to enable ctxsw");
+		}
+	}
+
+	if (engines_mask != 0U) {
+		gk20a_fifo_recover(g, engines_mask, tsg->tsgid, true, true,
+					verbose, rc_type);
 	} else {
 		if (nvgpu_tsg_mark_error(g, tsg) && verbose) {
 			gk20a_debug_dump(g);
@@ -205,7 +232,6 @@ void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 		gk20a_fifo_abort_tsg(g, tsg, false);
 	}
 
-	gr_gk20a_enable_ctxsw(g);
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 108155222..cc00c5fc7 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -528,7 +528,14 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
 		      .cond.fail = GR_IS_UCODE_OP_EQUAL }, true);
 }
 
-/* Stop processing (stall) context switches at FECS. */
+/**
+ * Stop processing (stall) context switches at FECS:-
+ * If fecs is sent stop_ctxsw method, elpg entry/exit cannot happen
+ * and may timeout. It could manifest as different error signatures
+ * depending on when stop_ctxsw fecs method gets sent with respect
+ * to pmu elpg sequence. It could come as pmu halt or abort or
+ * maybe ext error too.
+*/
 int gr_gk20a_disable_ctxsw(struct gk20a *g)
 {
 	int err = 0;
@@ -538,8 +545,24 @@ int gr_gk20a_disable_ctxsw(struct gk20a *g)
 	nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
 	g->ctxsw_disable_count++;
 	if (g->ctxsw_disable_count == 1) {
-		err = gr_gk20a_ctrl_ctxsw(g,
+		err = nvgpu_pg_elpg_disable(g);
+		if (err != 0) {
+			nvgpu_err(g, "failed to disable elpg. not safe to "
+					"stop_ctxsw");
+			/* stop ctxsw command is not sent */
+			g->ctxsw_disable_count--;
+		} else {
+			err = gr_gk20a_ctrl_ctxsw(g,
 				gr_fecs_method_push_adr_stop_ctxsw_v(), NULL);
+			if (err != 0) {
+				nvgpu_err(g, "failed to stop fecs ctxsw");
+				/* stop ctxsw failed */
+				g->ctxsw_disable_count--;
+			}
+		}
+	} else {
+		nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d",
+			g->ctxsw_disable_count);
 	}
 	nvgpu_mutex_release(&g->ctxsw_disable_lock);
 
@@ -554,12 +577,27 @@ int gr_gk20a_enable_ctxsw(struct gk20a *g)
 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " ");
 
 	nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
+	if (g->ctxsw_disable_count == 0) {
+		goto ctxsw_already_enabled;
+	}
 	g->ctxsw_disable_count--;
 	WARN_ON(g->ctxsw_disable_count < 0);
 	if (g->ctxsw_disable_count == 0) {
 		err = gr_gk20a_ctrl_ctxsw(g,
 				gr_fecs_method_push_adr_start_ctxsw_v(), NULL);
+		if (err != 0) {
+			nvgpu_err(g, "failed to start fecs ctxsw");
+		} else {
+			if (nvgpu_pg_elpg_enable(g) != 0) {
+				nvgpu_err(g, "failed to enable elpg "
+					"after start_ctxsw");
+			}
+		}
+	} else {
+		nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet",
+			g->ctxsw_disable_count);
 	}
+ctxsw_already_enabled:
 	nvgpu_mutex_release(&g->ctxsw_disable_lock);
 
 	return err;