diff --git a/drivers/gpu/nvgpu/common/linux/driver_common.c b/drivers/gpu/nvgpu/common/linux/driver_common.c
index 9029366ff..695bb3072 100644
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -62,6 +62,7 @@ static void nvgpu_init_vars(struct gk20a *g)
 	nvgpu_mutex_init(&g->client_lock);
 	nvgpu_mutex_init(&g->poweron_lock);
 	nvgpu_mutex_init(&g->poweroff_lock);
+	nvgpu_mutex_init(&g->ctxsw_disable_lock);
 
 	l->regs_saved = l->regs;
 	l->bar1_saved = l->bar1;
diff --git a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
index 1e5efa381..6f040603b 100644
--- a/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
+++ b/drivers/gpu/nvgpu/common/linux/vgpu/vgpu_linux.c
@@ -76,6 +76,7 @@ static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform)
 
 	nvgpu_mutex_init(&g->poweron_lock);
 	nvgpu_mutex_init(&g->poweroff_lock);
+	nvgpu_mutex_init(&g->ctxsw_disable_lock);
 	l->regs_saved = l->regs;
 	l->bar1_saved = l->bar1;
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 3442861cb..4ebdb6a4e 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1191,6 +1191,9 @@ struct gk20a {
 
 	nvgpu_atomic_t usage_count;
 
+	struct nvgpu_mutex ctxsw_disable_lock;
+	int ctxsw_disable_count;
+
 	struct nvgpu_ref refcount;
 
 	const char *name;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 619751062..8a3f98af6 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -616,22 +616,39 @@ static int gr_gk20a_ctrl_ctxsw(struct gk20a *g, u32 fecs_method, u32 *ret)
 		      .cond.fail = GR_IS_UCODE_OP_EQUAL }, true);
 }
 
-/* Stop processing (stall) context switches at FECS.
- * The caller must hold the dbg_sessions_lock, else if mutliple stop methods
- * are sent to the ucode in sequence, it can get into an undefined state. */
+/* Stop processing (stall) context switches at FECS. */
 int gr_gk20a_disable_ctxsw(struct gk20a *g)
 {
+	int err = 0;
+
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
-	return gr_gk20a_ctrl_ctxsw(g,
-			gr_fecs_method_push_adr_stop_ctxsw_v(), NULL);
+
+	nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
+	g->ctxsw_disable_count++;
+	if (g->ctxsw_disable_count == 1)
+		err = gr_gk20a_ctrl_ctxsw(g,
+				gr_fecs_method_push_adr_stop_ctxsw_v(), NULL);
+	nvgpu_mutex_release(&g->ctxsw_disable_lock);
+
+	return err;
 }
 
 /* Start processing (continue) context switches at FECS */
 int gr_gk20a_enable_ctxsw(struct gk20a *g)
 {
+	int err = 0;
+
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "");
-	return gr_gk20a_ctrl_ctxsw(g,
-			gr_fecs_method_push_adr_start_ctxsw_v(), NULL);
+
+	nvgpu_mutex_acquire(&g->ctxsw_disable_lock);
+	g->ctxsw_disable_count--;
+	WARN_ON(g->ctxsw_disable_count < 0);
+	if (g->ctxsw_disable_count == 0)
+		err = gr_gk20a_ctrl_ctxsw(g,
+				gr_fecs_method_push_adr_start_ctxsw_v(), NULL);
+	nvgpu_mutex_release(&g->ctxsw_disable_lock);
+
+	return err;
 }
 
 int gr_gk20a_halt_pipe(struct gk20a *g)