diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index dbed9880f..78f777aec 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -2410,6 +2410,34 @@ bool gk20a_fifo_handle_sched_error(struct gk20a *g)
 	sched_error = gk20a_readl(g, fifo_intr_sched_error_r());
 
 	engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);
+	/*
+	 * Could not find the engine
+	 * Possible Causes:
+	 * a)
+	 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
+	 * fifo_engine_status register. Also while the engine is held in reset
+	 * h/w passes busy/idle straight through. fifo_engine_status registers
+	 * are correct in that there is no context switch outstanding
+	 * as the CTXSW is aborted when reset is asserted.
+	 * This is just a side effect of how gv100 and earlier versions of
+	 * ctxsw_timeout behave.
+	 * With gv11b and later, h/w snaps the context at the point of error
+	 * so that s/w can see the tsg_id which caused the HW timeout.
+	 * b)
+	 * If engines are not busy and ctxsw state is valid then intr occurred
+	 * in the past and if the ctxsw state has moved on to VALID from LOAD
+	 * or SAVE, it means that whatever timed out eventually finished
+	 * anyways. The problem with this is that s/w cannot conclude which
+	 * context caused the problem as maybe more switches occurred before
+	 * intr is handled.
+	 */
+	if (engine_id == FIFO_INVAL_ENGINE_ID) {
+		nvgpu_info(g, "fifo sched error: 0x%08x, failed to find engine "
+				"that is busy doing ctxsw. "
+				"May be ctxsw already happened", sched_error);
+		ret = false;
+		goto err;
+	}
 
 	/* could not find the engine - should never happen */
 	if (!gk20a_fifo_is_valid_engine_id(g, engine_id)) {