diff --git a/drivers/gpu/nvgpu/common/fifo/preempt.c b/drivers/gpu/nvgpu/common/fifo/preempt.c
index 17d40ffaf..72ff3f933 100644
--- a/drivers/gpu/nvgpu/common/fifo/preempt.c
+++ b/drivers/gpu/nvgpu/common/fifo/preempt.c
@@ -42,6 +42,9 @@ u32 nvgpu_preempt_get_timeout(struct gk20a *g)
 int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
 {
 	int ret = 0;
+	u32 preempt_retry_count = 10U;
+	u32 preempt_retry_timeout =
+			nvgpu_preempt_get_timeout(g) / preempt_retry_count;
 #ifdef CONFIG_NVGPU_LS_PMU
 	u32 token = PMU_INVALID_MUTEX_OWNER_ID;
 	int mutex_ret = 0;
@@ -53,40 +56,57 @@ int nvgpu_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg)
 		return 0;
 	}
 
-	nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
+	do {
+		nvgpu_mutex_acquire(&tsg->runlist->runlist_lock);
 
-	if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
-		nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
-					RUNLIST_DISABLED);
-	}
-
-#ifdef CONFIG_NVGPU_LS_PMU
-	mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
-						PMU_MUTEX_ID_FIFO, &token);
-#endif
-	nvgpu_log_fn(g, "preempt id: %d", tsg->tsgid);
-
-	g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
-
-	/* poll for preempt done */
-	ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid, ID_TYPE_TSG);
-
-#ifdef CONFIG_NVGPU_LS_PMU
-	if (mutex_ret == 0) {
-		int err = nvgpu_pmu_lock_release(g, g->pmu, PMU_MUTEX_ID_FIFO,
-				&token);
-		if (err != 0) {
-			nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d",
-					err);
+		if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
+			nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
+						RUNLIST_DISABLED);
 		}
-	}
-#endif
-	if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
-		nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
-					RUNLIST_ENABLED);
-	}
 
-	nvgpu_mutex_release(&tsg->runlist->runlist_lock);
+#ifdef CONFIG_NVGPU_LS_PMU
+		mutex_ret = nvgpu_pmu_lock_acquire(g, g->pmu,
+						   PMU_MUTEX_ID_FIFO, &token);
+#endif
+		g->ops.fifo.preempt_trigger(g, tsg->tsgid, ID_TYPE_TSG);
+
+		/*
+		 * Poll for preempt done. if stalling interrupts are pending
+		 * while preempt is in progress we poll for stalling interrupts
+		 * to finish based on return value from this function and
+		 * retry preempt again.
+		 * If HW is hung, on the last retry instance we try to identify
+		 * the engines hung and set the runlist reset_eng_bitmask
+		 * and mark preemption completion.
+		 */
+		ret = g->ops.fifo.is_preempt_pending(g, tsg->tsgid,
+					ID_TYPE_TSG, preempt_retry_count > 1U);
+
+#ifdef CONFIG_NVGPU_LS_PMU
+		if (mutex_ret == 0) {
+			int err = nvgpu_pmu_lock_release(g, g->pmu,
+						PMU_MUTEX_ID_FIFO, &token);
+			if (err != 0) {
+				nvgpu_err(g, "PMU_MUTEX_ID_FIFO not released err=%d", err);
+			}
+		}
+#endif
+		if (nvgpu_is_errata_present(g, NVGPU_ERRATA_2016608)) {
+			nvgpu_runlist_set_state(g, BIT32(tsg->runlist->id),
+						RUNLIST_ENABLED);
+		}
+
+		nvgpu_mutex_release(&tsg->runlist->runlist_lock);
+
+		if (ret != -EAGAIN) {
+			break;
+		}
+
+		ret = nvgpu_wait_for_stall_interrupts(g, preempt_retry_timeout);
+		if (ret != 0) {
+			nvgpu_log_info(g, "wait for stall interrupts failed %d", ret);
+		}
+	} while (--preempt_retry_count != 0U);
 
 	if (ret != 0) {
 		if (nvgpu_platform_is_silicon(g)) {
diff --git a/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.c
index cd216ee4a..bf68b975f 100644
--- a/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -62,11 +62,11 @@ static int gk20a_fifo_preempt_locked(struct gk20a *g, u32 id,
 	g->ops.fifo.preempt_trigger(g, id, id_type);
 
 	/* wait for preempt */
-	return g->ops.fifo.is_preempt_pending(g, id, id_type);
+	return g->ops.fifo.is_preempt_pending(g, id, id_type, false);
 }
 
 int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		unsigned int id_type)
+		unsigned int id_type, bool preempt_retries_left)
 {
 	struct nvgpu_timeout timeout;
 	u32 delay = POLL_DELAY_MIN_US;
diff --git a/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.h
index f6472bfa8..4ff4de918 100644
--- a/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/fifo/preempt_gk20a.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -32,6 +32,6 @@ void gk20a_fifo_preempt_trigger(struct gk20a *g, u32 id, unsigned int id_type);
 int  gk20a_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch);
 int  gk20a_fifo_preempt_tsg(struct gk20a *g, struct nvgpu_tsg *tsg);
 int  gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-			unsigned int id_type);
+			unsigned int id_type, bool preempt_retries_left);
 
 #endif /* FIFO_PREEMPT_GK20A_H */
diff --git a/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b.h b/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b.h
index 0d8a17130..3ce660151 100644
--- a/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -33,7 +33,7 @@ struct nvgpu_tsg;
 void gv11b_fifo_preempt_trigger(struct gk20a *g, u32 id, unsigned int id_type);
 int  gv11b_fifo_preempt_channel(struct gk20a *g, struct nvgpu_channel *ch);
 int  gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-			unsigned int id_type);
+			unsigned int id_type, bool preempt_retries_left);
 int gv11b_fifo_preempt_poll_pbdma(struct gk20a *g, u32 tsgid, u32 pbdma_id);
 
 #endif /* FIFO_PREEMPT_GV11B_H */
diff --git a/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b_fusa.c
index 74fdb83e2..a60643aa5 100644
--- a/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b_fusa.c
+++ b/drivers/gpu/nvgpu/hal/fifo/preempt_gv11b_fusa.c
@@ -163,16 +163,16 @@ int gv11b_fifo_preempt_poll_pbdma(struct gk20a *g, u32 tsgid,
 static int gv11b_fifo_check_eng_intr_pending(struct gk20a *g, u32 id,
 			struct nvgpu_engine_status_info *engine_status,
 			u32 eng_intr_pending,
-			u32 engine_id, u32 *reset_eng_bitmask)
+			u32 engine_id, u32 *reset_eng_bitmask,
+			bool preempt_retries_left)
 {
+	bool check_preempt_retry = false;
 	int ret = -EBUSY;
 
 	if (engine_status->ctxsw_status == NVGPU_CTX_STATUS_CTXSW_SWITCH) {
 		/* Eng save hasn't started yet. Continue polling */
 		if (eng_intr_pending != 0U) {
-			/* if eng intr, stop polling */
-			*reset_eng_bitmask |= BIT32(engine_id);
-			ret = 0;
+			check_preempt_retry = true;
 		}
 
 	} else if ((engine_status->ctxsw_status == NVGPU_CTX_STATUS_VALID) ||
@@ -180,9 +180,7 @@ static int gv11b_fifo_check_eng_intr_pending(struct gk20a *g, u32 id,
 
 		if (id == engine_status->ctx_id) {
 			if (eng_intr_pending != 0U) {
-				/* preemption will not finish */
-				*reset_eng_bitmask |= BIT32(engine_id);
-				ret = 0;
+				check_preempt_retry = true;
 			}
 		} else {
 			/* context is not running on the engine */
@@ -193,24 +191,38 @@ static int gv11b_fifo_check_eng_intr_pending(struct gk20a *g, u32 id,
 
 		if (id == engine_status->ctx_next_id) {
 			if (eng_intr_pending != 0U) {
-				/* preemption will not finish */
-				*reset_eng_bitmask |= BIT32(engine_id);
-				ret = 0;
+				check_preempt_retry = true;
 			}
 		} else {
 			/* context is not running on the engine */
 			ret = 0;
 		}
 	} else {
-		/* Preempt should be finished */
-		ret = 0;
+		if (eng_intr_pending != 0U) {
+			check_preempt_retry = true;
+		} else {
+			/* Preempt should be finished */
+			ret = 0;
+		}
+	}
+
+	/* if eng intr, stop polling and check if we can retry preempts. */
+	if (check_preempt_retry) {
+		if (preempt_retries_left) {
+			ret = -EAGAIN;
+		} else {
+			/* preemption will not finish */
+			*reset_eng_bitmask |= BIT32(engine_id);
+			ret = 0;
+		}
 	}
 
 	return ret;
 }
 
 static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
-			 u32 engine_id, u32 *reset_eng_bitmask)
+			 u32 engine_id, u32 *reset_eng_bitmask,
+			 bool preempt_retries_left)
 {
 	struct nvgpu_timeout timeout;
 	u32 delay = POLL_DELAY_MIN_US;
@@ -283,8 +295,8 @@ static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
 		}
 		ret = gv11b_fifo_check_eng_intr_pending(g, id, &engine_status,
 				eng_intr_pending, engine_id,
-				reset_eng_bitmask);
-		if (ret == 0) {
+				reset_eng_bitmask, preempt_retries_left);
+		if (ret == 0 || ret == -EAGAIN) {
 			break;
 		}
 
@@ -292,7 +304,7 @@ static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
 		delay = min_t(u32, delay << 1U, POLL_DELAY_MAX_US);
 	} while (nvgpu_timeout_expired(&timeout) == 0);
 
-	if (ret != 0) {
+	if (ret != 0 && ret != -EAGAIN) {
 		/*
 		 * The reasons a preempt can fail are:
 		 * 1.Some other stalling interrupt is asserted preventing
@@ -309,7 +321,7 @@ static int gv11b_fifo_preempt_poll_eng(struct gk20a *g, u32 id,
 }
 
 int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
-		 unsigned int id_type)
+		 unsigned int id_type, bool preempt_retries_left)
 {
 	struct nvgpu_fifo *f = &g->fifo;
 	struct nvgpu_runlist *rl;
@@ -350,7 +362,7 @@ int gv11b_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 		engine_id = U32(bit);
 		err = gv11b_fifo_preempt_poll_eng(g,
 			tsgid, engine_id,
-			&rl->reset_eng_bitmask);
+			&rl->reset_eng_bitmask, preempt_retries_left);
 		if ((err != 0) && (ret == 0)) {
 			ret = err;
 		}
diff --git a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c
index 585451bf3..1eef14cdf 100644
--- a/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/fifo/runlist_fifo_gk20a.c
@@ -109,7 +109,7 @@ int gk20a_fifo_reschedule_preempt_next(struct nvgpu_channel *ch,
 #endif
 	if (wait_preempt) {
 		if (g->ops.fifo.is_preempt_pending(g, preempt_id,
-			preempt_type) != 0) {
+			preempt_type, false) != 0) {
 			nvgpu_err(g, "fifo preempt timed out");
 			/*
 			 * This function does not care if preempt
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/fifo.h b/drivers/gpu/nvgpu/include/nvgpu/gops/fifo.h
index 0905e55fd..6393a9d05 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/fifo.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/fifo.h
@@ -177,7 +177,7 @@ struct gops_fifo {
 	int (*preempt_poll_pbdma)(struct gk20a *g, u32 tsgid,
 			 u32 pbdma_id);
 	int (*is_preempt_pending)(struct gk20a *g, u32 id,
-		unsigned int id_type);
+		unsigned int id_type, bool preempt_retries_left);
 	void (*intr_set_recover_mask)(struct gk20a *g);
 	void (*intr_unset_recover_mask)(struct gk20a *g);
 	void (*intr_top_enable)(struct gk20a *g, bool enable);
diff --git a/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c b/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c
index ba4f3bc77..5b492df3c 100644
--- a/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c
+++ b/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c
@@ -205,13 +205,15 @@ static void stub_fifo_preempt_trigger(struct gk20a *g, u32 id,
 }
 
 static int stub_fifo_is_preempt_pending_ebusy(struct gk20a *g, u32 id,
-							unsigned int id_type)
+						unsigned int id_type,
+						bool preempt_retries_left)
 {
 	return -EBUSY;
 }
 
 static int stub_fifo_is_preempt_pending_pass(struct gk20a *g, u32 id,
-							unsigned int id_type)
+						unsigned int id_type,
+						bool preempt_retries_left)
 {
 	return 0;
 }
@@ -463,7 +465,7 @@ int test_gv11b_fifo_is_preempt_pending(struct unit_module *m, struct gk20a *g,
 		/* Modify eng_stat for engine 0 */
 		nvgpu_writel(g, fifo_engine_status_r(0U), stub.eng_stat);
 
-		err = gv11b_fifo_is_preempt_pending(g, 0U, id_type);
+		err = gv11b_fifo_is_preempt_pending(g, 0U, id_type, false);
 
 		if (branches & F_PREEMPT_PENDING_POLL_PBDMA_FAIL) {
 			unit_assert(err == -ETIMEDOUT, goto done);