gpu: nvgpu: move preempt timeout rc from fifo to rc

Move preempt timeout recovery related function to common/rc. Remove nvgpu_channel_recover as bare channels are not recovered. Recover channels bound to tsg. JIRA NVGPU-1314 Change-Id: Ic1f94b321d0404eea86dd6d6d990529b2f3a8d57 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2093682 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2019-04-09 14:08:08 -07:00
parent 1882a7413d
commit 2f00275584
6 changed files with 23 additions and 72 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1474,42 +1474,6 @@ bool nvgpu_channel_update_and_check_ctxsw_timeout(struct channel_gk20a *ch,
 		ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
 }

-void nvgpu_channel_recover(struct gk20a *g, struct channel_gk20a *ch,
-	bool verbose, u32 rc_type)
-{
-	u32 engines;
-	int err;
-
-	/* stop context switching to prevent engine assignments from
-	   changing until channel is recovered */
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
-	if (err != 0) {
-		nvgpu_err(g, "failed to disable ctxsw");
-		goto fail;
-	}
-
-	engines = g->ops.engine.get_mask_on_id(g, ch->chid, false);
-
-	if (engines != 0U) {
-		gk20a_fifo_recover(g, engines, ch->chid, false, true, verbose,
-					rc_type);
-	} else {
-		gk20a_channel_abort(ch, false);
-
-		if (nvgpu_channel_mark_error(g, ch)) {
-			gk20a_debug_dump(g);
-		}
-	}
-
-	err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
-	if (err != 0) {
-		nvgpu_err(g, "failed to enable ctxsw");
-	}
-fail:
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-}
-
 u32 nvgpu_get_gp_free_count(struct channel_gk20a *c)
 {
 	update_gp_get(c->g, c);
--- a/drivers/gpu/nvgpu/common/rc/rc.c
+++ b/drivers/gpu/nvgpu/common/rc/rc.c
@@ -95,3 +95,11 @@ void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id)
 				RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
 	}
 }
+
+void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg)
+{
+	nvgpu_tsg_set_error_notifier(g, tsg,
+		NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
+
+	nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
+}
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -688,35 +688,6 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 	return ret;
 }

-void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
-{
-	struct channel_gk20a *ch = NULL;
-
-	nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
-
-	nvgpu_rwsem_down_read(&tsg->ch_list_lock);
-	nvgpu_list_for_each_entry(ch, &tsg->ch_list,
-			channel_gk20a, ch_entry) {
-		if (gk20a_channel_get(ch) == NULL) {
-			continue;
-		}
-		g->ops.channel.set_error_notifier(ch,
-			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
-		gk20a_channel_put(ch);
-	}
-	nvgpu_rwsem_up_read(&tsg->ch_list_lock);
-	nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
-}
-
-void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch)
-{
-	nvgpu_err(g, "preempt channel %d timeout", ch->chid);
-
-	g->ops.channel.set_error_notifier(ch,
-				NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
-	nvgpu_channel_recover(g, ch, true, RC_TYPE_PREEMPT_TIMEOUT);
-}
-
 int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
 {
 	int ret;
@@ -763,12 +734,20 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch)
 			"ctxsw timeout will trigger recovery if needed",
 			ch->chid);
 		} else {
-			gk20a_fifo_preempt_timeout_rc(g, ch);
+			struct tsg_gk20a *tsg;
+
+			nvgpu_err(g, "preempt channel %d timeout", ch->chid);
+			tsg = tsg_gk20a_from_ch(ch);
+			if (tsg != NULL) {
+				nvgpu_rc_preempt_timeout(g, tsg);
+			} else {
+				nvgpu_err(g, "chid: %d is not bound to tsg",
+					ch->chid);
+			}
+
 		}
 	}

-
-
 	return ret;
 }

@@ -800,7 +779,8 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
 			"ctxsw timeout will trigger recovery if needed",
 			tsg->tsgid);
 		} else {
-			gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
+			nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
+			nvgpu_rc_preempt_timeout(g, tsg);
 		}
 	}

--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -266,8 +266,6 @@ static inline void gk20a_fifo_profile_snapshot(
 int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
 			unsigned int id_type);
 int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
-void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
-void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch);

 u32 gk20a_fifo_runlist_busy_engines(struct gk20a *g, u32 runlist_id);
 bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
--- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c
@@ -499,7 +499,7 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
 			nvgpu_err(g, "preempt timed out for tsgid: %u, "
 			"ctxsw timeout will trigger recovery if needed", tsg->tsgid);
 		} else {
-			gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
+			nvgpu_rc_preempt_timeout(g, tsg);
 		}
 	}

--- a/drivers/gpu/nvgpu/include/nvgpu/rc.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/rc.h
@@ -49,4 +49,5 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f,

 void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id);

+void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg);
 #endif /* NVGPU_RC_H */