gpu: nvgpu: move nvgpu_tsg_recover to common/rc

Moved from common/tsg to common/rc and renamed nvgpu_tsg_recover -> nvgpu_rc_tsg_and_related_engines JIRA NVGPU-1314 Change-Id: I887d5fcdb15def13cc74e2993312b3b36119c97c Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2095622 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2019-04-11 13:54:24 -07:00
parent c570ba99ed
commit 03b521d9d7
4 changed files with 76 additions and 73 deletions
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -264,71 +264,6 @@ void nvgpu_tsg_unbind_channel_check_ctx_reload(struct tsg_gk20a *tsg,
 	}
 }

-void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
-			 bool verbose, u32 rc_type)
-{
-	u32 engines_mask = 0U;
-	int err;
-
-	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-
-	/* disable tsg so that it does not get scheduled again */
-	g->ops.tsg.disable(tsg);
-
-	/*
-	 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
-	 * fifo_engine_status register. Also while the engine is held in reset
-	 * h/w passes busy/idle straight through. fifo_engine_status registers
-	 * are correct in that there is no context switch outstanding
-	 * as the CTXSW is aborted when reset is asserted.
-	*/
-	nvgpu_log_info(g, "acquire engines_reset_mutex");
-	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
-
-	/*
-	 * stop context switching to prevent engine assignments from
-	 * changing until engine status is checked to make sure tsg
-	 * being recovered is not loaded on the engines
-	 */
-	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
-
-	if (err != 0) {
-		/* if failed to disable ctxsw, just abort tsg */
-		nvgpu_err(g, "failed to disable ctxsw");
-	} else {
-		/* recover engines if tsg is loaded on the engines */
-		engines_mask = g->ops.engine.get_mask_on_id(g,
-				tsg->tsgid, true);
-
-		/*
-		 * it is ok to enable ctxsw before tsg is recovered. If engines
-		 * is 0, no engine recovery is needed and if it is  non zero,
-		 * gk20a_fifo_recover will call get_mask_on_id again.
-		 * By that time if tsg is not on the engine, engine need not
-		 * be reset.
-		 */
-		err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
-		if (err != 0) {
-			nvgpu_err(g, "failed to enable ctxsw");
-		}
-	}
-	nvgpu_log_info(g, "release engines_reset_mutex");
-	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
-
-	if (engines_mask != 0U) {
-		gk20a_fifo_recover(g, engines_mask, tsg->tsgid, true, true,
-					verbose, rc_type);
-	} else {
-		if (nvgpu_tsg_mark_error(g, tsg) && verbose) {
-			gk20a_debug_dump(g);
-		}
-
-		nvgpu_tsg_abort(g, tsg, false);
-	}
-
-	nvgpu_mutex_release(&g->dbg_sessions_lock);
-}
-
 static void nvgpu_tsg_destroy(struct gk20a *g, struct tsg_gk20a *tsg)
 {
 	nvgpu_mutex_destroy(&tsg->event_id_list_lock);
@@ -344,7 +279,8 @@ int nvgpu_tsg_force_reset_ch(struct channel_gk20a *ch,

 	if (tsg != NULL) {
 		nvgpu_tsg_set_error_notifier(g, tsg, err_code);
-		nvgpu_tsg_recover(g, tsg, verbose, RC_TYPE_FORCE_RESET);
+		nvgpu_rc_tsg_and_related_engines(g, tsg, verbose,
+			RC_TYPE_FORCE_RESET);
 	} else {
 		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 	}
--- a/drivers/gpu/nvgpu/common/rc/rc.c
+++ b/drivers/gpu/nvgpu/common/rc/rc.c
@@ -24,6 +24,7 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/fifo.h>
 #include <nvgpu/engines.h>
+#include <nvgpu/debug.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/tsg.h>
 #include <nvgpu/error_notifier.h>
@@ -64,7 +65,8 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f,
 		struct tsg_gk20a *tsg = &f->tsg[id];

 		nvgpu_tsg_set_error_notifier(g, tsg, error_notifier);
-		nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PBDMA_FAULT);
+		nvgpu_rc_tsg_and_related_engines(g, tsg, true,
+			RC_TYPE_PBDMA_FAULT);
 	} else if(pbdma_status.id_type == PBDMA_STATUS_ID_TYPE_CHID) {
 		struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
 		struct tsg_gk20a *tsg;
@@ -76,7 +78,8 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f,
 		tsg = tsg_gk20a_from_ch(ch);
 		if (tsg != NULL) {
 			nvgpu_tsg_set_error_notifier(g, tsg, error_notifier);
-			nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PBDMA_FAULT);
+			nvgpu_rc_tsg_and_related_engines(g, tsg, true,
+				RC_TYPE_PBDMA_FAULT);
 		} else {
 			nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 		}
@@ -102,7 +105,7 @@ void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg)
 	nvgpu_tsg_set_error_notifier(g, tsg,
 		NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);

-	nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
+	nvgpu_rc_tsg_and_related_engines(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
 }

 void nvgpu_rc_gr_fault(struct gk20a *g, struct tsg_gk20a *tsg,
@@ -135,5 +138,70 @@ void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g)
 {
 	/* id is unknown, preempt all runlists and do recovery */
 	gk20a_fifo_recover(g, 0, INVAL_ID, false, false, false,
-			RC_TYPE_SCHED_ERR);
+		RC_TYPE_SCHED_ERR);
+}
+
+void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct tsg_gk20a *tsg,
+			 bool debug_dump, u32 rc_type)
+{
+	u32 eng_bitmask = 0U;
+	int err;
+
+	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
+
+	/* disable tsg so that it does not get scheduled again */
+	g->ops.tsg.disable(tsg);
+
+	/*
+	 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
+	 * fifo_engine_status register. Also while the engine is held in reset
+	 * h/w passes busy/idle straight through. fifo_engine_status registers
+	 * are correct in that there is no context switch outstanding
+	 * as the CTXSW is aborted when reset is asserted.
+	 */
+	nvgpu_log_info(g, "acquire engines_reset_mutex");
+	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
+
+	/*
+	 * stop context switching to prevent engine assignments from
+	 * changing until engine status is checked to make sure tsg
+	 * being recovered is not loaded on the engines
+	 */
+	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
+
+	if (err != 0) {
+		/* if failed to disable ctxsw, just abort tsg */
+		nvgpu_err(g, "failed to disable ctxsw");
+	} else {
+		/* recover engines if tsg is loaded on the engines */
+		eng_bitmask = g->ops.engine.get_mask_on_id(g,
+				tsg->tsgid, true);
+
+		/*
+		 * it is ok to enable ctxsw before tsg is recovered. If engines
+		 * is 0, no engine recovery is needed and if it is  non zero,
+		 * gk20a_fifo_recover will call get_mask_on_id again.
+		 * By that time if tsg is not on the engine, engine need not
+		 * be reset.
+		 */
+		err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
+		if (err != 0) {
+			nvgpu_err(g, "failed to enable ctxsw");
+		}
+	}
+	nvgpu_log_info(g, "release engines_reset_mutex");
+	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
+
+	if (eng_bitmask != 0U) {
+		gk20a_fifo_recover(g, eng_bitmask, tsg->tsgid, true, true,
+			debug_dump, rc_type);
+	} else {
+		if (nvgpu_tsg_mark_error(g, tsg) && debug_dump) {
+			gk20a_debug_dump(g);
+		}
+
+		nvgpu_tsg_abort(g, tsg, false);
+	}
+
+	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
--- a/drivers/gpu/nvgpu/include/nvgpu/rc.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/rc.h
@@ -54,5 +54,7 @@ void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg);
 void nvgpu_rc_gr_fault(struct gk20a *g,
 			struct tsg_gk20a *tsg, struct channel_gk20a *ch);
 void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g);
+void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct tsg_gk20a *tsg,
+			 bool debug_dump, u32 rc_type);

 #endif /* NVGPU_RC_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -106,9 +106,6 @@ void nvgpu_tsg_unbind_channel_check_ctx_reload(struct tsg_gk20a *tsg,
 		struct nvgpu_channel_hw_state *hw_state);
 int nvgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
 				u32 err_code, bool verbose);
-void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
-			 bool verbose, u32 rc_type);
-
 void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g,
 		struct tsg_gk20a *tsg);
 bool nvgpu_tsg_mark_error(struct gk20a *g, struct tsg_gk20a *tsg);