gpu: nvgpu: move nvgpu_tsg_recover to common/rc

Moved from common/tsg to common/rc and renamed nvgpu_tsg_recover -> nvgpu_rc_tsg_and_related_engines JIRA NVGPU-1314 Change-Id: I887d5fcdb15def13cc74e2993312b3b36119c97c Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2095622 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Thomas Fleury <tfleury@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 09:57:08 +03:00 · 2019-04-11 13:54:24 -07:00
parent c570ba99ed
commit 03b521d9d7
4 changed files with 76 additions and 73 deletions
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -264,71 +264,6 @@ void nvgpu_tsg_unbind_channel_check_ctx_reload(struct tsg_gk20a *tsg,
 	}
 }
 void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 			 bool verbose, u32 rc_type)
 {
 	u32 engines_mask = 0U;
 	int err;
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	/* disable tsg so that it does not get scheduled again */
 	g->ops.tsg.disable(tsg);
 	/*
 	 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
 	 * fifo_engine_status register. Also while the engine is held in reset
 	 * h/w passes busy/idle straight through. fifo_engine_status registers
 	 * are correct in that there is no context switch outstanding
 	 * as the CTXSW is aborted when reset is asserted.
 	*/
 	nvgpu_log_info(g, "acquire engines_reset_mutex");
 	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
 	/*
 	 * stop context switching to prevent engine assignments from
 	 * changing until engine status is checked to make sure tsg
 	 * being recovered is not loaded on the engines
 	 */
 	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		/* if failed to disable ctxsw, just abort tsg */
 		nvgpu_err(g, "failed to disable ctxsw");
 	} else {
 		/* recover engines if tsg is loaded on the engines */
 		engines_mask = g->ops.engine.get_mask_on_id(g,
 				tsg->tsgid, true);
 		/*
 		 * it is ok to enable ctxsw before tsg is recovered. If engines
 		 * is 0, no engine recovery is needed and if it is  non zero,
 		 * gk20a_fifo_recover will call get_mask_on_id again.
 		 * By that time if tsg is not on the engine, engine need not
 		 * be reset.
 		 */
 		err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 		if (err != 0) {
 			nvgpu_err(g, "failed to enable ctxsw");
 		}
 	}
 	nvgpu_log_info(g, "release engines_reset_mutex");
 	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
 	if (engines_mask != 0U) {
 		gk20a_fifo_recover(g, engines_mask, tsg->tsgid, true, true,
 					verbose, rc_type);
 	} else {
 		if (nvgpu_tsg_mark_error(g, tsg) && verbose) {
 			gk20a_debug_dump(g);
 		}
 		nvgpu_tsg_abort(g, tsg, false);
 	}
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
 static void nvgpu_tsg_destroy(struct gk20a *g, struct tsg_gk20a *tsg)
 {
 	nvgpu_mutex_destroy(&tsg->event_id_list_lock);
@@ -344,7 +279,8 @@ int nvgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
 	if (tsg != NULL) {
 		nvgpu_tsg_set_error_notifier(g, tsg, err_code);
-		nvgpu_tsg_recover(g, tsg, verbose, RC_TYPE_FORCE_RESET);
+		nvgpu_rc_tsg_and_related_engines(g, tsg, verbose,
 			RC_TYPE_FORCE_RESET);
 	} else {
 		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 	}
--- a/drivers/gpu/nvgpu/common/rc/rc.c
+++ b/drivers/gpu/nvgpu/common/rc/rc.c
@@ -24,6 +24,7 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/fifo.h>
 #include <nvgpu/engines.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/tsg.h>
 #include <nvgpu/error_notifier.h>
@@ -64,7 +65,8 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f,
 		struct tsg_gk20a *tsg = &f->tsg[id];
 		nvgpu_tsg_set_error_notifier(g, tsg, error_notifier);
-		nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PBDMA_FAULT);
+		nvgpu_rc_tsg_and_related_engines(g, tsg, true,
 			RC_TYPE_PBDMA_FAULT);
 	} else if(pbdma_status.id_type == PBDMA_STATUS_ID_TYPE_CHID) {
 		struct channel_gk20a *ch = gk20a_channel_from_id(g, id);
 		struct tsg_gk20a *tsg;
@@ -76,7 +78,8 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f,
 		tsg = tsg_gk20a_from_ch(ch);
 		if (tsg != NULL) {
 			nvgpu_tsg_set_error_notifier(g, tsg, error_notifier);
-			nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PBDMA_FAULT);
+			nvgpu_rc_tsg_and_related_engines(g, tsg, true,
 				RC_TYPE_PBDMA_FAULT);
 		} else {
 			nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
 		}
@@ -102,7 +105,7 @@ void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg)
 	nvgpu_tsg_set_error_notifier(g, tsg,
 		NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
-	nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
+	nvgpu_rc_tsg_and_related_engines(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
 }
 void nvgpu_rc_gr_fault(struct gk20a *g, struct tsg_gk20a *tsg,
@@ -135,5 +138,70 @@ void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g)
 {
 	/* id is unknown, preempt all runlists and do recovery */
 	gk20a_fifo_recover(g, 0, INVAL_ID, false, false, false,
-			RC_TYPE_SCHED_ERR);
+		RC_TYPE_SCHED_ERR);
 }
 void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct tsg_gk20a *tsg,
 			 bool debug_dump, u32 rc_type)
 {
 	u32 eng_bitmask = 0U;
 	int err;
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	/* disable tsg so that it does not get scheduled again */
 	g->ops.tsg.disable(tsg);
 	/*
 	 * On hitting engine reset, h/w drops the ctxsw_status to INVALID in
 	 * fifo_engine_status register. Also while the engine is held in reset
 	 * h/w passes busy/idle straight through. fifo_engine_status registers
 	 * are correct in that there is no context switch outstanding
 	 * as the CTXSW is aborted when reset is asserted.
 	 */
 	nvgpu_log_info(g, "acquire engines_reset_mutex");
 	nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
 	/*
 	 * stop context switching to prevent engine assignments from
 	 * changing until engine status is checked to make sure tsg
 	 * being recovered is not loaded on the engines
 	 */
 	err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
 	if (err != 0) {
 		/* if failed to disable ctxsw, just abort tsg */
 		nvgpu_err(g, "failed to disable ctxsw");
 	} else {
 		/* recover engines if tsg is loaded on the engines */
 		eng_bitmask = g->ops.engine.get_mask_on_id(g,
 				tsg->tsgid, true);
 		/*
 		 * it is ok to enable ctxsw before tsg is recovered. If engines
 		 * is 0, no engine recovery is needed and if it is  non zero,
 		 * gk20a_fifo_recover will call get_mask_on_id again.
 		 * By that time if tsg is not on the engine, engine need not
 		 * be reset.
 		 */
 		err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
 		if (err != 0) {
 			nvgpu_err(g, "failed to enable ctxsw");
 		}
 	}
 	nvgpu_log_info(g, "release engines_reset_mutex");
 	nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
 	if (eng_bitmask != 0U) {
 		gk20a_fifo_recover(g, eng_bitmask, tsg->tsgid, true, true,
 			debug_dump, rc_type);
 	} else {
 		if (nvgpu_tsg_mark_error(g, tsg) && debug_dump) {
 			gk20a_debug_dump(g);
 		}
 		nvgpu_tsg_abort(g, tsg, false);
 	}
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 }
--- a/drivers/gpu/nvgpu/include/nvgpu/rc.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/rc.h
@@ -54,5 +54,7 @@ void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg);
 void nvgpu_rc_gr_fault(struct gk20a *g,
 			struct tsg_gk20a *tsg, struct channel_gk20a *ch);
 void nvgpu_rc_sched_error_bad_tsg(struct gk20a *g);
 void nvgpu_rc_tsg_and_related_engines(struct gk20a *g, struct tsg_gk20a *tsg,
 			 bool debug_dump, u32 rc_type);
 #endif /* NVGPU_RC_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -106,9 +106,6 @@ void nvgpu_tsg_unbind_channel_check_ctx_reload(struct tsg_gk20a *tsg,
 		struct nvgpu_channel_hw_state *hw_state);
 int nvgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
 				u32 err_code, bool verbose);
 void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 			 bool verbose, u32 rc_type);
 void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g,
 		struct tsg_gk20a *tsg);
 bool nvgpu_tsg_mark_error(struct gk20a *g, struct tsg_gk20a *tsg);