gpu: nvgpu: move force_reset_ch to hal.tsg unit

force_reset_ch obtains a tsg from a channel first before proceeding with other work. Thus, force_reset_ch is moved as part of tsg unit to avoid circular dependency between channel and tsg. TSGs can depend on channels but channel cannot depend on TSGs. Jira NVGPU-2978 Change-Id: Ib1879681287971d2a4dbeb26ca852d6b59b50f6a Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2084927 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2019-03-29 17:03:19 +05:30
parent eaab8ad1f2
commit 29b656f9b2
16 changed files with 34 additions and 46 deletions
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -1695,7 +1695,7 @@ static void nvgpu_channel_wdt_handler(struct channel_gk20a *ch)
 			gk20a_gr_debug_dump(g);
 		}

-		g->ops.fifo.force_reset_ch(ch,
+		g->ops.tsg.force_reset(ch,
 			NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT,
 			ch->wdt.debug_dump);
 	}
--- a/drivers/gpu/nvgpu/common/fifo/tsg.c
+++ b/drivers/gpu/nvgpu/common/fifo/tsg.c
@@ -212,6 +212,24 @@ static void nvgpu_tsg_destroy(struct gk20a *g, struct tsg_gk20a *tsg)
 	nvgpu_mutex_destroy(&tsg->event_id_list_lock);
 }

+/* force reset tsg that the channel is bound to */
+int nvgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
+				u32 err_code, bool verbose)
+{
+	struct gk20a *g = ch->g;
+
+	struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
+
+	if (tsg != NULL) {
+		nvgpu_tsg_set_error_notifier(g, tsg, err_code);
+		nvgpu_tsg_recover(g, tsg, verbose, RC_TYPE_FORCE_RESET);
+	} else {
+		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
+	}
+
+	return 0;
+}
+
 void nvgpu_tsg_cleanup_sw(struct gk20a *g)
 {
 	struct fifo_gk20a *f = &g->fifo;
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.c
@@ -353,7 +353,7 @@ int vgpu_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
 	return err;
 }

-int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
+int vgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
 					u32 err_code, bool verbose)
 {
 	struct tsg_gk20a *tsg = NULL;
@@ -373,7 +373,7 @@ int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
 		nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
 				channel_gk20a, ch_entry) {
 			if (gk20a_channel_get(ch_tsg)) {
-				g->ops.channel.set_error_notifier(ch_tsg,
+				nvgpu_channel_set_error_notifier(g, ch_tsg,
 								err_code);
 				gk20a_channel_set_unserviceable(ch_tsg);
 				gk20a_channel_put(ch_tsg);
--- a/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h
+++ b/drivers/gpu/nvgpu/common/vgpu/fifo/fifo_vgpu.h
@@ -47,7 +47,7 @@ int vgpu_fifo_init_engine_info(struct fifo_gk20a *f);
 int vgpu_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch);
 int vgpu_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
 int vgpu_channel_set_timeslice(struct channel_gk20a *ch, u32 timeslice);
-int vgpu_fifo_force_reset_ch(struct channel_gk20a *ch,
+int vgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
 					u32 err_code, bool verbose);
 u32 vgpu_fifo_default_timeslice_us(struct gk20a *g);
 int vgpu_tsg_open(struct tsg_gk20a *tsg);
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -421,7 +421,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.tsg_set_timeslice = vgpu_tsg_set_timeslice,
 		.tsg_open = vgpu_tsg_open,
 		.tsg_release = vgpu_tsg_release,
-		.force_reset_ch = vgpu_fifo_force_reset_ch,
 		.dump_channel_status_ramfc = NULL,
 		.is_preempt_pending = NULL,
 		.reset_enable_hw = NULL,
@@ -548,6 +547,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.enable = vgpu_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+		.force_reset = vgpu_tsg_force_reset_ch,
 	},
 	.netlist = {
 		.get_netlist_name = gp10b_netlist_get_name,
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -503,7 +503,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.tsg_set_timeslice = vgpu_tsg_set_timeslice,
 		.tsg_open = vgpu_tsg_open,
 		.tsg_release = vgpu_tsg_release,
-		.force_reset_ch = vgpu_fifo_force_reset_ch,
 		.dump_channel_status_ramfc = NULL,
 		.is_preempt_pending = gv11b_fifo_is_preempt_pending,
 		.reset_enable_hw = NULL,
@@ -635,6 +634,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+		.force_reset = vgpu_tsg_force_reset_ch,
 	},
 	.netlist = {
 		.get_netlist_name = gv11b_netlist_get_name,
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -634,36 +634,6 @@ void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids,
 					 rc_type, NULL);
 }

-/* force reset channel and tsg */
-int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
-				u32 err_code, bool verbose)
-{
-	struct channel_gk20a *ch_tsg = NULL;
-	struct gk20a *g = ch->g;
-
-	struct tsg_gk20a *tsg = tsg_gk20a_from_ch(ch);
-
-	if (tsg != NULL) {
-		nvgpu_rwsem_down_read(&tsg->ch_list_lock);
-
-		nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list,
-				channel_gk20a, ch_entry) {
-			if (gk20a_channel_get(ch_tsg) != NULL) {
-				g->ops.channel.set_error_notifier(ch_tsg,
-								err_code);
-				gk20a_channel_put(ch_tsg);
-			}
-		}
-
-		nvgpu_rwsem_up_read(&tsg->ch_list_lock);
-		nvgpu_tsg_recover(g, tsg, verbose, RC_TYPE_FORCE_RESET);
-	} else {
-		nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
-	}
-
-	return 0;
-}
-
 int gk20a_fifo_tsg_unbind_channel_verify_status(struct channel_gk20a *ch)
 {
 	struct gk20a *g = ch->g;
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -264,8 +264,6 @@ void gk20a_fifo_recover(struct gk20a *g,
 			u32 hw_id, /* if ~0, will be queried from HW */
 			bool id_is_tsg, /* ignored if hw_id == ~0 */
 			bool id_is_known, bool verbose, u32 rc_type);
-int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch,
-				u32 err_code, bool verbose);
 int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
 int gk20a_fifo_tsg_unbind_channel(struct channel_gk20a *ch);

--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -627,7 +627,6 @@ static const struct gpu_ops gm20b_ops = {
 		.tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status,
 		.tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
-		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc,
 		.is_preempt_pending = gk20a_fifo_is_preempt_pending,
@@ -766,6 +765,7 @@ static const struct gpu_ops gm20b_ops = {
 		.enable = gk20a_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+		.force_reset = nvgpu_tsg_force_reset_ch,
 	},
 	.netlist = {
 		.get_netlist_name = gm20b_netlist_get_name,
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -715,7 +715,6 @@ static const struct gpu_ops gp10b_ops = {
 		.tsg_verify_channel_status = gk20a_fifo_tsg_unbind_channel_verify_status,
 		.tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
-		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.dump_channel_status_ramfc = gk20a_dump_channel_status_ramfc,
 		.is_preempt_pending = gk20a_fifo_is_preempt_pending,
@@ -857,6 +856,7 @@ static const struct gpu_ops gp10b_ops = {
 		.enable = gk20a_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+		.force_reset = nvgpu_tsg_force_reset_ch,
 	},
 	.netlist = {
 		.get_netlist_name = gp10b_netlist_get_name,
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -893,7 +893,6 @@ static const struct gpu_ops gv100_ops = {
 		.tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
 		.tsg_verify_status_faulted = gv11b_fifo_tsg_verify_status_faulted,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
-		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
 		.is_preempt_pending = gv11b_fifo_is_preempt_pending,
@@ -1039,6 +1038,7 @@ static const struct gpu_ops gv100_ops = {
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+		.force_reset = nvgpu_tsg_force_reset_ch,
 	},
 	.netlist = {
 		.get_netlist_name = gv100_netlist_get_name,
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -850,7 +850,6 @@ static const struct gpu_ops gv11b_ops = {
 		.tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
 		.tsg_verify_status_faulted = gv11b_fifo_tsg_verify_status_faulted,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
-		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
 		.is_preempt_pending = gv11b_fifo_is_preempt_pending,
@@ -998,6 +997,7 @@ static const struct gpu_ops gv11b_ops = {
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+		.force_reset = nvgpu_tsg_force_reset_ch,
 	},
 	.netlist = {
 		.get_netlist_name = gv11b_netlist_get_name,
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -952,8 +952,6 @@ struct gpu_ops {
 		void (*apply_pb_timeout)(struct gk20a *g);
 		int (*tsg_set_timeslice)(struct tsg_gk20a *tsg, u32 timeslice);
 		u32 (*default_timeslice_us)(struct gk20a *g);
-		int (*force_reset_ch)(struct channel_gk20a *ch,
-					u32 err_code, bool verbose);
 		int (*tsg_bind_channel)(struct tsg_gk20a *tsg,
 				struct channel_gk20a *ch);
 		int (*tsg_unbind_channel)(struct channel_gk20a *ch);
@@ -1154,6 +1152,8 @@ struct gpu_ops {
 		void (*disable)(struct tsg_gk20a *tsg);
 		bool (*check_ctxsw_timeout)(struct tsg_gk20a *tsg,
 				bool *verbose, u32 *ms);
+		int (*force_reset)(struct channel_gk20a *ch,
+					u32 err_code, bool verbose);
 	} tsg;
 	struct {
 		void (*read_engine_status_info) (struct gk20a *g,
--- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h
@@ -96,6 +96,8 @@ void nvgpu_tsg_disable(struct tsg_gk20a *tsg);
 int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg,
 			struct channel_gk20a *ch);
 int gk20a_tsg_unbind_channel(struct channel_gk20a *ch);
+int nvgpu_tsg_force_reset_ch(struct channel_gk20a *ch,
+				u32 err_code, bool verbose);
 void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg,
 			 bool verbose, u32 rc_type);

--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -1342,7 +1342,7 @@ long gk20a_channel_ioctl(struct file *filp,
 				__func__, cmd);
 			break;
 		}
-		err = ch->g->ops.fifo.force_reset_ch(ch,
+		err = ch->g->ops.tsg.force_reset(ch,
 				NVGPU_ERR_NOTIFIER_RESETCHANNEL_VERIF_ERROR, true);
 		gk20a_idle(ch->g);
 		break;
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -928,7 +928,6 @@ static const struct gpu_ops tu104_ops = {
 		.tsg_verify_status_ctx_reload = gm20b_fifo_tsg_verify_status_ctx_reload,
 		.tsg_verify_status_faulted = gv11b_fifo_tsg_verify_status_faulted,
 		.tsg_set_timeslice = gk20a_fifo_tsg_set_timeslice,
-		.force_reset_ch = gk20a_fifo_force_reset_ch,
 		.init_pbdma_info = gk20a_fifo_init_pbdma_info,
 		.dump_channel_status_ramfc = gv11b_dump_channel_status_ramfc,
 		.is_preempt_pending = gv11b_fifo_is_preempt_pending,
@@ -1076,6 +1075,7 @@ static const struct gpu_ops tu104_ops = {
 		.enable = gv11b_tsg_enable,
 		.disable = nvgpu_tsg_disable,
 		.check_ctxsw_timeout = nvgpu_tsg_check_ctxsw_timeout,
+		.force_reset = nvgpu_tsg_force_reset_ch,
 	},
 	.netlist = {
 		.get_netlist_name = tu104_netlist_get_name,