gpu: nvgpu: wait ACK for FECS watchdog timeout

On Volta, nvgpu needs to wait for explicit ACK from CTXSW while setting FECS watchdog timeoout This is manual port of the fixes 4d7e5026e38528b88a4a168eca9a8b180475b368 and ad89436b03428a42e43042b6a849c15843fdebc4 on dev-main since clean cherry-pick is not possible due to huge file and structure differences. Bug 200603566 Bug 200660258 Change-Id: Icba69998ab45eee5fdf2a29e1ac1067589301be6 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2371708 (cherry picked from commit e878686302) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2423367 Tested-by: Sagar Kamble <skamble@nvidia.com> Reviewed-by: Sagar Kamble <skamble@nvidia.com> Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: Bibek Basu <bbasu@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 09:12:24 +03:00 · 2020-07-07 15:47:00 +05:30
parent 524bab9732
commit bb946cb769
9 changed files with 56 additions and 10 deletions
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -541,17 +541,12 @@ int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id,
 	return 0;
 }

-/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
- * We should replace most, if not all, fecs method calls to this instead. */
-int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
+int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
 				   struct fecs_method_op_gk20a op,
 				   bool sleepduringwait)
 {
-	struct gr_gk20a *gr = &g->gr;
 	int ret;

-	nvgpu_mutex_acquire(&gr->fecs_mutex);
-
 	if (op.mailbox.id != 0) {
 		gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id),
 			     op.mailbox.data);
@@ -579,6 +574,22 @@ int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
 			op.method.data, op.method.addr);
 	}

+	return ret;
+}
+
+/* The following is a less brittle way to call gr_gk20a_submit_fecs_method(...)
+ * We should replace most, if not all, fecs method calls to this instead. */
+int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
+				   struct fecs_method_op_gk20a op,
+				   bool sleepduringwait)
+{
+	struct gr_gk20a *gr = &g->gr;
+	int ret;
+
+	nvgpu_mutex_acquire(&gr->fecs_mutex);
+
+	ret = gr_gk20a_submit_fecs_method_op_locked(g, op, sleepduringwait);
+
 	nvgpu_mutex_release(&gr->fecs_mutex);

 	return ret;
@@ -2486,6 +2497,16 @@ int gr_gk20a_load_ctxsw_ucode(struct gk20a *g)
 	return 0;
 }

+int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g)
+{
+	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
+	gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
+	gk20a_writel(g, gr_fecs_method_push_r(),
+		gr_fecs_method_push_adr_set_watchdog_timeout_f());
+
+	return 0;
+}
+
 static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
 {
 	u32 ret;
@@ -2507,10 +2528,11 @@ static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g)
 			gr_fecs_current_ctx_valid_false_f());
 	}

-	gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0xffffffff);
-	gk20a_writel(g, gr_fecs_method_data_r(), 0x7fffffff);
-	gk20a_writel(g, gr_fecs_method_push_r(),
-		     gr_fecs_method_push_adr_set_watchdog_timeout_f());
+	ret = g->ops.gr.set_fecs_watchdog_timeout(g);
+	if (ret) {
+		nvgpu_err(g, "fail to set watchdog timeout");
+		return ret;
+	}

 	nvgpu_log_fn(g, "done");
 	return 0;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -702,6 +702,9 @@ int gr_gk20a_init_ctx_state(struct gk20a *g);
 int gr_gk20a_submit_fecs_method_op(struct gk20a *g,
 				   struct fecs_method_op_gk20a op,
 				   bool sleepduringwait);
+int gr_gk20a_submit_fecs_method_op_locked(struct gk20a *g,
+				   struct fecs_method_op_gk20a op,
+				   bool sleepduringwait);
 int gr_gk20a_submit_fecs_sideband_method_op(struct gk20a *g,
 		struct fecs_method_op_gk20a op);
 int gr_gk20a_alloc_gr_ctx(struct gk20a *g,
@@ -844,4 +847,5 @@ void gk20a_gr_destroy_ctx_buffer(struct gk20a *g,
 int gk20a_gr_alloc_ctx_buffer(struct gk20a *g,
 	struct gr_ctx_buffer_desc *desc, size_t size);
 void gk20a_gr_flush_channel_tlb(struct gr_gk20a *gr);
+int gr_gk20a_set_fecs_watchdog_timeout(struct gk20a *g);
 #endif /*__GR_GK20A_H__*/
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -332,6 +332,7 @@ static const struct gpu_ops gm20b_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_fecs_watchdog_timeout = gr_gk20a_set_fecs_watchdog_timeout,
 	},
 	.fb = {
 		.init_hw = gm20b_fb_init_hw,
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -366,6 +366,7 @@ static const struct gpu_ops gp10b_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_fecs_watchdog_timeout = gr_gk20a_set_fecs_watchdog_timeout,
 	},
 	.fb = {
 		.init_hw = gm20b_fb_init_hw,
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -490,6 +490,7 @@ static const struct gpu_ops gv100_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_fecs_watchdog_timeout = gr_gv11b_set_fecs_watchdog_timeout,
 	},
 	.fb = {
 		.init_hw = gv11b_fb_init_hw,
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c
@@ -5071,3 +5071,17 @@ fail:
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
+
+int gr_gv11b_set_fecs_watchdog_timeout(struct gk20a *g)
+{
+	return gr_gk20a_submit_fecs_method_op_locked(g,
+	      (struct fecs_method_op_gk20a) {
+		      .method.addr = gr_fecs_method_push_adr_set_watchdog_timeout_f(),
+		      .method.data = 0x7fffffff,
+		      .mailbox = { .id   = 0,
+				   .data = ~0, .clr = ~0, .ret = NULL,
+				   .ok = gr_fecs_ctxsw_mailbox_value_pass_v(),
+				   .fail = 0, },
+		      .cond.ok = GR_IS_UCODE_OP_EQUAL,
+		      .cond.fail = GR_IS_UCODE_OP_SKIP}, false);
+}
--- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.h
@@ -257,4 +257,5 @@ void gr_gv11b_set_skedcheck(struct gk20a *g, u32 data);
 void gr_gv11b_set_go_idle_timeout(struct gk20a *g, u32 data);
 void gr_gv11b_set_coalesce_buffer_size(struct gk20a *g, u32 data);
 void gr_gv11b_set_tex_in_dbg(struct gk20a *g, u32 data);
+int gr_gv11b_set_fecs_watchdog_timeout(struct gk20a *g);
 #endif /* NVGPU_GR_GV11B_H */
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -452,6 +452,7 @@ static const struct gpu_ops gv11b_ops = {
 		.get_offset_in_gpccs_segment =
 			gr_gk20a_get_offset_in_gpccs_segment,
 		.set_debug_mode = gm20b_gr_set_debug_mode,
+		.set_fecs_watchdog_timeout = gr_gv11b_set_fecs_watchdog_timeout,
 	},
 	.fb = {
 		.init_hw = gv11b_fb_init_hw,
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -531,6 +531,7 @@ struct gpu_ops {
 		void (*set_debug_mode)(struct gk20a *g, bool enable);
 		int (*set_mmu_debug_mode)(struct gk20a *g,
 			struct channel_gk20a *ch, bool enable);
+		int (*set_fecs_watchdog_timeout)(struct gk20a *g);
 	} gr;
 	struct {
 		void (*init_hw)(struct gk20a *g);