gpu: nvgpu: add fecs_host_intr hals

Add three hals in gr.falcon - fecs_host_intr_status This reads the fecs_host_intr_status, set the variables in the nvgpu_fecs_host_intr_status struct to report back for gr to handle the interrupts properly - fecs_host_clear_intr This helps to clear the needed bits in fecs_host_intr. - read_fecs_ctxsw_mailbox This reads the ctxsw_mailbox register based on register index. Use these hals in gk20a_gr_handle_fecs_error and gp10b_gr_handle_fecs_error functions. JIRA NVGPU-1881 Change-Id: Ia02a254acc38e7e25c7c3605e9f1dda4da898543 Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2093917 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2019-04-09 19:47:37 -07:00
parent 815c102e5d
commit 3ef8e6b099
12 changed files with 128 additions and 25 deletions
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -379,39 +379,42 @@ static int gk20a_gr_handle_illegal_method(struct gk20a *g,
 int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 					  struct nvgpu_gr_isr_data *isr_data)
 {
-	u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
+	u32 gr_fecs_intr, mailbox_value;
 	int ret = 0;
+	struct nvgpu_fecs_host_intr_status fecs_host_intr;
 	u32 chid = isr_data->ch != NULL ?
 		isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
+	u32 mailbox_id = NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6;

+	gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
+						&fecs_host_intr);
 	if (gr_fecs_intr == 0U) {
 		return 0;
 	}

-	if ((gr_fecs_intr &
-	     gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) {
+	if (fecs_host_intr.unimp_fw_method_active) {
+		mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
+								mailbox_id);
 		gk20a_gr_set_error_notifier(g, isr_data,
 			 NVGPU_ERR_NOTIFIER_FECS_ERR_UNIMP_FIRMWARE_METHOD);
 		nvgpu_err(g,
 			  "firmware method error 0x%08x for offset 0x%04x",
-			  gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6)),
+			  mailbox_value,
 			  isr_data->data_lo);
 		ret = -1;
-	} else if ((gr_fecs_intr &
-			gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
+	} else if (fecs_host_intr.watchdog_active) {
 		gr_report_ctxsw_error(g, GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
 				chid, 0);
 		/* currently, recovery is not initiated */
 		nvgpu_err(g, "fecs watchdog triggered for channel %u, "
 				"cannot ctxsw anymore !!", chid);
 		g->ops.gr.falcon.dump_stats(g);
-	} else if ((gr_fecs_intr &
-		    gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
-		u32 mailbox_value = gk20a_readl(g, gr_fecs_ctxsw_mailbox_r(6));
-
+	} else if (fecs_host_intr.ctxsw_intr0 != 0U) {
+		mailbox_value = g->ops.gr.falcon.read_fecs_ctxsw_mailbox(g,
+								mailbox_id);
 #ifdef CONFIG_GK20A_CTXSW_TRACE
 		if (mailbox_value ==
-			      g->ops.gr.fecs_trace.get_buffer_full_mailbox_val()) {
+			g->ops.gr.fecs_trace.get_buffer_full_mailbox_val()) {
 			nvgpu_info(g, "ctxsw intr0 set by ucode, "
 					"timestamp buffer full");
 			nvgpu_gr_fecs_trace_reset_buffer(g);
@@ -421,9 +424,9 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 		 * The mailbox values may vary across chips hence keeping it
 		 * as a HAL.
 		 */
-		if (g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val
-				!= NULL && mailbox_value ==
-				g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val()) {
+		if ((g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val != NULL)
+			&& (mailbox_value ==
+			g->ops.gr.get_ctxsw_checksum_mismatch_mailbox_val())) {

 			gr_report_ctxsw_error(g, GPU_FECS_CTXSW_CRC_MISMATCH,
 					chid, mailbox_value);
@@ -443,8 +446,7 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 				 mailbox_value);
 			ret = -1;
 		}
-	} else if ((gr_fecs_intr &
-			gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) {
+	} else if (fecs_host_intr.fault_during_ctxsw_active) {
 		gr_report_ctxsw_error(g, GPU_FECS_FAULT_DURING_CTXSW,
 				chid, 0);
 		nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid);
@@ -456,7 +458,8 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
 		g->ops.gr.falcon.dump_stats(g);
 	}

-	gk20a_writel(g, gr_fecs_host_int_clear_r(), gr_fecs_intr);
+	g->ops.gr.falcon.fecs_host_clear_intr(g, gr_fecs_intr);
+
 	return ret;
 }

--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h
@@ -44,9 +44,6 @@
 #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_GFXP		BIT32(1)
 #define NVGPU_OBJ_CTX_FLAGS_SUPPORT_CILP		BIT32(2)

-#define CTXSW_INTR0				BIT32(0)
-#define CTXSW_INTR1				BIT32(1)
-
 struct tsg_gk20a;
 struct nvgpu_gr_ctx;
 struct channel_gk20a;
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -507,6 +507,12 @@ static const struct gpu_ops gm20b_ops = {
 				gm20ab_gr_intr_tpc_exception_sm_disable,
 		},
 		.falcon = {
+			.read_fecs_ctxsw_mailbox =
+				gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
+			.fecs_host_clear_intr =
+				gm20b_gr_falcon_fecs_host_clear_intr,
+			.fecs_host_intr_status =
+				gm20b_gr_falcon_fecs_host_intr_status,
 			.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
 			.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
 			.set_current_ctx_invalid =
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -903,26 +903,32 @@ int gr_gp10b_handle_fecs_error(struct gk20a *g,
 				struct channel_gk20a *__ch,
 				struct nvgpu_gr_isr_data *isr_data)
 {
-	u32 gr_fecs_intr = gk20a_readl(g, gr_fecs_host_int_status_r());
 	struct channel_gk20a *ch;
 	u32 chid = FIFO_INVAL_CHANNEL_ID;
 	int ret = 0;
 	struct tsg_gk20a *tsg;
+	struct nvgpu_fecs_host_intr_status fecs_host_intr;
+	u32 gr_fecs_intr = g->ops.gr.falcon.fecs_host_intr_status(g,
+						&fecs_host_intr);
+

 	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr, " ");

+	if (gr_fecs_intr == 0U) {
+		return 0;
+	}
+
 	/*
 	 * INTR1 (bit 1 of the HOST_INT_STATUS_CTXSW_INTR)
 	 * indicates that a CILP ctxsw save has finished
 	 */
-	if ((gr_fecs_intr &
-		gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) {
+	if (fecs_host_intr.ctxsw_intr1 != 0U) {
 		nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg | gpu_dbg_intr,
 				"CILP: ctxsw save completed!\n");

 		/* now clear the interrupt */
-		gk20a_writel(g, gr_fecs_host_int_clear_r(),
-				gr_fecs_host_int_clear_ctxsw_intr1_clear_f());
+		g->ops.gr.falcon.fecs_host_clear_intr(g,
+					fecs_host_intr.ctxsw_intr1);

 		ret = gr_gp10b_get_cilp_preempt_pending_chid(g, &chid);
 		if ((ret != 0) || (chid == FIFO_INVAL_CHANNEL_ID)) {
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -602,6 +602,12 @@ static const struct gpu_ops gp10b_ops = {
 				gm20ab_gr_intr_tpc_exception_sm_disable,
 		},
 		.falcon = {
+			.read_fecs_ctxsw_mailbox =
+				gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
+			.fecs_host_clear_intr =
+				gm20b_gr_falcon_fecs_host_clear_intr,
+			.fecs_host_intr_status =
+				gm20b_gr_falcon_fecs_host_intr_status,
 			.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
 			.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
 			.set_current_ctx_invalid =
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -740,6 +740,12 @@ static const struct gpu_ops gv100_ops = {
 				gm20ab_gr_intr_tpc_exception_sm_disable,
 		},
 		.falcon = {
+			.read_fecs_ctxsw_mailbox =
+				gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
+			.fecs_host_clear_intr =
+				gm20b_gr_falcon_fecs_host_clear_intr,
+			.fecs_host_intr_status =
+				gm20b_gr_falcon_fecs_host_intr_status,
 			.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
 			.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
 			.set_current_ctx_invalid =
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -715,6 +715,12 @@ static const struct gpu_ops gv11b_ops = {
 				gm20ab_gr_intr_tpc_exception_sm_disable,
 		},
 		.falcon = {
+			.read_fecs_ctxsw_mailbox =
+				gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
+			.fecs_host_clear_intr =
+				gm20b_gr_falcon_fecs_host_clear_intr,
+			.fecs_host_intr_status =
+				gm20b_gr_falcon_fecs_host_intr_status,
 			.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
 			.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
 			.set_current_ctx_invalid =
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c
@@ -39,6 +39,8 @@
 #define CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT_US 10U

 #define CTXSW_WDT_DEFAULT_VALUE 0x7FFFFFFFU
+#define CTXSW_INTR0 BIT32(0)
+#define CTXSW_INTR1 BIT32(1)

 void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g,
 			const u32 *ucode_u32_data, u32 ucode_u32_size)
@@ -957,3 +959,42 @@ void gm20b_gr_falcon_fecs_host_int_enable(struct gk20a *g)
 		     gr_fecs_host_int_enable_umimp_illegal_method_enable_f() |
 		     gr_fecs_host_int_enable_watchdog_enable_f());
 }
+
+u32 gm20b_gr_falcon_read_fecs_ctxsw_mailbox(struct gk20a *g, u32 reg_index)
+{
+	return nvgpu_readl(g, gr_fecs_ctxsw_mailbox_r(reg_index));
+}
+
+void gm20b_gr_falcon_fecs_host_clear_intr(struct gk20a *g, u32 fecs_intr)
+{
+	nvgpu_writel(g, gr_fecs_host_int_clear_r(), fecs_intr);
+}
+
+u32 gm20b_gr_falcon_fecs_host_intr_status(struct gk20a *g,
+			struct nvgpu_fecs_host_intr_status *fecs_host_intr)
+{
+	u32 gr_fecs_intr = nvgpu_readl(g, gr_fecs_host_int_status_r());
+
+	(void) memset(fecs_host_intr, 0,
+				sizeof(struct nvgpu_fecs_host_intr_status));
+	if ((gr_fecs_intr &
+	     gr_fecs_host_int_status_umimp_firmware_method_f(1)) != 0U) {
+		fecs_host_intr->unimp_fw_method_active = true;
+	} else if ((gr_fecs_intr &
+			gr_fecs_host_int_status_watchdog_active_f()) != 0U) {
+		fecs_host_intr->watchdog_active = true;
+	} else if ((gr_fecs_intr &
+		    gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0)) != 0U) {
+		fecs_host_intr->ctxsw_intr0 =
+			gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR0);
+	} else if ((gr_fecs_intr &
+		    gr_fecs_host_int_status_ctxsw_intr_f(CTXSW_INTR1)) != 0U) {
+		fecs_host_intr->ctxsw_intr1 =
+			gr_fecs_host_int_clear_ctxsw_intr1_clear_f();
+	} else if ((gr_fecs_intr &
+		    gr_fecs_host_int_status_fault_during_ctxsw_f(1)) != 0U) {
+		fecs_host_intr->fault_during_ctxsw_active = true;
+	}
+
+	return gr_fecs_intr;
+}
--- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h
@@ -27,7 +27,12 @@

 struct gk20a;
 struct nvgpu_fecs_method_op;
+struct nvgpu_fecs_host_intr_status;

+u32 gm20b_gr_falcon_read_fecs_ctxsw_mailbox(struct gk20a *g, u32 reg_index);
+void gm20b_gr_falcon_fecs_host_clear_intr(struct gk20a *g, u32 fecs_intr);
+u32 gm20b_gr_falcon_fecs_host_intr_status(struct gk20a *g,
+			struct nvgpu_fecs_host_intr_status *fecs_host_intr);
 u32 gm20b_gr_falcon_fecs_base_addr(void);
 u32 gm20b_gr_falcon_gpccs_base_addr(void);
 void gm20b_gr_falcon_fecs_dump_stats(struct gk20a *g);
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -83,6 +83,7 @@ struct nvgpu_dbg_reg_op;
 struct gk20a_cs_snapshot;
 struct nvgpu_preemption_modes_rec;
 struct nvgpu_gr_ctx;
+struct nvgpu_fecs_host_intr_status;
 typedef void (*global_ctx_mem_destroy_fn)(struct gk20a *g,
 					struct nvgpu_mem *mem);

@@ -537,6 +538,12 @@ struct gpu_ops {
 		} config;

 		struct {
+			u32 (*read_fecs_ctxsw_mailbox)(struct gk20a *g,
+							u32 reg_index);
+			void (*fecs_host_clear_intr)(struct gk20a *g,
+							u32 fecs_intr);
+			u32 (*fecs_host_intr_status)(struct gk20a *g,
+			   struct nvgpu_fecs_host_intr_status *fecs_host_intr);
 			u32 (*fecs_base_addr)(void);
 			u32 (*gpccs_base_addr)(void);
 			void (*set_current_ctx_invalid)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h
@@ -42,6 +42,20 @@ struct gk20a;
 #define NVGPU_GR_FALCON_METHOD_PREEMPT_IMAGE_SIZE 12
 #define NVGPU_GR_FALCON_METHOD_CONFIGURE_CTXSW_INTR 13

+#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX0 0U
+#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX1 1U
+#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX2 2U
+#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX4 4U
+#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX6 6U
+#define NVGPU_GR_FALCON_FECS_CTXSW_MAILBOX7 7U
+
+struct nvgpu_fecs_host_intr_status {
+	u32 ctxsw_intr0;
+	u32 ctxsw_intr1;
+	bool fault_during_ctxsw_active;
+	bool unimp_fw_method_active;
+	bool watchdog_active;
+};

 int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g);
 int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g);
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -773,6 +773,12 @@ static const struct gpu_ops tu104_ops = {
 				gm20ab_gr_intr_tpc_exception_sm_disable,
 		},
 		.falcon = {
+			.read_fecs_ctxsw_mailbox =
+				gm20b_gr_falcon_read_fecs_ctxsw_mailbox,
+			.fecs_host_clear_intr =
+				gm20b_gr_falcon_fecs_host_clear_intr,
+			.fecs_host_intr_status =
+				gm20b_gr_falcon_fecs_host_intr_status,
 			.fecs_base_addr = gm20b_gr_falcon_fecs_base_addr,
 			.gpccs_base_addr = gm20b_gr_falcon_gpccs_base_addr,
 			.set_current_ctx_invalid =