gpu: nvgpu: gsp: host routed interrupt handling

Changes: - support for watchdog timer interrupt handling - code modified to support gsp scheduler interrupts for ecc errors directed to host - interrupts like IMEM, DMEM, EMEM, Delayed Lock Step and incorrect Register access NVGPU-9273 Change-Id: I93a2ef0961aaa40e76ca7efe8450ce07a6709453 Signed-off-by: vivekku <vivekku@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2818202 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Ramesh Mylavarapu <rmylavarapu@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> Tested-by: Ramesh Mylavarapu <rmylavarapu@nvidia.com> GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2022-11-29 05:37:33 +00:00
parent f5f13778ad
commit 470c1738b5
2 changed files with 43 additions and 9 deletions
--- a/drivers/gpu/nvgpu/hal/gsp/gsp_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/gsp/gsp_ga10b.c
@@ -64,13 +64,13 @@ int ga10b_gsp_engine_reset(struct gk20a *g)
 	return 0;
 }

-static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status)
+static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status, u32 err_module)
 {
 	int ret = 0;

 	if ((ecc_status &
 		pgsp_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
-		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
+		nvgpu_report_err_to_sdl(g, err_module,
 					GPU_GSP_ACR_IMEM_ECC_UNCORRECTED);
 		nvgpu_err(g, "imem ecc error uncorrected");
 		ret = -EFAULT;
@@ -78,7 +78,7 @@ static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status)

 	if ((ecc_status &
 		pgsp_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
-		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
+		nvgpu_report_err_to_sdl(g, err_module,
 					GPU_GSP_ACR_DMEM_ECC_UNCORRECTED);
 		nvgpu_err(g, "dmem ecc error uncorrected");
 		ret = -EFAULT;
@@ -86,7 +86,7 @@ static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status)

 	if ((ecc_status &
 		pgsp_falcon_ecc_status_uncorrected_err_dcls_m()) != 0U) {
-		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
+		nvgpu_report_err_to_sdl(g, err_module,
 					GPU_GSP_ACR_DCLS_UNCORRECTED);
 		nvgpu_err(g, "dcls ecc error uncorrected");
 		ret = -EFAULT;
@@ -94,7 +94,7 @@ static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status)

 	if ((ecc_status &
 		pgsp_falcon_ecc_status_uncorrected_err_reg_m()) != 0U) {
-		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
+		nvgpu_report_err_to_sdl(g, err_module,
 					GPU_GSP_ACR_REG_ECC_UNCORRECTED);
 		nvgpu_err(g, "reg ecc error uncorrected");
 		ret = -EFAULT;
@@ -102,7 +102,7 @@ static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status)

 	if ((ecc_status &
 		pgsp_falcon_ecc_status_uncorrected_err_emem_m()) != 0U) {
-		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
+		nvgpu_report_err_to_sdl(g, err_module,
 					GPU_GSP_ACR_EMEM_ECC_UNCORRECTED);
 		nvgpu_err(g, "emem ecc error uncorrected");
 		ret = -EFAULT;
@@ -117,7 +117,7 @@ bool ga10b_gsp_validate_mem_integrity(struct gk20a *g)

 	ecc_status = nvgpu_readl(g, pgsp_falcon_ecc_status_r());

-	return ((ga10b_gsp_handle_ecc(g, ecc_status) == 0) ? true :
+	return ((ga10b_gsp_handle_ecc(g, ecc_status, NVGPU_ERR_MODULE_GSP_ACR) == 0) ? true :
 			false);
 }

@@ -206,8 +206,12 @@ static bool ga10b_gsp_is_interrupted(struct gk20a *g, u32 *intr)
 	u32 intr_stat = gk20a_readl(g, pgsp_falcon_irqstat_r());

 	supported_gsp_int = pgsp_falcon_irqstat_halt_true_f() |
-			pgsp_falcon_irqstat_swgen1_true_f() |
-			pgsp_falcon_irqstat_swgen0_true_f() |
+			pgsp_falcon_irqstat_swgen1_true_f()		|
+			pgsp_falcon_irqstat_swgen0_true_f()		|
+			pgsp_falcon_irqstat_wdtmr_true_f()		|
+			pgsp_falcon_irqstat_extirq7_true_f()	|
+			pgsp_falcon_irqstat_memerr_true_f()		|
+			pgsp_falcon_irqstat_iopmp_true_f()		|
 			pgsp_falcon_irqstat_exterr_true_f();

 	*intr = intr_stat;
@@ -262,6 +266,8 @@ static void ga10b_gsp_handle_interrupts(struct gk20a *g, u32 intr)
 #ifndef CONFIG_NVGPU_MON_PRESENT
 	int err = 0;
 #endif
+	u32 ecc_status = 0U;
+
 	nvgpu_log_fn(g, " ");

 	/* swgen1 interrupt handle */
@@ -284,6 +290,13 @@ static void ga10b_gsp_handle_interrupts(struct gk20a *g, u32 intr)
 				~pgsp_falcon_exterrstat_valid_m());
 	}

+	/* watchdog timer interrupt handle */
+	if ((intr & pgsp_falcon_irqstat_wdtmr_true_f()) != 0U) {
+		nvgpu_err(g, "gsp watchdog timeout.");
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_SCHED,
+			GPU_GSP_SCHED_WDT_UNCORRECTED);
+	}
+
 #ifndef CONFIG_NVGPU_MON_PRESENT
 	/* swgen0 interrupt handle */
 	if ((intr & pgsp_falcon_irqstat_swgen0_true_f()) != 0U) {
@@ -294,6 +307,23 @@ static void ga10b_gsp_handle_interrupts(struct gk20a *g, u32 intr)
 		}
 	}
 #endif
+
+	/* handling ecc error by extirq7 */
+	if ((intr & pgsp_falcon_irqstat_extirq7_true_f()) != 0U) {
+		nvgpu_err(g, "ECC error detected.");
+		ecc_status = nvgpu_readl(g, pgsp_falcon_ecc_status_r());
+		if (ga10b_gsp_handle_ecc(g, ecc_status, NVGPU_ERR_MODULE_GSP_SCHED) != 0) {
+			nvgpu_err(g, "nvgpu ecc error handling failed err=");
+		}
+	}
+
+	if ((intr & pgsp_falcon_irqstat_iopmp_true_f()) != 0U) {
+		nvgpu_err(g, "GSP Pri access failure IOPMP");
+	}
+
+	if ((intr & pgsp_falcon_irqstat_memerr_true_f()) != 0U) {
+		nvgpu_err(g, "GSP Pri access failure MEMERR");
+	}
 }

 void ga10b_gsp_isr(struct gk20a *g, struct nvgpu_gsp *gsp)
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pgsp_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pgsp_ga10b.h
@@ -65,10 +65,14 @@
 #define pgsp_falcon_engine_reset_true_f()                                 (0x1U)
 #define pgsp_falcon_engine_reset_false_f()                                (0x0U)
 #define pgsp_falcon_irqstat_r()                                    (0x00110008U)
+#define pgsp_falcon_irqstat_wdtmr_true_f()                                (0x2U)
 #define pgsp_falcon_irqstat_halt_true_f()                                (0x10U)
 #define pgsp_falcon_irqstat_exterr_true_f()                              (0x20U)
 #define pgsp_falcon_irqstat_swgen0_true_f()                              (0x40U)
 #define pgsp_falcon_irqstat_swgen1_true_f()                              (0x80U)
+#define pgsp_falcon_irqstat_extirq7_true_f()                           (0x4000U)
+#define pgsp_falcon_irqstat_memerr_true_f()                           (0x40000U)
+#define pgsp_falcon_irqstat_iopmp_true_f()                           (0x800000U)
 #define pgsp_falcon_irqsclr_r()                                    (0x00110004U)
 #define pgsp_falcon_irqmode_r()                                    (0x0011000cU)
 #define pgsp_riscv_irqmset_r()                                     (0x00111520U)