From 0b9dc3dbc39e033d0f9e9cc3f4ac393c48a38598 Mon Sep 17 00:00:00 2001
From: mkumbar <mkumbar@nvidia.com>
Date: Thu, 24 Mar 2022 19:32:44 +0530
Subject: [PATCH] gpu: nvgpu: PMU NVRISCV engine HSI support

Below listed HSI are handled with PMU ISR handler and
all these triggers interrupt from individual unit upon issue.

-Add ECC check for IMEM, DMEM, DCLS, REG, and MPU as per
 HSI req
-Add MEMERR check for GPU_PMU_ACCESS_TIMEOUT_UNCORRECTED
 PMU HSI id
-Add IOPMP check for GPU_PMU_ILLEGAL_ACCESS_UNCORRECTED
 PMU HSI id
-Add WDT check for GPU_PMU_WDT_UNCORRECTED PMU HSI id

Bug 3491596
Bug 3366818

Change-Id: I751d653e447017ac62a2459da2c6bb9da506f438
Signed-off-by: mkumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2686566
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/hal/init/hal_ga10b.c        |  2 +-
 drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c         | 84 +++++++++++++++++++
 drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h         |  3 +-
 .../include/nvgpu/hw/ga10b/hw_pwr_ga10b.h     |  8 +-
 4 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
index d1df42833..83b6aa5ce 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -1352,7 +1352,7 @@ static const struct gops_pmu ga10b_ops_pmu = {
 	.get_irqdest = gv11b_pmu_get_irqdest,
 	.get_irqmask = ga10b_pmu_get_irqmask,
 	.pmu_isr = gk20a_pmu_isr,
-	.handle_ext_irq = gv11b_pmu_handle_ext_irq,
+	.handle_ext_irq = ga10b_pmu_handle_ext_irq,
 #ifdef CONFIG_NVGPU_LS_PMU
 	.get_inst_block_config = ga10b_pmu_get_inst_block_config,
 	/* Init */
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
index 418de2a20..69713394d 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
@@ -429,3 +429,87 @@ void ga10b_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable)
 		gv11b_pmu_enable_irq(pmu, enable);
 	}
 }
+
+static int ga10b_pmu_handle_ecc(struct gk20a *g)
+{
+	int ret = 0;
+	u32 ecc_status = 0;
+
+	ecc_status = nvgpu_readl(g, pwr_pmu_falcon_ecc_status_r());
+
+	if ((ecc_status &
+		pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+					GPU_PMU_IMEM_ECC_UNCORRECTED);
+		nvgpu_err(g, "imem ecc error uncorrected ");
+		ret = -EFAULT;
+	}
+
+	if ((ecc_status &
+		pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+					GPU_PMU_DMEM_ECC_UNCORRECTED);
+		nvgpu_err(g, "dmem ecc error uncorrected");
+		ret = -EFAULT;
+	}
+
+	if ((ecc_status &
+		pwr_pmu_falcon_ecc_status_uncorrected_err_dcls_m()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+					GPU_PMU_DCLS_UNCORRECTED);
+		nvgpu_err(g, "dcls ecc error uncorrected");
+		ret = -EFAULT;
+	}
+
+	if ((ecc_status &
+		pwr_pmu_falcon_ecc_status_uncorrected_err_reg_m()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+					GPU_PMU_REG_ECC_UNCORRECTED);
+		nvgpu_err(g, "reg ecc error uncorrected");
+		ret = -EFAULT;
+	}
+
+	if ((ecc_status &
+		pwr_pmu_falcon_ecc_status_uncorrected_err_mpu_ram_m()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+					GPU_PMU_MPU_ECC_UNCORRECTED);
+		nvgpu_err(g, "mpu ecc error uncorrected");
+		ret = -EFAULT;
+	}
+
+	if (ret != 0) {
+		nvgpu_err(g, "ecc_addr(0x%x)",
+			nvgpu_readl(g, pwr_pmu_falcon_ecc_address_r()));
+	}
+
+	return ret;
+}
+
+void ga10b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
+{
+	/* handle the ECC interrupt */
+	if ((intr0 & pwr_falcon_irqstat_ext_ecc_parity_true_f()) != 0U) {
+		ga10b_pmu_handle_ecc(g);
+	}
+
+	/* handle the MEMERR interrupt */
+	if ((intr0 & pwr_falcon_irqstat_memerr_true_f()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+				GPU_PMU_ACCESS_TIMEOUT_UNCORRECTED);
+		nvgpu_err(g, "memerr/access timeout error uncorrected");
+	}
+
+	/* handle the IOPMP interrupt */
+	if ((intr0 & pwr_falcon_irqstat_iopmp_true_f()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+				GPU_PMU_ILLEGAL_ACCESS_UNCORRECTED);
+		nvgpu_err(g, "iopmp/illegal access error uncorrected");
+	}
+
+	/* handle the WDT interrupt */
+	if ((intr0 & pwr_falcon_irqstat_wdt_true_f()) != 0U) {
+		nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
+				GPU_PMU_WDT_UNCORRECTED);
+		nvgpu_err(g, "wdt error uncorrected");
+	}
+}
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
index 246123a2d..723f31dae 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -57,4 +57,5 @@ void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr);
 bool ga10b_pmu_is_interrupted(struct nvgpu_pmu *pmu);
 #endif
 void ga10b_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable);
+void ga10b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0);
 #endif /* NVGPU_PMU_GA10B_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
index ff573664a..4559a6c91 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -64,11 +64,14 @@
 #define pwr_falcon_irqsset_swgen0_set_f()                                (0x40U)
 #define pwr_falcon_irqsclr_r()                                     (0x0010a004U)
 #define pwr_falcon_irqstat_r()                                     (0x0010a008U)
+#define pwr_falcon_irqstat_wdt_true_f()                                   (0x2U)
 #define pwr_falcon_irqstat_halt_true_f()                                 (0x10U)
 #define pwr_falcon_irqstat_exterr_true_f()                               (0x20U)
 #define pwr_falcon_irqstat_swgen0_true_f()                               (0x40U)
 #define pwr_falcon_irqstat_ext_ecc_parity_true_f()                      (0x400U)
 #define pwr_falcon_irqstat_swgen1_true_f()                               (0x80U)
+#define pwr_falcon_irqstat_memerr_true_f()                            (0x40000U)
+#define pwr_falcon_irqstat_iopmp_true_f()                            (0x800000U)
 #define pwr_pmu_ecc_intr_status_r()                                (0x0010abfcU)
 #define pwr_pmu_ecc_intr_status_corrected_m()                  (U32(0x1U) << 0U)
 #define pwr_pmu_ecc_intr_status_uncorrected_m()                (U32(0x1U) << 1U)
@@ -233,6 +236,9 @@
 #define pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()       (U32(0x1U) << 1U)
 #define pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()     (U32(0x1U) << 8U)
 #define pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()     (U32(0x1U) << 9U)
+#define pwr_pmu_falcon_ecc_status_uncorrected_err_mpu_ram_m() (U32(0x1U) << 10U)
+#define pwr_pmu_falcon_ecc_status_uncorrected_err_dcls_m()    (U32(0x1U) << 11U)
+#define pwr_pmu_falcon_ecc_status_uncorrected_err_reg_m()     (U32(0x1U) << 12U)
 #define pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m()\
 				(U32(0x1U) << 16U)
 #define pwr_pmu_falcon_ecc_status_uncorrected_err_total_counter_overflow_m()\