gpu: nvgpu: PMU NVRISCV engine HSI support

Below listed HSI are handled with PMU ISR handler and
all these triggers interrupt from individual unit upon issue.

-Add ECC check for IMEM, DMEM, DCLS, REG, and MPU as per
 HSI req
-Add MEMERR check for GPU_PMU_ACCESS_TIMEOUT_UNCORRECTED
 PMU HSI id
-Add IOPMP check for GPU_PMU_ILLEGAL_ACCESS_UNCORRECTED
 PMU HSI id
-Add WDT check for GPU_PMU_WDT_UNCORRECTED PMU HSI id

Bug 3491596
Bug 3366818

Change-Id: I751d653e447017ac62a2459da2c6bb9da506f438
Signed-off-by: mkumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2686566
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
mkumbar
2022-03-24 19:32:44 +05:30
committed by mobile promotions
parent 8cce8dea70
commit 0b9dc3dbc3
4 changed files with 94 additions and 3 deletions

View File

@@ -1352,7 +1352,7 @@ static const struct gops_pmu ga10b_ops_pmu = {
.get_irqdest = gv11b_pmu_get_irqdest,
.get_irqmask = ga10b_pmu_get_irqmask,
.pmu_isr = gk20a_pmu_isr,
.handle_ext_irq = gv11b_pmu_handle_ext_irq,
.handle_ext_irq = ga10b_pmu_handle_ext_irq,
#ifdef CONFIG_NVGPU_LS_PMU
.get_inst_block_config = ga10b_pmu_get_inst_block_config,
/* Init */

View File

@@ -429,3 +429,87 @@ void ga10b_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable)
gv11b_pmu_enable_irq(pmu, enable);
}
}
static int ga10b_pmu_handle_ecc(struct gk20a *g)
{
int ret = 0;
u32 ecc_status = 0;
ecc_status = nvgpu_readl(g, pwr_pmu_falcon_ecc_status_r());
if ((ecc_status &
pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_IMEM_ECC_UNCORRECTED);
nvgpu_err(g, "imem ecc error uncorrected ");
ret = -EFAULT;
}
if ((ecc_status &
pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_DMEM_ECC_UNCORRECTED);
nvgpu_err(g, "dmem ecc error uncorrected");
ret = -EFAULT;
}
if ((ecc_status &
pwr_pmu_falcon_ecc_status_uncorrected_err_dcls_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_DCLS_UNCORRECTED);
nvgpu_err(g, "dcls ecc error uncorrected");
ret = -EFAULT;
}
if ((ecc_status &
pwr_pmu_falcon_ecc_status_uncorrected_err_reg_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_REG_ECC_UNCORRECTED);
nvgpu_err(g, "reg ecc error uncorrected");
ret = -EFAULT;
}
if ((ecc_status &
pwr_pmu_falcon_ecc_status_uncorrected_err_mpu_ram_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_MPU_ECC_UNCORRECTED);
nvgpu_err(g, "mpu ecc error uncorrected");
ret = -EFAULT;
}
if (ret != 0) {
nvgpu_err(g, "ecc_addr(0x%x)",
nvgpu_readl(g, pwr_pmu_falcon_ecc_address_r()));
}
return ret;
}
void ga10b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0)
{
/* handle the ECC interrupt */
if ((intr0 & pwr_falcon_irqstat_ext_ecc_parity_true_f()) != 0U) {
ga10b_pmu_handle_ecc(g);
}
/* handle the MEMERR interrupt */
if ((intr0 & pwr_falcon_irqstat_memerr_true_f()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_ACCESS_TIMEOUT_UNCORRECTED);
nvgpu_err(g, "memerr/access timeout error uncorrected");
}
/* handle the IOPMP interrupt */
if ((intr0 & pwr_falcon_irqstat_iopmp_true_f()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_ILLEGAL_ACCESS_UNCORRECTED);
nvgpu_err(g, "iopmp/illegal access error uncorrected");
}
/* handle the WDT interrupt */
if ((intr0 & pwr_falcon_irqstat_wdt_true_f()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_WDT_UNCORRECTED);
nvgpu_err(g, "wdt error uncorrected");
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -57,4 +57,5 @@ void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr);
bool ga10b_pmu_is_interrupted(struct nvgpu_pmu *pmu);
#endif
void ga10b_pmu_enable_irq(struct nvgpu_pmu *pmu, bool enable);
void ga10b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0);
#endif /* NVGPU_PMU_GA10B_H */

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -64,11 +64,14 @@
#define pwr_falcon_irqsset_swgen0_set_f() (0x40U)
#define pwr_falcon_irqsclr_r() (0x0010a004U)
#define pwr_falcon_irqstat_r() (0x0010a008U)
#define pwr_falcon_irqstat_wdt_true_f() (0x2U)
#define pwr_falcon_irqstat_halt_true_f() (0x10U)
#define pwr_falcon_irqstat_exterr_true_f() (0x20U)
#define pwr_falcon_irqstat_swgen0_true_f() (0x40U)
#define pwr_falcon_irqstat_ext_ecc_parity_true_f() (0x400U)
#define pwr_falcon_irqstat_swgen1_true_f() (0x80U)
#define pwr_falcon_irqstat_memerr_true_f() (0x40000U)
#define pwr_falcon_irqstat_iopmp_true_f() (0x800000U)
#define pwr_pmu_ecc_intr_status_r() (0x0010abfcU)
#define pwr_pmu_ecc_intr_status_corrected_m() (U32(0x1U) << 0U)
#define pwr_pmu_ecc_intr_status_uncorrected_m() (U32(0x1U) << 1U)
@@ -233,6 +236,9 @@
#define pwr_pmu_falcon_ecc_status_corrected_err_dmem_m() (U32(0x1U) << 1U)
#define pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m() (U32(0x1U) << 8U)
#define pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m() (U32(0x1U) << 9U)
#define pwr_pmu_falcon_ecc_status_uncorrected_err_mpu_ram_m() (U32(0x1U) << 10U)
#define pwr_pmu_falcon_ecc_status_uncorrected_err_dcls_m() (U32(0x1U) << 11U)
#define pwr_pmu_falcon_ecc_status_uncorrected_err_reg_m() (U32(0x1U) << 12U)
#define pwr_pmu_falcon_ecc_status_corrected_err_total_counter_overflow_m()\
(U32(0x1U) << 16U)
#define pwr_pmu_falcon_ecc_status_uncorrected_err_total_counter_overflow_m()\