gpu: nvgpu: Add new errorid for GA10B

This is adding new error ids for GA10B and removing
some unused error ids.

Change-Id: Id5e360b9da9b6e352167575810b460e743cf8eb7
Signed-off-by: Dinesh T <dt@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2676757
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Dinesh T
2022-03-02 11:17:52 +00:00
committed by mobile promotions
parent 0a1bc4cca5
commit 162ad1bebf
4 changed files with 14 additions and 35 deletions

View File

@@ -453,7 +453,7 @@ struct nvgpu_err_hw_module ga10b_err_lut[] = {
.name = "ltc", .name = "ltc",
.hw_unit = (u32)NVGPU_ERR_MODULE_LTC, .hw_unit = (u32)NVGPU_ERR_MODULE_LTC,
.num_instances = 1U, .num_instances = 1U,
.num_errs = 8U, .num_errs = 4U,
.errs = (struct nvgpu_err_desc[]) { .errs = (struct nvgpu_err_desc[]) {
GPU_NONCRITERR("cache_dstg_ecc_corrected", GPU_NONCRITERR("cache_dstg_ecc_corrected",
GPU_LTC_CACHE_DSTG_ECC_CORRECTED, GPU_LTC_CACHE_DSTG_ECC_CORRECTED,
@@ -465,30 +465,14 @@ struct nvgpu_err_hw_module ga10b_err_lut[] = {
INJECT_SW, INJECT_SW,
NULL, NULL, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_tstg_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("cache_tstg_ecc_uncorrected", GPU_CRITERR("cache_tstg_ecc_uncorrected",
GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED, GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, NULL, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_rstg_ecc_corrected", GPU_CRITERR("cache_rstg_cbc_ecc_uncorrected",
0, INJECT_NONE, GPU_LTC_CACHE_RSTG_CBC_ECC_UNCORRECTED,
NULL, NULL, INJECT_NONE,
NULL, NULL, 0, 0),
GPU_CRITERR("cache_rstg_ecc_uncorrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_dstg_be_ecc_corrected",
0, INJECT_NONE,
NULL, NULL,
NULL, NULL, 0, 0),
GPU_CRITERR("cache_dstg_be_ecc_uncorrected",
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED,
INJECT_SW,
NULL, NULL, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },

View File

@@ -412,7 +412,7 @@ static void ga10b_ltc_intr_handle_rstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.rstg_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.rstg_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC, nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED); GPU_LTC_CACHE_RSTG_CBC_ECC_UNCORRECTED);
} }
if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) { if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) {
@@ -552,8 +552,6 @@ static void ga10b_ltc_intr_handle_dstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED);
} else { } else {
nvgpu_err(g, "unsupported uncorrected dstg ecc error"); nvgpu_err(g, "unsupported uncorrected dstg ecc error");
BUG(); BUG();

View File

@@ -146,8 +146,6 @@ void gv11b_ltc_intr_handle_dstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED);
nvgpu_err(g, "dstg be ecc error uncorrected. " nvgpu_err(g, "dstg be ecc error uncorrected. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }

View File

@@ -140,8 +140,8 @@ struct mmu_fault_info;
* Macros used to assign unique index to errors reported from the MMU unit. * Macros used to assign unique index to errors reported from the MMU unit.
* @{ * @{
*/ */
#define GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED (1U) #define GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED (0U)
#define GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED (3U) #define GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED (1U)
/** /**
* @} * @}
*/ */
@@ -225,9 +225,8 @@ struct mmu_fault_info;
*/ */
#define GPU_LTC_CACHE_DSTG_ECC_CORRECTED (0U) #define GPU_LTC_CACHE_DSTG_ECC_CORRECTED (0U)
#define GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED (1U) #define GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED (1U)
#define GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED (3U) #define GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED (2U)
#define GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED (5U) #define GPU_LTC_CACHE_RSTG_CBC_ECC_UNCORRECTED (3U)
#define GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED (7U)
/** /**
* @} * @}
*/ */
@@ -237,11 +236,11 @@ struct mmu_fault_info;
* Macros used to assign unique index to errors reported from the HUBMMU unit. * Macros used to assign unique index to errors reported from the HUBMMU unit.
* @{ * @{
*/ */
#define GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED (1U) #define GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED (0U)
#define GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED (3U) #define GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED (1U)
#define GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED (5U) #define GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED (2U)
#define GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED (7U) #define GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED (3U)
#define GPU_HUBMMU_PAGE_FAULT_ERROR (8U) #define GPU_HUBMMU_PAGE_FAULT_ERROR (4U)
/** /**
* @} * @}
*/ */