diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c index 9906c3255..51380549b 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c @@ -24,6 +24,7 @@ #include #include +#include #include "ltc_gp10b.h" #include "ltc_gv11b.h" @@ -35,6 +36,31 @@ #include +static void gv11b_ltc_report_ecc_error(struct gk20a *g, u32 ltc, u32 slice, + u32 err_type, u64 err_addr, u64 err_cnt) +{ + int ret = 0; + u32 inst = 0U; + + if (g->ops.ltc.err_ops.report_ecc_parity_err == NULL) { + return ; + } + if (slice < 256U) { + inst = (ltc << 8U) | slice; + } else { + nvgpu_err(g, "Invalid slice id=%u", slice); + return ; + } + ret = g->ops.ltc.err_ops.report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, inst, err_type, err_addr, + err_cnt); + if (ret != 0) { + nvgpu_err(g, "Failed to report LTC error: inst=%u, \ + err_type=%u, err_addr=%llu, err_cnt=%llu", + inst, err_type, err_addr, err_cnt); + } +} + /* * Sets the ZBC stencil for the passed index. */ @@ -174,21 +200,39 @@ void gv11b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice) "ltc:%d lts: %d cache ecc interrupt intr: 0x%x", ltc, slice, ltc_intr3); if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) { + gv11b_ltc_report_ecc_error(g, ltc, slice, + LTC_CACHE_RSTG_ECC_CORRECTED, ecc_addr, + g->ecc.ltc.ecc_sec_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) != 0U) { + gv11b_ltc_report_ecc_error(g, ltc, slice, + LTC_CACHE_RSTG_ECC_UNCORRECTED, ecc_addr, + g->ecc.ltc.ecc_ded_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) != 0U) { + gv11b_ltc_report_ecc_error(g, ltc, slice, + LTC_CACHE_TSTG_ECC_CORRECTED, ecc_addr, + g->ecc.ltc.ecc_sec_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) != 0U) { + gv11b_ltc_report_ecc_error(g, ltc, slice, + LTC_CACHE_TSTG_ECC_UNCORRECTED, ecc_addr, + g->ecc.ltc.ecc_ded_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "tstg ecc error uncorrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) != 0U) { + gv11b_ltc_report_ecc_error(g, ltc, slice, + LTC_CACHE_DSTG_ECC_CORRECTED, ecc_addr, + g->ecc.ltc.ecc_sec_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "dstg ecc error corrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) != 0U) { + gv11b_ltc_report_ecc_error(g, ltc, slice, + LTC_CACHE_DSTG_ECC_UNCORRECTED, ecc_addr, + g->ecc.ltc.ecc_ded_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected"); } diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c b/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c index 837f5ff24..a1f365216 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu_gv11b.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "pmu_gp10b.h" #include "pmu_gp106.h" @@ -115,6 +116,24 @@ static struct pg_init_sequence_list _pginitseq_gv11b[] = { {0x00020004, 0x00000000} , }; +static void gv11b_pmu_report_ecc_error(struct gk20a *g, u32 inst, + u32 err_type, u64 err_addr, u64 err_cnt) +{ + int ret = 0; + + if (g->ops.pmu.err_ops.report_ecc_parity_err == NULL) { + return ; + } + ret = g->ops.pmu.err_ops.report_ecc_parity_err(g, + NVGPU_ERR_MODULE_PWR, inst, err_type, err_addr, + err_cnt); + if (ret != 0) { + nvgpu_err(g, "Failed to report PMU error: inst=%u, \ + err_type=%u, err_addr=%llu, err_cnt=%llu", + inst, err_type, err_addr, err_cnt); + } +} + int gv11b_pmu_setup_elpg(struct gk20a *g) { int ret = 0; @@ -354,18 +373,34 @@ void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0) "pmu ecc interrupt intr1: 0x%x", intr1); if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) { + gv11b_pmu_report_ecc_error(g, 0, + GPU_PMU_FALCON_IMEM_ECC_CORRECTED, + ecc_addr, + g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); } if ((ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) { + gv11b_pmu_report_ecc_error(g, 0, + GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, + ecc_addr, + g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); } if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()) != 0U) { + gv11b_pmu_report_ecc_error(g, 0, + GPU_PMU_FALCON_DMEM_ECC_CORRECTED, + ecc_addr, + g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); } if ((ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) { + gv11b_pmu_report_ecc_error(g, 0, + GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, + ecc_addr, + g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); } diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 8e912d24d..f0903305f 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "gk20a/gr_gk20a.h" #include "gk20a/regops_gk20a.h" @@ -71,6 +72,33 @@ */ #define GR_TPCS_INFO_FOR_MAPREGISTER 6U +static void gv11b_gr_report_ecc_error(struct gk20a *g, u32 hw_module, + u32 gpc, u32 tpc, u32 err_type, + u64 err_addr, u64 err_cnt) +{ + int ret = 0; + u32 inst = 0U; + + if (g->ops.gr.err_ops.report_ecc_parity_err == NULL) { + return ; + } + if (tpc < 256U) { + inst = (gpc << 8) | tpc; + } else { + nvgpu_err(g, "Invalid tpc id=%u", tpc); + return ; + } + ret = g->ops.gr.err_ops.report_ecc_parity_err(g, + hw_module, inst, err_type, + err_addr, err_cnt); + if (ret != 0) { + nvgpu_err(g, "Failed to report GR error: hw_module=%u, \ + inst=%u, err_type=%u, err_addr=%llu, \ + err_cnt=%llu", hw_module, inst, err_type, + err_addr, err_cnt); + } +} + bool gr_gv11b_is_valid_class(struct gk20a *g, u32 class_num) { bool valid = false; @@ -222,6 +250,9 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter += l1_tag_corrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_ECC_CORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, 0); @@ -238,6 +269,9 @@ static int gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter += l1_tag_uncorrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, 0); @@ -317,6 +351,9 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter += lrf_corrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_LRF_ECC_CORRECTED, 0, + g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_corrected_err_count_r() + offset, 0); @@ -333,6 +370,9 @@ static int gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += lrf_uncorrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_LRF_ECC_UNCORRECTED, 0, + g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r() + offset, 0); @@ -479,6 +519,9 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter += cbu_corrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_CBU_ECC_CORRECTED, + 0, g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_corrected_err_count_r() + offset, 0); @@ -495,6 +538,9 @@ static int gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter += cbu_uncorrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_CBU_ECC_UNCORRECTED, + 0, g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r() + offset, 0); @@ -562,6 +608,9 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter += l1_data_corrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_DATA_ECC_CORRECTED, + 0, g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_corrected_err_count_r() + offset, 0); @@ -578,11 +627,13 @@ static int gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter += l1_data_uncorrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_DATA_ECC_UNCORRECTED, + 0, g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter); gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r() + offset, 0); } - gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_r() + offset, gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_reset_task_f()); @@ -652,6 +703,30 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r() + offset, 0); + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L0_DATA_ECC_CORRECTED, + 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); + } + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L0_PREDECODE_ECC_CORRECTED, + 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); + } + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L1_DATA_ECC_CORRECTED, + 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); + } + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L1_PREDECODE_ECC_CORRECTED, + 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); + } } if ((icache_uncorrected_err_count_delta > 0U) || is_icache_ecc_uncorrected_total_err_overflow) { nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, @@ -668,6 +743,30 @@ static int gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r() + offset, 0); + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, + 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); + } + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, + 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); + } + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, + 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); + } + if ((icache_ecc_status & + gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, + 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); + } } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_icache_ecc_status_r() + offset, @@ -756,6 +855,9 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter += gcc_l15_corrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GCC, gpc, tpc, + GPU_GCC_L15_ECC_CORRECTED, + 0, g->ecc.gr.gcc_l15_ecc_corrected_err_count[gpc].counter); gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_corrected_err_count_r() + offset, 0); @@ -772,6 +874,9 @@ int gr_gv11b_handle_gcc_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter += gcc_l15_uncorrected_err_count_delta; + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GCC, gpc, tpc, + GPU_GCC_L15_ECC_UNCORRECTED, + 0, g->ecc.gr.gcc_l15_ecc_uncorrected_err_count[gpc].counter); gk20a_writel(g, gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r() + offset, 0); @@ -844,7 +949,6 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, uncorrected_delta += (0x1UL << gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()); } - g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter += corrected_delta; g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter += @@ -854,18 +958,30 @@ static int gr_gv11b_handle_gpcmmu_ecc_exception(struct gk20a *g, u32 gpc, if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + GPU_MMU_L1TLB_SA_DATA_ECC_CORRECTED, + 0, g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); } if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED, + 0, g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); } if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + GPU_MMU_L1TLB_FA_DATA_ECC_CORRECTED, + 0, g->ecc.gr.mmu_l1tlb_ecc_corrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error"); } if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED, + 0, g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error"); } if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) { @@ -941,21 +1057,32 @@ static int gr_gv11b_handle_gpccs_ecc_exception(struct gk20a *g, u32 gpc, uncorrected_delta; nvgpu_log(g, gpu_dbg_intr, "gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr); - if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, + GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED, + ecc_addr, g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); } if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, + GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED, + ecc_addr, g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); } if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, + GPU_GPCCS_FALCON_DMEM_ECC_CORRECTED, + ecc_addr, g->ecc.gr.gpccs_ecc_corrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); } if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, + GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED, + ecc_addr, g->ecc.gr.gpccs_ecc_uncorrected_err_count[gpc].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); } if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) { @@ -2516,22 +2643,33 @@ static void gr_gv11b_handle_fecs_ecc_error(struct gk20a *g, u32 intr) nvgpu_log(g, gpu_dbg_intr, "fecs ecc interrupt intr: 0x%x", intr); - if ((ecc_status & gr_fecs_falcon_ecc_status_corrected_err_imem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + GPU_FECS_FALCON_IMEM_ECC_CORRECTED, + ecc_addr, g->ecc.gr.fecs_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); } if ((ecc_status & gr_fecs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, + ecc_addr, g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); } if ((ecc_status & gr_fecs_falcon_ecc_status_corrected_err_dmem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + GPU_FECS_FALCON_DMEM_ECC_CORRECTED, + ecc_addr, g->ecc.gr.fecs_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); } if ((ecc_status & gr_fecs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, + ecc_addr, g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); }