From 094395ed380f5a2fa5da143a16c2556a3f95ed0e Mon Sep 17 00:00:00 2001 From: Rajesh Devaraj Date: Thu, 7 Feb 2019 16:17:47 +0530 Subject: [PATCH] gpu: nvgpu: Enable the reporting of ECC errors for S2R RAM Enable the reporting of parity errors on S2R RAM. These errors will be notified to the underlying safety service. Jira NVGPU-1930 Change-Id: I17e7b5a051f5ed8055b0f551b8e3b2add0687c46 Signed-off-by: Rajesh Devaraj Reviewed-on: https://git-master.nvidia.com/r/2013835 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra Reviewed-by: Antony Clince Alex GVS: Gerrit_Virtual_Submit Reviewed-by: Vijayakumar Subbu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gv11b/gr_gv11b.c | 46 +++++++++++++++++---- drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h | 8 ++-- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 554f07e7d..72e0cebff 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -85,7 +85,7 @@ static void gv11b_gr_report_ecc_error(struct gk20a *g, u32 hw_module, u32 inst = 0U; if (g->ops.gr.err_ops.report_ecc_parity_err == NULL) { - return ; + return; } if (tpc < 256U) { inst = (gpc << 8) | tpc; @@ -255,9 +255,25 @@ static void gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter += l1_tag_corrected_err_count_delta; - gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, - GPU_SM_L1_TAG_ECC_CORRECTED, 0, - g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); + if ((l1_tag_ecc_status & + (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m())) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_ECC_CORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); + } + if ((l1_tag_ecc_status & + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_MISS_FIFO_ECC_CORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); + } + if ((l1_tag_ecc_status & + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_S2R_PIXPRF_ECC_CORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); + } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r() + offset, 0); @@ -274,9 +290,25 @@ static void gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter += l1_tag_uncorrected_err_count_delta; - gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, - GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, - g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); + if ((l1_tag_ecc_status & + (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() | + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m())) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); + } + if ((l1_tag_ecc_status & + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); + } + if ((l1_tag_ecc_status & + gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m()) != 0U) { + gv11b_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, + g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); + } gk20a_writel(g, gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r() + offset, 0); diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h index 81f72d80c..0b682a94a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h @@ -71,9 +71,11 @@ #define GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED 13U #define GPU_SM_L1_TAG_MISS_FIFO_ECC_CORRECTED 14U #define GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED 15U -#define GPU_SM_ICACHE_L1_PREDECODE_ECC_CORRECTED 16U -#define GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED 17U -#define GPU_SM_INVALID_ERROR 18U +#define GPU_SM_L1_TAG_S2R_PIXPRF_ECC_CORRECTED 16U +#define GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED 17U +#define GPU_SM_ICACHE_L1_PREDECODE_ECC_CORRECTED 18U +#define GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED 19U +#define GPU_SM_INVALID_ERROR 20U #define GPU_FECS_FALCON_IMEM_ECC_CORRECTED 0U #define GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED 1U