gpu: nvgpu: correct debug messages for fecs ecc errors

Following error message is getting printed even when there are no fecs
ecc errors:
nvgpu: 17000000.ga10x gv11b_gr_intr_handle_fecs_ecc_error:114
 [ERR]  error count corrected: 0, uncorrected 0

To avoid confusion, print error messages only when fecs errors
are reported.

Bug 3417834

Change-Id: I96317555b11e1976f33add4b1dc8d84c936c26fb
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2625723
Reviewed-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Seshendra Gadagottu
2021-11-13 07:25:25 -08:00
committed by mobile promotions
parent 430b9bef58
commit c2901b6835

View File

@@ -91,17 +91,20 @@ static void gv11b_gr_intr_handle_fecs_ecc_error(struct gk20a *g)
GPU_FECS_FALCON_IMEM_ECC_CORRECTED,
fecs_ecc_status.ecc_addr,
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter);
nvgpu_err(g, "imem ecc error corrected");
nvgpu_err(g, "imem ecc error corrected - error count:%d",
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter);
}
if (fecs_ecc_status.imem_uncorrected_err) {
nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_FECS, 0,
GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED,
fecs_ecc_status.ecc_addr,
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
nvgpu_err(g, "imem ecc error uncorrected");
nvgpu_err(g, "imem ecc error uncorrected - error count:%d",
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
}
if (fecs_ecc_status.dmem_corrected_err) {
nvgpu_err(g, "unexpected dmem ecc error corrected");
nvgpu_err(g, "unexpected dmem ecc error corrected - count: %d",
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter);
/* This error is not expected to occur in gv11b and hence,
* this scenario is considered as a fatal error.
*/
@@ -112,12 +115,9 @@ static void gv11b_gr_intr_handle_fecs_ecc_error(struct gk20a *g)
GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED,
fecs_ecc_status.ecc_addr,
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
nvgpu_err(g, "dmem ecc error uncorrected");
nvgpu_err(g, "dmem ecc error uncorrected - error count %d",
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
}
nvgpu_err(g, "error count corrected: %d, uncorrected %d",
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter,
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
}
int gv11b_gr_intr_handle_fecs_error(struct gk20a *g,