From fac998940c0d6147d0da41ccd4c5e8e060e2f0a9 Mon Sep 17 00:00:00 2001 From: Rajesh Devaraj Date: Wed, 4 May 2022 11:47:24 +0000 Subject: [PATCH] gpu: nvgpu: enable polling support for error reporting in AV+L As per Safety_Services, a client must perform polling to ensure that the previously reported errors are cleared at FSI, in case of back-to-back error reporting. However, to minimize the polling overhead, NvGPU driver performs polling only when the error to be reported is corrected error to ensure that it is not overwriting the previously reported uncorrected/corrected error. In case of uncorrected errors, it will be reported without doing polling. This situation leads to a failure in error reporting, when uncorrected errors are reported back-to-back. This is acceptable for safety builds where SW quiesce will be triggered immediately after the reporting of first uncorrected error. In case of other build configurations, MCU/SEH takes the decision on encountering uncorrected errors. To handle such build configurations, polling is enabled for all types of errors, in all build configurations. This patch also removes an unused macro "ERR_TYPE_MASK". Bug 3622420 Change-Id: I750b0406faec9b229d8d0c74e986807234362cb9 Signed-off-by: Rajesh Devaraj Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2707105 Reviewed-by: Tejal Kudav Reviewed-by: Vaibhav Kachore GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/include/nvgpu/cic_mon.h | 1 - .../gpu/nvgpu/os/linux/cic/cic_report_err.c | 53 +++++++++---------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h b/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h index 14555342c..7fcbacaa9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h +++ b/drivers/gpu/nvgpu/include/nvgpu/cic_mon.h @@ -35,7 +35,6 @@ #define ERR_REPORT_TIMEOUT_US (5000U) #define SS_WAIT_DURATION_US (500U) #define MAX_SS_RETRIES (ERR_REPORT_TIMEOUT_US / SS_WAIT_DURATION_US) -#define ERR_TYPE_MASK ((1U) << (CORRECTED_BIT_FIELD_SHIFT)) #define U32_BITS 32U #define DIV_BY_U32_BITS(x) ((x) / U32_BITS) diff --git a/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c b/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c index 4f874947c..1a186d622 100644 --- a/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c +++ b/drivers/gpu/nvgpu/os/linux/cic/cic_report_err.c @@ -35,43 +35,38 @@ int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id) struct device *dev = dev_from_gk20a(g); if (g->enable_polling == true) { - /** - * Do polling, if the error to be reported is corrected one. - */ - if ((err_id & ERR_TYPE_MASK) == 0U) { - for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U; + for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U; ss_retries--) { - ret = epl_get_misc_ec_err_status(dev, + ret = epl_get_misc_ec_err_status(dev, MISC_EC_SW_ERR_CODE_0, &ss_status); - if (ret == 0) { - if (ss_status == true) { - /** - * Previously reported error is - * cleared at Safety_Services. - */ - break; - } else { - nvgpu_udelay(SS_WAIT_DURATION_US); - continue; - } - } else if (ret == -ENODEV) { - nvgpu_err(g, "Error reporting is not " - "supported in this platform"); - ret = 0; - return ret; + if (ret == 0) { + if (ss_status == true) { + /** + * Previously reported error is cleared. + */ + break; } else { - nvgpu_err(g, "Error reporting failed"); - return ret; + nvgpu_info(g, "Polling is in progress"); + nvgpu_udelay(SS_WAIT_DURATION_US); + continue; } - } - if (ss_retries == 0U) { - nvgpu_err(g, "Error reporting failed: previous" - "error is not cleared"); - ret = -1; + } else if (ret == -ENODEV) { + nvgpu_err(g, "Error reporting is not " + "supported in this platform"); + ret = 0; + return ret; + } else { + nvgpu_err(g, "Error reporting failed"); return ret; } } + if (ss_retries == 0U) { + nvgpu_err(g, "Error reporting failed: previous" + "error is not cleared after retries"); + ret = -1; + return ret; + } } /**