gpu: nvgpu: enable polling support for error reporting in AV+L

As per Safety_Services, a client must perform polling to ensure that the
previously reported errors are cleared at FSI, in case of back-to-back
error reporting. However, to minimize the polling overhead, NvGPU driver
performs polling only when the error to be reported is corrected error
to ensure that it is not overwriting the previously reported
uncorrected/corrected error. In case of uncorrected errors, it will be
reported without doing polling. This situation leads to a failure in
error reporting, when uncorrected errors are reported back-to-back. This
is acceptable for safety builds where SW quiesce will be triggered
immediately after the reporting of first uncorrected error. In case of
other build configurations, MCU/SEH takes the decision on encountering
uncorrected errors. To handle such build configurations, polling is
enabled for all types of errors, in all build configurations.

This patch also removes an unused macro "ERR_TYPE_MASK".

Bug 3622420

Change-Id: I750b0406faec9b229d8d0c74e986807234362cb9
Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2707105
Reviewed-by: Tejal Kudav <tkudav@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Rajesh Devaraj
2022-05-04 11:47:24 +00:00
committed by mobile promotions
parent 657daaee9e
commit fac998940c
2 changed files with 24 additions and 30 deletions

View File

@@ -35,7 +35,6 @@
#define ERR_REPORT_TIMEOUT_US (5000U)
#define SS_WAIT_DURATION_US (500U)
#define MAX_SS_RETRIES (ERR_REPORT_TIMEOUT_US / SS_WAIT_DURATION_US)
#define ERR_TYPE_MASK ((1U) << (CORRECTED_BIT_FIELD_SHIFT))
#define U32_BITS 32U
#define DIV_BY_U32_BITS(x) ((x) / U32_BITS)

View File

@@ -35,43 +35,38 @@ int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
struct device *dev = dev_from_gk20a(g);
if (g->enable_polling == true) {
/**
* Do polling, if the error to be reported is corrected one.
*/
if ((err_id & ERR_TYPE_MASK) == 0U) {
for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U;
for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U;
ss_retries--) {
ret = epl_get_misc_ec_err_status(dev,
ret = epl_get_misc_ec_err_status(dev,
MISC_EC_SW_ERR_CODE_0,
&ss_status);
if (ret == 0) {
if (ss_status == true) {
/**
* Previously reported error is
* cleared at Safety_Services.
*/
break;
} else {
nvgpu_udelay(SS_WAIT_DURATION_US);
continue;
}
} else if (ret == -ENODEV) {
nvgpu_err(g, "Error reporting is not "
"supported in this platform");
ret = 0;
return ret;
if (ret == 0) {
if (ss_status == true) {
/**
* Previously reported error is cleared.
*/
break;
} else {
nvgpu_err(g, "Error reporting failed");
return ret;
nvgpu_info(g, "Polling is in progress");
nvgpu_udelay(SS_WAIT_DURATION_US);
continue;
}
}
if (ss_retries == 0U) {
nvgpu_err(g, "Error reporting failed: previous"
"error is not cleared");
ret = -1;
} else if (ret == -ENODEV) {
nvgpu_err(g, "Error reporting is not "
"supported in this platform");
ret = 0;
return ret;
} else {
nvgpu_err(g, "Error reporting failed");
return ret;
}
}
if (ss_retries == 0U) {
nvgpu_err(g, "Error reporting failed: previous"
"error is not cleared after retries");
ret = -1;
return ret;
}
}
/**