mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add polling for back-to-back error reporting in av+l
When an error is reported to Safety_Services, it will be cleared at FSI and reported to SEH (System Error Handler). Since MISC_EC interface provides only one register for error reporting, there is a need to poll the status of previously reported error before reporting the next error. For this purpose, this patch adds logic to perform polling using epl_get_misc_ec_err_status(), in AV+L. JIRA NVGPU-8094 Bug 200729736 Change-Id: Ia01a2fc42a7ce586b7965a82c90027a9a2dd252b Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2684141 Reviewed-by: Dinesh T <dt@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
mobile promotions
parent
9edbac4494
commit
4652f96a6f
@@ -19,6 +19,7 @@
|
||||
|
||||
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
||||
#include <linux/tegra-epl.h>
|
||||
#include <nvgpu/timers.h>
|
||||
#include "os/linux/os_linux.h"
|
||||
#endif
|
||||
|
||||
@@ -26,11 +27,60 @@ struct gk20a;
|
||||
|
||||
int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
|
||||
{
|
||||
int ret = 0U;
|
||||
int ret = 0;
|
||||
|
||||
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
||||
u32 ss_retries = 0U;
|
||||
bool ss_status;
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
if (g->enable_polling == true) {
|
||||
/**
|
||||
* Do polling, if the error to be reported is corrected one.
|
||||
*/
|
||||
if ((err_id & ERR_TYPE_MASK) == 0U) {
|
||||
for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U;
|
||||
ss_retries--) {
|
||||
ret = epl_get_misc_ec_err_status(dev,
|
||||
MISC_EC_SW_ERR_CODE_0,
|
||||
&ss_status);
|
||||
if (ret == 0) {
|
||||
if (ss_status == true) {
|
||||
/**
|
||||
* Previously reported error is
|
||||
* cleared at Safety_Services.
|
||||
*/
|
||||
break;
|
||||
} else {
|
||||
nvgpu_udelay(SS_WAIT_DURATION_US);
|
||||
continue;
|
||||
}
|
||||
} else if (ret == -ENODEV) {
|
||||
nvgpu_err(g, "Error reporting is not "
|
||||
"supported in this platform");
|
||||
ret = 0;
|
||||
return ret;
|
||||
} else {
|
||||
nvgpu_err(g, "Error reporting failed");
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
if (ss_retries == 0U) {
|
||||
nvgpu_err(g, "Error reporting failed: previous"
|
||||
"error is not cleared");
|
||||
ret = -1;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable polling immediately after reporting of first error from boot.
|
||||
*/
|
||||
if (g->enable_polling == false) {
|
||||
g->enable_polling = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* MISC_EC_SW_ERR_CODE_0 register has been allocated for NvGPU
|
||||
* to report GPU HW errors to Safety_Services via MISC_EC interface.
|
||||
|
||||
@@ -452,6 +452,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
||||
g->enable_polling = false;
|
||||
#endif
|
||||
|
||||
err = gk20a_restore_registers(g);
|
||||
if (err)
|
||||
goto done;
|
||||
|
||||
Reference in New Issue
Block a user