mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: add polling for back-to-back error reporting in av+l
When an error is reported to Safety_Services, it will be cleared at FSI and reported to SEH (System Error Handler). Since MISC_EC interface provides only one register for error reporting, there is a need to poll the status of previously reported error before reporting the next error. For this purpose, this patch adds logic to perform polling using epl_get_misc_ec_err_status(), in AV+L. JIRA NVGPU-8094 Bug 200729736 Change-Id: Ia01a2fc42a7ce586b7965a82c90027a9a2dd252b Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2684141 Reviewed-by: Dinesh T <dt@nvidia.com> Reviewed-by: Ankur Kishore <ankkishore@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
committed by
mobile promotions
parent
9edbac4494
commit
4652f96a6f
@@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
||||||
#include <linux/tegra-epl.h>
|
#include <linux/tegra-epl.h>
|
||||||
|
#include <nvgpu/timers.h>
|
||||||
#include "os/linux/os_linux.h"
|
#include "os/linux/os_linux.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -26,11 +27,60 @@ struct gk20a;
|
|||||||
|
|
||||||
int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
|
int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
|
||||||
{
|
{
|
||||||
int ret = 0U;
|
int ret = 0;
|
||||||
|
|
||||||
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
||||||
|
u32 ss_retries = 0U;
|
||||||
|
bool ss_status;
|
||||||
struct device *dev = dev_from_gk20a(g);
|
struct device *dev = dev_from_gk20a(g);
|
||||||
|
|
||||||
|
if (g->enable_polling == true) {
|
||||||
|
/**
|
||||||
|
* Do polling, if the error to be reported is corrected one.
|
||||||
|
*/
|
||||||
|
if ((err_id & ERR_TYPE_MASK) == 0U) {
|
||||||
|
for (ss_retries = MAX_SS_RETRIES; ss_retries > 0U;
|
||||||
|
ss_retries--) {
|
||||||
|
ret = epl_get_misc_ec_err_status(dev,
|
||||||
|
MISC_EC_SW_ERR_CODE_0,
|
||||||
|
&ss_status);
|
||||||
|
if (ret == 0) {
|
||||||
|
if (ss_status == true) {
|
||||||
|
/**
|
||||||
|
* Previously reported error is
|
||||||
|
* cleared at Safety_Services.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
nvgpu_udelay(SS_WAIT_DURATION_US);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else if (ret == -ENODEV) {
|
||||||
|
nvgpu_err(g, "Error reporting is not "
|
||||||
|
"supported in this platform");
|
||||||
|
ret = 0;
|
||||||
|
return ret;
|
||||||
|
} else {
|
||||||
|
nvgpu_err(g, "Error reporting failed");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ss_retries == 0U) {
|
||||||
|
nvgpu_err(g, "Error reporting failed: previous"
|
||||||
|
"error is not cleared");
|
||||||
|
ret = -1;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable polling immediately after reporting of first error from boot.
|
||||||
|
*/
|
||||||
|
if (g->enable_polling == false) {
|
||||||
|
g->enable_polling = true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MISC_EC_SW_ERR_CODE_0 register has been allocated for NvGPU
|
* MISC_EC_SW_ERR_CODE_0 register has been allocated for NvGPU
|
||||||
* to report GPU HW errors to Safety_Services via MISC_EC interface.
|
* to report GPU HW errors to Safety_Services via MISC_EC interface.
|
||||||
|
|||||||
@@ -452,6 +452,10 @@ int gk20a_pm_finalize_poweron(struct device *dev)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVGPU_ENABLE_MISC_EC
|
||||||
|
g->enable_polling = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
err = gk20a_restore_registers(g);
|
err = gk20a_restore_registers(g);
|
||||||
if (err)
|
if (err)
|
||||||
goto done;
|
goto done;
|
||||||
|
|||||||
Reference in New Issue
Block a user