gpu: nvgpu: Add ACR error reporting to SDL

-Add check for ECC parity errors in IMEM, DMEM, EMEM, DCLS, REG
for ACR running in GSP engine.
The EXTIRQ3 external interrupt is set from ACR pointing towards host.
-Add function to check error type when ACR or Bootrom  execution fails
and report accordingly to SDL with relevant error codes.

This is a part of HSI safety requirements.

Bug 3564039
Jira NVGPU-8108

Change-Id: I65407371f7a1d1ba50a10bdf443ef6b903eeaa36
Signed-off-by: mpoojary <mpoojary@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2678100
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
mpoojary
2022-03-08 11:11:25 +00:00
committed by mobile promotions
parent 358f62a9d7
commit c1a995403a
8 changed files with 148 additions and 16 deletions

View File

@@ -26,6 +26,7 @@
#include <nvgpu/timers.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/bug.h>
#include <nvgpu/nvgpu_err.h>
#ifdef CONFIG_NVGPU_GSP_SCHEDULER
#include <nvgpu/gsp.h>
#include <nvgpu/string.h>
@@ -59,6 +60,63 @@ int ga10b_gsp_engine_reset(struct gk20a *g)
return 0;
}
static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status)
{
int ret = 0;
if ((ecc_status &
pgsp_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
GPU_GSP_ACR_IMEM_ECC_UNCORRECTED);
nvgpu_err(g, "imem ecc error uncorrected");
ret = -EFAULT;
}
if ((ecc_status &
pgsp_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
GPU_GSP_ACR_DMEM_ECC_UNCORRECTED);
nvgpu_err(g, "dmem ecc error uncorrected");
ret = -EFAULT;
}
if ((ecc_status &
pgsp_falcon_ecc_status_uncorrected_err_dcls_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
GPU_GSP_ACR_DCLS_UNCORRECTED);
nvgpu_err(g, "dcls ecc error uncorrected");
ret = -EFAULT;
}
if ((ecc_status &
pgsp_falcon_ecc_status_uncorrected_err_reg_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
GPU_GSP_ACR_REG_ECC_UNCORRECTED);
nvgpu_err(g, "reg ecc error uncorrected");
ret = -EFAULT;
}
if ((ecc_status &
pgsp_falcon_ecc_status_uncorrected_err_emem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
GPU_GSP_ACR_EMEM_ECC_UNCORRECTED);
nvgpu_err(g, "emem ecc error uncorrected");
ret = -EFAULT;
}
return ret;
}
bool ga10b_gsp_validate_mem_integrity(struct gk20a *g)
{
u32 ecc_status;
ecc_status = nvgpu_readl(g, pgsp_falcon_ecc_status_r());
return ((ga10b_gsp_handle_ecc(g, ecc_status) == 0) ? true :
false);
}
#ifdef CONFIG_NVGPU_GSP_SCHEDULER
u32 ga10b_gsp_queue_head_r(u32 i)
{