mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: Add ACR error reporting to SDL
-Add check for ECC parity errors in IMEM, DMEM, EMEM, DCLS, REG for ACR running in GSP engine. The EXTIRQ3 external interrupt is set from ACR pointing towards host. -Add function to check error type when ACR or Bootrom execution fails and report accordingly to SDL with relevant error codes. This is a part of HSI safety requirements. Bug 3564039 Jira NVGPU-8108 Change-Id: I65407371f7a1d1ba50a10bdf443ef6b903eeaa36 Signed-off-by: mpoojary <mpoojary@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2678100 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
358f62a9d7
commit
c1a995403a
@@ -33,10 +33,62 @@
|
||||
#include <nvgpu/soc.h>
|
||||
#include <nvgpu/riscv.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include "acr_bootstrap.h"
|
||||
#include "acr_priv.h"
|
||||
|
||||
static void acr_report_error_to_sdl(struct gk20a *g, u32 error, u32 error_type)
|
||||
{
|
||||
switch (error) {
|
||||
case ACR_ERROR_WDT:
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_WDT_UNCORRECTED);
|
||||
nvgpu_err(g, "ACR GSP watchdog timeout");
|
||||
break;
|
||||
|
||||
case ACR_ERROR_REG_ACCESS_FAILURE:
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_REG_ACCESS_TIMEOUT_UNCORRECTED);
|
||||
nvgpu_err(g, "ACR register access failure");
|
||||
break;
|
||||
|
||||
case ACR_ERROR_RISCV_EXCEPTION:
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_ILLEGAL_ACCESS_UNCORRECTED);
|
||||
nvgpu_err(g, "ACR riscv exception");
|
||||
break;
|
||||
|
||||
case ACR_ERROR_LS_SIG_VERIF_FAIL:
|
||||
switch (error_type) {
|
||||
case FALCON_ID_PMU_NEXT_CORE:
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_LSPMU_PKC_LSSIG_FAILURE);
|
||||
nvgpu_err(g, "LSPMU pkc signature verification failed");
|
||||
break;
|
||||
|
||||
case FALCON_ID_FECS:
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_FECS_PKC_LSSIG_FAILURE);
|
||||
nvgpu_err(g, "FECS pkc signature verification failed");
|
||||
break;
|
||||
|
||||
case FALCON_ID_GPCCS:
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_GPCCS_PKC_LSSIG_FAILURE);
|
||||
nvgpu_err(g, "GPCCS pkc signature verification failed");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int nvgpu_acr_wait_for_completion(struct gk20a *g, struct hs_acr *acr_desc,
|
||||
u32 timeout)
|
||||
{
|
||||
@@ -70,8 +122,24 @@ int nvgpu_acr_wait_for_completion(struct gk20a *g, struct hs_acr *acr_desc,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* When engine-falcon is used for ACR bootstrap, validate the integrity
|
||||
* of falcon IMEM and DMEM.
|
||||
*/
|
||||
if (acr_desc->acr_validate_mem_integrity != NULL) {
|
||||
if (!acr_desc->acr_validate_mem_integrity(g)) {
|
||||
nvgpu_err(g, "flcn-%d: memcheck failed", flcn_id);
|
||||
completion = -EAGAIN;
|
||||
error_type = ACR_BOOT_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
data = nvgpu_falcon_mailbox_read(acr_desc->acr_flcn, FALCON_MAILBOX_0);
|
||||
if (data != 0U) {
|
||||
error_type = nvgpu_falcon_mailbox_read(acr_desc->acr_flcn, FALCON_MAILBOX_1);
|
||||
if (nvgpu_is_enabled(g, NVGPU_ACR_NEXT_CORE_ENABLED)) {
|
||||
acr_report_error_to_sdl(g, data, error_type);
|
||||
}
|
||||
nvgpu_err(g, "flcn-%d: HS ucode boot failed, err %x", flcn_id,
|
||||
data);
|
||||
nvgpu_err(g, "flcn-%d: Mailbox-1 : 0x%x", flcn_id,
|
||||
@@ -87,18 +155,6 @@ int nvgpu_acr_wait_for_completion(struct gk20a *g, struct hs_acr *acr_desc,
|
||||
nvgpu_falcon_mailbox_read(acr_desc->acr_flcn,
|
||||
FALCON_MAILBOX_1));
|
||||
|
||||
/*
|
||||
* When engine-falcon is used for ACR bootstrap, validate the integrity
|
||||
* of falcon IMEM and DMEM.
|
||||
*/
|
||||
if (acr_desc->acr_validate_mem_integrity != NULL) {
|
||||
if (!acr_desc->acr_validate_mem_integrity(g)) {
|
||||
nvgpu_err(g, "flcn-%d: memcheck failed", flcn_id);
|
||||
completion = -EAGAIN;
|
||||
error_type = ACR_BOOT_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
|
||||
#ifdef CONFIG_NVGPU_FALCON_NON_FUSA
|
||||
@@ -392,6 +448,8 @@ int nvgpu_acr_bootstrap_hs_ucode_riscv(struct gk20a *g, struct nvgpu_acr *acr)
|
||||
nvgpu_acr_dbg(g, "RISCV BROM passed");
|
||||
nvgpu_riscv_dump_brom_stats(flcn);
|
||||
} else {
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_NVRISCV_BROM_FAILURE);
|
||||
if (err == -ENOTRECOVERABLE) {
|
||||
nvgpu_err(g, "RISCV BROM Failed");
|
||||
} else {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -109,6 +109,14 @@ struct wpr_carveout_info;
|
||||
#define ACR_COMPLETION_TIMEOUT_NON_SILICON_MS 10000U /*in msec */
|
||||
#define ACR_COMPLETION_TIMEOUT_SILICON_MS 100 /*in msec */
|
||||
|
||||
/*
|
||||
* ACR firmware returns these error codes when below mentioned error occurs.
|
||||
*/
|
||||
# define ACR_ERROR_WDT 0x66U
|
||||
# define ACR_ERROR_REG_ACCESS_FAILURE 0x1BU
|
||||
# define ACR_ERROR_RISCV_EXCEPTION 0x84U
|
||||
# define ACR_ERROR_LS_SIG_VERIF_FAIL 0x0BU
|
||||
|
||||
struct acr_lsf_config {
|
||||
u32 falcon_id;
|
||||
u32 falcon_dma_idx;
|
||||
|
||||
@@ -320,7 +320,7 @@ static void ga10b_acr_default_sw_init(struct gk20a *g, struct hs_acr *riscv_hs)
|
||||
nvgpu_pmu_report_bar0_pri_err_status;
|
||||
riscv_hs->acr_engine_bus_err_status =
|
||||
g->ops.pmu.bar0_error_status;
|
||||
riscv_hs->acr_validate_mem_integrity = g->ops.pmu.validate_mem_integrity;
|
||||
riscv_hs->acr_validate_mem_integrity = g->ops.gsp.validate_mem_integrity;
|
||||
}
|
||||
|
||||
static void ga10b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr)
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <nvgpu/timers.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/bug.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#ifdef CONFIG_NVGPU_GSP_SCHEDULER
|
||||
#include <nvgpu/gsp.h>
|
||||
#include <nvgpu/string.h>
|
||||
@@ -59,6 +60,63 @@ int ga10b_gsp_engine_reset(struct gk20a *g)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ga10b_gsp_handle_ecc(struct gk20a *g, u32 ecc_status)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if ((ecc_status &
|
||||
pgsp_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_IMEM_ECC_UNCORRECTED);
|
||||
nvgpu_err(g, "imem ecc error uncorrected");
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
if ((ecc_status &
|
||||
pgsp_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_DMEM_ECC_UNCORRECTED);
|
||||
nvgpu_err(g, "dmem ecc error uncorrected");
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
if ((ecc_status &
|
||||
pgsp_falcon_ecc_status_uncorrected_err_dcls_m()) != 0U) {
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_DCLS_UNCORRECTED);
|
||||
nvgpu_err(g, "dcls ecc error uncorrected");
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
if ((ecc_status &
|
||||
pgsp_falcon_ecc_status_uncorrected_err_reg_m()) != 0U) {
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_REG_ECC_UNCORRECTED);
|
||||
nvgpu_err(g, "reg ecc error uncorrected");
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
if ((ecc_status &
|
||||
pgsp_falcon_ecc_status_uncorrected_err_emem_m()) != 0U) {
|
||||
nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GSP_ACR,
|
||||
GPU_GSP_ACR_EMEM_ECC_UNCORRECTED);
|
||||
nvgpu_err(g, "emem ecc error uncorrected");
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool ga10b_gsp_validate_mem_integrity(struct gk20a *g)
|
||||
{
|
||||
u32 ecc_status;
|
||||
|
||||
ecc_status = nvgpu_readl(g, pgsp_falcon_ecc_status_r());
|
||||
|
||||
return ((ga10b_gsp_handle_ecc(g, ecc_status) == 0) ? true :
|
||||
false);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_GSP_SCHEDULER
|
||||
u32 ga10b_gsp_queue_head_r(u32 i)
|
||||
{
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
u32 ga10b_gsp_falcon_base_addr(void);
|
||||
u32 ga10b_gsp_falcon2_base_addr(void);
|
||||
int ga10b_gsp_engine_reset(struct gk20a *g);
|
||||
bool ga10b_gsp_validate_mem_integrity(struct gk20a *g);
|
||||
#ifdef CONFIG_NVGPU_GSP_SCHEDULER
|
||||
void ga10b_gsp_flcn_setup_boot_config(struct gk20a *g);
|
||||
|
||||
|
||||
@@ -1295,6 +1295,7 @@ static const struct gops_gsp ga10b_ops_gsp = {
|
||||
.falcon_base_addr = ga10b_gsp_falcon_base_addr,
|
||||
.falcon2_base_addr = ga10b_gsp_falcon2_base_addr,
|
||||
.gsp_reset = ga10b_gsp_engine_reset,
|
||||
.validate_mem_integrity = ga10b_gsp_validate_mem_integrity,
|
||||
#ifdef CONFIG_NVGPU_GSP_SCHEDULER
|
||||
/* interrupt */
|
||||
.enable_irq = ga10b_gsp_enable_irq,
|
||||
|
||||
@@ -34,6 +34,7 @@ struct gops_gsp {
|
||||
u32 (*falcon2_base_addr)(void);
|
||||
void (*falcon_setup_boot_config)(struct gk20a *g);
|
||||
int (*gsp_reset)(struct gk20a *g);
|
||||
bool (*validate_mem_integrity)(struct gk20a *g);
|
||||
#ifdef CONFIG_NVGPU_GSP_SCHEDULER
|
||||
u32 (*gsp_get_queue_head)(u32 i);
|
||||
u32 (*gsp_get_queue_head_size)(void);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -61,7 +61,6 @@
|
||||
|
||||
#define pgsp_falcon2_gsp_base_r() (0x00111000U)
|
||||
#define pgsp_falcon_irqsset_r() (0x00110000U)
|
||||
#define pgsp_falcon_irqsclr_r() (0x00110004U)
|
||||
#define pgsp_falcon_engine_r() (0x001103c0U)
|
||||
#define pgsp_falcon_engine_reset_true_f() (0x1U)
|
||||
#define pgsp_falcon_engine_reset_false_f() (0x0U)
|
||||
@@ -167,4 +166,10 @@
|
||||
#define pgsp_falcon_exterrstat_valid_m() (U32(0x1U) << 31U)
|
||||
#define pgsp_falcon_exterrstat_valid_v(r) (((r) >> 31U) & 0x1U)
|
||||
#define pgsp_falcon_exterrstat_valid_true_v() (0x00000001U)
|
||||
#define pgsp_falcon_ecc_status_r() (0x00110878U)
|
||||
#define pgsp_falcon_ecc_status_uncorrected_err_imem_m() (U32(0x1U) << 8U)
|
||||
#define pgsp_falcon_ecc_status_uncorrected_err_dmem_m() (U32(0x1U) << 9U)
|
||||
#define pgsp_falcon_ecc_status_uncorrected_err_emem_m() (U32(0x1U) << 13U)
|
||||
#define pgsp_falcon_ecc_status_uncorrected_err_dcls_m() (U32(0x1U) << 11U)
|
||||
#define pgsp_falcon_ecc_status_uncorrected_err_reg_m() (U32(0x1U) << 12U)
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user