mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: Err injection utility support
The HSI error injection utility is an on-bench debug and test utility which can be used by customers and SQA to test end-to-end error detection and reporting path. Inplement callback function to integrate with this utility and allow injecting GPU HSI related errors. As part of callback function hsierrrpt_inj(), invoke the driver's error-reporting logic which uses the EPD MISC_EC APIs. In future, we can enhance the callback function to trigger driver's error handling logic incrementally for different errors. Bug 3413214 Change-Id: I2d050b6c850d6151b40095f243a6733b4ba74f47 Signed-off-by: Tejal Kudav <tkudav@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2727198 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
8b4bc0e51c
commit
494dc19ee8
@@ -197,6 +197,12 @@ CONFIG_GK20A_DEVFREQ := y
|
||||
CONFIG_GK20A_PM_QOS := n
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_TEGRA_HSIERRRPTINJ),y)
|
||||
ifeq ($(CONFIG_NVGPU_ENABLE_MISC_EC),y)
|
||||
CONFIG_NVGPU_FSI_ERR_INJECTION := y
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CONFIG_GK20A_PMU),y)
|
||||
ccflags-y += -DCONFIG_GK20A_PMU
|
||||
endif
|
||||
@@ -308,3 +314,6 @@ endif
|
||||
ifeq ($(CONFIG_NVGPU_ENABLE_MISC_EC),y)
|
||||
ccflags-y += -DCONFIG_NVGPU_ENABLE_MISC_EC
|
||||
endif
|
||||
ifeq ($(CONFIG_NVGPU_FSI_ERR_INJECTION),y)
|
||||
ccflags-y += -DCONFIG_NVGPU_FSI_ERR_INJECTION
|
||||
endif
|
||||
|
||||
@@ -167,6 +167,16 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_TRACE
|
||||
CONFIG_NVGPU_FALCON_DEBUG := 1
|
||||
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_DEBUG
|
||||
|
||||
# Enable FSI Error injection support on safety debug and regular build
|
||||
# when the HSI error injection utility and NVGPU's MISC_EC support is
|
||||
# enabled.
|
||||
ifeq ($(CONFIG_TEGRA_HSIERRRPTINJ),1)
|
||||
ifeq ($(CONFIG_NVGPU_ENABLE_MISC_EC),1)
|
||||
CONFIG_NVGPU_FSI_ERR_INJECTION := 1
|
||||
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FSI_ERR_INJECTION
|
||||
endif
|
||||
endif
|
||||
|
||||
#
|
||||
# Flags enabled only for regular build profile.
|
||||
#
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -24,7 +24,6 @@
|
||||
#include <nvgpu/kmem.h>
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/cic_mon.h>
|
||||
|
||||
#include "cic_mon_priv.h"
|
||||
|
||||
int nvgpu_cic_mon_setup(struct gk20a *g)
|
||||
@@ -45,6 +44,21 @@ int nvgpu_cic_mon_setup(struct gk20a *g)
|
||||
}
|
||||
|
||||
g->cic_mon = cic_mon;
|
||||
|
||||
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||
err = nvgpu_cic_mon_reg_errinj_cb(g);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g,
|
||||
"Err inj callback registration failed: %d",
|
||||
err);
|
||||
/* Continue CIC init despite err inj utility
|
||||
* registration failure, as the err inj support
|
||||
* is meant only for debug purposes.
|
||||
*/
|
||||
err = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
cic_dbg(g, "CIC_MON unit initialization done.");
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -643,4 +643,7 @@ void nvgpu_cic_mon_intr_nonstall_resume(struct gk20a *g);
|
||||
|
||||
void nvgpu_cic_mon_intr_enable(struct gk20a *g);
|
||||
|
||||
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||
int nvgpu_cic_mon_reg_errinj_cb(struct gk20a *g);
|
||||
#endif
|
||||
#endif /* NVGPU_CIC_MON_H */
|
||||
|
||||
@@ -21,7 +21,12 @@
|
||||
#include <linux/tegra-epl.h>
|
||||
#include <nvgpu/timers.h>
|
||||
#include "os/linux/os_linux.h"
|
||||
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||
#include <linux/tegra-hsierrrptinj.h>
|
||||
#define NVGPU_FSI_REPORTER_ID 0x8016
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
struct gk20a;
|
||||
|
||||
@@ -99,3 +104,48 @@ int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||
static struct gk20a *g_err_inj;
|
||||
|
||||
static int nvgpu_cic_mon_inject_err_fsi(uint32_t inst_id,
|
||||
struct epl_error_report_frame err_rpt_frame)
|
||||
{
|
||||
struct gk20a *g = g_err_inj;
|
||||
int err = 0;
|
||||
|
||||
/* Sanity check reporter_id */
|
||||
if (err_rpt_frame.reporter_id != NVGPU_FSI_REPORTER_ID) {
|
||||
nvgpu_err(g, "Invalid Input -> Reporter ID = %u",
|
||||
err_rpt_frame.reporter_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Sanity check inst_id */
|
||||
if (inst_id != 0U) {
|
||||
nvgpu_err(g, "Invalid Input -> instance ID = %u", inst_id);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = nvgpu_cic_mon_report_err_safety_services(g,
|
||||
err_rpt_frame.error_code);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "Error injection failed for err_id: %u",
|
||||
err_rpt_frame.error_code);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int nvgpu_cic_mon_reg_errinj_cb(struct gk20a *g)
|
||||
{
|
||||
hsierrrpt_ipid_t ip_id = IP_GPU;
|
||||
unsigned int inst_id = 0U;
|
||||
|
||||
/* Save NvGPU context which can be used during error injection */
|
||||
g_err_inj = g;
|
||||
|
||||
return hsierrrpt_reg_cb(ip_id, inst_id, nvgpu_cic_mon_inject_err_fsi);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -56,3 +56,11 @@ int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g,
|
||||
(void)err_id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||
int nvgpu_cic_mon_reg_errinj_cb(struct gk20a *g)
|
||||
{
|
||||
(void)g;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user