mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
gpu: nvgpu: Err injection utility support
The HSI error injection utility is an on-bench debug and test utility which can be used by customers and SQA to test end-to-end error detection and reporting path. Inplement callback function to integrate with this utility and allow injecting GPU HSI related errors. As part of callback function hsierrrpt_inj(), invoke the driver's error-reporting logic which uses the EPD MISC_EC APIs. In future, we can enhance the callback function to trigger driver's error handling logic incrementally for different errors. Bug 3413214 Change-Id: I2d050b6c850d6151b40095f243a6733b4ba74f47 Signed-off-by: Tejal Kudav <tkudav@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2727198 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
8b4bc0e51c
commit
494dc19ee8
@@ -197,6 +197,12 @@ CONFIG_GK20A_DEVFREQ := y
|
|||||||
CONFIG_GK20A_PM_QOS := n
|
CONFIG_GK20A_PM_QOS := n
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CONFIG_TEGRA_HSIERRRPTINJ),y)
|
||||||
|
ifeq ($(CONFIG_NVGPU_ENABLE_MISC_EC),y)
|
||||||
|
CONFIG_NVGPU_FSI_ERR_INJECTION := y
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CONFIG_GK20A_PMU),y)
|
ifeq ($(CONFIG_GK20A_PMU),y)
|
||||||
ccflags-y += -DCONFIG_GK20A_PMU
|
ccflags-y += -DCONFIG_GK20A_PMU
|
||||||
endif
|
endif
|
||||||
@@ -308,3 +314,6 @@ endif
|
|||||||
ifeq ($(CONFIG_NVGPU_ENABLE_MISC_EC),y)
|
ifeq ($(CONFIG_NVGPU_ENABLE_MISC_EC),y)
|
||||||
ccflags-y += -DCONFIG_NVGPU_ENABLE_MISC_EC
|
ccflags-y += -DCONFIG_NVGPU_ENABLE_MISC_EC
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(CONFIG_NVGPU_FSI_ERR_INJECTION),y)
|
||||||
|
ccflags-y += -DCONFIG_NVGPU_FSI_ERR_INJECTION
|
||||||
|
endif
|
||||||
|
|||||||
@@ -167,6 +167,16 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_TRACE
|
|||||||
CONFIG_NVGPU_FALCON_DEBUG := 1
|
CONFIG_NVGPU_FALCON_DEBUG := 1
|
||||||
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_DEBUG
|
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FALCON_DEBUG
|
||||||
|
|
||||||
|
# Enable FSI Error injection support on safety debug and regular build
|
||||||
|
# when the HSI error injection utility and NVGPU's MISC_EC support is
|
||||||
|
# enabled.
|
||||||
|
ifeq ($(CONFIG_TEGRA_HSIERRRPTINJ),1)
|
||||||
|
ifeq ($(CONFIG_NVGPU_ENABLE_MISC_EC),1)
|
||||||
|
CONFIG_NVGPU_FSI_ERR_INJECTION := 1
|
||||||
|
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_FSI_ERR_INJECTION
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# Flags enabled only for regular build profile.
|
# Flags enabled only for regular build profile.
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
* copy of this software and associated documentation files (the "Software"),
|
* copy of this software and associated documentation files (the "Software"),
|
||||||
@@ -24,7 +24,6 @@
|
|||||||
#include <nvgpu/kmem.h>
|
#include <nvgpu/kmem.h>
|
||||||
#include <nvgpu/log.h>
|
#include <nvgpu/log.h>
|
||||||
#include <nvgpu/cic_mon.h>
|
#include <nvgpu/cic_mon.h>
|
||||||
|
|
||||||
#include "cic_mon_priv.h"
|
#include "cic_mon_priv.h"
|
||||||
|
|
||||||
int nvgpu_cic_mon_setup(struct gk20a *g)
|
int nvgpu_cic_mon_setup(struct gk20a *g)
|
||||||
@@ -45,6 +44,21 @@ int nvgpu_cic_mon_setup(struct gk20a *g)
|
|||||||
}
|
}
|
||||||
|
|
||||||
g->cic_mon = cic_mon;
|
g->cic_mon = cic_mon;
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||||
|
err = nvgpu_cic_mon_reg_errinj_cb(g);
|
||||||
|
if (err != 0) {
|
||||||
|
nvgpu_err(g,
|
||||||
|
"Err inj callback registration failed: %d",
|
||||||
|
err);
|
||||||
|
/* Continue CIC init despite err inj utility
|
||||||
|
* registration failure, as the err inj support
|
||||||
|
* is meant only for debug purposes.
|
||||||
|
*/
|
||||||
|
err = 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
cic_dbg(g, "CIC_MON unit initialization done.");
|
cic_dbg(g, "CIC_MON unit initialization done.");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -643,4 +643,7 @@ void nvgpu_cic_mon_intr_nonstall_resume(struct gk20a *g);
|
|||||||
|
|
||||||
void nvgpu_cic_mon_intr_enable(struct gk20a *g);
|
void nvgpu_cic_mon_intr_enable(struct gk20a *g);
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||||
|
int nvgpu_cic_mon_reg_errinj_cb(struct gk20a *g);
|
||||||
|
#endif
|
||||||
#endif /* NVGPU_CIC_MON_H */
|
#endif /* NVGPU_CIC_MON_H */
|
||||||
|
|||||||
@@ -21,7 +21,12 @@
|
|||||||
#include <linux/tegra-epl.h>
|
#include <linux/tegra-epl.h>
|
||||||
#include <nvgpu/timers.h>
|
#include <nvgpu/timers.h>
|
||||||
#include "os/linux/os_linux.h"
|
#include "os/linux/os_linux.h"
|
||||||
|
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||||
|
#include <linux/tegra-hsierrrptinj.h>
|
||||||
|
#define NVGPU_FSI_REPORTER_ID 0x8016
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
struct gk20a;
|
struct gk20a;
|
||||||
|
|
||||||
@@ -99,3 +104,48 @@ int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, u32 err_id)
|
|||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||||
|
static struct gk20a *g_err_inj;
|
||||||
|
|
||||||
|
static int nvgpu_cic_mon_inject_err_fsi(uint32_t inst_id,
|
||||||
|
struct epl_error_report_frame err_rpt_frame)
|
||||||
|
{
|
||||||
|
struct gk20a *g = g_err_inj;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
/* Sanity check reporter_id */
|
||||||
|
if (err_rpt_frame.reporter_id != NVGPU_FSI_REPORTER_ID) {
|
||||||
|
nvgpu_err(g, "Invalid Input -> Reporter ID = %u",
|
||||||
|
err_rpt_frame.reporter_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sanity check inst_id */
|
||||||
|
if (inst_id != 0U) {
|
||||||
|
nvgpu_err(g, "Invalid Input -> instance ID = %u", inst_id);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = nvgpu_cic_mon_report_err_safety_services(g,
|
||||||
|
err_rpt_frame.error_code);
|
||||||
|
if (err != 0) {
|
||||||
|
nvgpu_err(g, "Error injection failed for err_id: %u",
|
||||||
|
err_rpt_frame.error_code);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nvgpu_cic_mon_reg_errinj_cb(struct gk20a *g)
|
||||||
|
{
|
||||||
|
hsierrrpt_ipid_t ip_id = IP_GPU;
|
||||||
|
unsigned int inst_id = 0U;
|
||||||
|
|
||||||
|
/* Save NvGPU context which can be used during error injection */
|
||||||
|
g_err_inj = g;
|
||||||
|
|
||||||
|
return hsierrrpt_reg_cb(ip_id, inst_id, nvgpu_cic_mon_inject_err_fsi);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
@@ -56,3 +56,11 @@ int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g,
|
|||||||
(void)err_id;
|
(void)err_id;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_NVGPU_FSI_ERR_INJECTION
|
||||||
|
int nvgpu_cic_mon_reg_errinj_cb(struct gk20a *g)
|
||||||
|
{
|
||||||
|
(void)g;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user