mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
gpu: nvgpu: gops initialization for SDL
This patch moves gops init related to SDL from qnx to common-core. For this purpose, it does the following changes: - Adds stub functions for linux and posix. - Updates nvgpu_init.c for mapping err_ops with report error APIs. - Updates nvgpu_err.h header file to include prototypes related to error reporting APIs. - Updates nvgpu-linux.yaml file to include sdl_stub file. Jira NVGPU-3237 Change-Id: Idbdbe6f8437bf53504b29dc2d50214484ad18d6f Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2119681 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
31cbde4412
commit
fcb7635a92
@@ -213,6 +213,9 @@ vgpu:
|
||||
vm:
|
||||
sources: [ os/linux/vm.c ]
|
||||
|
||||
sdl:
|
||||
sources: [ os/linux/sdl/sdl_stub.c ]
|
||||
|
||||
# Group all the Linux headers for now.
|
||||
headers:
|
||||
sources: [ include/nvgpu/linux/atomic.h,
|
||||
|
||||
@@ -403,7 +403,8 @@ nvgpu-y += \
|
||||
os/linux/dt.o \
|
||||
os/linux/ecc_sysfs.o \
|
||||
os/linux/os_ops_tu104.o \
|
||||
os/linux/bsearch.o
|
||||
os/linux/bsearch.o \
|
||||
os/linux/sdl/sdl_stub.o
|
||||
|
||||
nvgpu-$(CONFIG_GK20A_VIDMEM) += \
|
||||
os/linux/dmabuf_vidmem.o
|
||||
|
||||
@@ -386,91 +386,3 @@ void nvgpu_ecc_remove_support(struct gk20a *g)
|
||||
nvgpu_ecc_sysfs_remove(g);
|
||||
nvgpu_ecc_free(g);
|
||||
}
|
||||
|
||||
void nvgpu_hubmmu_report_ecc_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u64 err_addr, u64 err_cnt)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (g->ops.fb.err_ops.report_ecc_parity_err == NULL) {
|
||||
return;
|
||||
}
|
||||
ret = g->ops.fb.err_ops.report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_HUBMMU, inst, err_type, err_addr,
|
||||
err_cnt);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report HUBMMU error: inst=%u, "
|
||||
"err_type=%u, err_addr=%llu, err_cnt=%llu",
|
||||
inst, err_type, err_addr, err_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_ltc_report_ecc_error(struct gk20a *g, u32 ltc, u32 slice,
|
||||
u32 err_type, u64 err_addr, u64 err_cnt)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 inst = 0U;
|
||||
|
||||
if (g->ops.ltc.err_ops.report_ecc_parity_err == NULL) {
|
||||
return;
|
||||
}
|
||||
if (slice < 256U) {
|
||||
inst = (ltc << 8U) | slice;
|
||||
} else {
|
||||
nvgpu_err(g, "Invalid slice id=%u", slice);
|
||||
return;
|
||||
}
|
||||
ret = g->ops.ltc.err_ops.report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC, inst, err_type, err_addr,
|
||||
err_cnt);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report LTC error: inst=%u, \
|
||||
err_type=%u, err_addr=%llu, err_cnt=%llu",
|
||||
inst, err_type, err_addr, err_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_pmu_report_ecc_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u64 err_addr, u64 err_cnt)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (g->ops.pmu.err_ops.report_ecc_parity_err == NULL) {
|
||||
return;
|
||||
}
|
||||
ret = g->ops.pmu.err_ops.report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_PWR, inst, err_type, err_addr,
|
||||
err_cnt);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report PMU error: inst=%u, \
|
||||
err_type=%u, err_addr=%llu, err_cnt=%llu",
|
||||
inst, err_type, err_addr, err_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_gr_report_ecc_error(struct gk20a *g, u32 hw_module,
|
||||
u32 gpc, u32 tpc, u32 err_type,
|
||||
u64 err_addr, u64 err_cnt)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 inst = 0U;
|
||||
|
||||
if (g->ops.gr.err_ops.report_ecc_parity_err == NULL) {
|
||||
return;
|
||||
}
|
||||
if (tpc < 256U) {
|
||||
inst = (gpc << 8) | tpc;
|
||||
} else {
|
||||
nvgpu_err(g, "Invalid tpc id=%u", tpc);
|
||||
return;
|
||||
}
|
||||
ret = g->ops.gr.err_ops.report_ecc_parity_err(g,
|
||||
hw_module, inst, err_type,
|
||||
err_addr, err_cnt);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report GR error: hw_module=%u, \
|
||||
inst=%u, err_type=%u, err_addr=%llu, \
|
||||
err_cnt=%llu", hw_module, inst, err_type,
|
||||
err_addr, err_cnt);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,23 +239,6 @@ clean_up:
|
||||
return err;
|
||||
}
|
||||
|
||||
void nvgpu_report_host_error(struct gk20a *g, u32 inst,
|
||||
u32 err_id, u32 intr_info)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (g->ops.fifo.err_ops.report_host_err == NULL) {
|
||||
return;
|
||||
}
|
||||
ret = g->ops.fifo.err_ops.report_host_err(g,
|
||||
NVGPU_ERR_MODULE_HOST, inst, err_id, intr_info);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report HOST error: \
|
||||
inst=%u, err_id=%u, intr_info=%u, ret=%d",
|
||||
inst, err_id, intr_info, ret);
|
||||
}
|
||||
}
|
||||
|
||||
static const char * const pbdma_ch_eng_status_str[] = {
|
||||
"invalid",
|
||||
"valid",
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/tsg.h>
|
||||
#include <nvgpu/preempt.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
|
||||
u32 nvgpu_preempt_get_timeout(struct gk20a *g)
|
||||
@@ -78,9 +79,9 @@ void nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g,
|
||||
*/
|
||||
if (g->ops.fifo.preempt_poll_pbdma(g, tsgid,
|
||||
pbdma_id) != 0) {
|
||||
nvgpu_report_host_error(g, 0,
|
||||
GPU_HOST_PBDMA_PREEMPT_ERROR,
|
||||
pbdma_id);
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST,
|
||||
pbdma_id,
|
||||
GPU_HOST_PBDMA_PREEMPT_ERROR, 0);
|
||||
nvgpu_err(g, "PBDMA preempt failed");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
*/
|
||||
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/channel.h>
|
||||
#include <nvgpu/rc.h>
|
||||
@@ -43,7 +44,6 @@
|
||||
static void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid,
|
||||
u32 mailbox_value)
|
||||
{
|
||||
int ret = 0;
|
||||
struct ctxsw_err_info err_info;
|
||||
|
||||
err_info.curr_ctx = g->ops.gr.falcon.get_current_ctx(g);
|
||||
@@ -52,15 +52,8 @@ static void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid,
|
||||
err_info.mailbox_value = mailbox_value;
|
||||
err_info.chid = chid;
|
||||
|
||||
if (g->ops.gr.err_ops.report_ctxsw_err != NULL) {
|
||||
ret = g->ops.gr.err_ops.report_ctxsw_err(g,
|
||||
NVGPU_ERR_MODULE_FECS,
|
||||
err_type, (void *)&err_info);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report FECS CTXSW error: %d",
|
||||
err_type);
|
||||
}
|
||||
}
|
||||
(void) nvgpu_report_ctxsw_err(g, NVGPU_ERR_MODULE_FECS,
|
||||
err_type, (void *)&err_info);
|
||||
}
|
||||
|
||||
static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
@@ -188,16 +181,11 @@ static int gr_intr_handle_class_error(struct gk20a *g,
|
||||
static void gr_intr_report_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
u32 sm, u32 hww_warp_esr_status, u64 hww_warp_esr_pc)
|
||||
{
|
||||
int ret;
|
||||
struct gr_sm_mcerr_info err_info;
|
||||
struct nvgpu_channel *ch;
|
||||
struct gr_err_info info;
|
||||
u32 tsgid, chid, curr_ctx, inst = 0;
|
||||
|
||||
if (g->ops.gr.err_ops.report_gr_err == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
tsgid = NVGPU_INVALID_TSG_ID;
|
||||
curr_ctx = g->ops.gr.falcon.get_current_ctx(g);
|
||||
ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid);
|
||||
@@ -217,14 +205,8 @@ static void gr_intr_report_sm_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
err_info.tpc = tpc;
|
||||
err_info.sm = sm;
|
||||
info.sm_mcerr_info = &err_info;
|
||||
ret = g->ops.gr.err_ops.report_gr_err(g,
|
||||
NVGPU_ERR_MODULE_SM, inst, GPU_SM_MACHINE_CHECK_ERROR,
|
||||
&info);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "failed to report SM_EXCEPTION "
|
||||
"gpc=%u, tpc=%u, sm=%u, esr_status=%x",
|
||||
gpc, tpc, sm, hww_warp_esr_status);
|
||||
}
|
||||
(void) nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_SM, inst,
|
||||
GPU_SM_MACHINE_CHECK_ERROR, &info);
|
||||
}
|
||||
|
||||
/* Used by sw interrupt thread to translate current ctx to chid.
|
||||
@@ -314,16 +296,11 @@ unlock:
|
||||
void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u32 status)
|
||||
{
|
||||
int ret = 0;
|
||||
struct nvgpu_channel *ch;
|
||||
struct gr_exception_info err_info;
|
||||
struct gr_err_info info;
|
||||
u32 tsgid, chid, curr_ctx;
|
||||
|
||||
if (g->ops.gr.err_ops.report_gr_err == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
tsgid = NVGPU_INVALID_TSG_ID;
|
||||
curr_ctx = g->ops.gr.falcon.get_current_ctx(g);
|
||||
ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid);
|
||||
@@ -339,14 +316,8 @@ void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst,
|
||||
err_info.tsgid = tsgid;
|
||||
err_info.status = status;
|
||||
info.exception_info = &err_info;
|
||||
ret = g->ops.gr.err_ops.report_gr_err(g,
|
||||
NVGPU_ERR_MODULE_PGRAPH, inst, err_type,
|
||||
&info);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report PGRAPH exception: "
|
||||
"inst=%u, err_type=%u, status=%u",
|
||||
inst, err_type, status);
|
||||
}
|
||||
(void) nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_PGRAPH,
|
||||
inst, err_type, &info);
|
||||
}
|
||||
|
||||
void nvgpu_gr_intr_set_error_notifier(struct gk20a *g,
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include <nvgpu/boardobj.h>
|
||||
#include <nvgpu/boardobjgrp.h>
|
||||
#include <nvgpu/pmu/pmu_pstate.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
/* PMU locks used to sync with PMU-RTOS */
|
||||
int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu,
|
||||
@@ -387,27 +388,11 @@ void nvgpu_pmu_remove_support(struct gk20a *g, struct nvgpu_pmu *pmu)
|
||||
}
|
||||
|
||||
/* PMU H/W error functions */
|
||||
static void pmu_report_error(struct gk20a *g, u32 err_type,
|
||||
u32 status, u32 pmu_err_type)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (g->ops.pmu.err_ops.report_pmu_err != NULL) {
|
||||
ret = g->ops.pmu.err_ops.report_pmu_err(g,
|
||||
NVGPU_ERR_MODULE_PWR, err_type, status, pmu_err_type);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report PMU error: %d",
|
||||
err_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_pmu_report_bar0_pri_err_status(struct gk20a *g, u32 bar0_status,
|
||||
u32 error_type)
|
||||
{
|
||||
pmu_report_error(g,
|
||||
GPU_PMU_BAR0_ERROR_TIMEOUT, bar0_status, error_type);
|
||||
return;
|
||||
(void) nvgpu_report_pmu_err(g, NVGPU_ERR_MODULE_PMU,
|
||||
GPU_PMU_BAR0_ERROR_TIMEOUT, error_type, bar0_status);
|
||||
}
|
||||
|
||||
/* PMU engine reset functions */
|
||||
|
||||
@@ -75,7 +75,8 @@ void gk20a_bus_isr(struct gk20a *g)
|
||||
*/
|
||||
err_type = GPU_HOST_PBUS_TIMEOUT_ERROR;
|
||||
}
|
||||
nvgpu_report_host_error(g, 0, err_type, val);
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST,
|
||||
0, err_type, val);
|
||||
gk20a_writel(g, bus_intr_0_r(), val);
|
||||
}
|
||||
|
||||
|
||||
@@ -39,14 +39,14 @@ void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
|
||||
/* clear blocking interrupts: they exibit broken behavior */
|
||||
if ((ce_intr & ce_intr_status_blockpipe_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
(void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id,
|
||||
GPU_CE_BLOCK_PIPE, ce_intr);
|
||||
nvgpu_log(g, gpu_dbg_intr, "ce blocking pipe interrupt");
|
||||
clear_intr |= ce_intr_status_blockpipe_pending_f();
|
||||
}
|
||||
|
||||
if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
(void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id,
|
||||
GPU_CE_LAUNCH_ERROR, ce_intr);
|
||||
nvgpu_log(g, gpu_dbg_intr, "ce launch error interrupt");
|
||||
clear_intr |= ce_intr_status_launcherr_pending_f();
|
||||
@@ -65,7 +65,7 @@ u32 gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
ce_intr, inst_id);
|
||||
|
||||
if ((ce_intr & ce_intr_status_nonblockpipe_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
(void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id,
|
||||
GPU_CE_NONBLOCK_PIPE, ce_intr);
|
||||
nvgpu_writel(g, ce_intr_status_r(inst_id),
|
||||
ce_intr_status_nonblockpipe_pending_f());
|
||||
|
||||
@@ -33,23 +33,6 @@
|
||||
|
||||
#include <nvgpu/hw/gv11b/hw_ce_gv11b.h>
|
||||
|
||||
void nvgpu_report_ce_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u32 status)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (g->ops.ce.err_ops.report_ce_err == NULL) {
|
||||
return;
|
||||
}
|
||||
ret = g->ops.ce.err_ops.report_ce_err(g,
|
||||
NVGPU_ERR_MODULE_CE, inst, err_type, status);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g,
|
||||
"report_ce_err failed inst=%u err_type=%u status=%u",
|
||||
inst, err_type, status);
|
||||
}
|
||||
}
|
||||
|
||||
u32 gv11b_ce_get_num_pce(struct gk20a *g)
|
||||
{
|
||||
/*
|
||||
@@ -78,7 +61,7 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
* reset to get back to a working state.
|
||||
*/
|
||||
if ((ce_intr & ce_intr_status_invalid_config_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
(void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id,
|
||||
GPU_CE_INVALID_CONFIG, ce_intr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ce: inst %d: invalid config", inst_id);
|
||||
@@ -92,7 +75,7 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
|
||||
* reset before operations can start again, if not the entire GPU.
|
||||
*/
|
||||
if ((ce_intr & ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) {
|
||||
nvgpu_report_ce_error(g, inst_id,
|
||||
(void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id,
|
||||
GPU_CE_METHOD_BUFFER_FAULT, ce_intr);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"ce: inst %d: mthd buffer fault", inst_id);
|
||||
|
||||
@@ -83,8 +83,8 @@ static void gv11b_fb_intr_handle_ecc_l2tlb(struct gk20a *g, u32 ecc_status)
|
||||
if ((ecc_status &
|
||||
fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m())
|
||||
!= 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_L2TLB_SA_DATA_ECC_CORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_L2TLB_SA_DATA_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
|
||||
@@ -92,8 +92,8 @@ static void gv11b_fb_intr_handle_ecc_l2tlb(struct gk20a *g, u32 ecc_status)
|
||||
if ((ecc_status &
|
||||
fb_mmu_l2tlb_ecc_status_uncorrected_err_l2tlb_sa_data_m())
|
||||
!= 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
|
||||
@@ -161,16 +161,16 @@ static void gv11b_fb_intr_handle_ecc_hubtlb(struct gk20a *g, u32 ecc_status)
|
||||
|
||||
if ((ecc_status &
|
||||
fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m()) != 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_TLB_SA_DATA_ECC_CORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_TLB_SA_DATA_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
|
||||
}
|
||||
if ((ecc_status &
|
||||
fb_mmu_hubtlb_ecc_status_uncorrected_err_sa_data_m()) != 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
|
||||
@@ -239,8 +239,8 @@ static void gv11b_fb_intr_handle_ecc_fillunit(struct gk20a *g, u32 ecc_status)
|
||||
|
||||
if ((ecc_status &
|
||||
fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m()) != 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_PTE_DATA_ECC_CORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_PTE_DATA_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "corrected ecc pte data error");
|
||||
@@ -248,16 +248,16 @@ static void gv11b_fb_intr_handle_ecc_fillunit(struct gk20a *g, u32 ecc_status)
|
||||
if ((ecc_status &
|
||||
fb_mmu_fillunit_ecc_status_uncorrected_err_pte_data_m())
|
||||
!= 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pte data error");
|
||||
}
|
||||
if ((ecc_status &
|
||||
fb_mmu_fillunit_ecc_status_corrected_err_pde0_data_m()) != 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_PDE0_DATA_ECC_CORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_PDE0_DATA_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "corrected ecc pde0 data error");
|
||||
@@ -265,8 +265,8 @@ static void gv11b_fb_intr_handle_ecc_fillunit(struct gk20a *g, u32 ecc_status)
|
||||
if ((ecc_status &
|
||||
fb_mmu_fillunit_ecc_status_uncorrected_err_pde0_data_m())
|
||||
!= 0U) {
|
||||
nvgpu_hubmmu_report_ecc_error(g, 0,
|
||||
GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU,
|
||||
0, GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pde0 data error");
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include <nvgpu/ptimer.h>
|
||||
#include <nvgpu/tsg.h>
|
||||
#include <nvgpu/rc.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include <hal/fifo/ctxsw_timeout_gv11b.h>
|
||||
|
||||
@@ -188,7 +189,6 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g)
|
||||
const char *info_status_str;
|
||||
struct nvgpu_tsg *tsg = NULL;
|
||||
|
||||
|
||||
/* get ctxsw timedout engines */
|
||||
ctxsw_timeout_engines = nvgpu_readl(g, fifo_intr_ctxsw_timeout_r());
|
||||
if (ctxsw_timeout_engines == 0U) {
|
||||
@@ -217,8 +217,8 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g)
|
||||
continue;
|
||||
}
|
||||
|
||||
nvgpu_report_host_error(g, 0,
|
||||
GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR,
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST,
|
||||
0, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR,
|
||||
tsgid);
|
||||
|
||||
recover = g->ops.tsg.check_ctxsw_timeout(tsg,
|
||||
|
||||
@@ -149,8 +149,8 @@ void gk20a_fifo_intr_handle_chsw_error(struct gk20a *g)
|
||||
u32 intr;
|
||||
|
||||
intr = nvgpu_readl(g, fifo_intr_chsw_error_r());
|
||||
nvgpu_report_host_error(g, 0,
|
||||
GPU_HOST_PFIFO_CHSW_ERROR, intr);
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST,
|
||||
0, GPU_HOST_PFIFO_CHSW_ERROR, intr);
|
||||
nvgpu_err(g, "chsw: %08x", intr);
|
||||
g->ops.gr.falcon.dump_stats(g);
|
||||
nvgpu_writel(g, fifo_intr_chsw_error_r(), intr);
|
||||
|
||||
@@ -135,8 +135,8 @@ bool gv11b_fifo_handle_sched_error(struct gk20a *g)
|
||||
nvgpu_err(g, "fifo sched error code not supported");
|
||||
}
|
||||
|
||||
nvgpu_report_host_error(g, 0,
|
||||
GPU_HOST_PFIFO_SCHED_ERROR, sched_error);
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST,
|
||||
0, GPU_HOST_PFIFO_SCHED_ERROR, sched_error);
|
||||
|
||||
if (sched_error == SCHED_ERROR_CODE_BAD_TSG) {
|
||||
/* id is unknown, preempt all runlists and do recovery */
|
||||
@@ -154,7 +154,7 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr)
|
||||
|
||||
if ((fifo_intr & fifo_intr_0_bind_error_pending_f()) != 0U) {
|
||||
u32 bind_error = nvgpu_readl(g, fifo_intr_bind_error_r());
|
||||
nvgpu_report_host_error(g, 0,
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, 0,
|
||||
GPU_HOST_PFIFO_BIND_ERROR, bind_error);
|
||||
nvgpu_err(g, "fifo bind error: 0x%08x", bind_error);
|
||||
handled |= fifo_intr_0_bind_error_pending_f();
|
||||
@@ -166,7 +166,7 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr)
|
||||
}
|
||||
|
||||
if ((fifo_intr & fifo_intr_0_memop_timeout_pending_f()) != 0U) {
|
||||
nvgpu_report_host_error(g, 0,
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, 0,
|
||||
GPU_HOST_PFIFO_MEMOP_TIMEOUT_ERROR, 0);
|
||||
nvgpu_err(g, "fifo memop timeout error");
|
||||
handled |= fifo_intr_0_memop_timeout_pending_f();
|
||||
@@ -175,7 +175,7 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr)
|
||||
if ((fifo_intr & fifo_intr_0_lb_error_pending_f()) != 0U) {
|
||||
u32 lb_error = nvgpu_readl(g, fifo_intr_lb_error_r());
|
||||
|
||||
nvgpu_report_host_error(g, 0,
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, 0,
|
||||
GPU_HOST_PFIFO_LB_ERROR, lb_error);
|
||||
nvgpu_err(g, "fifo lb error");
|
||||
handled |= fifo_intr_0_lb_error_pending_f();
|
||||
|
||||
@@ -86,8 +86,8 @@ static void report_pbdma_error(struct gk20a *g, u32 pbdma_id,
|
||||
err_type = GPU_HOST_PBDMA_SIGNATURE_ERROR;
|
||||
}
|
||||
if (err_type != GPU_HOST_INVALID_ERROR) {
|
||||
nvgpu_report_host_error(g, pbdma_id,
|
||||
err_type, pbdma_intr_0);
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST,
|
||||
pbdma_id, err_type, pbdma_intr_0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -188,7 +188,7 @@ bool gv11b_pbdma_handle_intr_1(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_1,
|
||||
|
||||
recover = true;
|
||||
|
||||
nvgpu_report_host_error(g, pbdma_id,
|
||||
(void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, pbdma_id,
|
||||
GPU_HOST_PBDMA_HCE_ERROR, pbdma_intr_1);
|
||||
|
||||
if ((pbdma_intr_1 & pbdma_intr_1_ctxnotvalid_pending_f()) != 0U) {
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include <nvgpu/engines.h>
|
||||
#include <nvgpu/engine_status.h>
|
||||
#include <nvgpu/fbp.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include "gr_pri_gk20a.h"
|
||||
#include "gr_pri_gv11b.h"
|
||||
@@ -153,19 +154,25 @@ static void gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
if ((l1_tag_ecc_status &
|
||||
(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() |
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m())) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_TAG_ECC_CORRECTED, 0,
|
||||
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((l1_tag_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_TAG_MISS_FIFO_ECC_CORRECTED, 0,
|
||||
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((l1_tag_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_CORRECTED, 0,
|
||||
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
@@ -188,19 +195,25 @@ static void gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
if ((l1_tag_ecc_status &
|
||||
(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() |
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m())) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_TAG_ECC_UNCORRECTED, 0,
|
||||
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((l1_tag_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0,
|
||||
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((l1_tag_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0,
|
||||
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
@@ -280,7 +293,9 @@ static void gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
}
|
||||
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter +=
|
||||
lrf_corrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_LRF_ECC_CORRECTED, 0,
|
||||
g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter);
|
||||
gk20a_writel(g,
|
||||
@@ -299,7 +314,9 @@ static void gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
}
|
||||
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter +=
|
||||
lrf_uncorrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_LRF_ECC_UNCORRECTED, 0,
|
||||
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter);
|
||||
gk20a_writel(g,
|
||||
@@ -370,7 +387,9 @@ static void gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
}
|
||||
g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||
cbu_corrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_CBU_ECC_CORRECTED,
|
||||
0, g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
gk20a_writel(g,
|
||||
@@ -389,7 +408,9 @@ static void gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
}
|
||||
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||
cbu_uncorrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_CBU_ECC_UNCORRECTED,
|
||||
0, g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
gk20a_writel(g,
|
||||
@@ -456,7 +477,9 @@ static void gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
}
|
||||
g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter +=
|
||||
l1_data_corrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_DATA_ECC_CORRECTED,
|
||||
0, g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
gk20a_writel(g,
|
||||
@@ -475,7 +498,9 @@ static void gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
}
|
||||
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter +=
|
||||
l1_data_uncorrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_L1_DATA_ECC_UNCORRECTED,
|
||||
0, g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
gk20a_writel(g,
|
||||
@@ -550,25 +575,33 @@ static void gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
0);
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L0_DATA_ECC_CORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L0_PREDECODE_ECC_CORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L1_DATA_ECC_CORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L1_PREDECODE_ECC_CORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
@@ -590,25 +623,33 @@ static void gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
0);
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
if ((icache_ecc_status &
|
||||
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_SM,
|
||||
(gpc << 8) | tpc,
|
||||
GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED,
|
||||
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
|
||||
}
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/class.h>
|
||||
#include <nvgpu/safe_ops.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/class.h>
|
||||
#include <nvgpu/safe_ops.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include <nvgpu/gr/config.h>
|
||||
#include <nvgpu/gr/gr.h>
|
||||
@@ -49,28 +50,28 @@ static void gv11b_gr_intr_handle_fecs_ecc_error(struct gk20a *g)
|
||||
fecs_ecc_status.uncorrected_delta;
|
||||
|
||||
if (fecs_ecc_status.imem_corrected_err) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0,
|
||||
GPU_FECS_FALCON_IMEM_ECC_CORRECTED,
|
||||
fecs_ecc_status.ecc_addr,
|
||||
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
|
||||
}
|
||||
if (fecs_ecc_status.imem_uncorrected_err) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0,
|
||||
GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED,
|
||||
fecs_ecc_status.ecc_addr,
|
||||
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected");
|
||||
}
|
||||
if (fecs_ecc_status.dmem_corrected_err) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0,
|
||||
GPU_FECS_FALCON_DMEM_ECC_CORRECTED,
|
||||
fecs_ecc_status.ecc_addr,
|
||||
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
|
||||
}
|
||||
if (fecs_ecc_status.dmem_uncorrected_err) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0,
|
||||
GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED,
|
||||
fecs_ecc_status.ecc_addr,
|
||||
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
|
||||
@@ -319,7 +320,7 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc,
|
||||
);
|
||||
}
|
||||
*corrected_err += gcc_l15_corrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GCC, gpc, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GCC, gpc,
|
||||
GPU_GCC_L15_ECC_CORRECTED,
|
||||
0, *corrected_err);
|
||||
nvgpu_writel(g,
|
||||
@@ -341,7 +342,7 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc,
|
||||
);
|
||||
}
|
||||
*uncorrected_err += gcc_l15_uncorrected_err_count_delta;
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GCC, gpc, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GCC, gpc,
|
||||
GPU_GCC_L15_ECC_UNCORRECTED,
|
||||
0, *uncorrected_err);
|
||||
nvgpu_writel(g,
|
||||
@@ -429,7 +430,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
|
||||
if ((ecc_status &
|
||||
gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m()) !=
|
||||
0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc,
|
||||
GPU_MMU_L1TLB_SA_DATA_ECC_CORRECTED,
|
||||
0, (u32)*corrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
|
||||
@@ -437,7 +438,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
|
||||
if ((ecc_status &
|
||||
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) !=
|
||||
0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc,
|
||||
GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED,
|
||||
0, (u32)*uncorrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
|
||||
@@ -445,7 +446,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
|
||||
if ((ecc_status &
|
||||
gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m()) !=
|
||||
0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc,
|
||||
GPU_MMU_L1TLB_FA_DATA_ECC_CORRECTED,
|
||||
0, (u32)*corrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error");
|
||||
@@ -453,7 +454,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
|
||||
if ((ecc_status &
|
||||
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) !=
|
||||
0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc,
|
||||
GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED,
|
||||
0, (u32)*uncorrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error");
|
||||
@@ -536,29 +537,29 @@ void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc,
|
||||
|
||||
if ((ecc_status &
|
||||
gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0,
|
||||
GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS,
|
||||
gpc, GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED,
|
||||
ecc_addr, (u32)*corrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0,
|
||||
GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS,
|
||||
gpc, GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED,
|
||||
ecc_addr, (u32)*uncorrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0,
|
||||
GPU_GPCCS_FALCON_DMEM_ECC_CORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS,
|
||||
gpc, GPU_GPCCS_FALCON_DMEM_ECC_CORRECTED,
|
||||
ecc_addr, (u32)*corrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0,
|
||||
GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS,
|
||||
gpc, GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED,
|
||||
ecc_addr, (u32)*uncorrected_err);
|
||||
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected");
|
||||
}
|
||||
|
||||
@@ -154,28 +154,36 @@ static void gv11b_ltc_intr_handle_lts_interrupts(struct gk20a *g,
|
||||
|
||||
if ((ecc_status &
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_RSTG_ECC_CORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_sec_count[ltc][slice].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) != 0U) {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_ded_count[ltc][slice].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) != 0U) {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_TSTG_ECC_CORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_sec_count[ltc][slice].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected");
|
||||
}
|
||||
if ((ecc_status &
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) != 0U) {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_ded_count[ltc][slice].counter);
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
@@ -185,11 +193,15 @@ static void gv11b_ltc_intr_handle_lts_interrupts(struct gk20a *g,
|
||||
ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) != 0U) {
|
||||
if ((dstg_ecc_addr &
|
||||
ltc_ltc0_lts0_dstg_ecc_address_info_ram_m()) == 0U) {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_DSTG_ECC_CORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_sec_count[ltc][slice].counter);
|
||||
} else {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_DSTG_BE_ECC_CORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_sec_count[ltc][slice].counter);
|
||||
}
|
||||
@@ -197,11 +209,15 @@ static void gv11b_ltc_intr_handle_lts_interrupts(struct gk20a *g,
|
||||
}
|
||||
if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) != 0U) {
|
||||
if ((dstg_ecc_addr & ltc_ltc0_lts0_dstg_ecc_address_info_ram_m()) == 0U) {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_ded_count[ltc][slice].counter);
|
||||
} else {
|
||||
nvgpu_ltc_report_ecc_error(g, ltc, slice,
|
||||
(void) nvgpu_report_ecc_parity_err(g,
|
||||
NVGPU_ERR_MODULE_LTC,
|
||||
(ltc << 8U) | slice,
|
||||
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED, ecc_addr,
|
||||
g->ecc.ltc.ecc_ded_count[ltc][slice].counter);
|
||||
}
|
||||
|
||||
@@ -242,7 +242,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr)
|
||||
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0,
|
||||
GPU_PMU_FALCON_IMEM_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter);
|
||||
@@ -250,7 +250,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr)
|
||||
}
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0,
|
||||
GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
@@ -259,7 +259,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr)
|
||||
}
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0,
|
||||
GPU_PMU_FALCON_DMEM_ECC_CORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter);
|
||||
@@ -267,7 +267,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr)
|
||||
}
|
||||
if ((ecc_status &
|
||||
pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
|
||||
nvgpu_pmu_report_ecc_error(g, 0,
|
||||
(void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0,
|
||||
GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED,
|
||||
ecc_addr,
|
||||
g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter);
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include <nvgpu/io.h>
|
||||
#include <nvgpu/utils.h>
|
||||
#include <nvgpu/gk20a.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
#include <nvgpu/hw/gp10b/hw_pri_ringmaster_gp10b.h>
|
||||
#include <nvgpu/hw/gp10b/hw_pri_ringstation_sys_gp10b.h>
|
||||
@@ -68,20 +69,9 @@ void gp10b_priv_ring_decode_error_code(struct gk20a *g,
|
||||
u32 error_code)
|
||||
{
|
||||
u32 error_type_index;
|
||||
int ret = 0;
|
||||
|
||||
if (g->ops.priv_ring.err_ops.report_access_violation != NULL) {
|
||||
ret = g->ops.priv_ring.err_ops.report_access_violation (g,
|
||||
NVGPU_ERR_MODULE_PRI,
|
||||
0U,
|
||||
GPU_PRI_ACCESS_VIOLATION,
|
||||
0U,
|
||||
error_code);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report PRI access violation: "
|
||||
"err_code=%u", error_code);
|
||||
}
|
||||
}
|
||||
(void) nvgpu_report_pri_err(g, NVGPU_ERR_MODULE_PRI, 0,
|
||||
GPU_PRI_ACCESS_VIOLATION, 0, error_code);
|
||||
|
||||
error_type_index = (error_code & 0x00000f00U) >> 8U;
|
||||
error_code = error_code & 0xBADFf000U;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -32,7 +32,6 @@
|
||||
void gk20a_ptimer_isr(struct gk20a *g)
|
||||
{
|
||||
u32 save0, save1, fecs_errcode = 0;
|
||||
int ret = 0;
|
||||
u32 inst = 0U;
|
||||
u32 error_addr;
|
||||
|
||||
@@ -69,19 +68,9 @@ void gk20a_ptimer_isr(struct gk20a *g)
|
||||
error_addr = 0U;
|
||||
}
|
||||
|
||||
if (g->ops.priv_ring.err_ops.report_timeout_err != NULL) {
|
||||
ret = g->ops.priv_ring.err_ops.report_timeout_err(g,
|
||||
NVGPU_ERR_MODULE_PRI,
|
||||
inst,
|
||||
GPU_PRI_TIMEOUT_ERROR,
|
||||
error_addr,
|
||||
fecs_errcode);
|
||||
if (ret != 0) {
|
||||
nvgpu_err(g, "Failed to report PRI Timout error: "
|
||||
"inst=%u, err_addr=%u, err_code=%u",
|
||||
inst, error_addr, fecs_errcode);
|
||||
}
|
||||
}
|
||||
(void) nvgpu_report_pri_err(g, NVGPU_ERR_MODULE_PRI,
|
||||
inst, GPU_PRI_TIMEOUT_ERROR,
|
||||
error_addr, fecs_errcode);
|
||||
}
|
||||
|
||||
int gk20a_read_ptimer(struct gk20a *g, u64 *value)
|
||||
|
||||
@@ -115,7 +115,6 @@ enum ctxsw_addr_type;
|
||||
#include <nvgpu/sec2/sec2.h>
|
||||
#include <nvgpu/cbc.h>
|
||||
#include <nvgpu/ltc.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
#include <nvgpu/worker.h>
|
||||
#include <nvgpu/bios.h>
|
||||
#include <nvgpu/semaphore.h>
|
||||
@@ -240,11 +239,6 @@ struct gpu_ops {
|
||||
void (*en_illegal_compstat)(struct gk20a *g,
|
||||
bool enable);
|
||||
} intr;
|
||||
struct {
|
||||
int (*report_ecc_parity_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
u64 err_addr, u64 count);
|
||||
} err_ops;
|
||||
} ltc;
|
||||
struct {
|
||||
void (*init)(struct gk20a *g, struct nvgpu_cbc *cbc);
|
||||
@@ -260,11 +254,6 @@ struct gpu_ops {
|
||||
u32 (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base);
|
||||
u32 (*get_num_pce)(struct gk20a *g);
|
||||
void (*mthd_buffer_fault_in_bar2_fault)(struct gk20a *g);
|
||||
struct {
|
||||
int (*report_ce_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
u32 status);
|
||||
} err_ops;
|
||||
} ce;
|
||||
struct {
|
||||
u32 (*get_gr_status)(struct gk20a *g);
|
||||
@@ -845,18 +834,6 @@ struct gpu_ops {
|
||||
} intr;
|
||||
|
||||
u32 (*get_ctxsw_checksum_mismatch_mailbox_val)(void);
|
||||
struct {
|
||||
int (*report_ecc_parity_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
u64 err_addr,
|
||||
u64 err_count);
|
||||
int (*report_gr_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
struct gr_err_info *err_info);
|
||||
int (*report_ctxsw_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 err_id,
|
||||
void *data);
|
||||
} err_ops;
|
||||
} gr;
|
||||
|
||||
struct {
|
||||
@@ -933,12 +910,6 @@ struct gpu_ops {
|
||||
void (*fault_buf_configure_hw)(struct gk20a *g, u32 index);
|
||||
size_t (*get_vidmem_size)(struct gk20a *g);
|
||||
int (*apply_pdb_cache_war)(struct gk20a *g);
|
||||
struct {
|
||||
int (*report_ecc_parity_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst,
|
||||
u32 err_id, u64 err_addr,
|
||||
u64 err_cnt);
|
||||
} err_ops;
|
||||
struct {
|
||||
void (*enable)(struct gk20a *g);
|
||||
void (*disable)(struct gk20a *g);
|
||||
@@ -999,11 +970,6 @@ struct gpu_ops {
|
||||
void (*intr_unset_recover_mask)(struct gk20a *g);
|
||||
int (*set_sm_exception_type_mask)(struct nvgpu_channel *ch,
|
||||
u32 exception_mask);
|
||||
struct {
|
||||
int (*report_host_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
u32 intr_info);
|
||||
} err_ops;
|
||||
|
||||
void (*intr_0_enable)(struct gk20a *g, bool enable);
|
||||
void (*intr_0_isr)(struct gk20a *g);
|
||||
@@ -1400,15 +1366,6 @@ struct gpu_ops {
|
||||
void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id);
|
||||
/* PG */
|
||||
void (*pmu_setup_elpg)(struct gk20a *g);
|
||||
struct {
|
||||
int (*report_ecc_parity_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst,
|
||||
u32 err_id, u64 err_addr,
|
||||
u64 err_cnt);
|
||||
int (*report_pmu_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 err_id, u32 status,
|
||||
u32 pmu_err_type);
|
||||
} err_ops;
|
||||
void (*pmu_clear_bar0_host_err_status)(struct gk20a *g);
|
||||
int (*bar0_error_status)(struct gk20a *g, u32 *bar0_status,
|
||||
u32 *etype);
|
||||
@@ -1670,14 +1627,6 @@ struct gpu_ops {
|
||||
u32 (*enum_ltc)(struct gk20a *g);
|
||||
u32 (*get_gpc_count)(struct gk20a *g);
|
||||
u32 (*get_fbp_count)(struct gk20a *g);
|
||||
struct {
|
||||
int (*report_access_violation)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
u32 err_addr, u32 error_code);
|
||||
int (*report_timeout_err)(struct gk20a *g,
|
||||
u32 hw_id, u32 inst, u32 err_id,
|
||||
u32 err_addr, u32 error_code);
|
||||
} err_ops;
|
||||
} priv_ring;
|
||||
struct {
|
||||
int (*check_priv_security)(struct gk20a *g);
|
||||
|
||||
@@ -23,13 +23,17 @@
|
||||
#ifndef NVGPU_NVGPU_ERR_H
|
||||
#define NVGPU_NVGPU_ERR_H
|
||||
|
||||
#include <nvgpu/types.h>
|
||||
|
||||
struct gk20a;
|
||||
|
||||
#define NVGPU_ERR_MODULE_HOST 0U
|
||||
#define NVGPU_ERR_MODULE_SM 1U
|
||||
#define NVGPU_ERR_MODULE_FECS 2U
|
||||
#define NVGPU_ERR_MODULE_GPCCS 3U
|
||||
#define NVGPU_ERR_MODULE_MMU 4U
|
||||
#define NVGPU_ERR_MODULE_GCC 5U
|
||||
#define NVGPU_ERR_MODULE_PWR 6U
|
||||
#define NVGPU_ERR_MODULE_PMU 6U
|
||||
#define NVGPU_ERR_MODULE_PGRAPH 7U
|
||||
#define NVGPU_ERR_MODULE_LTC 8U
|
||||
#define NVGPU_ERR_MODULE_HUBMMU 9U
|
||||
@@ -171,22 +175,26 @@ struct gr_err_info {
|
||||
struct gr_exception_info *exception_info;
|
||||
};
|
||||
|
||||
void nvgpu_report_host_error(struct gk20a *g,
|
||||
u32 inst, u32 err_id, u32 intr_info);
|
||||
/* Functions to report errors to 3LSS */
|
||||
int nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info);
|
||||
|
||||
void nvgpu_report_ce_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u32 status);
|
||||
int nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info);
|
||||
|
||||
void nvgpu_hubmmu_report_ecc_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u64 err_addr, u64 err_cnt);
|
||||
int nvgpu_report_ecc_parity_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, u64 err_addr, u64 err_count);
|
||||
|
||||
void nvgpu_ltc_report_ecc_error(struct gk20a *g, u32 ltc, u32 slice,
|
||||
u32 err_type, u64 err_addr, u64 err_cnt);
|
||||
int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
void *data);
|
||||
|
||||
void nvgpu_pmu_report_ecc_error(struct gk20a *g, u32 inst,
|
||||
u32 err_type, u64 err_addr, u64 err_cnt);
|
||||
int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, struct gr_err_info *err_info);
|
||||
|
||||
int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
u32 sub_err_type, u32 status);
|
||||
|
||||
int nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, u32 err_addr, u32 err_code);
|
||||
|
||||
void nvgpu_gr_report_ecc_error(struct gk20a *g, u32 hw_module,
|
||||
u32 gpc, u32 tpc, u32 err_type,
|
||||
u64 err_addr, u64 err_cnt);
|
||||
#endif
|
||||
|
||||
67
drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c
Normal file
67
drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
struct gk20a;
|
||||
|
||||
int nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_ecc_parity_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, u64 err_addr, u64 err_count)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, struct gr_err_info *err_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
u32 sub_err_type, u32 status)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, u32 err_addr, u32 err_code)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
void *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -27,7 +27,9 @@
|
||||
|
||||
#include <nvgpu/ecc.h>
|
||||
#include <nvgpu/debugger.h>
|
||||
#include <nvgpu/nvgpu_err.h>
|
||||
|
||||
struct gk20a;
|
||||
|
||||
void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s)
|
||||
{
|
||||
@@ -41,3 +43,45 @@ int nvgpu_ecc_sysfs_init(struct gk20a *g)
|
||||
void nvgpu_ecc_sysfs_remove(struct gk20a *g)
|
||||
{
|
||||
}
|
||||
|
||||
int nvgpu_report_host_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_ecc_parity_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, u64 err_addr, u64 err_count)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, struct gr_err_info *err_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
u32 sub_err_type, u32 status)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit,
|
||||
u32 inst, u32 err_id, u32 intr_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst,
|
||||
u32 err_type, u32 err_addr, u32 err_code)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id,
|
||||
void *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user