gpu: nvgpu: compile-out unused apis from safety build

This patch does the following changes:
- Compiles-out unused error reporting APIs and the related
  data structures from safety build. For this purpose, it
  introduces the new flag: CONFIG_NVGPU_INTR_DEBUG
- Updates nvgpu_report_err_to_sdl() API with one more argument,
  hw_unit_id. This aids in finding whether an error to be reported
  is corrected or uncorrected from LUT.
- Triggers SW quiesce, if an uncorrected error is reported to
  Safety_Services, in safety build.
- Renames files in cic folder by replacing gv11b with ga10b,
  since error reporting for gv11b is not supported in dev-main.

JIRA NVGPU-8002

Change-Id: Ic01e73b0208252abba1f615a2c98d770cdf41ca4
Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2668466
Reviewed-by: Tejal Kudav <tkudav@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Vaibhav Kachore <vkachore@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Rajesh Devaraj
2022-02-11 05:59:52 +00:00
committed by mobile promotions
parent 81c220b95b
commit 0699220b85
49 changed files with 456 additions and 316 deletions

View File

@@ -1,4 +1,4 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. # Copyright (c) 2019-2022, NVIDIA CORPORATION. All Rights Reserved.
# #
# HAL units. These are the units that have access to HW. # HAL units. These are the units that have access to HW.
# #
@@ -1024,9 +1024,9 @@ tpc:
cic: cic:
safe: yes safe: yes
owner: Tejal K owner: Tejal K
sources: [ hal/cic/mon/init_gv11b_fusa.c, sources: [ hal/cic/mon/init_ga10b_fusa.c,
hal/cic/mon/lut_gv11b_fusa.c, hal/cic/mon/lut_ga10b_fusa.c,
hal/cic/mon/cic_gv11b.h ] hal/cic/mon/cic_ga10b.h ]
grmgr: grmgr:
safe: no safe: no

View File

@@ -333,15 +333,6 @@ nvgpu-y += \
common/cic/mon/mon_init.o \ common/cic/mon/mon_init.o \
common/cic/mon/mon_lut.o \ common/cic/mon/mon_lut.o \
common/cic/mon/mon_intr.o \ common/cic/mon/mon_intr.o \
common/cic/mon/mon_ce.o \
common/cic/mon/mon_ctxsw.o \
common/cic/mon/mon_msg.o \
common/cic/mon/mon_ecc.o \
common/cic/mon/mon_host.o \
common/cic/mon/mon_gr.o \
common/cic/mon/mon_pri.o \
common/cic/mon/mon_pmu.o \
common/cic/mon/mon_mmu.o \
common/cic/mon/mon_report_err.o \ common/cic/mon/mon_report_err.o \
common/cic/rm/rm_init.o \ common/cic/rm/rm_init.o \
common/cic/rm/rm_intr.o \ common/cic/rm/rm_intr.o \
@@ -417,8 +408,21 @@ nvgpu-y += \
hal/top/top_gp10b.o \ hal/top/top_gp10b.o \
hal/tpc/tpc_gv11b.o \ hal/tpc/tpc_gv11b.o \
hal/priv_ring/priv_ring_gv11b.o \ hal/priv_ring/priv_ring_gv11b.o \
hal/cic/mon/init_gv11b_fusa.o \ hal/cic/mon/init_ga10b_fusa.o \
hal/cic/mon/lut_gv11b_fusa.o hal/cic/mon/lut_ga10b_fusa.o
ifeq ($(CONFIG_NVGPU_INTR_DEBUG),y)
nvgpu-$(CONFIG_NVGPU_INTR_DEBUG) += \
common/cic/mon/mon_ce.o \
common/cic/mon/mon_ctxsw.o \
common/cic/mon/mon_msg.o \
common/cic/mon/mon_ecc.o \
common/cic/mon/mon_host.o \
common/cic/mon/mon_gr.o \
common/cic/mon/mon_pri.o \
common/cic/mon/mon_pmu.o \
common/cic/mon/mon_mmu.o
endif
ifeq ($(CONFIG_NVGPU_GSP_SCHEDULER),y) ifeq ($(CONFIG_NVGPU_GSP_SCHEDULER),y)
nvgpu-$(CONFIG_NVGPU_GSP_SCHEDULER) += \ nvgpu-$(CONFIG_NVGPU_GSP_SCHEDULER) += \

View File

@@ -304,6 +304,13 @@ NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_MIG
CONFIG_NVGPU_GSP_SCHEDULER := 1 CONFIG_NVGPU_GSP_SCHEDULER := 1
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GSP_SCHEDULER NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_GSP_SCHEDULER
# Code to encapsulate the error information into a uniform interface via
# nvgpu_err_msg is currently used, but might be needed in future by nvgpu_rm
# process. Retain this code under CONFIG_NVGPU_INTR_DEBUG which is enabled
# only in standard build for now.
CONFIG_NVGPU_INTR_DEBUG := 1
NVGPU_COMMON_CFLAGS += -DCONFIG_NVGPU_INTR_DEBUG
# Enable GSP stress test # Enable GSP stress test
ifeq ($(CONFIG_NVGPU_GSP_SCHEDULER),1) ifeq ($(CONFIG_NVGPU_GSP_SCHEDULER),1)
CONFIG_NVGPU_GSP_STRESS_TEST := 1 CONFIG_NVGPU_GSP_STRESS_TEST := 1

View File

@@ -159,15 +159,6 @@ srcs += common/device.c \
common/cic/mon/mon_init.c \ common/cic/mon/mon_init.c \
common/cic/mon/mon_lut.c \ common/cic/mon/mon_lut.c \
common/cic/mon/mon_intr.c \ common/cic/mon/mon_intr.c \
common/cic/mon/mon_ce.c \
common/cic/mon/mon_ctxsw.c \
common/cic/mon/mon_msg.c \
common/cic/mon/mon_ecc.c \
common/cic/mon/mon_host.c \
common/cic/mon/mon_gr.c \
common/cic/mon/mon_pri.c \
common/cic/mon/mon_pmu.c \
common/cic/mon/mon_mmu.c \
common/cic/mon/mon_report_err.c \ common/cic/mon/mon_report_err.c \
common/cic/rm/rm_init.c \ common/cic/rm/rm_init.c \
common/cic/rm/rm_intr.c \ common/cic/rm/rm_intr.c \
@@ -179,6 +170,18 @@ srcs += common/device.c \
hal/fifo/userd_gk20a.c \ hal/fifo/userd_gk20a.c \
hal/sync/syncpt_cmdbuf_gv11b.c hal/sync/syncpt_cmdbuf_gv11b.c
ifeq ($(CONFIG_NVGPU_INTR_DEBUG),1)
srcs += common/cic/mon/mon_ce.c \
common/cic/mon/mon_ctxsw.c \
common/cic/mon/mon_msg.c \
common/cic/mon/mon_ecc.c \
common/cic/mon/mon_host.c \
common/cic/mon/mon_gr.c \
common/cic/mon/mon_pri.c \
common/cic/mon/mon_pmu.c \
common/cic/mon/mon_mmu.c
endif
ifeq ($(CONFIG_NVGPU_GSP_SCHEDULER),1) ifeq ($(CONFIG_NVGPU_GSP_SCHEDULER),1)
srcs += common/gsp/gsp_init.c \ srcs += common/gsp/gsp_init.c \
common/gsp/gsp_bootstrap.c \ common/gsp/gsp_bootstrap.c \
@@ -281,8 +284,8 @@ srcs += hal/mm/mm_gv11b_fusa.c \
hal/therm/therm_gv11b_fusa.c \ hal/therm/therm_gv11b_fusa.c \
hal/top/top_gm20b_fusa.c \ hal/top/top_gm20b_fusa.c \
hal/top/top_gv11b_fusa.c \ hal/top/top_gv11b_fusa.c \
hal/cic/mon/init_gv11b_fusa.c \ hal/cic/mon/init_ga10b_fusa.c \
hal/cic/mon/lut_gv11b_fusa.c hal/cic/mon/lut_ga10b_fusa.c
# Source files below are not guaranteed to be functionaly safe (FuSa) and are # Source files below are not guaranteed to be functionaly safe (FuSa) and are
# only included in the normal build. # only included in the normal build.

View File

@@ -28,15 +28,49 @@
#include "cic_mon_priv.h" #include "cic_mon_priv.h"
void nvgpu_report_err_to_sdl(struct gk20a *g, u32 err_id) void nvgpu_report_err_to_sdl(struct gk20a *g, u32 hw_unit_id, u32 err_id)
{ {
int32_t err = 0;
if (g->ops.cic_mon.report_err == NULL) { if (g->ops.cic_mon.report_err == NULL) {
return; return;
} }
err = nvgpu_cic_mon_bound_check_err_id(g, hw_unit_id, err_id);
if (err != 0) {
nvgpu_err(g, "Invalid hw_unit_id/err_id"
"hw_unit_id = 0x%x, err_id=0x%x",
hw_unit_id, err_id);
goto handle_report_failure;
}
if (g->ops.cic_mon.report_err(g, err_id) != 0) { if (g->ops.cic_mon.report_err(g, err_id) != 0) {
nvgpu_err(g, "Failed to report an error: err_id=%x", nvgpu_err(g, "Failed to report an error: "
err_id); "hw_unit_id = 0x%x, err_id=0x%x",
hw_unit_id, err_id);
goto handle_report_failure;
}
#ifndef CONFIG_NVGPU_RECOVERY
/*
* Trigger SW quiesce, in case of an uncorrected error is reported
* to Safety_Services, in safety build.
*/
if (g->cic_mon->err_lut[hw_unit_id].errs[err_id].is_critical) {
nvgpu_sw_quiesce(g); nvgpu_sw_quiesce(g);
} }
#endif
return;
handle_report_failure:
#ifdef CONFIG_NVGPU_BUILD_CONFIGURATION_IS_SAFETY
/*
* Trigger SW quiesce, in case of a SW error is encountered during
* error reporting to Safety_Services, in safety build.
*/
nvgpu_sw_quiesce(g);
#endif
return;
} }

View File

@@ -817,7 +817,8 @@ static int gr_init_ctxsw_falcon_support(struct gk20a *g, struct nvgpu_gr *gr)
err = nvgpu_gr_falcon_init_ctxsw(g, gr->falcon); err = nvgpu_gr_falcon_init_ctxsw(g, gr->falcon);
if (err != 0) { if (err != 0) {
nvgpu_report_err_to_sdl(g, GPU_FECS_CTXSW_INIT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_CTXSW_INIT_ERROR);
return err; return err;
} }

View File

@@ -360,7 +360,8 @@ int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm,
* Check and report any fatal warp errors. * Check and report any fatal warp errors.
*/ */
if (is_global_esr_error(global_esr, global_mask)) { if (is_global_esr_error(global_esr, global_mask)) {
nvgpu_report_err_to_sdl(g, GPU_SM_MACHINE_CHECK_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_MACHINE_CHECK_ERROR);
nvgpu_err(g, "sm machine check err. gpc_id(%d), tpc_id(%d), " nvgpu_err(g, "sm machine check err. gpc_id(%d), tpc_id(%d), "
"offset(%d)", gpc, tpc, offset); "offset(%d)", gpc, tpc, offset);
} }
@@ -478,7 +479,8 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
&& (mailbox_value == && (mailbox_value ==
g->ops.gr.intr.get_ctxsw_checksum_mismatch_mailbox_val())) { g->ops.gr.intr.get_ctxsw_checksum_mismatch_mailbox_val())) {
nvgpu_report_err_to_sdl(g, GPU_FECS_CTXSW_CRC_MISMATCH); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_CTXSW_CRC_MISMATCH);
nvgpu_err(g, "ctxsw intr0 set by ucode, " nvgpu_err(g, "ctxsw intr0 set by ucode, "
"ctxsw checksum mismatch"); "ctxsw checksum mismatch");
ret = -1; ret = -1;
@@ -488,7 +490,8 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
* recovery is initiated and error is reported to * recovery is initiated and error is reported to
* 3LSS. * 3LSS.
*/ */
nvgpu_report_err_to_sdl(g, GPU_FECS_FAULT_DURING_CTXSW); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_FAULT_DURING_CTXSW);
nvgpu_err(g, nvgpu_err(g,
"ctxsw intr0 set by ucode, error_code: 0x%08x", "ctxsw intr0 set by ucode, error_code: 0x%08x",
mailbox_value); mailbox_value);
@@ -497,13 +500,15 @@ int nvgpu_gr_intr_handle_fecs_error(struct gk20a *g, struct nvgpu_channel *ch,
} }
if (fecs_host_intr->fault_during_ctxsw_active) { if (fecs_host_intr->fault_during_ctxsw_active) {
nvgpu_report_err_to_sdl(g, GPU_FECS_FAULT_DURING_CTXSW); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_FAULT_DURING_CTXSW);
nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid); nvgpu_err(g, "fecs fault during ctxsw for channel %u", chid);
ret = -1; ret = -1;
} }
if (fecs_host_intr->watchdog_active) { if (fecs_host_intr->watchdog_active) {
nvgpu_report_err_to_sdl(g, GPU_FECS_CTXSW_WATCHDOG_TIMEOUT); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_CTXSW_WATCHDOG_TIMEOUT);
/* currently, recovery is not initiated */ /* currently, recovery is not initiated */
nvgpu_err(g, "fecs watchdog triggered for channel %u, " nvgpu_err(g, "fecs watchdog triggered for channel %u, "
"cannot ctxsw anymore !!", chid); "cannot ctxsw anymore !!", chid);
@@ -770,7 +775,8 @@ static u32 gr_intr_handle_illegal_interrupts(struct gk20a *g,
if (intr_info->illegal_notify != 0U) { if (intr_info->illegal_notify != 0U) {
nvgpu_err(g, "illegal notify pending"); nvgpu_err(g, "illegal notify pending");
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_ILLEGAL_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_ILLEGAL_ERROR);
nvgpu_gr_intr_set_error_notifier(g, isr_data, nvgpu_gr_intr_set_error_notifier(g, isr_data,
NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY);
do_reset = 1U; do_reset = 1U;
@@ -779,7 +785,8 @@ static u32 gr_intr_handle_illegal_interrupts(struct gk20a *g,
if (intr_info->illegal_method != 0U) { if (intr_info->illegal_method != 0U) {
if (gr_intr_handle_illegal_method(g, isr_data) != 0) { if (gr_intr_handle_illegal_method(g, isr_data) != 0) {
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_ILLEGAL_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_ILLEGAL_ERROR);
do_reset = 1U; do_reset = 1U;
} }
@@ -787,7 +794,8 @@ static u32 gr_intr_handle_illegal_interrupts(struct gk20a *g,
} }
if (intr_info->illegal_class != 0U) { if (intr_info->illegal_class != 0U) {
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_ILLEGAL_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_ILLEGAL_ERROR);
nvgpu_err(g, "invalid class 0x%08x, offset 0x%08x", nvgpu_err(g, "invalid class 0x%08x, offset 0x%08x",
isr_data->class_num, isr_data->offset); isr_data->class_num, isr_data->offset);
@@ -817,7 +825,8 @@ static u32 gr_intr_handle_error_interrupts(struct gk20a *g,
} }
if (intr_info->class_error != 0U) { if (intr_info->class_error != 0U) {
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_ILLEGAL_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_ILLEGAL_ERROR);
gr_intr_handle_class_error(g, isr_data); gr_intr_handle_class_error(g, isr_data);
do_reset = 1U; do_reset = 1U;
*clear_intr &= ~intr_info->class_error; *clear_intr &= ~intr_info->class_error;

View File

@@ -34,7 +34,8 @@
void nvgpu_pmu_report_bar0_pri_err_status(struct gk20a *g, u32 bar0_status, void nvgpu_pmu_report_bar0_pri_err_status(struct gk20a *g, u32 bar0_status,
u32 error_type) u32 error_type)
{ {
nvgpu_report_err_to_sdl(g, GPU_PMU_BAR0_ERROR_TIMEOUT); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_BAR0_ERROR_TIMEOUT);
nvgpu_err(g, "Falcon mem scrubbing timeout. status(0x%x), " nvgpu_err(g, "Falcon mem scrubbing timeout. status(0x%x), "
"error_type(0x%x)", bar0_status, error_type); "error_type(0x%x)", bar0_status, error_type);
} }

View File

@@ -156,6 +156,6 @@ void ga10b_bus_isr(struct gk20a *g)
bus_intr_0 & ~bus_intr_0_handled); bus_intr_0 & ~bus_intr_0_handled);
} }
nvgpu_report_err_to_sdl(g, err_type); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST, err_type);
nvgpu_writel(g, bus_intr_0_r(), bus_intr_0); nvgpu_writel(g, bus_intr_0_r(), bus_intr_0);
} }

View File

@@ -89,6 +89,6 @@ void gk20a_bus_isr(struct gk20a *g)
*/ */
err_type = GPU_HOST_PBUS_TIMEOUT_ERROR; err_type = GPU_HOST_PBUS_TIMEOUT_ERROR;
} }
nvgpu_report_err_to_sdl(g, err_type); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST, err_type);
nvgpu_writel(g, bus_intr_0_r(), val); nvgpu_writel(g, bus_intr_0_r(), val);
} }

View File

@@ -43,13 +43,15 @@ void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
/* clear blocking interrupts: they exibit broken behavior */ /* clear blocking interrupts: they exibit broken behavior */
if ((ce_intr & ce_intr_status_blockpipe_pending_f()) != 0U) { if ((ce_intr & ce_intr_status_blockpipe_pending_f()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_CE_BLOCK_PIPE); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_BLOCK_PIPE);
nvgpu_err(g, "ce blocking pipe interrupt"); nvgpu_err(g, "ce blocking pipe interrupt");
clear_intr |= ce_intr_status_blockpipe_pending_f(); clear_intr |= ce_intr_status_blockpipe_pending_f();
} }
if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) { if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_CE_LAUNCH_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_LAUNCH_ERROR);
nvgpu_err(g, "ce launch error interrupt"); nvgpu_err(g, "ce launch error interrupt");
clear_intr |= ce_intr_status_launcherr_pending_f(); clear_intr |= ce_intr_status_launcherr_pending_f();
} }

View File

@@ -61,7 +61,8 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
* reset to get back to a working state. * reset to get back to a working state.
*/ */
if ((ce_intr & ce_intr_status_invalid_config_pending_f()) != 0U) { if ((ce_intr & ce_intr_status_invalid_config_pending_f()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_CE_INVALID_CONFIG); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_INVALID_CONFIG);
nvgpu_err(g, "ce: inst %d: invalid config", inst_id); nvgpu_err(g, "ce: inst %d: invalid config", inst_id);
clear_intr |= ce_intr_status_invalid_config_reset_f(); clear_intr |= ce_intr_status_invalid_config_reset_f();
} }
@@ -73,7 +74,8 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base)
* reset before operations can start again, if not the entire GPU. * reset before operations can start again, if not the entire GPU.
*/ */
if ((ce_intr & ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) { if ((ce_intr & ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_CE_METHOD_BUFFER_FAULT); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_CE,
GPU_CE_METHOD_BUFFER_FAULT);
nvgpu_err(g, "ce: inst %d: mthd buffer fault", inst_id); nvgpu_err(g, "ce: inst %d: mthd buffer fault", inst_id);
clear_intr |= ce_intr_status_mthd_buffer_fault_reset_f(); clear_intr |= ce_intr_status_mthd_buffer_fault_reset_f();
} }

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -20,17 +20,17 @@
* DEALINGS IN THE SOFTWARE. * DEALINGS IN THE SOFTWARE.
*/ */
#ifndef CIC_GV11B_H #ifndef CIC_GA10B_H
#define CIC_GV11B_H #define CIC_GA10B_H
#include <nvgpu/nvgpu_err_info.h> #include <nvgpu/nvgpu_err_info.h>
struct gk20a; struct gk20a;
struct nvgpu_cic_mon; struct nvgpu_cic_mon;
extern struct nvgpu_err_hw_module gv11b_err_lut[]; extern struct nvgpu_err_hw_module ga10b_err_lut[];
extern u32 size_of_gv11b_lut; extern u32 size_of_ga10b_lut;
int gv11b_cic_mon_init(struct gk20a *g, struct nvgpu_cic_mon *cic_mon); int ga10b_cic_mon_init(struct gk20a *g, struct nvgpu_cic_mon *cic_mon);
#endif /* CIC_GV11B_H */ #endif /* CIC_GV11B_H */

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -23,16 +23,16 @@
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include "common/cic/mon/cic_mon_priv.h" #include "common/cic/mon/cic_mon_priv.h"
#include "cic_gv11b.h" #include "cic_ga10b.h"
int gv11b_cic_mon_init(struct gk20a *g, struct nvgpu_cic_mon *cic_mon) int ga10b_cic_mon_init(struct gk20a *g, struct nvgpu_cic_mon *cic_mon)
{ {
if (cic_mon == NULL) { if (cic_mon == NULL) {
nvgpu_err(g, "Invalid CIC reference pointer."); nvgpu_err(g, "Invalid CIC reference pointer.");
return -EINVAL; return -EINVAL;
} }
cic_mon->err_lut = gv11b_err_lut; cic_mon->err_lut = ga10b_err_lut;
cic_mon->num_hw_modules = size_of_gv11b_lut; cic_mon->num_hw_modules = size_of_ga10b_lut;
return 0; return 0;
} }

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -24,7 +24,7 @@
#include <nvgpu/nvgpu_err_info.h> #include <nvgpu/nvgpu_err_info.h>
#include <nvgpu/static_analysis.h> #include <nvgpu/static_analysis.h>
#include "common/cic/mon/cic_mon_priv.h" #include "common/cic/mon/cic_mon_priv.h"
#include "cic_gv11b.h" #include "cic_ga10b.h"
/* /*
* A flag to enable/disable hw error injection. * A flag to enable/disable hw error injection.
@@ -40,7 +40,7 @@
* In case of hw error injection support, this initialization will be overriden * In case of hw error injection support, this initialization will be overriden
* by the values provided from the hal layes of corresponding hw units. * by the values provided from the hal layes of corresponding hw units.
*/ */
struct nvgpu_err_hw_module gv11b_err_lut[] = { struct nvgpu_err_hw_module ga10b_err_lut[] = {
{ {
.name = "host", .name = "host",
.hw_unit = (u32)NVGPU_ERR_MODULE_HOST, .hw_unit = (u32)NVGPU_ERR_MODULE_HOST,
@@ -49,84 +49,84 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
.errs = (struct nvgpu_err_desc[]) { .errs = (struct nvgpu_err_desc[]) {
GPU_CRITERR("pfifo_bind_error", GPU_CRITERR("pfifo_bind_error",
GPU_HOST_PFIFO_BIND_ERROR, INJECT_SW, GPU_HOST_PFIFO_BIND_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_sched_error", GPU_CRITERR("pfifo_sched_error",
GPU_HOST_PFIFO_SCHED_ERROR, INJECT_SW, GPU_HOST_PFIFO_SCHED_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_chsw_error", GPU_CRITERR("pfifo_chsw_error",
GPU_HOST_PFIFO_CHSW_ERROR, INJECT_SW, GPU_HOST_PFIFO_CHSW_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_memop_error", GPU_CRITERR("pfifo_memop_error",
GPU_HOST_PFIFO_MEMOP_TIMEOUT_ERROR, GPU_HOST_PFIFO_MEMOP_TIMEOUT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_lb_error", GPU_CRITERR("pfifo_lb_error",
GPU_HOST_PFIFO_LB_ERROR, INJECT_SW, GPU_HOST_PFIFO_LB_ERROR, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbus_squash_error", GPU_CRITERR("pbus_squash_error",
GPU_HOST_PBUS_SQUASH_ERROR, GPU_HOST_PBUS_SQUASH_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbus_fecs_error", GPU_CRITERR("pbus_fecs_error",
GPU_HOST_PBUS_FECS_ERROR, GPU_HOST_PBUS_FECS_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbus_timeout_error", GPU_CRITERR("pbus_timeout_error",
GPU_HOST_PBUS_TIMEOUT_ERROR, GPU_HOST_PBUS_TIMEOUT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_timeout_error", GPU_CRITERR("pbdma_timeout_error",
GPU_HOST_PBDMA_TIMEOUT_ERROR, GPU_HOST_PBDMA_TIMEOUT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_extra_error", GPU_CRITERR("pbdma_extra_error",
GPU_HOST_PBDMA_EXTRA_ERROR, GPU_HOST_PBDMA_EXTRA_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_gpfifo_pb_error", GPU_CRITERR("pbdma_gpfifo_pb_error",
GPU_HOST_PBDMA_GPFIFO_PB_ERROR, GPU_HOST_PBDMA_GPFIFO_PB_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_method_error", GPU_CRITERR("pbdma_method_error",
GPU_HOST_PBDMA_METHOD_ERROR, GPU_HOST_PBDMA_METHOD_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_signature_error", GPU_CRITERR("pbdma_signature_error",
GPU_HOST_PBDMA_SIGNATURE_ERROR, GPU_HOST_PBDMA_SIGNATURE_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_hce_error", GPU_CRITERR("pbdma_hce_error",
GPU_HOST_PBDMA_HCE_ERROR, GPU_HOST_PBDMA_HCE_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pbdma_preempt_error", GPU_CRITERR("pbdma_preempt_error",
GPU_HOST_PBDMA_PREEMPT_ERROR, GPU_HOST_PBDMA_PREEMPT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("pfifo_ctxsw_timeout", GPU_NONCRITERR("pfifo_ctxsw_timeout",
GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pfifo_fb_flush_timeout", GPU_CRITERR("pfifo_fb_flush_timeout",
GPU_HOST_PFIFO_FB_FLUSH_TIMEOUT_ERROR, GPU_HOST_PFIFO_FB_FLUSH_TIMEOUT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_host_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -139,12 +139,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_NONCRITERR("l1_tag_ecc_corrected", GPU_NONCRITERR("l1_tag_ecc_corrected",
GPU_SM_L1_TAG_ECC_CORRECTED, GPU_SM_L1_TAG_ECC_CORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("l1_tag_ecc_uncorrected", GPU_CRITERR("l1_tag_ecc_uncorrected",
GPU_SM_L1_TAG_ECC_UNCORRECTED, GPU_SM_L1_TAG_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("cbu_ecc_corrected", GPU_NONCRITERR("cbu_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -153,7 +153,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("cbu_ecc_uncorrected", GPU_CRITERR("cbu_ecc_uncorrected",
GPU_SM_CBU_ECC_UNCORRECTED, GPU_SM_CBU_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("lrf_ecc_corrected", GPU_NONCRITERR("lrf_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -162,7 +162,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("lrf_ecc_uncorrected", GPU_CRITERR("lrf_ecc_uncorrected",
GPU_SM_LRF_ECC_UNCORRECTED, GPU_SM_LRF_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("l1_data_ecc_corrected", GPU_NONCRITERR("l1_data_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -171,7 +171,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("l1_data_ecc_uncorrected", GPU_CRITERR("l1_data_ecc_uncorrected",
GPU_SM_L1_DATA_ECC_UNCORRECTED, GPU_SM_L1_DATA_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l0_data_ecc_corrected", GPU_NONCRITERR("icache_l0_data_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -180,7 +180,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("icache_l0_data_ecc_uncorrected", GPU_CRITERR("icache_l0_data_ecc_uncorrected",
GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l1_data_ecc_corrected", GPU_NONCRITERR("icache_l1_data_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -189,7 +189,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("icache_l1_data_ecc_uncorrected", GPU_CRITERR("icache_l1_data_ecc_uncorrected",
GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l0_predecode_ecc_corrected", GPU_NONCRITERR("icache_l0_predecode_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -198,7 +198,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("icache_l0_predecode_ecc_uncorrected", GPU_CRITERR("icache_l0_predecode_ecc_uncorrected",
GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected", GPU_NONCRITERR("l1_tag_miss_fifo_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -207,7 +207,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected", GPU_CRITERR("l1_tag_miss_fifo_ecc_uncorrected",
GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected", GPU_NONCRITERR("l1_tag_s2r_pixprf_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -216,12 +216,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected", GPU_CRITERR("l1_tag_s2r_pixprf_ecc_uncorrected",
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("machine_check_error", GPU_CRITERR("machine_check_error",
GPU_SM_MACHINE_CHECK_ERROR, GPU_SM_MACHINE_CHECK_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("icache_l1_predecode_ecc_corrected", GPU_NONCRITERR("icache_l1_predecode_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -230,7 +230,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("icache_l1_predecode_ecc_uncorrected", GPU_CRITERR("icache_l1_predecode_ecc_uncorrected",
GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -243,12 +243,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_NONCRITERR("falcon_imem_ecc_corrected", GPU_NONCRITERR("falcon_imem_ecc_corrected",
GPU_FECS_FALCON_IMEM_ECC_CORRECTED, GPU_FECS_FALCON_IMEM_ECC_CORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("falcon_imem_ecc_uncorrected", GPU_CRITERR("falcon_imem_ecc_uncorrected",
GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("falcon_dmem_ecc_corrected", GPU_NONCRITERR("falcon_dmem_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -257,27 +257,27 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("falcon_dmem_ecc_uncorrected", GPU_CRITERR("falcon_dmem_ecc_uncorrected",
GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("ctxsw_watchdog_timeout", GPU_CRITERR("ctxsw_watchdog_timeout",
GPU_FECS_CTXSW_WATCHDOG_TIMEOUT, GPU_FECS_CTXSW_WATCHDOG_TIMEOUT,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("ctxsw_crc_mismatch", GPU_CRITERR("ctxsw_crc_mismatch",
GPU_FECS_CTXSW_CRC_MISMATCH, GPU_FECS_CTXSW_CRC_MISMATCH,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("fault_during_ctxsw", GPU_CRITERR("fault_during_ctxsw",
GPU_FECS_FAULT_DURING_CTXSW, GPU_FECS_FAULT_DURING_CTXSW,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("ctxsw_init_error", GPU_CRITERR("ctxsw_init_error",
GPU_FECS_CTXSW_INIT_ERROR, GPU_FECS_CTXSW_INIT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ctxsw_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -290,12 +290,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_NONCRITERR("falcon_imem_ecc_corrected", GPU_NONCRITERR("falcon_imem_ecc_corrected",
GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED, GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("falcon_imem_ecc_uncorrected", GPU_CRITERR("falcon_imem_ecc_uncorrected",
GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED, GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("falcon_dmem_ecc_corrected", GPU_NONCRITERR("falcon_dmem_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -304,7 +304,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("falcon_dmem_ecc_uncorrected", GPU_CRITERR("falcon_dmem_ecc_uncorrected",
GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED, GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -321,7 +321,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("l1tlb_sa_data_ecc_uncorrected", GPU_CRITERR("l1tlb_sa_data_ecc_uncorrected",
GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED, GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("l1tlb_fa_data_ecc_corrected", GPU_NONCRITERR("l1tlb_fa_data_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -330,7 +330,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("l1tlb_fa_data_ecc_uncorrected", GPU_CRITERR("l1tlb_fa_data_ecc_uncorrected",
GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED, GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -347,7 +347,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("l15_ecc_uncorrected", GPU_CRITERR("l15_ecc_uncorrected",
GPU_GCC_L15_ECC_UNCORRECTED, GPU_GCC_L15_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -360,12 +360,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_NONCRITERR("falcon_imem_ecc_corrected", GPU_NONCRITERR("falcon_imem_ecc_corrected",
GPU_PMU_FALCON_IMEM_ECC_CORRECTED, GPU_PMU_FALCON_IMEM_ECC_CORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("falcon_imem_ecc_uncorrected", GPU_CRITERR("falcon_imem_ecc_uncorrected",
GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("falcon_dmem_ecc_corrected", GPU_NONCRITERR("falcon_dmem_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -374,11 +374,11 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("falcon_dmem_ecc_uncorrected", GPU_CRITERR("falcon_dmem_ecc_uncorrected",
GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("bar0_error_timeout", GPU_CRITERR("bar0_error_timeout",
GPU_PMU_BAR0_ERROR_TIMEOUT, INJECT_SW, GPU_PMU_BAR0_ERROR_TIMEOUT, INJECT_SW,
NULL, nvgpu_inject_pmu_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -391,62 +391,62 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("fe_exception", GPU_CRITERR("fe_exception",
GPU_PGRAPH_FE_EXCEPTION, GPU_PGRAPH_FE_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("memfmt_exception", GPU_CRITERR("memfmt_exception",
GPU_PGRAPH_MEMFMT_EXCEPTION, GPU_PGRAPH_MEMFMT_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pd_exception", GPU_CRITERR("pd_exception",
GPU_PGRAPH_PD_EXCEPTION, GPU_PGRAPH_PD_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("scc_exception", GPU_CRITERR("scc_exception",
GPU_PGRAPH_SCC_EXCEPTION, GPU_PGRAPH_SCC_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("ds_exception", GPU_CRITERR("ds_exception",
GPU_PGRAPH_DS_EXCEPTION, GPU_PGRAPH_DS_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("ssync_exception", GPU_CRITERR("ssync_exception",
GPU_PGRAPH_SSYNC_EXCEPTION, GPU_PGRAPH_SSYNC_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("mme_exception", GPU_CRITERR("mme_exception",
GPU_PGRAPH_MME_EXCEPTION, GPU_PGRAPH_MME_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("sked_exception", GPU_CRITERR("sked_exception",
GPU_PGRAPH_SKED_EXCEPTION, GPU_PGRAPH_SKED_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("be_exception", GPU_CRITERR("be_exception",
GPU_PGRAPH_BE_EXCEPTION, GPU_PGRAPH_BE_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("mpc_exception", GPU_CRITERR("mpc_exception",
GPU_PGRAPH_MPC_EXCEPTION, GPU_PGRAPH_MPC_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("illegal_error", GPU_CRITERR("illegal_error",
GPU_PGRAPH_ILLEGAL_ERROR, GPU_PGRAPH_ILLEGAL_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("gpc_gfx_exception", GPU_CRITERR("gpc_gfx_exception",
GPU_PGRAPH_GPC_GFX_EXCEPTION, GPU_PGRAPH_GPC_GFX_EXCEPTION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_gr_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -459,12 +459,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_NONCRITERR("cache_dstg_ecc_corrected", GPU_NONCRITERR("cache_dstg_ecc_corrected",
GPU_LTC_CACHE_DSTG_ECC_CORRECTED, GPU_LTC_CACHE_DSTG_ECC_CORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("cache_dstg_ecc_uncorrected", GPU_CRITERR("cache_dstg_ecc_uncorrected",
GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED, GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_tstg_ecc_corrected", GPU_NONCRITERR("cache_tstg_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -473,7 +473,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("cache_tstg_ecc_uncorrected", GPU_CRITERR("cache_tstg_ecc_uncorrected",
GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED, GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("cache_rstg_ecc_corrected", GPU_NONCRITERR("cache_rstg_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -490,7 +490,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("cache_dstg_be_ecc_uncorrected", GPU_CRITERR("cache_dstg_be_ecc_uncorrected",
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED, GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -507,7 +507,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("hubmmu_l2tlb_sa_data_ecc_uncorrected", GPU_CRITERR("hubmmu_l2tlb_sa_data_ecc_uncorrected",
GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED, GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("hubmmu_tlb_sa_data_ecc_corrected", GPU_NONCRITERR("hubmmu_tlb_sa_data_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -516,7 +516,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("hubmmu_tlb_sa_data_ecc_uncorrected", GPU_CRITERR("hubmmu_tlb_sa_data_ecc_uncorrected",
GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED, GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("hubmmu_pte_data_ecc_corrected", GPU_NONCRITERR("hubmmu_pte_data_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -525,7 +525,7 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("hubmmu_pte_data_ecc_uncorrected", GPU_CRITERR("hubmmu_pte_data_ecc_uncorrected",
GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED, GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED,
INJECT_TYPE, INJECT_TYPE,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("hubmmu_pde0_data_ecc_corrected", GPU_NONCRITERR("hubmmu_pde0_data_ecc_corrected",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -534,12 +534,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("hubmmu_pde0_data_ecc_uncorrected", GPU_CRITERR("hubmmu_pde0_data_ecc_uncorrected",
GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED, GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ecc_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("hubmmu_page_fault_error", GPU_CRITERR("hubmmu_page_fault_error",
GPU_HUBMMU_PAGE_FAULT_ERROR, GPU_HUBMMU_PAGE_FAULT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_mmu_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -552,12 +552,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("pri_timeout_error", GPU_CRITERR("pri_timeout_error",
GPU_PRI_TIMEOUT_ERROR, GPU_PRI_TIMEOUT_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_pri_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("pri_access_violation", GPU_CRITERR("pri_access_violation",
GPU_PRI_ACCESS_VIOLATION, GPU_PRI_ACCESS_VIOLATION,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_pri_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
@@ -570,12 +570,12 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("ce_launch_error", GPU_CRITERR("ce_launch_error",
GPU_CE_LAUNCH_ERROR, GPU_CE_LAUNCH_ERROR,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ce_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("ce_block_pipe", GPU_CRITERR("ce_block_pipe",
GPU_CE_BLOCK_PIPE, GPU_CE_BLOCK_PIPE,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ce_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_NONCRITERR("ce_nonblock_pipe", GPU_NONCRITERR("ce_nonblock_pipe",
0, INJECT_NONE, 0, INJECT_NONE,
@@ -584,16 +584,16 @@ struct nvgpu_err_hw_module gv11b_err_lut[] = {
GPU_CRITERR("ce_invalid_config", GPU_CRITERR("ce_invalid_config",
GPU_CE_INVALID_CONFIG, GPU_CE_INVALID_CONFIG,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ce_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
GPU_CRITERR("ce_method_buffer_fault", GPU_CRITERR("ce_method_buffer_fault",
GPU_CE_METHOD_BUFFER_FAULT, GPU_CE_METHOD_BUFFER_FAULT,
INJECT_SW, INJECT_SW,
NULL, nvgpu_inject_ce_swerror, NULL, NULL,
NULL, NULL, 0, 0), NULL, NULL, 0, 0),
}, },
}, },
}; };
u32 size_of_gv11b_lut = sizeof(gv11b_err_lut) / u32 size_of_ga10b_lut = sizeof(ga10b_err_lut) /
sizeof(struct nvgpu_err_hw_module); sizeof(struct nvgpu_err_hw_module);

View File

@@ -511,7 +511,8 @@ void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr)
if ((niso_intr & if ((niso_intr &
fb_niso_intr_mmu_other_fault_notify_m()) != 0U) { fb_niso_intr_mmu_other_fault_notify_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_PAGE_FAULT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_PAGE_FAULT_ERROR);
nvgpu_err(g, "GPU_HUBMMU_PAGE_FAULT_ERROR. " nvgpu_err(g, "GPU_HUBMMU_PAGE_FAULT_ERROR. "
"sub-err: OTHER_FAULT_NOTIFY. " "sub-err: OTHER_FAULT_NOTIFY. "
"fault_status(0x%x)", fault_status); "fault_status(0x%x)", fault_status);
@@ -539,7 +540,8 @@ void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr)
if ((niso_intr & if ((niso_intr &
fb_niso_intr_mmu_nonreplayable_fault_overflow_m()) != 0U) { fb_niso_intr_mmu_nonreplayable_fault_overflow_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_PAGE_FAULT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_PAGE_FAULT_ERROR);
nvgpu_err(g, "GPU_HUBMMU_PAGE_FAULT_ERROR. " nvgpu_err(g, "GPU_HUBMMU_PAGE_FAULT_ERROR. "
"sub-err: NONREPLAYABLE_FAULT_OVERFLOW. " "sub-err: NONREPLAYABLE_FAULT_OVERFLOW. "
"fault_status(0x%x)", fault_status); "fault_status(0x%x)", fault_status);
@@ -563,7 +565,8 @@ void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr)
if ((niso_intr & if ((niso_intr &
fb_niso_intr_mmu_replayable_fault_overflow_m()) != 0U) { fb_niso_intr_mmu_replayable_fault_overflow_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_PAGE_FAULT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_PAGE_FAULT_ERROR);
nvgpu_err(g, "GPU_HUBMMU_PAGE_FAULT_ERROR. " nvgpu_err(g, "GPU_HUBMMU_PAGE_FAULT_ERROR. "
"sub-err: REPLAYABLE_FAULT_OVERFLOW. " "sub-err: REPLAYABLE_FAULT_OVERFLOW. "
"fault_status(0x%x)", fault_status); "fault_status(0x%x)", fault_status);

View File

@@ -49,7 +49,8 @@ static void gv11b_fb_intr_handle_ecc_l2tlb_errs(struct gk20a *g,
BUG(); BUG();
} }
if ((ecc_status & uncorrected_error_mask) != 0U) { if ((ecc_status & uncorrected_error_mask) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc sa data error. " nvgpu_err(g, "uncorrected ecc sa data error. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }
@@ -135,7 +136,8 @@ static void gv11b_fb_intr_handle_ecc_hubtlb_errs(struct gk20a *g,
} }
if ((ecc_status & if ((ecc_status &
fb_mmu_hubtlb_ecc_status_uncorrected_err_sa_data_m()) != 0U) { fb_mmu_hubtlb_ecc_status_uncorrected_err_sa_data_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc sa data error. " nvgpu_err(g, "uncorrected ecc sa data error. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }
@@ -224,7 +226,8 @@ static void gv11b_fb_intr_handle_ecc_fillunit_errors(struct gk20a *g,
if ((ecc_status & if ((ecc_status &
fb_mmu_fillunit_ecc_status_uncorrected_err_pte_data_m()) fb_mmu_fillunit_ecc_status_uncorrected_err_pte_data_m())
!= 0U) { != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc pte data error. " nvgpu_err(g, "uncorrected ecc pte data error. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }
@@ -241,7 +244,8 @@ static void gv11b_fb_intr_handle_ecc_fillunit_errors(struct gk20a *g,
if ((ecc_status & if ((ecc_status &
fb_mmu_fillunit_ecc_status_uncorrected_err_pde0_data_m()) fb_mmu_fillunit_ecc_status_uncorrected_err_pde0_data_m())
!= 0U) { != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc pde0 data error. " nvgpu_err(g, "uncorrected ecc pde0 data error. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }

View File

@@ -272,7 +272,8 @@ void ga10b_fifo_ctxsw_timeout_isr(struct gk20a *g,
continue; continue;
} }
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR);
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
recover = g->ops.tsg.check_ctxsw_timeout(tsg, &debug_dump, &ms); recover = g->ops.tsg.check_ctxsw_timeout(tsg, &debug_dump, &ms);

View File

@@ -215,7 +215,8 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g)
continue; continue;
} }
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR);
#ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
recover = g->ops.tsg.check_ctxsw_timeout(tsg, recover = g->ops.tsg.check_ctxsw_timeout(tsg,

View File

@@ -294,7 +294,8 @@ static void ga10b_fifo_handle_bad_tsg(struct gk20a *g,
nvgpu_err(g, "runlist bad tsg error code not supported"); nvgpu_err(g, "runlist bad tsg error code not supported");
} }
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_SCHED_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_SCHED_ERROR);
/* id is unknown, preempt all runlists and do recovery */ /* id is unknown, preempt all runlists and do recovery */
/* TBD: nvgpu_rc_sched_error_bad_tsg(g); */ /* TBD: nvgpu_rc_sched_error_bad_tsg(g); */

View File

@@ -142,7 +142,8 @@ static u32 gk20a_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr)
} }
if ((fifo_intr & fifo_intr_0_fb_flush_timeout_pending_f()) != 0U) { if ((fifo_intr & fifo_intr_0_fb_flush_timeout_pending_f()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_FB_FLUSH_TIMEOUT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_FB_FLUSH_TIMEOUT_ERROR);
nvgpu_err(g, "fifo fb flush timeout error"); nvgpu_err(g, "fifo fb flush timeout error");
handled |= fifo_intr_0_fb_flush_timeout_pending_f(); handled |= fifo_intr_0_fb_flush_timeout_pending_f();
} }

View File

@@ -70,7 +70,8 @@ void gk20a_fifo_intr_handle_chsw_error(struct gk20a *g)
u32 intr; u32 intr;
intr = nvgpu_readl(g, fifo_intr_chsw_error_r()); intr = nvgpu_readl(g, fifo_intr_chsw_error_r());
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_CHSW_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_CHSW_ERROR);
nvgpu_err(g, "chsw: %08x", intr); nvgpu_err(g, "chsw: %08x", intr);
g->ops.gr.falcon.dump_stats(g); g->ops.gr.falcon.dump_stats(g);
nvgpu_writel(g, fifo_intr_chsw_error_r(), intr); nvgpu_writel(g, fifo_intr_chsw_error_r(), intr);

View File

@@ -132,7 +132,8 @@ bool gv11b_fifo_handle_sched_error(struct gk20a *g)
nvgpu_err(g, "fifo sched error code not supported"); nvgpu_err(g, "fifo sched error code not supported");
} }
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_SCHED_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_SCHED_ERROR);
if (sched_error == SCHED_ERROR_CODE_BAD_TSG) { if (sched_error == SCHED_ERROR_CODE_BAD_TSG) {
/* id is unknown, preempt all runlists and do recovery */ /* id is unknown, preempt all runlists and do recovery */
@@ -150,7 +151,8 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr)
if ((fifo_intr & fifo_intr_0_bind_error_pending_f()) != 0U) { if ((fifo_intr & fifo_intr_0_bind_error_pending_f()) != 0U) {
u32 bind_error = nvgpu_readl(g, fifo_intr_bind_error_r()); u32 bind_error = nvgpu_readl(g, fifo_intr_bind_error_r());
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_BIND_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_BIND_ERROR);
nvgpu_err(g, "fifo bind error: 0x%08x", bind_error); nvgpu_err(g, "fifo bind error: 0x%08x", bind_error);
handled |= fifo_intr_0_bind_error_pending_f(); handled |= fifo_intr_0_bind_error_pending_f();
} }
@@ -161,13 +163,15 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr)
} }
if ((fifo_intr & fifo_intr_0_memop_timeout_pending_f()) != 0U) { if ((fifo_intr & fifo_intr_0_memop_timeout_pending_f()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_MEMOP_TIMEOUT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_MEMOP_TIMEOUT_ERROR);
nvgpu_err(g, "fifo memop timeout error"); nvgpu_err(g, "fifo memop timeout error");
handled |= fifo_intr_0_memop_timeout_pending_f(); handled |= fifo_intr_0_memop_timeout_pending_f();
} }
if ((fifo_intr & fifo_intr_0_lb_error_pending_f()) != 0U) { if ((fifo_intr & fifo_intr_0_lb_error_pending_f()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_HOST_PFIFO_LB_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PFIFO_LB_ERROR);
nvgpu_err(g, "fifo lb error"); nvgpu_err(g, "fifo lb error");
handled |= fifo_intr_0_lb_error_pending_f(); handled |= fifo_intr_0_lb_error_pending_f();
} }

View File

@@ -328,7 +328,7 @@ static void report_pbdma_error(struct gk20a *g, u32 pbdma_id,
if (err_type != GPU_HOST_INVALID_ERROR) { if (err_type != GPU_HOST_INVALID_ERROR) {
nvgpu_err(g, "pbdma_intr_0(%d)= 0x%08x ", nvgpu_err(g, "pbdma_intr_0(%d)= 0x%08x ",
pbdma_id, pbdma_intr_0); pbdma_id, pbdma_intr_0);
nvgpu_report_err_to_sdl(g, err_type); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST, err_type);
} }
return; return;
} }
@@ -537,7 +537,8 @@ bool ga10b_pbdma_handle_intr_1(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_1,
recover = true; recover = true;
nvgpu_report_err_to_sdl(g, GPU_HOST_PBDMA_HCE_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PBDMA_HCE_ERROR);
if ((pbdma_intr_1 & pbdma_intr_1_ctxnotvalid_pending_f()) != 0U) { if ((pbdma_intr_1 & pbdma_intr_1_ctxnotvalid_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr, "ctxnotvalid intr on pbdma id %d", nvgpu_log(g, gpu_dbg_intr, "ctxnotvalid intr on pbdma id %d",

View File

@@ -88,7 +88,7 @@ static void report_pbdma_error(struct gk20a *g, u32 pbdma_id,
} }
if (err_type != GPU_HOST_INVALID_ERROR) { if (err_type != GPU_HOST_INVALID_ERROR) {
nvgpu_log_info(g, "pbdma id:%u", pbdma_id); nvgpu_log_info(g, "pbdma id:%u", pbdma_id);
nvgpu_report_err_to_sdl(g, err_type); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST, err_type);
} }
return; return;
} }
@@ -190,7 +190,8 @@ bool gv11b_pbdma_handle_intr_1(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_1,
recover = true; recover = true;
nvgpu_report_err_to_sdl(g, GPU_HOST_PBDMA_HCE_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HOST,
GPU_HOST_PBDMA_HCE_ERROR);
if ((pbdma_intr_1 & pbdma_intr_1_ctxnotvalid_pending_f()) != 0U) { if ((pbdma_intr_1 & pbdma_intr_1_ctxnotvalid_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr, "ctxnotvalid intr on pbdma id %d", nvgpu_log(g, gpu_dbg_intr, "ctxnotvalid intr on pbdma id %d",

View File

@@ -195,7 +195,8 @@ static u32 ga10b_gr_intr_check_gr_mme_fe1_exception(struct gk20a *g,
info_mthd = nvgpu_readl(g, gr_mme_fe1_hww_esr_info_mthd_r()); info_mthd = nvgpu_readl(g, gr_mme_fe1_hww_esr_info_mthd_r());
info_mthd2 = nvgpu_readl(g, gr_mme_fe1_hww_esr_info_mthd2_r()); info_mthd2 = nvgpu_readl(g, gr_mme_fe1_hww_esr_info_mthd2_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_MME_FE1_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_MME_FE1_EXCEPTION);
nvgpu_err(g, "mme_fe1 exception: esr 0x%08x, info 0x%08x," nvgpu_err(g, "mme_fe1 exception: esr 0x%08x, info 0x%08x,"
"info_mthd 0x%08x, info_mthd2 0x%08x", "info_mthd 0x%08x, info_mthd2 0x%08x",
mme_fe1_hww_esr, info, info_mthd, info_mthd2); mme_fe1_hww_esr, info, info_mthd, info_mthd2);
@@ -374,7 +375,8 @@ static void ga10b_gr_intr_report_gpcmmu_ecc_err(struct gk20a *g,
} }
if ((ecc_status & if ((ecc_status &
gr_gpc0_mmu0_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) != 0U) { gr_gpc0_mmu0_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_MMU,
GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc sa data error" nvgpu_err(g, "uncorrected ecc sa data error"
"gpc_id(%d)", gpc); "gpc_id(%d)", gpc);
} }
@@ -385,7 +387,8 @@ static void ga10b_gr_intr_report_gpcmmu_ecc_err(struct gk20a *g,
} }
if ((ecc_status & if ((ecc_status &
gr_gpc0_mmu0_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) != 0U) { gr_gpc0_mmu0_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_MMU,
GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc fa data error" nvgpu_err(g, "uncorrected ecc fa data error"
"gpc_id(%d)", gpc); "gpc_id(%d)", gpc);
} }
@@ -742,11 +745,13 @@ static void ga10b_gr_intr_report_tpc_sm_rams_ecc_err(struct gk20a *g,
for (i = 0U; i < ecc_status->err_count; i++) { for (i = 0U; i < ecc_status->err_count; i++) {
if (ecc_status->err_id[i] == GPU_SM_RAMS_ECC_CORRECTED) { if (ecc_status->err_id[i] == GPU_SM_RAMS_ECC_CORRECTED) {
nvgpu_report_err_to_sdl(g, GPU_SM_L1_TAG_ECC_CORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_L1_TAG_ECC_CORRECTED);
nvgpu_err(g, "sm_l1_tag_ecc_corrected. " nvgpu_err(g, "sm_l1_tag_ecc_corrected. "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
} else { } else {
nvgpu_report_err_to_sdl(g, GPU_SM_L1_TAG_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_L1_TAG_ECC_UNCORRECTED);
nvgpu_err(g, "sm_l1_tag_ecc_uncorrected. " nvgpu_err(g, "sm_l1_tag_ecc_uncorrected. "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
} }

View File

@@ -106,7 +106,8 @@ u32 gm20b_gr_intr_check_gr_ssync_exception(struct gk20a *g, u32 exception)
g->ops.gr.intr.handle_ssync_hww(g, &ssync_esr); g->ops.gr.intr.handle_ssync_hww(g, &ssync_esr);
reset_gpc = 1U; reset_gpc = 1U;
} }
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_SSYNC_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_SSYNC_EXCEPTION);
} }
return reset_gpc; return reset_gpc;
} }
@@ -117,7 +118,8 @@ u32 gm20b_gr_intr_check_gr_mme_exception(struct gk20a *g, u32 exception)
u32 mme = nvgpu_readl(g, gr_mme_hww_esr_r()); u32 mme = nvgpu_readl(g, gr_mme_hww_esr_r());
u32 info = nvgpu_readl(g, gr_mme_hww_esr_info_r()); u32 info = nvgpu_readl(g, gr_mme_hww_esr_info_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_MME_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_MME_EXCEPTION);
nvgpu_err(g, "mme exception: esr 0x%08x info:0x%08x", nvgpu_err(g, "mme exception: esr 0x%08x info:0x%08x",
mme, info); mme, info);
#ifdef CONFIG_NVGPU_DGPU #ifdef CONFIG_NVGPU_DGPU
@@ -137,7 +139,8 @@ u32 gm20b_gr_intr_check_gr_sked_exception(struct gk20a *g, u32 exception)
if ((exception & gr_exception_sked_m()) != 0U) { if ((exception & gr_exception_sked_m()) != 0U) {
u32 sked = nvgpu_readl(g, gr_sked_hww_esr_r()); u32 sked = nvgpu_readl(g, gr_sked_hww_esr_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_SKED_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_SKED_EXCEPTION);
nvgpu_err(g, "sked exception: esr 0x%08x", sked); nvgpu_err(g, "sked exception: esr 0x%08x", sked);
nvgpu_writel(g, gr_sked_hww_esr_r(), nvgpu_writel(g, gr_sked_hww_esr_r(),
gr_sked_hww_esr_reset_active_f()); gr_sked_hww_esr_reset_active_f());
@@ -152,7 +155,8 @@ static u32 gr_gm20b_intr_check_gr_be_crop_exception(struct gk20a *g,
if ((exception & gr_pri_be0_becs_be_exception_crop_m()) != 0U) { if ((exception & gr_pri_be0_becs_be_exception_crop_m()) != 0U) {
u32 crop = nvgpu_readl(g, gr_crop_hww_esr_r()); u32 crop = nvgpu_readl(g, gr_crop_hww_esr_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_BE_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_BE_EXCEPTION);
nvgpu_err(g, "BE exception: crop exception: esr 0x%08x", crop); nvgpu_err(g, "BE exception: crop exception: esr 0x%08x", crop);
nvgpu_writel(g, gr_crop_hww_esr_r(), nvgpu_writel(g, gr_crop_hww_esr_r(),
gr_crop_hww_esr_reset_active_f()); gr_crop_hww_esr_reset_active_f());
@@ -167,7 +171,8 @@ static u32 gr_gm20b_intr_check_gr_be_zrop_exception(struct gk20a *g,
if ((exception & gr_pri_be0_becs_be_exception_zrop_m()) != 0U) { if ((exception & gr_pri_be0_becs_be_exception_zrop_m()) != 0U) {
u32 zrop = nvgpu_readl(g, gr_zrop_hww_esr_r()); u32 zrop = nvgpu_readl(g, gr_zrop_hww_esr_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_BE_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_BE_EXCEPTION);
nvgpu_err(g, "BE exception: zrop exception: esr 0x%08x", zrop); nvgpu_err(g, "BE exception: zrop exception: esr 0x%08x", zrop);
nvgpu_writel(g, gr_zrop_hww_esr_r(), nvgpu_writel(g, gr_zrop_hww_esr_r(),
gr_zrop_hww_esr_reset_active_f()); gr_zrop_hww_esr_reset_active_f());
@@ -182,7 +187,8 @@ u32 gm20b_gr_intr_check_gr_fe_exception(struct gk20a *g, u32 exception)
u32 fe = nvgpu_readl(g, gr_fe_hww_esr_r()); u32 fe = nvgpu_readl(g, gr_fe_hww_esr_r());
u32 info = nvgpu_readl(g, gr_fe_hww_esr_info_r()); u32 info = nvgpu_readl(g, gr_fe_hww_esr_info_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_FE_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_FE_EXCEPTION);
nvgpu_err(g, "fe exception: esr 0x%08x, info 0x%08x", nvgpu_err(g, "fe exception: esr 0x%08x, info 0x%08x",
fe, info); fe, info);
nvgpu_writel(g, gr_fe_hww_esr_r(), nvgpu_writel(g, gr_fe_hww_esr_r(),
@@ -197,7 +203,8 @@ u32 gm20b_gr_intr_check_gr_memfmt_exception(struct gk20a *g, u32 exception)
if ((exception & gr_exception_memfmt_m()) != 0U) { if ((exception & gr_exception_memfmt_m()) != 0U) {
u32 memfmt = nvgpu_readl(g, gr_memfmt_hww_esr_r()); u32 memfmt = nvgpu_readl(g, gr_memfmt_hww_esr_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_MEMFMT_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_MEMFMT_EXCEPTION);
nvgpu_err(g, "memfmt exception: esr %08x", memfmt); nvgpu_err(g, "memfmt exception: esr %08x", memfmt);
nvgpu_writel(g, gr_memfmt_hww_esr_r(), nvgpu_writel(g, gr_memfmt_hww_esr_r(),
gr_memfmt_hww_esr_reset_active_f()); gr_memfmt_hww_esr_reset_active_f());
@@ -211,7 +218,8 @@ u32 gm20b_gr_intr_check_gr_pd_exception(struct gk20a *g, u32 exception)
if ((exception & gr_exception_pd_m()) != 0U) { if ((exception & gr_exception_pd_m()) != 0U) {
u32 pd = nvgpu_readl(g, gr_pd_hww_esr_r()); u32 pd = nvgpu_readl(g, gr_pd_hww_esr_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_PD_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_PD_EXCEPTION);
nvgpu_err(g, "pd exception: esr 0x%08x", pd); nvgpu_err(g, "pd exception: esr 0x%08x", pd);
nvgpu_writel(g, gr_pd_hww_esr_r(), nvgpu_writel(g, gr_pd_hww_esr_r(),
gr_pd_hww_esr_reset_active_f()); gr_pd_hww_esr_reset_active_f());
@@ -225,7 +233,8 @@ u32 gm20b_gr_intr_check_gr_scc_exception(struct gk20a *g, u32 exception)
if ((exception & gr_exception_scc_m()) != 0U) { if ((exception & gr_exception_scc_m()) != 0U) {
u32 scc = nvgpu_readl(g, gr_scc_hww_esr_r()); u32 scc = nvgpu_readl(g, gr_scc_hww_esr_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_SCC_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_SCC_EXCEPTION);
nvgpu_err(g, "scc exception: esr 0x%08x", scc); nvgpu_err(g, "scc exception: esr 0x%08x", scc);
nvgpu_writel(g, gr_scc_hww_esr_r(), nvgpu_writel(g, gr_scc_hww_esr_r(),
gr_scc_hww_esr_reset_active_f()); gr_scc_hww_esr_reset_active_f());
@@ -239,7 +248,8 @@ u32 gm20b_gr_intr_check_gr_ds_exception(struct gk20a *g, u32 exception)
if ((exception & gr_exception_ds_m()) != 0U) { if ((exception & gr_exception_ds_m()) != 0U) {
u32 ds = nvgpu_readl(g, gr_ds_hww_esr_r()); u32 ds = nvgpu_readl(g, gr_ds_hww_esr_r());
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_DS_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_DS_EXCEPTION);
nvgpu_err(g, "ds exception: esr: 0x%08x", ds); nvgpu_err(g, "ds exception: esr: 0x%08x", ds);
nvgpu_writel(g, gr_ds_hww_esr_r(), nvgpu_writel(g, gr_ds_hww_esr_r(),
gr_ds_hww_esr_reset_task_f()); gr_ds_hww_esr_reset_task_f());

View File

@@ -88,12 +88,14 @@ static void gv11b_gr_intr_handle_fecs_ecc_error(struct gk20a *g)
fecs_ecc_status.uncorrected_delta); fecs_ecc_status.uncorrected_delta);
if (fecs_ecc_status.imem_corrected_err) { if (fecs_ecc_status.imem_corrected_err) {
nvgpu_report_err_to_sdl(g, GPU_FECS_FALCON_IMEM_ECC_CORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_FALCON_IMEM_ECC_CORRECTED);
nvgpu_err(g, "imem ecc error corrected - error count:%d", nvgpu_err(g, "imem ecc error corrected - error count:%d",
g->ecc.gr.fecs_ecc_corrected_err_count[0].counter); g->ecc.gr.fecs_ecc_corrected_err_count[0].counter);
} }
if (fecs_ecc_status.imem_uncorrected_err) { if (fecs_ecc_status.imem_uncorrected_err) {
nvgpu_report_err_to_sdl(g, GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED);
nvgpu_err(g, "imem ecc error uncorrected - error count:%d", nvgpu_err(g, "imem ecc error uncorrected - error count:%d",
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
} }
@@ -106,7 +108,8 @@ static void gv11b_gr_intr_handle_fecs_ecc_error(struct gk20a *g)
BUG(); BUG();
} }
if (fecs_ecc_status.dmem_uncorrected_err) { if (fecs_ecc_status.dmem_uncorrected_err) {
nvgpu_report_err_to_sdl(g, GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_FECS,
GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED);
nvgpu_err(g, "dmem ecc error uncorrected - error count %d", nvgpu_err(g, "dmem ecc error uncorrected - error count %d",
g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter);
} }
@@ -341,7 +344,8 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc,
} }
*uncorrected_err = nvgpu_safe_add_u32(*uncorrected_err, *uncorrected_err = nvgpu_safe_add_u32(*uncorrected_err,
gcc_l15_uncorrected_err_count_delta); gcc_l15_uncorrected_err_count_delta);
nvgpu_report_err_to_sdl(g, GPU_GCC_L15_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GCC,
GPU_GCC_L15_ECC_UNCORRECTED);
nvgpu_writel(g, nvgpu_safe_add_u32( nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r(), offset), gr_pri_gpc0_gcc_l15_ecc_uncorrected_err_count_r(), offset),
0); 0);
@@ -367,7 +371,8 @@ static void gv11b_gr_intr_report_gpcmmu_ecc_err(struct gk20a *g,
if ((ecc_status & if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) != gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) !=
0U) { 0U) {
nvgpu_report_err_to_sdl(g, GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_MMU,
GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc sa data error. gpc_id(%d)", gpc); nvgpu_err(g, "uncorrected ecc sa data error. gpc_id(%d)", gpc);
} }
if ((ecc_status & if ((ecc_status &
@@ -382,7 +387,8 @@ static void gv11b_gr_intr_report_gpcmmu_ecc_err(struct gk20a *g,
if ((ecc_status & if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) != gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) !=
0U) { 0U) {
nvgpu_report_err_to_sdl(g, GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_MMU,
GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED);
nvgpu_err(g, "uncorrected ecc fa data error. gpc_id(%d)", gpc); nvgpu_err(g, "uncorrected ecc fa data error. gpc_id(%d)", gpc);
} }
} }
@@ -476,13 +482,15 @@ static void gv11b_gr_intr_report_gpccs_ecc_err(struct gk20a *g,
{ {
if ((ecc_status & if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) != 0U) { gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GPCCS,
GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED);
nvgpu_err(g, "imem ecc error corrected" nvgpu_err(g, "imem ecc error corrected"
"ecc_addr(0x%x), gpc_id(%d)", ecc_addr, gpc); "ecc_addr(0x%x), gpc_id(%d)", ecc_addr, gpc);
} }
if ((ecc_status & if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) { gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GPCCS,
GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED);
nvgpu_err(g, "imem ecc error uncorrected" nvgpu_err(g, "imem ecc error uncorrected"
"ecc_addr(0x%x), gpc_id(%d)", ecc_addr, gpc); "ecc_addr(0x%x), gpc_id(%d)", ecc_addr, gpc);
} }
@@ -496,7 +504,8 @@ static void gv11b_gr_intr_report_gpccs_ecc_err(struct gk20a *g,
} }
if ((ecc_status & if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) { gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_GPCCS,
GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED);
nvgpu_err(g, "dmem ecc error uncorrected" nvgpu_err(g, "dmem ecc error uncorrected"
"ecc_addr(0x%x), gpc_id(%d)", ecc_addr, gpc); "ecc_addr(0x%x), gpc_id(%d)", ecc_addr, gpc);
} }
@@ -515,7 +524,8 @@ void gv11b_gr_intr_handle_gpc_prop_exception(struct gk20a *g, u32 gpc,
hww_esr = nvgpu_readl(g, hww_esr = nvgpu_readl(g,
nvgpu_safe_add_u32(gr_gpc0_prop_hww_esr_r(), offset)); nvgpu_safe_add_u32(gr_gpc0_prop_hww_esr_r(), offset));
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_GPC_GFX_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_GPC_GFX_EXCEPTION);
/* /*
* print additional diagnostic information. * print additional diagnostic information.
@@ -559,7 +569,8 @@ void gv11b_gr_intr_handle_gpc_zcull_exception(struct gk20a *g, u32 gpc,
hww_esr = nvgpu_readl(g, hww_esr = nvgpu_readl(g,
nvgpu_safe_add_u32(gr_gpc0_zcull_hww_esr_r(), offset)); nvgpu_safe_add_u32(gr_gpc0_zcull_hww_esr_r(), offset));
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_GPC_GFX_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_GPC_GFX_EXCEPTION);
/* clear the interrupt */ /* clear the interrupt */
nvgpu_writel(g, nvgpu_safe_add_u32( nvgpu_writel(g, nvgpu_safe_add_u32(
@@ -583,7 +594,8 @@ void gv11b_gr_intr_handle_gpc_setup_exception(struct gk20a *g, u32 gpc,
hww_esr = nvgpu_readl(g, hww_esr = nvgpu_readl(g,
nvgpu_safe_add_u32(gr_gpc0_setup_hww_esr_r(), offset)); nvgpu_safe_add_u32(gr_gpc0_setup_hww_esr_r(), offset));
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_GPC_GFX_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_GPC_GFX_EXCEPTION);
/* clear the interrupt */ /* clear the interrupt */
nvgpu_writel(g, nvgpu_safe_add_u32( nvgpu_writel(g, nvgpu_safe_add_u32(
@@ -609,7 +621,8 @@ void gv11b_gr_intr_handle_gpc_pes_exception(struct gk20a *g, u32 gpc,
hww_esr = nvgpu_readl(g, hww_esr = nvgpu_readl(g,
nvgpu_safe_add_u32(gr_gpc0_ppc0_pes_hww_esr_r(), offset)); nvgpu_safe_add_u32(gr_gpc0_ppc0_pes_hww_esr_r(), offset));
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_GPC_GFX_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_GPC_GFX_EXCEPTION);
/* clear the interrupt */ /* clear the interrupt */
nvgpu_writel(g, nvgpu_safe_add_u32( nvgpu_writel(g, nvgpu_safe_add_u32(
@@ -713,7 +726,8 @@ void gv11b_gr_intr_handle_tpc_mpc_exception(struct gk20a *g, u32 gpc, u32 tpc)
offset)); offset));
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "mpc hww esr 0x%08x", esr); nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "mpc hww esr 0x%08x", esr);
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_MPC_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_MPC_EXCEPTION);
esr = nvgpu_readl(g, esr = nvgpu_readl(g,
nvgpu_safe_add_u32(gr_gpc0_tpc0_mpc_hww_esr_info_r(), nvgpu_safe_add_u32(gr_gpc0_tpc0_mpc_hww_esr_info_r(),
@@ -739,7 +753,8 @@ void gv11b_gr_intr_handle_tpc_pe_exception(struct gk20a *g, u32 gpc, u32 tpc)
esr = nvgpu_readl(g, nvgpu_safe_add_u32(gr_gpc0_tpc0_pe_hww_esr_r(), esr = nvgpu_readl(g, nvgpu_safe_add_u32(gr_gpc0_tpc0_pe_hww_esr_r(),
offset)); offset));
nvgpu_report_err_to_sdl(g, GPU_PGRAPH_GPC_GFX_EXCEPTION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PGRAPH,
GPU_PGRAPH_GPC_GFX_EXCEPTION);
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "pe hww esr 0x%08x", esr); nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "pe hww esr 0x%08x", esr);
@@ -896,19 +911,22 @@ static void gv11b_gr_intr_report_l1_tag_uncorrected_err(struct gk20a *g,
if (ecc_status->err_id[i] == GPU_SM_L1_TAG_ECC_UNCORRECTED) { if (ecc_status->err_id[i] == GPU_SM_L1_TAG_ECC_UNCORRECTED) {
nvgpu_err(g, "sm_l1_tag_ecc_uncorrected " nvgpu_err(g, "sm_l1_tag_ecc_uncorrected "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_L1_TAG_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_L1_TAG_ECC_UNCORRECTED);
} }
if (ecc_status->err_id[i] == GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED) { if (ecc_status->err_id[i] == GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED) {
nvgpu_err(g, "sm_l1_tag_miss_fifo_ecc_uncorrected " nvgpu_err(g, "sm_l1_tag_miss_fifo_ecc_uncorrected "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED);
} }
if (ecc_status->err_id[i] == GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED) { if (ecc_status->err_id[i] == GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED) {
nvgpu_err(g, "sm_l1_tag_s2r_pixprf_ecc_uncorrected " nvgpu_err(g, "sm_l1_tag_s2r_pixprf_ecc_uncorrected "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED);
} }
} }
} }
@@ -928,7 +946,8 @@ static void gv11b_gr_intr_report_l1_tag_corrected_err(struct gk20a *g,
if (ecc_status->err_id[i] == GPU_SM_L1_TAG_ECC_CORRECTED) { if (ecc_status->err_id[i] == GPU_SM_L1_TAG_ECC_CORRECTED) {
nvgpu_err(g, "sm_l1_tag_ecc_corrected " nvgpu_err(g, "sm_l1_tag_ecc_corrected "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_L1_TAG_ECC_CORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_L1_TAG_ECC_CORRECTED);
} }
} }
} }
@@ -1248,7 +1267,8 @@ static void gv11b_gr_intr_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc
nvgpu_safe_add_u32( nvgpu_safe_add_u32(
g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter, g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter,
lrf_uncorrected_err_count_delta); lrf_uncorrected_err_count_delta);
nvgpu_report_err_to_sdl(g, GPU_SM_LRF_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_LRF_ECC_UNCORRECTED);
nvgpu_writel(g, nvgpu_safe_add_u32( nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r(), offset), gr_pri_gpc0_tpc0_sm_lrf_ecc_uncorrected_err_count_r(), offset),
0U); 0U);
@@ -1380,7 +1400,8 @@ static void gv11b_gr_intr_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc
nvgpu_safe_add_u32( nvgpu_safe_add_u32(
g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter, g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter,
cbu_uncorrected_err_count_delta); cbu_uncorrected_err_count_delta);
nvgpu_report_err_to_sdl(g, GPU_SM_CBU_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_CBU_ECC_UNCORRECTED);
nvgpu_writel(g, nvgpu_safe_add_u32( nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r(), offset), gr_pri_gpc0_tpc0_sm_cbu_ecc_uncorrected_err_count_r(), offset),
0U); 0U);
@@ -1508,7 +1529,8 @@ static void gv11b_gr_intr_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32
nvgpu_safe_add_u32( nvgpu_safe_add_u32(
g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter, g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter,
l1_data_uncorrected_err_count_delta); l1_data_uncorrected_err_count_delta);
nvgpu_report_err_to_sdl(g, GPU_SM_L1_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_L1_DATA_ECC_UNCORRECTED);
nvgpu_writel(g, nvgpu_safe_add_u32( nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r(), offset), gr_pri_gpc0_tpc0_sm_l1_data_ecc_uncorrected_err_count_r(), offset),
0U); 0U);
@@ -1533,25 +1555,29 @@ static void gv11b_gr_intr_report_icache_uncorrected_err(struct gk20a *g,
if (ecc_status->err_id[i] == GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED) { if (ecc_status->err_id[i] == GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED) {
nvgpu_err(g, "sm_icache_l0_data_ecc_uncorrected. " nvgpu_err(g, "sm_icache_l0_data_ecc_uncorrected. "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED);
} }
if (ecc_status->err_id[i] == GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED) { if (ecc_status->err_id[i] == GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED) {
nvgpu_err(g, "sm_icache_l0_predecode_ecc_uncorrected. " nvgpu_err(g, "sm_icache_l0_predecode_ecc_uncorrected. "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED);
} }
if (ecc_status->err_id[i] == GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED) { if (ecc_status->err_id[i] == GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED) {
nvgpu_err(g, "sm_icache_l1_data_ecc_uncorrected. " nvgpu_err(g, "sm_icache_l1_data_ecc_uncorrected. "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED);
} }
if (ecc_status->err_id[i] == GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED) { if (ecc_status->err_id[i] == GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED) {
nvgpu_err(g, "sm_icache_l1_predecode_ecc_uncorrected. " nvgpu_err(g, "sm_icache_l1_predecode_ecc_uncorrected. "
"gpc_id(%d), tpc_id(%d)", gpc, tpc); "gpc_id(%d), tpc_id(%d)", gpc, tpc);
nvgpu_report_err_to_sdl(g, GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_SM,
GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED);
} }
} }
} }

View File

@@ -285,7 +285,7 @@
#include <nvgpu/grmgr.h> #include <nvgpu/grmgr.h>
#endif #endif
#include "hal/cic/mon/cic_gv11b.h" #include "hal/cic/mon/cic_ga10b.h"
#include <nvgpu/cic_mon.h> #include <nvgpu/cic_mon.h>
static int ga10b_init_gpu_characteristics(struct gk20a *g) static int ga10b_init_gpu_characteristics(struct gk20a *g)
@@ -1720,7 +1720,7 @@ static const struct gops_mssnvlink ga10b_ops_mssnvlink = {
#endif #endif
static const struct gops_cic_mon ga10b_ops_cic_mon = { static const struct gops_cic_mon ga10b_ops_cic_mon = {
.init = gv11b_cic_mon_init, .init = ga10b_cic_mon_init,
.report_err = nvgpu_cic_mon_report_err_safety_services .report_err = nvgpu_cic_mon_report_err_safety_services
}; };

View File

@@ -191,8 +191,6 @@
#include "hal/fifo/channel_gm20b.h" #include "hal/fifo/channel_gm20b.h"
#include "hal/fifo/channel_gv11b.h" #include "hal/fifo/channel_gv11b.h"
#include "hal/cic/mon/cic_gv11b.h"
#ifdef CONFIG_NVGPU_STATIC_POWERGATE #ifdef CONFIG_NVGPU_STATIC_POWERGATE
#include "hal/tpc/tpc_gv11b.h" #include "hal/tpc/tpc_gv11b.h"
#endif #endif
@@ -214,7 +212,6 @@
#include <nvgpu/gr/gr_intr.h> #include <nvgpu/gr/gr_intr.h>
#include <nvgpu/nvgpu_init.h> #include <nvgpu/nvgpu_init.h>
#include <nvgpu/grmgr.h> #include <nvgpu/grmgr.h>
#include <nvgpu/cic_mon.h>
#include <nvgpu/hw/gv11b/hw_pwr_gv11b.h> #include <nvgpu/hw/gv11b/hw_pwr_gv11b.h>
@@ -1473,10 +1470,6 @@ static const struct gops_grmgr gv11b_ops_grmgr = {
.init_gr_manager = nvgpu_init_gr_manager, .init_gr_manager = nvgpu_init_gr_manager,
}; };
static const struct gops_cic_mon gv11b_ops_cic_mon = {
.init = gv11b_cic_mon_init,
};
int gv11b_init_hal(struct gk20a *g) int gv11b_init_hal(struct gk20a *g)
{ {
struct gpu_ops *gops = &g->ops; struct gpu_ops *gops = &g->ops;
@@ -1574,7 +1567,6 @@ int gv11b_init_hal(struct gk20a *g)
gops->gpc_pg = gv11b_ops_gpc_pg; gops->gpc_pg = gv11b_ops_gpc_pg;
#endif #endif
gops->grmgr = gv11b_ops_grmgr; gops->grmgr = gv11b_ops_grmgr;
gops->cic_mon = gv11b_ops_cic_mon;
gops->chip_init_gpu_characteristics = gv11b_init_gpu_characteristics; gops->chip_init_gpu_characteristics = gv11b_init_gpu_characteristics;
gops->get_litter_value = gv11b_get_litter_value; gops->get_litter_value = gv11b_get_litter_value;
gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup; gops->semaphore_wakeup = nvgpu_channel_semaphore_wakeup;

View File

@@ -411,7 +411,8 @@ static void ga10b_ltc_intr_handle_rstg_ecc_interrupts(struct gk20a *g,
nvgpu_wrapping_add_u32( nvgpu_wrapping_add_u32(
g->ecc.ltc.rstg_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.rstg_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED);
} }
if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) { if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) {
@@ -442,7 +443,8 @@ static void ga10b_ltc_intr_handle_tstg_ecc_interrupts(struct gk20a *g,
nvgpu_wrapping_add_u32( nvgpu_wrapping_add_u32(
g->ecc.ltc.tstg_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.tstg_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED);
} }
if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) != 0U) { if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) != 0U) {
@@ -508,7 +510,8 @@ static void ga10b_ltc_intr_handle_dstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.ecc_sec_count[ltc][slice].counter, g->ecc.ltc.ecc_sec_count[ltc][slice].counter,
corrected_delta); corrected_delta);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_DSTG_ECC_CORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_ECC_CORRECTED);
/* /*
* Using a SEC code will allow correction of an SBE (Single Bit * Using a SEC code will allow correction of an SBE (Single Bit
@@ -539,7 +542,8 @@ static void ga10b_ltc_intr_handle_dstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.ecc_ded_count[ltc][slice].counter, g->ecc.ltc.ecc_ded_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED);
} else if (ga10b_ltc_intr_is_dstg_be_ram(ecc_addr)) { } else if (ga10b_ltc_intr_is_dstg_be_ram(ecc_addr)) {
nvgpu_log(g, gpu_dbg_intr, "dstg be ecc error uncorrected"); nvgpu_log(g, gpu_dbg_intr, "dstg be ecc error uncorrected");
@@ -548,7 +552,8 @@ static void ga10b_ltc_intr_handle_dstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED);
} else { } else {
nvgpu_err(g, "unsupported uncorrected dstg ecc error"); nvgpu_err(g, "unsupported uncorrected dstg ecc error");
BUG(); BUG();

View File

@@ -126,7 +126,8 @@ void gv11b_ltc_intr_handle_tstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.tstg_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.tstg_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED);
nvgpu_err(g, "tstg ecc error uncorrected. " nvgpu_err(g, "tstg ecc error uncorrected. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }
@@ -145,7 +146,8 @@ void gv11b_ltc_intr_handle_dstg_ecc_interrupts(struct gk20a *g,
g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter, g->ecc.ltc.dstg_be_ecc_parity_count[ltc][slice].counter,
uncorrected_delta); uncorrected_delta);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED);
nvgpu_err(g, "dstg be ecc error uncorrected. " nvgpu_err(g, "dstg be ecc error uncorrected. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }
@@ -281,7 +283,8 @@ static void gv11b_ltc_intr_handle_ecc_sec_ded_interrupts(struct gk20a *g, u32 lt
ltc_ltc0_lts0_dstg_ecc_report_r(), offset), ltc_ltc0_lts0_dstg_ecc_report_r(), offset),
ecc_stats_reg_val); ecc_stats_reg_val);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_DSTG_ECC_CORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_ECC_CORRECTED);
nvgpu_err(g, "dstg ecc error corrected. " nvgpu_err(g, "dstg ecc error corrected. "
"ecc_addr(0x%x)", dstg_ecc_addr); "ecc_addr(0x%x)", dstg_ecc_addr);
@@ -327,7 +330,8 @@ static void gv11b_ltc_intr_handle_ecc_sec_ded_interrupts(struct gk20a *g, u32 lt
ltc_ltc0_lts0_dstg_ecc_report_r(), offset), ltc_ltc0_lts0_dstg_ecc_report_r(), offset),
ecc_stats_reg_val); ecc_stats_reg_val);
nvgpu_report_err_to_sdl(g, GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_LTC,
GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED);
nvgpu_err(g, "dstg ecc error uncorrected. " nvgpu_err(g, "dstg ecc error uncorrected. "
"ecc_addr(0x%x)", dstg_ecc_addr); "ecc_addr(0x%x)", dstg_ecc_addr);
} }

View File

@@ -521,7 +521,8 @@ static void gv11b_mm_mmu_fault_handle_buf_valid_entry(struct gk20a *g,
} }
#endif #endif
nvgpu_report_err_to_sdl(g, GPU_HUBMMU_PAGE_FAULT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_HUBMMU,
GPU_HUBMMU_PAGE_FAULT_ERROR);
nvgpu_err(g, "sub_er_type = 0x%x, " nvgpu_err(g, "sub_er_type = 0x%x, "
"fault_status = 0x%x", "fault_status = 0x%x",
sub_err_type, fault_status); sub_err_type, fault_status);

View File

@@ -141,13 +141,15 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr)
if ((ecc_status & if ((ecc_status &
pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) { pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_PMU_FALCON_IMEM_ECC_CORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_FALCON_IMEM_ECC_CORRECTED);
nvgpu_err(g, "falcon imem ecc error corrected. " nvgpu_err(g, "falcon imem ecc error corrected. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
} }
if ((ecc_status & if ((ecc_status &
pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) { pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED);
nvgpu_err(g, "falcon imem ecc error uncorrected. " nvgpu_err(g, "falcon imem ecc error uncorrected. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
ret = -EFAULT; ret = -EFAULT;
@@ -163,7 +165,8 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr)
} }
if ((ecc_status & if ((ecc_status &
pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) { pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
nvgpu_report_err_to_sdl(g, GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PMU,
GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED);
nvgpu_err(g, "falcon dmem ecc error uncorrected. " nvgpu_err(g, "falcon dmem ecc error uncorrected. "
"ecc_addr(0x%x)", ecc_addr); "ecc_addr(0x%x)", ecc_addr);
ret = -EFAULT; ret = -EFAULT;

View File

@@ -378,7 +378,8 @@ void ga10b_priv_ring_decode_error_code(struct gk20a *g, u32 error_code)
size_t lookup_table_size = 1; size_t lookup_table_size = 1;
size_t index = 0; size_t index = 0;
nvgpu_report_err_to_sdl(g, GPU_PRI_ACCESS_VIOLATION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PRI,
GPU_PRI_ACCESS_VIOLATION);
err_code = pri_sys_pri_error_code_v(error_code); err_code = pri_sys_pri_error_code_v(error_code);
error_extra = pri_sys_pri_error_extra_v(error_code); error_extra = pri_sys_pri_error_extra_v(error_code);

View File

@@ -71,7 +71,8 @@ void gp10b_priv_ring_decode_error_code(struct gk20a *g,
{ {
u32 error_type_index; u32 error_type_index;
nvgpu_report_err_to_sdl(g, GPU_PRI_ACCESS_VIOLATION); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PRI,
GPU_PRI_ACCESS_VIOLATION);
error_type_index = (error_code & 0x00000f00U) >> 8U; error_type_index = (error_code & 0x00000f00U) >> 8U;
error_code = error_code & 0xBADFf000U; error_code = error_code & 0xBADFf000U;

View File

@@ -63,5 +63,6 @@ void ga10b_ptimer_isr(struct gk20a *g)
} }
} }
nvgpu_report_err_to_sdl(g, GPU_PRI_TIMEOUT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PRI,
GPU_PRI_TIMEOUT_ERROR);
} }

View File

@@ -65,7 +65,8 @@ void gk20a_ptimer_isr(struct gk20a *g)
gk20a_writel(g, timer_pri_timeout_save_0_r(), 0); gk20a_writel(g, timer_pri_timeout_save_0_r(), 0);
gk20a_writel(g, timer_pri_timeout_save_1_r(), 0); gk20a_writel(g, timer_pri_timeout_save_1_r(), 0);
nvgpu_report_err_to_sdl(g, GPU_PRI_TIMEOUT_ERROR); nvgpu_report_err_to_sdl(g, NVGPU_ERR_MODULE_PRI,
GPU_PRI_TIMEOUT_ERROR);
} }
#ifdef CONFIG_NVGPU_IOCTL_NON_FUSA #ifdef CONFIG_NVGPU_IOCTL_NON_FUSA

View File

@@ -128,6 +128,7 @@ struct gops_ltc_intr {
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl * -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
* "nvgpu_report_err_to_sdl" with following parameters: * "nvgpu_report_err_to_sdl" with following parameters:
* -# \a g * -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC
* -# \ref GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED * -# \ref GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED
* "GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED" * "GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED"
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m() is * -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m() is
@@ -142,6 +143,7 @@ struct gops_ltc_intr {
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl * -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
* "nvgpu_report_err_to_sdl" with following parameters: * "nvgpu_report_err_to_sdl" with following parameters:
* -# \a g * -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC
* -# \ref GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED * -# \ref GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED
* "GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED" * "GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED"
* -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m() is * -# If ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m() is
@@ -157,6 +159,7 @@ struct gops_ltc_intr {
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl * -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
* "nvgpu_report_err_to_sdl" with following parameters: * "nvgpu_report_err_to_sdl" with following parameters:
* -# \a g * -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC
* -# \ref GPU_LTC_CACHE_DSTG_ECC_CORRECTED * -# \ref GPU_LTC_CACHE_DSTG_ECC_CORRECTED
* "GPU_LTC_CACHE_DSTG_ECC_CORRECTED" * "GPU_LTC_CACHE_DSTG_ECC_CORRECTED"
* -# Flush the L2 cache by calling * -# Flush the L2 cache by calling
@@ -173,6 +176,7 @@ struct gops_ltc_intr {
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl * -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
* "nvgpu_report_err_to_sdl" with following parameters: * "nvgpu_report_err_to_sdl" with following parameters:
* -# \a g * -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC
* -# \ref GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED * -# \ref GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED
* "GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED" * "GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED"
* -# Else if the ECC address correspongs to DSTG BE RAM: * -# Else if the ECC address correspongs to DSTG BE RAM:
@@ -182,6 +186,7 @@ struct gops_ltc_intr {
* -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl * -# Report to |qnx.sdl| unit by calling \ref nvgpu_report_err_to_sdl
* "nvgpu_report_err_to_sdl" with following parameters: * "nvgpu_report_err_to_sdl" with following parameters:
* -# \a g * -# \a g
* -# \ref NVGPU_ERR_MODULE_LTC
* -# \ref GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED * -# \ref GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED
* "GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED" * "GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED"
* -# Else call \ref BUG "BUG()" as this type of ECC error is not supported. * -# Else call \ref BUG "BUG()" as this type of ECC error is not supported.

View File

@@ -108,28 +108,6 @@ struct mmu_fault_info;
* @} * @}
*/ */
/**
* This structure is used to store SM machine check related information.
*/
struct gr_sm_mcerr_info {
/** PC which triggered the machine check error. */
u64 hww_warp_esr_pc;
/** Error status register. */
u32 hww_warp_esr_status;
/** GR engine context of the faulted channel. */
u32 curr_ctx;
/** Channel to which the context belongs. */
u32 chid;
/** TSG to which the channel is bound. */
u32 tsgid;
/** IDs of TPC, GPC, and SM. */
u32 tpc, gpc, sm;
};
/** /**
* @defgroup LIST_OF_ERRORS_REPORTED_FROM_FECS * @defgroup LIST_OF_ERRORS_REPORTED_FROM_FECS
@@ -147,32 +125,6 @@ struct gr_sm_mcerr_info {
* @} * @}
*/ */
/**
* This structure is used to store CTXSW error related information.
*/
struct ctxsw_err_info {
/** GR engine context of the faulted channel. */
u32 curr_ctx;
/** Context-switch status register-0. */
u32 ctxsw_status0;
/** Context-switch status register-1. */
u32 ctxsw_status1;
/** Channel to which the context belongs. */
u32 chid;
/**
* In case of any fault during context-switch transaction,
* context-switch error interrupt is set and the FECS firmware
* writes error code into FECS mailbox 6. This exception
* is handled at GR unit.
*/
u32 mailbox_value;
};
/** /**
* @defgroup LIST_OF_ERRORS_REPORTED_FROM_GPCCS * @defgroup LIST_OF_ERRORS_REPORTED_FROM_GPCCS
* Macros used to assign unique index to errors reported from the GPCCS unit. * Macros used to assign unique index to errors reported from the GPCCS unit.
@@ -268,23 +220,6 @@ struct ctxsw_err_info {
#define GPU_PGRAPH_ILLEGAL_CLASS (2U) #define GPU_PGRAPH_ILLEGAL_CLASS (2U)
#define GPU_PGRAPH_CLASS_ERROR (3U) #define GPU_PGRAPH_CLASS_ERROR (3U)
/**
* This structure is used to store GR exception related information.
*/
struct gr_exception_info {
/** GR engine context of the faulted channel. */
u32 curr_ctx;
/** Channel bound to the context. */
u32 chid;
/** TSG to which the channel is bound. */
u32 tsgid;
/** GR interrupt status. */
u32 status;
};
/** /**
* @defgroup LIST_OF_ERRORS_REPORTED_FROM_LTC * @defgroup LIST_OF_ERRORS_REPORTED_FROM_LTC
* Macros used to assign unique index to errors reported from the LTC unit. * Macros used to assign unique index to errors reported from the LTC unit.
@@ -347,17 +282,6 @@ struct gr_exception_info {
* @} * @}
*/ */
/**
* This structure is used to store GR error related information.
*/
struct gr_err_info {
/** SM machine check error information. */
struct gr_sm_mcerr_info *sm_mcerr_info;
/** GR exception related information. */
struct gr_exception_info *exception_info;
};
/** /**
* This macro is used to initialize the members of nvgpu_hw_err_inject_info * This macro is used to initialize the members of nvgpu_hw_err_inject_info
* struct. * struct.
@@ -392,6 +316,85 @@ struct nvgpu_hw_err_inject_info_desc {
u32 info_size; u32 info_size;
}; };
#ifdef CONFIG_NVGPU_INTR_DEBUG
/**
* This structure is used to store SM machine check related information.
*/
struct gr_sm_mcerr_info {
/** PC which triggered the machine check error. */
u64 hww_warp_esr_pc;
/** Error status register. */
u32 hww_warp_esr_status;
/** GR engine context of the faulted channel. */
u32 curr_ctx;
/** Channel to which the context belongs. */
u32 chid;
/** TSG to which the channel is bound. */
u32 tsgid;
/** IDs of TPC, GPC, and SM. */
u32 tpc, gpc, sm;
};
/**
* This structure is used to store CTXSW error related information.
*/
struct ctxsw_err_info {
/** GR engine context of the faulted channel. */
u32 curr_ctx;
/** Context-switch status register-0. */
u32 ctxsw_status0;
/** Context-switch status register-1. */
u32 ctxsw_status1;
/** Channel to which the context belongs. */
u32 chid;
/**
* In case of any fault during context-switch transaction,
* context-switch error interrupt is set and the FECS firmware
* writes error code into FECS mailbox 6. This exception
* is handled at GR unit.
*/
u32 mailbox_value;
};
/**
* This structure is used to store GR exception related information.
*/
struct gr_exception_info {
/** GR engine context of the faulted channel. */
u32 curr_ctx;
/** Channel bound to the context. */
u32 chid;
/** TSG to which the channel is bound. */
u32 tsgid;
/** GR interrupt status. */
u32 status;
};
/**
* This structure is used to store GR error related information.
*/
struct gr_err_info {
/** SM machine check error information. */
struct gr_sm_mcerr_info *sm_mcerr_info;
/** GR exception related information. */
struct gr_exception_info *exception_info;
};
/** /**
* @brief This function provides an interface to report errors from HOST * @brief This function provides an interface to report errors from HOST
* (PFIFO/PBDMA/PBUS) unit to SDL unit. * (PFIFO/PBDMA/PBUS) unit to SDL unit.
@@ -1194,17 +1197,19 @@ void nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit,
*/ */
void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid, void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid,
u32 mailbox_value); u32 mailbox_value);
#endif /* CONFIG_NVGPU_INTR_DEBUG */
/** /**
* @brief This is a wrapper function to report ECC errors from HUBMMU to SDL. * @brief This is a wrapper function to report ECC errors from HUBMMU to SDL.
* *
* @param g [in] - The GPU driver struct. * @param g [in] - The GPU driver struct.
* @param hw_unit_id [in] - HW Unit ID.
* @param err_id [in] - Error ID. * @param err_id [in] - Error ID.
* *
* Calls nvgpu_report_err_to_ss to report errors to Safety_Services. * Calls nvgpu_report_err_to_ss to report errors to Safety_Services.
* *
* @return None * @return None
*/ */
void nvgpu_report_err_to_sdl(struct gk20a *g, u32 err_id); void nvgpu_report_err_to_sdl(struct gk20a *g, u32 hw_unit_id, u32 err_id);
#endif /* NVGPU_NVGPU_ERR_H */ #endif /* NVGPU_NVGPU_ERR_H */

View File

@@ -20,11 +20,10 @@
struct gk20a; struct gk20a;
int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g, int nvgpu_cic_mon_report_err_safety_services(struct gk20a *g,
u32 metadata) u32 err_id)
{ {
/** /**
* ToDo: Add MISC_EC API to report error. * ToDo: Add MISC_EC API to report error.
* Decide on triggering SW quiesce for UE.
*/ */
return 0; return 0;
} }

View File

@@ -247,7 +247,7 @@ gv11b_blcg_hshub_get_gating_prod
gv11b_netlist_is_firmware_defined gv11b_netlist_is_firmware_defined
gv11b_top_get_num_lce gv11b_top_get_num_lce
gv11b_bus_configure_debug_bus gv11b_bus_configure_debug_bus
gv11b_cic_mon_init ga10b_cic_mon_init
mc_gp10b_intr_stall_unit_config mc_gp10b_intr_stall_unit_config
mc_gp10b_intr_nonstall_unit_config mc_gp10b_intr_nonstall_unit_config
nvgpu_acr_bootstrap_hs_acr nvgpu_acr_bootstrap_hs_acr

View File

@@ -36,7 +36,7 @@
#include <hal/bus/bus_gm20b.h> #include <hal/bus/bus_gm20b.h>
#include <hal/bus/bus_gp10b.h> #include <hal/bus/bus_gp10b.h>
#include <hal/bus/bus_gv11b.h> #include <hal/bus/bus_gv11b.h>
#include <hal/cic/mon/cic_gv11b.h> #include <hal/cic/mon/cic_ga10b.h>
#include <nvgpu/hw/gv11b/hw_mc_gv11b.h> #include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
#include <nvgpu/hw/gv11b/hw_bus_gv11b.h> #include <nvgpu/hw/gv11b/hw_bus_gv11b.h>
@@ -130,7 +130,7 @@ int test_bus_setup(struct unit_module *m, struct gk20a *g, void *args)
g->ops.mc.intr_nonstall_unit_config = g->ops.mc.intr_nonstall_unit_config =
mc_gp10b_intr_nonstall_unit_config; mc_gp10b_intr_nonstall_unit_config;
g->ops.ptimer.isr = gk20a_ptimer_isr; g->ops.ptimer.isr = gk20a_ptimer_isr;
g->ops.cic_mon.init = gv11b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
/* Map register space NV_PRIV_MASTER */ /* Map register space NV_PRIV_MASTER */
if (nvgpu_posix_io_add_reg_space(g, NV_PBUS_START, NV_PBUS_SIZE) != 0) { if (nvgpu_posix_io_add_reg_space(g, NV_PBUS_START, NV_PBUS_SIZE) != 0) {

View File

@@ -30,7 +30,7 @@
#include <nvgpu/cic_mon.h> #include <nvgpu/cic_mon.h>
#include <hal/ce/ce_gp10b.h> #include <hal/ce/ce_gp10b.h>
#include <hal/ce/ce_gv11b.h> #include <hal/ce/ce_gv11b.h>
#include <hal/cic/mon/cic_gv11b.h> #include <hal/cic/mon/cic_ga10b.h>
#include <nvgpu/hw/gv11b/hw_ce_gv11b.h> #include <nvgpu/hw/gv11b/hw_ce_gv11b.h>
#include "nvgpu-ce.h" #include "nvgpu-ce.h"
@@ -128,7 +128,7 @@ int test_ce_setup_env(struct unit_module *m,
g->blcg_enabled = false; g->blcg_enabled = false;
nvgpu_spinlock_init(&g->mc.intr_lock); nvgpu_spinlock_init(&g->mc.intr_lock);
g->ops.cic_mon.init = gv11b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
if (nvgpu_cic_mon_setup(g) != 0) { if (nvgpu_cic_mon_setup(g) != 0) {
unit_err(m, "%s: failed to initialize CIC\n", unit_err(m, "%s: failed to initialize CIC\n",

View File

@@ -35,7 +35,7 @@
#include "hal/fb/ecc/fb_ecc_gv11b.h" #include "hal/fb/ecc/fb_ecc_gv11b.h"
#include "hal/fb/intr/fb_intr_gv11b.h" #include "hal/fb/intr/fb_intr_gv11b.h"
#include "hal/fb/intr/fb_intr_ecc_gv11b.h" #include "hal/fb/intr/fb_intr_ecc_gv11b.h"
#include "hal/cic/mon/cic_gv11b.h" #include "hal/cic/mon/cic_ga10b.h"
#include <nvgpu/hw/gv11b/hw_fb_gv11b.h> #include <nvgpu/hw/gv11b/hw_fb_gv11b.h>
#include "fb_fusa.h" #include "fb_fusa.h"
@@ -64,7 +64,7 @@ int fb_gv11b_init_test(struct unit_module *m, struct gk20a *g, void *args)
g->ops.mc.intr_nonstall_unit_config = g->ops.mc.intr_nonstall_unit_config =
mc_gp10b_intr_nonstall_unit_config; mc_gp10b_intr_nonstall_unit_config;
g->ops.fb.intr.enable = gv11b_fb_intr_enable; g->ops.fb.intr.enable = gv11b_fb_intr_enable;
g->ops.cic_mon.init = gv11b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
/* /*
* Define some arbitrary addresses for test purposes. * Define some arbitrary addresses for test purposes.

View File

@@ -39,7 +39,7 @@
#include "common/gr/gr_falcon_priv.h" #include "common/gr/gr_falcon_priv.h"
#include "hal/init/hal_gv11b.h" #include "hal/init/hal_gv11b.h"
#include "hal/cic/mon/cic_gv11b.h" #include "hal/cic/mon/cic_ga10b.h"
#include "nvgpu-gr.h" #include "nvgpu-gr.h"
#include "nvgpu-gr-gv11b.h" #include "nvgpu-gr-gv11b.h"
@@ -168,7 +168,7 @@ int test_gr_init_setup_ready(struct unit_module *m,
nvgpu_device_init(g); nvgpu_device_init(g);
nvgpu_fifo_setup_sw(g); nvgpu_fifo_setup_sw(g);
g->ops.cic_mon.init = gv11b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
err = nvgpu_cic_mon_setup(g); err = nvgpu_cic_mon_setup(g);
if (err != 0) { if (err != 0) {

View File

@@ -61,7 +61,7 @@
#include "hal/mm/gmmu/gmmu_gv11b.h" #include "hal/mm/gmmu/gmmu_gv11b.h"
#include "hal/mm/mm_gp10b.h" #include "hal/mm/mm_gp10b.h"
#include "hal/mm/mm_gv11b.h" #include "hal/mm/mm_gv11b.h"
#include "hal/cic/mon/cic_gv11b.h" #include "hal/cic/mon/cic_ga10b.h"
#include "hal/mm/mmu_fault/mmu_fault_gv11b.h" #include "hal/mm/mmu_fault/mmu_fault_gv11b.h"
#include "mmu-fault-gv11b-fusa.h" #include "mmu-fault-gv11b-fusa.h"
@@ -222,7 +222,7 @@ int test_env_init_mm_mmu_fault_gv11b_fusa(struct unit_module *m,
unit_return_fail(m, "nvgpu_init_mm_support failed\n"); unit_return_fail(m, "nvgpu_init_mm_support failed\n");
} }
g->ops.cic_mon.init = gv11b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
if (nvgpu_cic_mon_setup(g) != 0) { if (nvgpu_cic_mon_setup(g) != 0) {
unit_return_fail(m, "Failed to initialize CIC\n"); unit_return_fail(m, "Failed to initialize CIC\n");

View File

@@ -31,7 +31,7 @@
#include <hal/priv_ring/priv_ring_gp10b.h> #include <hal/priv_ring/priv_ring_gp10b.h>
#include <hal/init/hal_gv11b_litter.h> #include <hal/init/hal_gv11b_litter.h>
#include <hal/mc/mc_gp10b.h> #include <hal/mc/mc_gp10b.h>
#include "hal/cic/mon/cic_gv11b.h" #include "hal/cic/mon/cic_ga10b.h"
#include <nvgpu/hw/gv11b/hw_pri_ringstation_sys_gv11b.h> #include <nvgpu/hw/gv11b/hw_pri_ringstation_sys_gv11b.h>
#include <nvgpu/hw/gv11b/hw_pri_ringstation_gpc_gv11b.h> #include <nvgpu/hw/gv11b/hw_pri_ringstation_gpc_gv11b.h>
@@ -126,7 +126,7 @@ int test_priv_ring_setup(struct unit_module *m, struct gk20a *g, void *args)
g->ops.get_litter_value = gv11b_get_litter_value; g->ops.get_litter_value = gv11b_get_litter_value;
g->ops.mc.intr_stall_unit_config = g->ops.mc.intr_stall_unit_config =
mc_gp10b_intr_stall_unit_config; mc_gp10b_intr_stall_unit_config;
g->ops.cic_mon.init = gv11b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
/* Map register space NV_PRIV_MASTER */ /* Map register space NV_PRIV_MASTER */
if (nvgpu_posix_io_add_reg_space(g, NV_PRIV_MASTER_START, if (nvgpu_posix_io_add_reg_space(g, NV_PRIV_MASTER_START,

View File

@@ -28,7 +28,7 @@
#include <nvgpu/ptimer.h> #include <nvgpu/ptimer.h>
#include <nvgpu/cic_mon.h> #include <nvgpu/cic_mon.h>
#include <hal/ptimer/ptimer_gk20a.h> #include <hal/ptimer/ptimer_gk20a.h>
#include <hal/cic/mon/cic_gv11b.h> #include <hal/cic/mon/cic_ga10b.h>
#include <nvgpu/hw/gk20a/hw_timer_gk20a.h> #include <nvgpu/hw/gk20a/hw_timer_gk20a.h>
#include "nvgpu-ptimer.h" #include "nvgpu-ptimer.h"
@@ -89,7 +89,7 @@ int test_setup_env(struct unit_module *m,
/* Setup HAL */ /* Setup HAL */
g->ops.ptimer.isr = gk20a_ptimer_isr; g->ops.ptimer.isr = gk20a_ptimer_isr;
g->ops.cic_mon.init = gv11b_cic_mon_init; g->ops.cic_mon.init = ga10b_cic_mon_init;
/* Create ptimer register space */ /* Create ptimer register space */
if (nvgpu_posix_io_add_reg_space(g, PTIMER_REG_SPACE_START, if (nvgpu_posix_io_add_reg_space(g, PTIMER_REG_SPACE_START,