From fcb7635a922654446e9d24f7b29917412344e228 Mon Sep 17 00:00:00 2001 From: Rajesh Devaraj Date: Wed, 15 May 2019 23:54:18 +0530 Subject: [PATCH] gpu: nvgpu: gops initialization for SDL This patch moves gops init related to SDL from qnx to common-core. For this purpose, it does the following changes: - Adds stub functions for linux and posix. - Updates nvgpu_init.c for mapping err_ops with report error APIs. - Updates nvgpu_err.h header file to include prototypes related to error reporting APIs. - Updates nvgpu-linux.yaml file to include sdl_stub file. Jira NVGPU-3237 Change-Id: Idbdbe6f8437bf53504b29dc2d50214484ad18d6f Signed-off-by: Rajesh Devaraj Reviewed-on: https://git-master.nvidia.com/r/2119681 Reviewed-by: mobile promotions Tested-by: mobile promotions --- arch/nvgpu-linux.yaml | 3 + drivers/gpu/nvgpu/Makefile | 3 +- drivers/gpu/nvgpu/common/ecc.c | 88 ------------------- drivers/gpu/nvgpu/common/fifo/fifo.c | 17 ---- drivers/gpu/nvgpu/common/fifo/preempt.c | 7 +- drivers/gpu/nvgpu/common/gr/gr_intr.c | 43 ++------- drivers/gpu/nvgpu/common/pmu/pmu.c | 21 +---- drivers/gpu/nvgpu/hal/bus/bus_gk20a.c | 3 +- drivers/gpu/nvgpu/hal/ce/ce_gp10b.c | 6 +- drivers/gpu/nvgpu/hal/ce/ce_gv11b.c | 21 +---- .../gpu/nvgpu/hal/fb/intr/fb_intr_ecc_gv11b.c | 32 +++---- .../gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c | 6 +- drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c | 4 +- drivers/gpu/nvgpu/hal/fifo/fifo_intr_gv11b.c | 10 +-- drivers/gpu/nvgpu/hal/fifo/pbdma_gv11b.c | 6 +- drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c | 81 ++++++++++++----- drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c | 1 + drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c | 37 ++++---- .../gpu/nvgpu/hal/ltc/intr/ltc_intr_gv11b.c | 32 +++++-- drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c | 8 +- .../gpu/nvgpu/hal/priv_ring/priv_ring_gp10b.c | 16 +--- drivers/gpu/nvgpu/hal/ptimer/ptimer_gk20a.c | 19 +--- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 51 ----------- drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h | 36 +++++--- drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c | 67 ++++++++++++++ drivers/gpu/nvgpu/os/posix/stubs.c | 44 ++++++++++ 26 files changed, 304 insertions(+), 358 deletions(-) create mode 100644 drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c diff --git a/arch/nvgpu-linux.yaml b/arch/nvgpu-linux.yaml index 47e75e394..30ce2d3c6 100644 --- a/arch/nvgpu-linux.yaml +++ b/arch/nvgpu-linux.yaml @@ -213,6 +213,9 @@ vgpu: vm: sources: [ os/linux/vm.c ] +sdl: + sources: [ os/linux/sdl/sdl_stub.c ] + # Group all the Linux headers for now. headers: sources: [ include/nvgpu/linux/atomic.h, diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 39ce03340..e2edb26d9 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -403,7 +403,8 @@ nvgpu-y += \ os/linux/dt.o \ os/linux/ecc_sysfs.o \ os/linux/os_ops_tu104.o \ - os/linux/bsearch.o + os/linux/bsearch.o \ + os/linux/sdl/sdl_stub.o nvgpu-$(CONFIG_GK20A_VIDMEM) += \ os/linux/dmabuf_vidmem.o diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index f0e8b7d0a..c4f60e9b9 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -386,91 +386,3 @@ void nvgpu_ecc_remove_support(struct gk20a *g) nvgpu_ecc_sysfs_remove(g); nvgpu_ecc_free(g); } - -void nvgpu_hubmmu_report_ecc_error(struct gk20a *g, u32 inst, - u32 err_type, u64 err_addr, u64 err_cnt) -{ - int ret = 0; - - if (g->ops.fb.err_ops.report_ecc_parity_err == NULL) { - return; - } - ret = g->ops.fb.err_ops.report_ecc_parity_err(g, - NVGPU_ERR_MODULE_HUBMMU, inst, err_type, err_addr, - err_cnt); - if (ret != 0) { - nvgpu_err(g, "Failed to report HUBMMU error: inst=%u, " - "err_type=%u, err_addr=%llu, err_cnt=%llu", - inst, err_type, err_addr, err_cnt); - } -} - -void nvgpu_ltc_report_ecc_error(struct gk20a *g, u32 ltc, u32 slice, - u32 err_type, u64 err_addr, u64 err_cnt) -{ - int ret = 0; - u32 inst = 0U; - - if (g->ops.ltc.err_ops.report_ecc_parity_err == NULL) { - return; - } - if (slice < 256U) { - inst = (ltc << 8U) | slice; - } else { - nvgpu_err(g, "Invalid slice id=%u", slice); - return; - } - ret = g->ops.ltc.err_ops.report_ecc_parity_err(g, - NVGPU_ERR_MODULE_LTC, inst, err_type, err_addr, - err_cnt); - if (ret != 0) { - nvgpu_err(g, "Failed to report LTC error: inst=%u, \ - err_type=%u, err_addr=%llu, err_cnt=%llu", - inst, err_type, err_addr, err_cnt); - } -} - -void nvgpu_pmu_report_ecc_error(struct gk20a *g, u32 inst, - u32 err_type, u64 err_addr, u64 err_cnt) -{ - int ret = 0; - - if (g->ops.pmu.err_ops.report_ecc_parity_err == NULL) { - return; - } - ret = g->ops.pmu.err_ops.report_ecc_parity_err(g, - NVGPU_ERR_MODULE_PWR, inst, err_type, err_addr, - err_cnt); - if (ret != 0) { - nvgpu_err(g, "Failed to report PMU error: inst=%u, \ - err_type=%u, err_addr=%llu, err_cnt=%llu", - inst, err_type, err_addr, err_cnt); - } -} - -void nvgpu_gr_report_ecc_error(struct gk20a *g, u32 hw_module, - u32 gpc, u32 tpc, u32 err_type, - u64 err_addr, u64 err_cnt) -{ - int ret = 0; - u32 inst = 0U; - - if (g->ops.gr.err_ops.report_ecc_parity_err == NULL) { - return; - } - if (tpc < 256U) { - inst = (gpc << 8) | tpc; - } else { - nvgpu_err(g, "Invalid tpc id=%u", tpc); - return; - } - ret = g->ops.gr.err_ops.report_ecc_parity_err(g, - hw_module, inst, err_type, - err_addr, err_cnt); - if (ret != 0) { - nvgpu_err(g, "Failed to report GR error: hw_module=%u, \ - inst=%u, err_type=%u, err_addr=%llu, \ - err_cnt=%llu", hw_module, inst, err_type, - err_addr, err_cnt); - } -} diff --git a/drivers/gpu/nvgpu/common/fifo/fifo.c b/drivers/gpu/nvgpu/common/fifo/fifo.c index 7a0d859db..fd14b50b0 100644 --- a/drivers/gpu/nvgpu/common/fifo/fifo.c +++ b/drivers/gpu/nvgpu/common/fifo/fifo.c @@ -239,23 +239,6 @@ clean_up: return err; } -void nvgpu_report_host_error(struct gk20a *g, u32 inst, - u32 err_id, u32 intr_info) -{ - int ret; - - if (g->ops.fifo.err_ops.report_host_err == NULL) { - return; - } - ret = g->ops.fifo.err_ops.report_host_err(g, - NVGPU_ERR_MODULE_HOST, inst, err_id, intr_info); - if (ret != 0) { - nvgpu_err(g, "Failed to report HOST error: \ - inst=%u, err_id=%u, intr_info=%u, ret=%d", - inst, err_id, intr_info, ret); - } -} - static const char * const pbdma_ch_eng_status_str[] = { "invalid", "valid", diff --git a/drivers/gpu/nvgpu/common/fifo/preempt.c b/drivers/gpu/nvgpu/common/fifo/preempt.c index de9aa235e..5e93cb022 100644 --- a/drivers/gpu/nvgpu/common/fifo/preempt.c +++ b/drivers/gpu/nvgpu/common/fifo/preempt.c @@ -26,6 +26,7 @@ #include #include #include +#include u32 nvgpu_preempt_get_timeout(struct gk20a *g) @@ -78,9 +79,9 @@ void nvgpu_preempt_poll_tsg_on_pbdma(struct gk20a *g, */ if (g->ops.fifo.preempt_poll_pbdma(g, tsgid, pbdma_id) != 0) { - nvgpu_report_host_error(g, 0, - GPU_HOST_PBDMA_PREEMPT_ERROR, - pbdma_id); + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, + pbdma_id, + GPU_HOST_PBDMA_PREEMPT_ERROR, 0); nvgpu_err(g, "PBDMA preempt failed"); } } diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c index 8fe8a6d91..078315679 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_intr.c +++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include @@ -43,7 +44,6 @@ static void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid, u32 mailbox_value) { - int ret = 0; struct ctxsw_err_info err_info; err_info.curr_ctx = g->ops.gr.falcon.get_current_ctx(g); @@ -52,15 +52,8 @@ static void gr_intr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid, err_info.mailbox_value = mailbox_value; err_info.chid = chid; - if (g->ops.gr.err_ops.report_ctxsw_err != NULL) { - ret = g->ops.gr.err_ops.report_ctxsw_err(g, - NVGPU_ERR_MODULE_FECS, - err_type, (void *)&err_info); - if (ret != 0) { - nvgpu_err(g, "Failed to report FECS CTXSW error: %d", - err_type); - } - } + (void) nvgpu_report_ctxsw_err(g, NVGPU_ERR_MODULE_FECS, + err_type, (void *)&err_info); } static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, @@ -188,16 +181,11 @@ static int gr_intr_handle_class_error(struct gk20a *g, static void gr_intr_report_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, u32 hww_warp_esr_status, u64 hww_warp_esr_pc) { - int ret; struct gr_sm_mcerr_info err_info; struct nvgpu_channel *ch; struct gr_err_info info; u32 tsgid, chid, curr_ctx, inst = 0; - if (g->ops.gr.err_ops.report_gr_err == NULL) { - return; - } - tsgid = NVGPU_INVALID_TSG_ID; curr_ctx = g->ops.gr.falcon.get_current_ctx(g); ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid); @@ -217,14 +205,8 @@ static void gr_intr_report_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, err_info.tpc = tpc; err_info.sm = sm; info.sm_mcerr_info = &err_info; - ret = g->ops.gr.err_ops.report_gr_err(g, - NVGPU_ERR_MODULE_SM, inst, GPU_SM_MACHINE_CHECK_ERROR, - &info); - if (ret != 0) { - nvgpu_err(g, "failed to report SM_EXCEPTION " - "gpc=%u, tpc=%u, sm=%u, esr_status=%x", - gpc, tpc, sm, hww_warp_esr_status); - } + (void) nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_SM, inst, + GPU_SM_MACHINE_CHECK_ERROR, &info); } /* Used by sw interrupt thread to translate current ctx to chid. @@ -314,16 +296,11 @@ unlock: void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst, u32 err_type, u32 status) { - int ret = 0; struct nvgpu_channel *ch; struct gr_exception_info err_info; struct gr_err_info info; u32 tsgid, chid, curr_ctx; - if (g->ops.gr.err_ops.report_gr_err == NULL) { - return; - } - tsgid = NVGPU_INVALID_TSG_ID; curr_ctx = g->ops.gr.falcon.get_current_ctx(g); ch = nvgpu_gr_intr_get_channel_from_ctx(g, curr_ctx, &tsgid); @@ -339,14 +316,8 @@ void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst, err_info.tsgid = tsgid; err_info.status = status; info.exception_info = &err_info; - ret = g->ops.gr.err_ops.report_gr_err(g, - NVGPU_ERR_MODULE_PGRAPH, inst, err_type, - &info); - if (ret != 0) { - nvgpu_err(g, "Failed to report PGRAPH exception: " - "inst=%u, err_type=%u, status=%u", - inst, err_type, status); - } + (void) nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_PGRAPH, + inst, err_type, &info); } void nvgpu_gr_intr_set_error_notifier(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c index 1420ff45b..5aa7e394e 100644 --- a/drivers/gpu/nvgpu/common/pmu/pmu.c +++ b/drivers/gpu/nvgpu/common/pmu/pmu.c @@ -43,6 +43,7 @@ #include #include #include +#include /* PMU locks used to sync with PMU-RTOS */ int nvgpu_pmu_lock_acquire(struct gk20a *g, struct nvgpu_pmu *pmu, @@ -387,27 +388,11 @@ void nvgpu_pmu_remove_support(struct gk20a *g, struct nvgpu_pmu *pmu) } /* PMU H/W error functions */ -static void pmu_report_error(struct gk20a *g, u32 err_type, - u32 status, u32 pmu_err_type) -{ - int ret = 0; - - if (g->ops.pmu.err_ops.report_pmu_err != NULL) { - ret = g->ops.pmu.err_ops.report_pmu_err(g, - NVGPU_ERR_MODULE_PWR, err_type, status, pmu_err_type); - if (ret != 0) { - nvgpu_err(g, "Failed to report PMU error: %d", - err_type); - } - } -} - void nvgpu_pmu_report_bar0_pri_err_status(struct gk20a *g, u32 bar0_status, u32 error_type) { - pmu_report_error(g, - GPU_PMU_BAR0_ERROR_TIMEOUT, bar0_status, error_type); - return; + (void) nvgpu_report_pmu_err(g, NVGPU_ERR_MODULE_PMU, + GPU_PMU_BAR0_ERROR_TIMEOUT, error_type, bar0_status); } /* PMU engine reset functions */ diff --git a/drivers/gpu/nvgpu/hal/bus/bus_gk20a.c b/drivers/gpu/nvgpu/hal/bus/bus_gk20a.c index 9306b5076..37d2e4a4b 100644 --- a/drivers/gpu/nvgpu/hal/bus/bus_gk20a.c +++ b/drivers/gpu/nvgpu/hal/bus/bus_gk20a.c @@ -75,7 +75,8 @@ void gk20a_bus_isr(struct gk20a *g) */ err_type = GPU_HOST_PBUS_TIMEOUT_ERROR; } - nvgpu_report_host_error(g, 0, err_type, val); + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, + 0, err_type, val); gk20a_writel(g, bus_intr_0_r(), val); } diff --git a/drivers/gpu/nvgpu/hal/ce/ce_gp10b.c b/drivers/gpu/nvgpu/hal/ce/ce_gp10b.c index 0060dffc5..c222ea06f 100644 --- a/drivers/gpu/nvgpu/hal/ce/ce_gp10b.c +++ b/drivers/gpu/nvgpu/hal/ce/ce_gp10b.c @@ -39,14 +39,14 @@ void gp10b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) /* clear blocking interrupts: they exibit broken behavior */ if ((ce_intr & ce_intr_status_blockpipe_pending_f()) != 0U) { - nvgpu_report_ce_error(g, inst_id, + (void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id, GPU_CE_BLOCK_PIPE, ce_intr); nvgpu_log(g, gpu_dbg_intr, "ce blocking pipe interrupt"); clear_intr |= ce_intr_status_blockpipe_pending_f(); } if ((ce_intr & ce_intr_status_launcherr_pending_f()) != 0U) { - nvgpu_report_ce_error(g, inst_id, + (void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id, GPU_CE_LAUNCH_ERROR, ce_intr); nvgpu_log(g, gpu_dbg_intr, "ce launch error interrupt"); clear_intr |= ce_intr_status_launcherr_pending_f(); @@ -65,7 +65,7 @@ u32 gp10b_ce_nonstall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) ce_intr, inst_id); if ((ce_intr & ce_intr_status_nonblockpipe_pending_f()) != 0U) { - nvgpu_report_ce_error(g, inst_id, + (void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id, GPU_CE_NONBLOCK_PIPE, ce_intr); nvgpu_writel(g, ce_intr_status_r(inst_id), ce_intr_status_nonblockpipe_pending_f()); diff --git a/drivers/gpu/nvgpu/hal/ce/ce_gv11b.c b/drivers/gpu/nvgpu/hal/ce/ce_gv11b.c index 7899eecc4..8f901390d 100644 --- a/drivers/gpu/nvgpu/hal/ce/ce_gv11b.c +++ b/drivers/gpu/nvgpu/hal/ce/ce_gv11b.c @@ -33,23 +33,6 @@ #include -void nvgpu_report_ce_error(struct gk20a *g, u32 inst, - u32 err_type, u32 status) -{ - int ret = 0; - - if (g->ops.ce.err_ops.report_ce_err == NULL) { - return; - } - ret = g->ops.ce.err_ops.report_ce_err(g, - NVGPU_ERR_MODULE_CE, inst, err_type, status); - if (ret != 0) { - nvgpu_err(g, - "report_ce_err failed inst=%u err_type=%u status=%u", - inst, err_type, status); - } -} - u32 gv11b_ce_get_num_pce(struct gk20a *g) { /* @@ -78,7 +61,7 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) * reset to get back to a working state. */ if ((ce_intr & ce_intr_status_invalid_config_pending_f()) != 0U) { - nvgpu_report_ce_error(g, inst_id, + (void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id, GPU_CE_INVALID_CONFIG, ce_intr); nvgpu_log(g, gpu_dbg_intr, "ce: inst %d: invalid config", inst_id); @@ -92,7 +75,7 @@ void gv11b_ce_stall_isr(struct gk20a *g, u32 inst_id, u32 pri_base) * reset before operations can start again, if not the entire GPU. */ if ((ce_intr & ce_intr_status_mthd_buffer_fault_pending_f()) != 0U) { - nvgpu_report_ce_error(g, inst_id, + (void) nvgpu_report_ce_err(g, NVGPU_ERR_MODULE_CE, inst_id, GPU_CE_METHOD_BUFFER_FAULT, ce_intr); nvgpu_log(g, gpu_dbg_intr, "ce: inst %d: mthd buffer fault", inst_id); diff --git a/drivers/gpu/nvgpu/hal/fb/intr/fb_intr_ecc_gv11b.c b/drivers/gpu/nvgpu/hal/fb/intr/fb_intr_ecc_gv11b.c index 6b74b4f8a..be1d26126 100644 --- a/drivers/gpu/nvgpu/hal/fb/intr/fb_intr_ecc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fb/intr/fb_intr_ecc_gv11b.c @@ -83,8 +83,8 @@ static void gv11b_fb_intr_handle_ecc_l2tlb(struct gk20a *g, u32 ecc_status) if ((ecc_status & fb_mmu_l2tlb_ecc_status_corrected_err_l2tlb_sa_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_L2TLB_SA_DATA_ECC_CORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_L2TLB_SA_DATA_ECC_CORRECTED, ecc_addr, g->ecc.fb.mmu_l2tlb_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); @@ -92,8 +92,8 @@ static void gv11b_fb_intr_handle_ecc_l2tlb(struct gk20a *g, u32 ecc_status) if ((ecc_status & fb_mmu_l2tlb_ecc_status_uncorrected_err_l2tlb_sa_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_L2TLB_SA_DATA_ECC_UNCORRECTED, ecc_addr, g->ecc.fb.mmu_l2tlb_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); @@ -161,16 +161,16 @@ static void gv11b_fb_intr_handle_ecc_hubtlb(struct gk20a *g, u32 ecc_status) if ((ecc_status & fb_mmu_hubtlb_ecc_status_corrected_err_sa_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_TLB_SA_DATA_ECC_CORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_TLB_SA_DATA_ECC_CORRECTED, ecc_addr, g->ecc.fb.mmu_hubtlb_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); } if ((ecc_status & fb_mmu_hubtlb_ecc_status_uncorrected_err_sa_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_TLB_SA_DATA_ECC_UNCORRECTED, ecc_addr, g->ecc.fb.mmu_hubtlb_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); @@ -239,8 +239,8 @@ static void gv11b_fb_intr_handle_ecc_fillunit(struct gk20a *g, u32 ecc_status) if ((ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pte_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_PTE_DATA_ECC_CORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_PTE_DATA_ECC_CORRECTED, ecc_addr, g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "corrected ecc pte data error"); @@ -248,16 +248,16 @@ static void gv11b_fb_intr_handle_ecc_fillunit(struct gk20a *g, u32 ecc_status) if ((ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pte_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED, ecc_addr, g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pte data error"); } if ((ecc_status & fb_mmu_fillunit_ecc_status_corrected_err_pde0_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_PDE0_DATA_ECC_CORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_PDE0_DATA_ECC_CORRECTED, ecc_addr, g->ecc.fb.mmu_fillunit_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "corrected ecc pde0 data error"); @@ -265,8 +265,8 @@ static void gv11b_fb_intr_handle_ecc_fillunit(struct gk20a *g, u32 ecc_status) if ((ecc_status & fb_mmu_fillunit_ecc_status_uncorrected_err_pde0_data_m()) != 0U) { - nvgpu_hubmmu_report_ecc_error(g, 0, - GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_HUBMMU, + 0, GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED, ecc_addr, g->ecc.fb.mmu_fillunit_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc pde0 data error"); diff --git a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c index 77491161d..b61e825fa 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -188,7 +189,6 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g) const char *info_status_str; struct nvgpu_tsg *tsg = NULL; - /* get ctxsw timedout engines */ ctxsw_timeout_engines = nvgpu_readl(g, fifo_intr_ctxsw_timeout_r()); if (ctxsw_timeout_engines == 0U) { @@ -217,8 +217,8 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g) continue; } - nvgpu_report_host_error(g, 0, - GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR, + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, + 0, GPU_HOST_PFIFO_CTXSW_TIMEOUT_ERROR, tsgid); recover = g->ops.tsg.check_ctxsw_timeout(tsg, diff --git a/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c index 8e20229c6..d75ff7ffb 100644 --- a/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gk20a.c @@ -149,8 +149,8 @@ void gk20a_fifo_intr_handle_chsw_error(struct gk20a *g) u32 intr; intr = nvgpu_readl(g, fifo_intr_chsw_error_r()); - nvgpu_report_host_error(g, 0, - GPU_HOST_PFIFO_CHSW_ERROR, intr); + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, + 0, GPU_HOST_PFIFO_CHSW_ERROR, intr); nvgpu_err(g, "chsw: %08x", intr); g->ops.gr.falcon.dump_stats(g); nvgpu_writel(g, fifo_intr_chsw_error_r(), intr); diff --git a/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gv11b.c index 29f32a4dc..7d1641d89 100644 --- a/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fifo/fifo_intr_gv11b.c @@ -135,8 +135,8 @@ bool gv11b_fifo_handle_sched_error(struct gk20a *g) nvgpu_err(g, "fifo sched error code not supported"); } - nvgpu_report_host_error(g, 0, - GPU_HOST_PFIFO_SCHED_ERROR, sched_error); + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, + 0, GPU_HOST_PFIFO_SCHED_ERROR, sched_error); if (sched_error == SCHED_ERROR_CODE_BAD_TSG) { /* id is unknown, preempt all runlists and do recovery */ @@ -154,7 +154,7 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr) if ((fifo_intr & fifo_intr_0_bind_error_pending_f()) != 0U) { u32 bind_error = nvgpu_readl(g, fifo_intr_bind_error_r()); - nvgpu_report_host_error(g, 0, + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, 0, GPU_HOST_PFIFO_BIND_ERROR, bind_error); nvgpu_err(g, "fifo bind error: 0x%08x", bind_error); handled |= fifo_intr_0_bind_error_pending_f(); @@ -166,7 +166,7 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr) } if ((fifo_intr & fifo_intr_0_memop_timeout_pending_f()) != 0U) { - nvgpu_report_host_error(g, 0, + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, 0, GPU_HOST_PFIFO_MEMOP_TIMEOUT_ERROR, 0); nvgpu_err(g, "fifo memop timeout error"); handled |= fifo_intr_0_memop_timeout_pending_f(); @@ -175,7 +175,7 @@ static u32 gv11b_fifo_intr_handle_errors(struct gk20a *g, u32 fifo_intr) if ((fifo_intr & fifo_intr_0_lb_error_pending_f()) != 0U) { u32 lb_error = nvgpu_readl(g, fifo_intr_lb_error_r()); - nvgpu_report_host_error(g, 0, + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, 0, GPU_HOST_PFIFO_LB_ERROR, lb_error); nvgpu_err(g, "fifo lb error"); handled |= fifo_intr_0_lb_error_pending_f(); diff --git a/drivers/gpu/nvgpu/hal/fifo/pbdma_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/pbdma_gv11b.c index 012f25dde..9ce663eb9 100644 --- a/drivers/gpu/nvgpu/hal/fifo/pbdma_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fifo/pbdma_gv11b.c @@ -86,8 +86,8 @@ static void report_pbdma_error(struct gk20a *g, u32 pbdma_id, err_type = GPU_HOST_PBDMA_SIGNATURE_ERROR; } if (err_type != GPU_HOST_INVALID_ERROR) { - nvgpu_report_host_error(g, pbdma_id, - err_type, pbdma_intr_0); + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, + pbdma_id, err_type, pbdma_intr_0); } return; } @@ -188,7 +188,7 @@ bool gv11b_pbdma_handle_intr_1(struct gk20a *g, u32 pbdma_id, u32 pbdma_intr_1, recover = true; - nvgpu_report_host_error(g, pbdma_id, + (void) nvgpu_report_host_err(g, NVGPU_ERR_MODULE_HOST, pbdma_id, GPU_HOST_PBDMA_HCE_ERROR, pbdma_intr_1); if ((pbdma_intr_1 & pbdma_intr_1_ctxnotvalid_pending_f()) != 0U) { diff --git a/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c b/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c index a30d68009..913633774 100644 --- a/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/gr/gr_gv11b.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "gr_pri_gk20a.h" #include "gr_pri_gv11b.h" @@ -153,19 +154,25 @@ static void gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, if ((l1_tag_ecc_status & (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m())) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_TAG_ECC_CORRECTED, 0, g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); } if ((l1_tag_ecc_status & gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_TAG_MISS_FIFO_ECC_CORRECTED, 0, g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); } if ((l1_tag_ecc_status & gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_TAG_S2R_PIXPRF_ECC_CORRECTED, 0, g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter); } @@ -188,19 +195,25 @@ static void gr_gv11b_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc, if ((l1_tag_ecc_status & (gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() | gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m())) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_TAG_ECC_UNCORRECTED, 0, g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); } if ((l1_tag_ecc_status & gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0, g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); } if ((l1_tag_ecc_status & gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0, g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter); } @@ -280,7 +293,9 @@ static void gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter += lrf_corrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_LRF_ECC_CORRECTED, 0, g->ecc.gr.sm_lrf_ecc_single_err_count[gpc][tpc].counter); gk20a_writel(g, @@ -299,7 +314,9 @@ static void gr_gv11b_handle_lrf_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += lrf_uncorrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_LRF_ECC_UNCORRECTED, 0, g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter); gk20a_writel(g, @@ -370,7 +387,9 @@ static void gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter += cbu_corrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_CBU_ECC_CORRECTED, 0, g->ecc.gr.sm_cbu_ecc_corrected_err_count[gpc][tpc].counter); gk20a_writel(g, @@ -389,7 +408,9 @@ static void gr_gv11b_handle_cbu_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter += cbu_uncorrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_CBU_ECC_UNCORRECTED, 0, g->ecc.gr.sm_cbu_ecc_uncorrected_err_count[gpc][tpc].counter); gk20a_writel(g, @@ -456,7 +477,9 @@ static void gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter += l1_data_corrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_DATA_ECC_CORRECTED, 0, g->ecc.gr.sm_l1_data_ecc_corrected_err_count[gpc][tpc].counter); gk20a_writel(g, @@ -475,7 +498,9 @@ static void gr_gv11b_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32 tpc, } g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter += l1_data_uncorrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_L1_DATA_ECC_UNCORRECTED, 0, g->ecc.gr.sm_l1_data_ecc_uncorrected_err_count[gpc][tpc].counter); gk20a_writel(g, @@ -550,25 +575,33 @@ static void gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, 0); if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L0_DATA_ECC_CORRECTED, 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); } if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L0_PREDECODE_ECC_CORRECTED, 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); } if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L1_DATA_ECC_CORRECTED, 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); } if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L1_PREDECODE_ECC_CORRECTED, 0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter); } @@ -590,25 +623,33 @@ static void gr_gv11b_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc, 0); if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED, 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); } if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED, 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); } if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED, 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); } if ((icache_ecc_status & gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_SM, gpc, tpc, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_SM, + (gpc << 8) | tpc, GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED, 0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter); } diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c index e88488cb8..61d0c5703 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c index abb621463..e0ed351c1 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -49,28 +50,28 @@ static void gv11b_gr_intr_handle_fecs_ecc_error(struct gk20a *g) fecs_ecc_status.uncorrected_delta; if (fecs_ecc_status.imem_corrected_err) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0, GPU_FECS_FALCON_IMEM_ECC_CORRECTED, fecs_ecc_status.ecc_addr, g->ecc.gr.fecs_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); } if (fecs_ecc_status.imem_uncorrected_err) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0, GPU_FECS_FALCON_IMEM_ECC_UNCORRECTED, fecs_ecc_status.ecc_addr, g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); } if (fecs_ecc_status.dmem_corrected_err) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0, GPU_FECS_FALCON_DMEM_ECC_CORRECTED, fecs_ecc_status.ecc_addr, g->ecc.gr.fecs_ecc_corrected_err_count[0].counter); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); } if (fecs_ecc_status.dmem_uncorrected_err) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_FECS, 0, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_FECS, 0, GPU_FECS_FALCON_DMEM_ECC_UNCORRECTED, fecs_ecc_status.ecc_addr, g->ecc.gr.fecs_ecc_uncorrected_err_count[0].counter); @@ -319,7 +320,7 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc, ); } *corrected_err += gcc_l15_corrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GCC, gpc, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GCC, gpc, GPU_GCC_L15_ECC_CORRECTED, 0, *corrected_err); nvgpu_writel(g, @@ -341,7 +342,7 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc, ); } *uncorrected_err += gcc_l15_uncorrected_err_count_delta; - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GCC, gpc, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GCC, gpc, GPU_GCC_L15_ECC_UNCORRECTED, 0, *uncorrected_err); nvgpu_writel(g, @@ -429,7 +430,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc, GPU_MMU_L1TLB_SA_DATA_ECC_CORRECTED, 0, (u32)*corrected_err); nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error"); @@ -437,7 +438,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc, GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED, 0, (u32)*uncorrected_err); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error"); @@ -445,7 +446,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc, GPU_MMU_L1TLB_FA_DATA_ECC_CORRECTED, 0, (u32)*corrected_err); nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error"); @@ -453,7 +454,7 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, if ((ecc_status & gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_MMU, gpc, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_MMU, gpc, GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED, 0, (u32)*uncorrected_err); nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error"); @@ -536,29 +537,29 @@ void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, - GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS, + gpc, GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED, ecc_addr, (u32)*corrected_err); nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected"); } if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, - GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS, + gpc, GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED, ecc_addr, (u32)*uncorrected_err); nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected"); } if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, - GPU_GPCCS_FALCON_DMEM_ECC_CORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS, + gpc, GPU_GPCCS_FALCON_DMEM_ECC_CORRECTED, ecc_addr, (u32)*corrected_err); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected"); } if ((ecc_status & gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) { - nvgpu_gr_report_ecc_error(g, NVGPU_ERR_MODULE_GPCCS, gpc, 0, - GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_GPCCS, + gpc, GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED, ecc_addr, (u32)*uncorrected_err); nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected"); } diff --git a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gv11b.c b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gv11b.c index 766d25725..85c87ae16 100644 --- a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gv11b.c +++ b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gv11b.c @@ -154,28 +154,36 @@ static void gv11b_ltc_intr_handle_lts_interrupts(struct gk20a *g, if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_rstg_m()) != 0U) { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_RSTG_ECC_CORRECTED, ecc_addr, g->ecc.ltc.ecc_sec_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "rstg ecc error corrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_rstg_m()) != 0U) { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_RSTG_ECC_UNCORRECTED, ecc_addr, g->ecc.ltc.ecc_ded_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "rstg ecc error uncorrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_tstg_m()) != 0U) { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_TSTG_ECC_CORRECTED, ecc_addr, g->ecc.ltc.ecc_sec_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, "tstg ecc error corrected"); } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_tstg_m()) != 0U) { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_TSTG_ECC_UNCORRECTED, ecc_addr, g->ecc.ltc.ecc_ded_count[ltc][slice].counter); nvgpu_log(g, gpu_dbg_intr, @@ -185,11 +193,15 @@ static void gv11b_ltc_intr_handle_lts_interrupts(struct gk20a *g, ltc_ltc0_lts0_l2_cache_ecc_status_corrected_err_dstg_m()) != 0U) { if ((dstg_ecc_addr & ltc_ltc0_lts0_dstg_ecc_address_info_ram_m()) == 0U) { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_DSTG_ECC_CORRECTED, ecc_addr, g->ecc.ltc.ecc_sec_count[ltc][slice].counter); } else { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_DSTG_BE_ECC_CORRECTED, ecc_addr, g->ecc.ltc.ecc_sec_count[ltc][slice].counter); } @@ -197,11 +209,15 @@ static void gv11b_ltc_intr_handle_lts_interrupts(struct gk20a *g, } if ((ecc_status & ltc_ltc0_lts0_l2_cache_ecc_status_uncorrected_err_dstg_m()) != 0U) { if ((dstg_ecc_addr & ltc_ltc0_lts0_dstg_ecc_address_info_ram_m()) == 0U) { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_DSTG_ECC_UNCORRECTED, ecc_addr, g->ecc.ltc.ecc_ded_count[ltc][slice].counter); } else { - nvgpu_ltc_report_ecc_error(g, ltc, slice, + (void) nvgpu_report_ecc_parity_err(g, + NVGPU_ERR_MODULE_LTC, + (ltc << 8U) | slice, GPU_LTC_CACHE_DSTG_BE_ECC_UNCORRECTED, ecc_addr, g->ecc.ltc.ecc_ded_count[ltc][slice].counter); } diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c index ec1126654..547143c54 100644 --- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c +++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c @@ -242,7 +242,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_imem_m()) != 0U) { - nvgpu_pmu_report_ecc_error(g, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0, GPU_PMU_FALCON_IMEM_ECC_CORRECTED, ecc_addr, g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter); @@ -250,7 +250,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) } if ((ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) { - nvgpu_pmu_report_ecc_error(g, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0, GPU_PMU_FALCON_IMEM_ECC_UNCORRECTED, ecc_addr, g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); @@ -259,7 +259,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) } if ((ecc_status & pwr_pmu_falcon_ecc_status_corrected_err_dmem_m()) != 0U) { - nvgpu_pmu_report_ecc_error(g, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0, GPU_PMU_FALCON_DMEM_ECC_CORRECTED, ecc_addr, g->ecc.pmu.pmu_ecc_corrected_err_count[0].counter); @@ -267,7 +267,7 @@ static int gv11b_pmu_correct_ecc(struct gk20a *g, u32 ecc_status, u32 ecc_addr) } if ((ecc_status & pwr_pmu_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) { - nvgpu_pmu_report_ecc_error(g, 0, + (void) nvgpu_report_ecc_parity_err(g, NVGPU_ERR_MODULE_PMU, 0, GPU_PMU_FALCON_DMEM_ECC_UNCORRECTED, ecc_addr, g->ecc.pmu.pmu_ecc_uncorrected_err_count[0].counter); diff --git a/drivers/gpu/nvgpu/hal/priv_ring/priv_ring_gp10b.c b/drivers/gpu/nvgpu/hal/priv_ring/priv_ring_gp10b.c index cdd7082b6..b2d21aaaa 100644 --- a/drivers/gpu/nvgpu/hal/priv_ring/priv_ring_gp10b.c +++ b/drivers/gpu/nvgpu/hal/priv_ring/priv_ring_gp10b.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -68,20 +69,9 @@ void gp10b_priv_ring_decode_error_code(struct gk20a *g, u32 error_code) { u32 error_type_index; - int ret = 0; - if (g->ops.priv_ring.err_ops.report_access_violation != NULL) { - ret = g->ops.priv_ring.err_ops.report_access_violation (g, - NVGPU_ERR_MODULE_PRI, - 0U, - GPU_PRI_ACCESS_VIOLATION, - 0U, - error_code); - if (ret != 0) { - nvgpu_err(g, "Failed to report PRI access violation: " - "err_code=%u", error_code); - } - } + (void) nvgpu_report_pri_err(g, NVGPU_ERR_MODULE_PRI, 0, + GPU_PRI_ACCESS_VIOLATION, 0, error_code); error_type_index = (error_code & 0x00000f00U) >> 8U; error_code = error_code & 0xBADFf000U; diff --git a/drivers/gpu/nvgpu/hal/ptimer/ptimer_gk20a.c b/drivers/gpu/nvgpu/hal/ptimer/ptimer_gk20a.c index eb93c3924..59ebaa131 100644 --- a/drivers/gpu/nvgpu/hal/ptimer/ptimer_gk20a.c +++ b/drivers/gpu/nvgpu/hal/ptimer/ptimer_gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -32,7 +32,6 @@ void gk20a_ptimer_isr(struct gk20a *g) { u32 save0, save1, fecs_errcode = 0; - int ret = 0; u32 inst = 0U; u32 error_addr; @@ -69,19 +68,9 @@ void gk20a_ptimer_isr(struct gk20a *g) error_addr = 0U; } - if (g->ops.priv_ring.err_ops.report_timeout_err != NULL) { - ret = g->ops.priv_ring.err_ops.report_timeout_err(g, - NVGPU_ERR_MODULE_PRI, - inst, - GPU_PRI_TIMEOUT_ERROR, - error_addr, - fecs_errcode); - if (ret != 0) { - nvgpu_err(g, "Failed to report PRI Timout error: " - "inst=%u, err_addr=%u, err_code=%u", - inst, error_addr, fecs_errcode); - } - } + (void) nvgpu_report_pri_err(g, NVGPU_ERR_MODULE_PRI, + inst, GPU_PRI_TIMEOUT_ERROR, + error_addr, fecs_errcode); } int gk20a_read_ptimer(struct gk20a *g, u64 *value) diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index ff58f1144..0622274da 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -115,7 +115,6 @@ enum ctxsw_addr_type; #include #include #include -#include #include #include #include @@ -240,11 +239,6 @@ struct gpu_ops { void (*en_illegal_compstat)(struct gk20a *g, bool enable); } intr; - struct { - int (*report_ecc_parity_err)(struct gk20a *g, - u32 hw_id, u32 inst, u32 err_id, - u64 err_addr, u64 count); - } err_ops; } ltc; struct { void (*init)(struct gk20a *g, struct nvgpu_cbc *cbc); @@ -260,11 +254,6 @@ struct gpu_ops { u32 (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base); u32 (*get_num_pce)(struct gk20a *g); void (*mthd_buffer_fault_in_bar2_fault)(struct gk20a *g); - struct { - int (*report_ce_err)(struct gk20a *g, - u32 hw_id, u32 inst, u32 err_id, - u32 status); - } err_ops; } ce; struct { u32 (*get_gr_status)(struct gk20a *g); @@ -845,18 +834,6 @@ struct gpu_ops { } intr; u32 (*get_ctxsw_checksum_mismatch_mailbox_val)(void); - struct { - int (*report_ecc_parity_err)(struct gk20a *g, - u32 hw_id, u32 inst, u32 err_id, - u64 err_addr, - u64 err_count); - int (*report_gr_err)(struct gk20a *g, - u32 hw_id, u32 inst, u32 err_id, - struct gr_err_info *err_info); - int (*report_ctxsw_err)(struct gk20a *g, - u32 hw_id, u32 err_id, - void *data); - } err_ops; } gr; struct { @@ -933,12 +910,6 @@ struct gpu_ops { void (*fault_buf_configure_hw)(struct gk20a *g, u32 index); size_t (*get_vidmem_size)(struct gk20a *g); int (*apply_pdb_cache_war)(struct gk20a *g); - struct { - int (*report_ecc_parity_err)(struct gk20a *g, - u32 hw_id, u32 inst, - u32 err_id, u64 err_addr, - u64 err_cnt); - } err_ops; struct { void (*enable)(struct gk20a *g); void (*disable)(struct gk20a *g); @@ -999,11 +970,6 @@ struct gpu_ops { void (*intr_unset_recover_mask)(struct gk20a *g); int (*set_sm_exception_type_mask)(struct nvgpu_channel *ch, u32 exception_mask); - struct { - int (*report_host_err)(struct gk20a *g, - u32 hw_id, u32 inst, u32 err_id, - u32 intr_info); - } err_ops; void (*intr_0_enable)(struct gk20a *g, bool enable); void (*intr_0_isr)(struct gk20a *g); @@ -1400,15 +1366,6 @@ struct gpu_ops { void (*pmu_reset_idle_counter)(struct gk20a *g, u32 counter_id); /* PG */ void (*pmu_setup_elpg)(struct gk20a *g); - struct { - int (*report_ecc_parity_err)(struct gk20a *g, - u32 hw_id, u32 inst, - u32 err_id, u64 err_addr, - u64 err_cnt); - int (*report_pmu_err)(struct gk20a *g, - u32 hw_id, u32 err_id, u32 status, - u32 pmu_err_type); - } err_ops; void (*pmu_clear_bar0_host_err_status)(struct gk20a *g); int (*bar0_error_status)(struct gk20a *g, u32 *bar0_status, u32 *etype); @@ -1670,14 +1627,6 @@ struct gpu_ops { u32 (*enum_ltc)(struct gk20a *g); u32 (*get_gpc_count)(struct gk20a *g); u32 (*get_fbp_count)(struct gk20a *g); - struct { - int (*report_access_violation)(struct gk20a *g, - u32 hw_id, u32 inst, u32 err_id, - u32 err_addr, u32 error_code); - int (*report_timeout_err)(struct gk20a *g, - u32 hw_id, u32 inst, u32 err_id, - u32 err_addr, u32 error_code); - } err_ops; } priv_ring; struct { int (*check_priv_security)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h index f14b265e5..5f46b76d7 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h @@ -23,13 +23,17 @@ #ifndef NVGPU_NVGPU_ERR_H #define NVGPU_NVGPU_ERR_H +#include + +struct gk20a; + #define NVGPU_ERR_MODULE_HOST 0U #define NVGPU_ERR_MODULE_SM 1U #define NVGPU_ERR_MODULE_FECS 2U #define NVGPU_ERR_MODULE_GPCCS 3U #define NVGPU_ERR_MODULE_MMU 4U #define NVGPU_ERR_MODULE_GCC 5U -#define NVGPU_ERR_MODULE_PWR 6U +#define NVGPU_ERR_MODULE_PMU 6U #define NVGPU_ERR_MODULE_PGRAPH 7U #define NVGPU_ERR_MODULE_LTC 8U #define NVGPU_ERR_MODULE_HUBMMU 9U @@ -171,22 +175,26 @@ struct gr_err_info { struct gr_exception_info *exception_info; }; -void nvgpu_report_host_error(struct gk20a *g, - u32 inst, u32 err_id, u32 intr_info); +/* Functions to report errors to 3LSS */ +int nvgpu_report_host_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info); -void nvgpu_report_ce_error(struct gk20a *g, u32 inst, - u32 err_type, u32 status); +int nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info); -void nvgpu_hubmmu_report_ecc_error(struct gk20a *g, u32 inst, - u32 err_type, u64 err_addr, u64 err_cnt); +int nvgpu_report_ecc_parity_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, u64 err_addr, u64 err_count); -void nvgpu_ltc_report_ecc_error(struct gk20a *g, u32 ltc, u32 slice, - u32 err_type, u64 err_addr, u64 err_cnt); +int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id, + void *data); -void nvgpu_pmu_report_ecc_error(struct gk20a *g, u32 inst, - u32 err_type, u64 err_addr, u64 err_cnt); +int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, struct gr_err_info *err_info); + +int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id, + u32 sub_err_type, u32 status); + +int nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, u32 err_addr, u32 err_code); -void nvgpu_gr_report_ecc_error(struct gk20a *g, u32 hw_module, - u32 gpc, u32 tpc, u32 err_type, - u64 err_addr, u64 err_cnt); #endif diff --git a/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c b/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c new file mode 100644 index 000000000..9249e5988 --- /dev/null +++ b/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +struct gk20a; + +int nvgpu_report_host_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info) +{ + return 0; +} + +int nvgpu_report_ecc_parity_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, u64 err_addr, u64 err_count) +{ + return 0; +} + +int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, struct gr_err_info *err_info) +{ + return 0; +} + +int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id, + u32 sub_err_type, u32 status) +{ + return 0; +} + +int nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info) +{ + return 0; +} + +int nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, u32 err_addr, u32 err_code) +{ + return 0; +} + +int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id, + void *data) +{ + return 0; +} diff --git a/drivers/gpu/nvgpu/os/posix/stubs.c b/drivers/gpu/nvgpu/os/posix/stubs.c index 46cf0abf7..a771965ce 100644 --- a/drivers/gpu/nvgpu/os/posix/stubs.c +++ b/drivers/gpu/nvgpu/os/posix/stubs.c @@ -27,7 +27,9 @@ #include #include +#include +struct gk20a; void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) { @@ -41,3 +43,45 @@ int nvgpu_ecc_sysfs_init(struct gk20a *g) void nvgpu_ecc_sysfs_remove(struct gk20a *g) { } + +int nvgpu_report_host_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info) +{ + return 0; +} + +int nvgpu_report_ecc_parity_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, u64 err_addr, u64 err_count) +{ + return 0; +} + +int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, struct gr_err_info *err_info) +{ + return 0; +} + +int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id, + u32 sub_err_type, u32 status) +{ + return 0; +} + +int nvgpu_report_ce_err(struct gk20a *g, u32 hw_unit, + u32 inst, u32 err_id, u32 intr_info) +{ + return 0; +} + +int nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst, + u32 err_type, u32 err_addr, u32 err_code) +{ + return 0; +} + +int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id, + void *data) +{ + return 0; +}