From cd4fa084c1444d3c3bdc1ebdae1d2f37179cf0a5 Mon Sep 17 00:00:00 2001 From: Rajesh Devaraj Date: Tue, 25 Jun 2019 09:52:43 +0530 Subject: [PATCH] gpu: nvgpu: report MMU page fault errors to 3LSS This patch adds support to report MMU page fault errors to 3LSS. JIRA NVGPU-3459 Change-Id: I3f06e594a75ae79bf4deef9acdc1829a002ea869 Signed-off-by: Rajesh Devaraj Reviewed-on: https://git-master.nvidia.com/r/2142742 GVS: Gerrit_Virtual_Submit Reviewed-by: Antony Clince Alex Reviewed-by: Ankur Kishore Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../gpu/nvgpu/hal/fb/fb_mmu_fault_gv11b_fusa.c | 18 ++++++++++++++++++ .../hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c | 13 +++++++++++++ drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h | 13 +++++++++++++ drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c | 8 ++++++++ drivers/gpu/nvgpu/os/posix/stubs.c | 8 ++++++++ 5 files changed, 60 insertions(+) diff --git a/drivers/gpu/nvgpu/hal/fb/fb_mmu_fault_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/fb/fb_mmu_fault_gv11b_fusa.c index 1aea1a707..07f9e94fc 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_mmu_fault_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/fb/fb_mmu_fault_gv11b_fusa.c @@ -581,6 +581,12 @@ void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr) if ((niso_intr & fb_niso_intr_mmu_other_fault_notify_m()) != 0U) { + (void) nvgpu_report_mmu_err(g, NVGPU_ERR_MODULE_HUBMMU, + GPU_HUBMMU_PAGE_FAULT_ERROR, + NULL, + fault_status, + GPU_HUBMMU_OTHER_FAULT_NOTIFY); + gv11b_fb_handle_dropped_mmu_fault(g, fault_status); gv11b_mm_mmu_fault_handle_other_fault_notify(g, fault_status); @@ -604,6 +610,12 @@ void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr) if ((niso_intr & fb_niso_intr_mmu_nonreplayable_fault_overflow_m()) != 0U) { + (void) nvgpu_report_mmu_err(g, NVGPU_ERR_MODULE_HUBMMU, + GPU_HUBMMU_PAGE_FAULT_ERROR, + NULL, + fault_status, + GPU_HUBMMU_NONREPLAYABLE_FAULT_OVERFLOW); + gv11b_fb_handle_nonreplay_fault_overflow(g, fault_status); } @@ -622,6 +634,12 @@ void gv11b_fb_handle_mmu_fault(struct gk20a *g, u32 niso_intr) if ((niso_intr & fb_niso_intr_mmu_replayable_fault_overflow_m()) != 0U) { + (void) nvgpu_report_mmu_err(g, NVGPU_ERR_MODULE_HUBMMU, + GPU_HUBMMU_PAGE_FAULT_ERROR, + NULL, + fault_status, + GPU_HUBMMU_REPLAYABLE_FAULT_OVERFLOW); + gv11b_fb_handle_replay_fault_overflow(g, fault_status); } diff --git a/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c b/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c index b1001a9c7..cf221efaf 100644 --- a/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c +++ b/drivers/gpu/nvgpu/hal/mm/mmu_fault/mmu_fault_gv11b_fusa.c @@ -449,6 +449,7 @@ void gv11b_mm_mmu_fault_handle_nonreplay_replay_fault(struct gk20a *g, u32 invalidate_replay_val = 0U; u64 prev_fault_addr = 0ULL; u64 next_fault_addr = 0ULL; + u32 sub_err_type = 0U; if (gv11b_fb_is_fault_buffer_empty(g, index, &get_indx)) { nvgpu_log(g, gpu_dbg_intr, @@ -481,6 +482,18 @@ void gv11b_mm_mmu_fault_handle_nonreplay_replay_fault(struct gk20a *g, gv11b_fb_copy_from_hw_fault_buf(g, mem, offset, mmufault); + if (index == NVGPU_MMU_FAULT_REPLAY_REG_INDX) { + sub_err_type = GPU_HUBMMU_REPLAYABLE_FAULT_NOTIFY; + } else { + sub_err_type = GPU_HUBMMU_NONREPLAYABLE_FAULT_NOTIFY; + } + + (void) nvgpu_report_mmu_err(g, NVGPU_ERR_MODULE_HUBMMU, + GPU_HUBMMU_PAGE_FAULT_ERROR, + mmufault, + fault_status, + sub_err_type); + nvgpu_assert(get_indx < U32_MAX); get_indx = (get_indx + 1U) % entries; nvgpu_log(g, gpu_dbg_intr, "new get index = %d", get_indx); diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h index 8ebc46303..0cbb8b0e6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h @@ -26,6 +26,7 @@ #include struct gk20a; +struct mmu_fault_info; #define NVGPU_ERR_MODULE_HOST (0U) #define NVGPU_ERR_MODULE_SM (1U) @@ -169,6 +170,14 @@ struct gr_exception_info { #define GPU_HUBMMU_PTE_DATA_ECC_UNCORRECTED (5U) #define GPU_HUBMMU_PDE0_DATA_ECC_CORRECTED (6U) #define GPU_HUBMMU_PDE0_DATA_ECC_UNCORRECTED (7U) +#define GPU_HUBMMU_PAGE_FAULT_ERROR (8U) + +/* Sub-errors in GPU_HUBMMU_PAGE_FAULT_ERROR */ +#define GPU_HUBMMU_REPLAYABLE_FAULT_OVERFLOW (0U) +#define GPU_HUBMMU_REPLAYABLE_FAULT_NOTIFY (1U) +#define GPU_HUBMMU_NONREPLAYABLE_FAULT_OVERFLOW (2U) +#define GPU_HUBMMU_NONREPLAYABLE_FAULT_NOTIFY (3U) +#define GPU_HUBMMU_OTHER_FAULT_NOTIFY (4U) #define GPU_PRI_TIMEOUT_ERROR (0U) #define GPU_PRI_ACCESS_VIOLATION (1U) @@ -227,4 +236,8 @@ int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id, int nvgpu_report_pri_err(struct gk20a *g, u32 hw_unit, u32 inst, u32 err_type, u32 err_addr, u32 err_code); +int nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit, + u32 err_type, struct mmu_fault_info *fault_info, + u32 status, u32 sub_err_type); + #endif diff --git a/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c b/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c index e6779fe56..853c23aa8 100644 --- a/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c +++ b/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c @@ -23,6 +23,7 @@ #include struct gk20a; +struct mmu_fault_info; int nvgpu_report_host_err(struct gk20a *g, u32 hw_unit, u32 inst, u32 err_id, u32 intr_info) @@ -65,3 +66,10 @@ int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id, { return 0; } + +int nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit, + u32 err_type, struct mmu_fault_info *fault_info, + u32 status, u32 sub_err_type) +{ + return 0; +} diff --git a/drivers/gpu/nvgpu/os/posix/stubs.c b/drivers/gpu/nvgpu/os/posix/stubs.c index 99402508c..9fc5a2255 100644 --- a/drivers/gpu/nvgpu/os/posix/stubs.c +++ b/drivers/gpu/nvgpu/os/posix/stubs.c @@ -30,6 +30,7 @@ #include struct gk20a; +struct mmu_fault_info; #ifdef CONFIG_NVGPU_DEBUGGER void nvgpu_dbg_session_post_event(struct dbg_session_gk20a *dbg_s) @@ -87,3 +88,10 @@ int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id, { return 0; } + +int nvgpu_report_mmu_err(struct gk20a *g, u32 hw_unit, + u32 err_type, struct mmu_fault_info *fault_info, + u32 status, u32 sub_err_type) +{ + return 0; +}