diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c index 87ab3c8f0..432fd673c 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_intr.c +++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c @@ -208,7 +208,7 @@ static void gr_intr_report_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, err_info.sm = sm; info.sm_mcerr_info = &err_info; (void) nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_SM, inst, - GPU_SM_MACHINE_CHECK_ERROR, &info); + GPU_SM_MACHINE_CHECK_ERROR, &info, 0U); } /* Used by sw interrupt thread to translate current ctx to chid. @@ -296,7 +296,7 @@ unlock: } void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst, - u32 err_type, u32 status) + u32 err_type, u32 status, u32 sub_err_type) { struct nvgpu_channel *ch; struct gr_exception_info err_info; @@ -319,7 +319,7 @@ void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst, err_info.status = status; info.exception_info = &err_info; (void) nvgpu_report_gr_err(g, NVGPU_ERR_MODULE_PGRAPH, - inst, err_type, &info); + inst, err_type, &info, sub_err_type); } void nvgpu_gr_intr_set_error_notifier(struct gk20a *g, @@ -735,6 +735,9 @@ int nvgpu_gr_intr_stall_isr(struct gk20a *g) if (intr_info.illegal_notify != 0U) { nvgpu_err(g, "illegal notify pending"); + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_ILLEGAL_NOTIFY); nvgpu_gr_intr_set_error_notifier(g, &isr_data, NVGPU_ERR_NOTIFIER_GR_ILLEGAL_NOTIFY); need_reset = true; @@ -742,6 +745,9 @@ int nvgpu_gr_intr_stall_isr(struct gk20a *g) } if (intr_info.illegal_method != 0U) { + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_ILLEGAL_METHOD); if (gr_intr_handle_illegal_method(g, &isr_data) != 0) { need_reset = true; } @@ -749,6 +755,9 @@ int nvgpu_gr_intr_stall_isr(struct gk20a *g) } if (intr_info.illegal_class != 0U) { + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_ILLEGAL_CLASS); nvgpu_err(g, "invalid class 0x%08x, offset 0x%08x", isr_data.class_num, isr_data.offset); @@ -766,6 +775,9 @@ int nvgpu_gr_intr_stall_isr(struct gk20a *g) } if (intr_info.class_error != 0U) { + nvgpu_gr_intr_report_exception(g, 0U, + GPU_PGRAPH_ILLEGAL_ERROR, gr_intr, + GPU_PGRAPH_CLASS_ERROR); if (gr_intr_handle_class_error(g, &isr_data) != 0) { need_reset = true; } diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c index 6282d5d41..b41487639 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c @@ -231,7 +231,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_FE_EXCEPTION, - fe); + fe, 0); nvgpu_err(g, "fe exception: esr 0x%08x, info 0x%08x", fe, info); nvgpu_writel(g, gr_fe_hww_esr_r(), @@ -244,7 +244,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_MEMFMT_EXCEPTION, - memfmt); + memfmt, 0); nvgpu_err(g, "memfmt exception: esr %08x", memfmt); nvgpu_writel(g, gr_memfmt_hww_esr_r(), gr_memfmt_hww_esr_reset_active_f()); @@ -256,7 +256,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_PD_EXCEPTION, - pd); + pd, 0); nvgpu_err(g, "pd exception: esr 0x%08x", pd); nvgpu_writel(g, gr_pd_hww_esr_r(), gr_pd_hww_esr_reset_active_f()); @@ -268,7 +268,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_SCC_EXCEPTION, - scc); + scc, 0); nvgpu_err(g, "scc exception: esr 0x%08x", scc); nvgpu_writel(g, gr_scc_hww_esr_r(), gr_scc_hww_esr_reset_active_f()); @@ -280,7 +280,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_DS_EXCEPTION, - ds); + ds, 0); nvgpu_err(g, "ds exception: esr: 0x%08x", ds); nvgpu_writel(g, gr_ds_hww_esr_r(), gr_ds_hww_esr_reset_task_f()); @@ -300,7 +300,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) } nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_SSYNC_EXCEPTION, - ssync_esr); + ssync_esr, 0); } if ((exception & gr_exception_mme_m()) != 0U) { @@ -309,7 +309,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_MME_EXCEPTION, - mme); + mme, 0); nvgpu_err(g, "mme exception: esr 0x%08x info:0x%08x", mme, info); if (g->ops.gr.intr.log_mme_exception != NULL) { @@ -326,7 +326,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception) nvgpu_gr_intr_report_exception(g, 0, GPU_PGRAPH_SKED_EXCEPTION, - sked); + sked, 0); nvgpu_err(g, "sked exception: esr 0x%08x", sked); nvgpu_writel(g, gr_sked_hww_esr_r(), gr_sked_hww_esr_reset_active_f()); diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c index cfb420f32..c802cd68f 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c @@ -607,7 +607,7 @@ void gv11b_gr_intr_handle_tpc_mpc_exception(struct gk20a *g, u32 gpc, u32 tpc) nvgpu_gr_intr_report_exception(g, ((gpc << 8U) | tpc), GPU_PGRAPH_MPC_EXCEPTION, - esr); + esr, 0); esr = nvgpu_readl(g, nvgpu_safe_add_u32(gr_gpc0_tpc0_mpc_hww_esr_info_r(), diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h index 6b7f2163d..3b5f249ec 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_intr.h @@ -42,7 +42,7 @@ void nvgpu_gr_intr_handle_notify_pending(struct gk20a *g, void nvgpu_gr_intr_handle_semaphore_pending(struct gk20a *g, struct nvgpu_gr_isr_data *isr_data); void nvgpu_gr_intr_report_exception(struct gk20a *g, u32 inst, - u32 err_type, u32 status); + u32 err_type, u32 status, u32 sub_err_type); struct nvgpu_channel *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid); void nvgpu_gr_intr_set_error_notifier(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h index 43478ed9e..64b7b471a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_err.h @@ -136,6 +136,14 @@ struct ctxsw_err_info { #define GPU_PGRAPH_SKED_EXCEPTION (7U) #define GPU_PGRAPH_BE_EXCEPTION (8U) #define GPU_PGRAPH_MPC_EXCEPTION (9U) +#define GPU_PGRAPH_ILLEGAL_ERROR (10U) + +/* Sub-errors in GPU_PGRAPH_ILLEGAL_ERROR */ +#define GPU_PGRAPH_ILLEGAL_NOTIFY (0U) +#define GPU_PGRAPH_ILLEGAL_METHOD (1U) +#define GPU_PGRAPH_ILLEGAL_CLASS (2U) +#define GPU_PGRAPH_CLASS_ERROR (3U) + struct gr_exception_info { u32 curr_ctx; /* Context which triggered the exception */ u32 chid; /* Channel bound to the context */ @@ -210,7 +218,7 @@ int nvgpu_report_ctxsw_err(struct gk20a *g, u32 hw_unit, u32 err_id, void *data); int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst, - u32 err_type, struct gr_err_info *err_info); + u32 err_type, struct gr_err_info *err_info, u32 sub_err_type); int nvgpu_report_pmu_err(struct gk20a *g, u32 hw_unit, u32 err_id, u32 sub_err_type, u32 status); diff --git a/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c b/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c index caf73778e..e6779fe56 100644 --- a/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c +++ b/drivers/gpu/nvgpu/os/linux/sdl/sdl_stub.c @@ -37,7 +37,7 @@ int nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst, } int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst, - u32 err_type, struct gr_err_info *err_info) + u32 err_type, struct gr_err_info *err_info, u32 sub_err_type) { return 0; } diff --git a/drivers/gpu/nvgpu/os/posix/stubs.c b/drivers/gpu/nvgpu/os/posix/stubs.c index 5956d0539..778fbb38b 100644 --- a/drivers/gpu/nvgpu/os/posix/stubs.c +++ b/drivers/gpu/nvgpu/os/posix/stubs.c @@ -57,7 +57,7 @@ int nvgpu_report_ecc_err(struct gk20a *g, u32 hw_unit, u32 inst, } int nvgpu_report_gr_err(struct gk20a *g, u32 hw_unit, u32 inst, - u32 err_type, struct gr_err_info *err_info) + u32 err_type, struct gr_err_info *err_info, u32 sub_err_type) { return 0; }