mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: handle and report graphics exceptions
This patch adds the support to handle and report graphics related exceptions to 3LSS. Specifically, it adds the following exceptions: NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_CROP NV_PGRAPH_PRI_BE0_BECS_BE_EXCEPTION_ZROP NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_PROP NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_ZCULL NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_SETUP NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_PES0 NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_PES1 NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_PE JIRA NVGPU-3457 Change-Id: Ib24b67ed33ae139317ec85bba3fbb80ba51fd384 Signed-off-by: Rajesh Devaraj <rdevaraj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2158609 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
5a08b51559
commit
2793e76c06
@@ -129,6 +129,15 @@ static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc,
|
||||
}
|
||||
}
|
||||
|
||||
/* check if a pe exception is pending */
|
||||
if (pending_tpc.pe_exception) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GPC%d TPC%d: PE exception pending", gpc, tpc);
|
||||
if (g->ops.gr.intr.handle_tpc_pe_exception != NULL) {
|
||||
g->ops.gr.intr.handle_tpc_pe_exception(g, gpc, tpc);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -612,6 +621,30 @@ int nvgpu_gr_intr_handle_gpc_exception(struct gk20a *g, bool *post_event,
|
||||
&g->ecc.gr.mmu_l1tlb_ecc_uncorrected_err_count[gpc].counter);
|
||||
}
|
||||
|
||||
/* Handle PROP exception */
|
||||
if (g->ops.gr.intr.handle_gpc_prop_exception != NULL) {
|
||||
g->ops.gr.intr.handle_gpc_prop_exception(g, gpc,
|
||||
gpc_exception);
|
||||
}
|
||||
|
||||
/* Handle ZCULL exception */
|
||||
if (g->ops.gr.intr.handle_gpc_zcull_exception != NULL) {
|
||||
g->ops.gr.intr.handle_gpc_zcull_exception(g, gpc,
|
||||
gpc_exception);
|
||||
}
|
||||
|
||||
/* Handle SETUP exception */
|
||||
if (g->ops.gr.intr.handle_gpc_setup_exception != NULL) {
|
||||
g->ops.gr.intr.handle_gpc_setup_exception(g, gpc,
|
||||
gpc_exception);
|
||||
}
|
||||
|
||||
/* Handle PES exception */
|
||||
if (g->ops.gr.intr.handle_gpc_pes_exception != NULL) {
|
||||
g->ops.gr.intr.handle_gpc_pes_exception(g, gpc,
|
||||
gpc_exception);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -44,6 +44,7 @@ struct nvgpu_gr_tpc_exception {
|
||||
bool tex_exception;
|
||||
bool sm_exception;
|
||||
bool mpc_exception;
|
||||
bool pe_exception;
|
||||
};
|
||||
|
||||
struct nvgpu_gr_isr_data {
|
||||
|
||||
@@ -184,6 +184,40 @@ static u32 gr_gm20b_intr_check_gr_sked_exception(struct gk20a *g,
|
||||
return 0U;
|
||||
}
|
||||
|
||||
static u32 gr_gm20b_intr_check_gr_be_crop_exception(struct gk20a *g,
|
||||
u32 exception)
|
||||
{
|
||||
if ((exception & gr_pri_be0_becs_be_exception_crop_m()) != 0U) {
|
||||
u32 crop = nvgpu_readl(g, gr_crop_hww_esr_r());
|
||||
|
||||
nvgpu_gr_intr_report_exception(g, 0,
|
||||
GPU_PGRAPH_BE_EXCEPTION,
|
||||
crop, GPU_PGRAPH_BE_EXCEPTION_CROP);
|
||||
nvgpu_err(g, "crop exception: esr 0x%08x", crop);
|
||||
nvgpu_writel(g, gr_crop_hww_esr_r(),
|
||||
gr_crop_hww_esr_reset_active_f());
|
||||
return 1U;
|
||||
}
|
||||
return 0U;
|
||||
}
|
||||
|
||||
static u32 gr_gm20b_intr_check_gr_be_zrop_exception(struct gk20a *g,
|
||||
u32 exception)
|
||||
{
|
||||
if ((exception & gr_pri_be0_becs_be_exception_zrop_m()) != 0U) {
|
||||
u32 zrop = nvgpu_readl(g, gr_zrop_hww_esr_r());
|
||||
|
||||
nvgpu_gr_intr_report_exception(g, 0,
|
||||
GPU_PGRAPH_BE_EXCEPTION,
|
||||
zrop, GPU_PGRAPH_BE_EXCEPTION_ZROP);
|
||||
nvgpu_err(g, "zrop exception: esr 0x%08x", zrop);
|
||||
nvgpu_writel(g, gr_zrop_hww_esr_r(),
|
||||
gr_zrop_hww_esr_reset_active_f());
|
||||
return 1U;
|
||||
}
|
||||
return 0U;
|
||||
}
|
||||
|
||||
static u32 gr_gm20b_intr_check_gr_fe_exception(struct gk20a *g, u32 exception)
|
||||
{
|
||||
if ((exception & gr_exception_fe_m()) != 0U) {
|
||||
@@ -286,6 +320,8 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception)
|
||||
gpc_reset |= gr_gm20b_intr_check_gr_ssync_exception(g, exception);
|
||||
gpc_reset |= gr_gm20b_intr_check_gr_mme_exception(g, exception);
|
||||
gpc_reset |= gr_gm20b_intr_check_gr_sked_exception(g, exception);
|
||||
gpc_reset |= gr_gm20b_intr_check_gr_be_crop_exception(g, exception);
|
||||
gpc_reset |= gr_gm20b_intr_check_gr_be_zrop_exception(g, exception);
|
||||
|
||||
/* check if a gpc exception has occurred */
|
||||
if ((exception & gr_exception_gpc_m()) != 0U) {
|
||||
@@ -353,6 +389,10 @@ u32 gm20b_gr_intr_get_tpc_exception(struct gk20a *g, u32 offset,
|
||||
pending_tpc->mpc_exception = true;
|
||||
}
|
||||
|
||||
if ((tpc_exception & gr_gpc0_tpc0_tpccs_tpc_exception_pe_m()) != 0U) {
|
||||
pending_tpc->pe_exception = true;
|
||||
}
|
||||
|
||||
return tpc_exception;
|
||||
}
|
||||
|
||||
|
||||
@@ -71,9 +71,18 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc,
|
||||
u32 *corrected_err, u32 *uncorrected_err);
|
||||
void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception, u32 *corrected_err, u32 *uncorrected_err);
|
||||
void gv11b_gr_intr_handle_gpc_prop_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception);
|
||||
void gv11b_gr_intr_handle_gpc_zcull_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception);
|
||||
void gv11b_gr_intr_handle_gpc_setup_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception);
|
||||
void gv11b_gr_intr_handle_gpc_pes_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception);
|
||||
void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception, u32 *corrected_err, u32 *uncorrected_err);
|
||||
void gv11b_gr_intr_handle_tpc_mpc_exception(struct gk20a *g, u32 gpc, u32 tpc);
|
||||
void gv11b_gr_intr_handle_tpc_pe_exception(struct gk20a *g, u32 gpc, u32 tpc);
|
||||
void gv11b_gr_intr_enable_hww_exceptions(struct gk20a *g);
|
||||
void gv11b_gr_intr_enable_exceptions(struct gk20a *g,
|
||||
struct nvgpu_gr_config *gr_config,
|
||||
|
||||
@@ -526,6 +526,121 @@ static void gv11b_gr_intr_report_gpccs_ecc_err(struct gk20a *g,
|
||||
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected");
|
||||
}
|
||||
}
|
||||
|
||||
void gv11b_gr_intr_handle_gpc_prop_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception)
|
||||
{
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc);
|
||||
u32 hww_esr;
|
||||
|
||||
if ((gpc_exception & gr_gpc0_gpccs_gpc_exception_prop_m()) == 0U) {
|
||||
return;
|
||||
}
|
||||
|
||||
hww_esr = nvgpu_readl(g,
|
||||
nvgpu_safe_add_u32(gr_prop_hww_esr_r(), offset));
|
||||
|
||||
nvgpu_gr_intr_report_exception(g, (gpc << 8U),
|
||||
GPU_PGRAPH_GPC_GFX_EXCEPTION,
|
||||
hww_esr, GPU_PGRAPH_GPC_GFX_EXCEPTION_PROP);
|
||||
|
||||
/* clear the interrupt */
|
||||
nvgpu_writel(g, nvgpu_safe_add_u32(
|
||||
gr_prop_hww_esr_r(), offset),
|
||||
gr_prop_hww_esr_reset_active_f());
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"gpc:%d prop interrupt intr: 0x%x", gpc, hww_esr);
|
||||
}
|
||||
|
||||
void gv11b_gr_intr_handle_gpc_zcull_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception)
|
||||
{
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc);
|
||||
u32 hww_esr;
|
||||
|
||||
if ((gpc_exception & gr_gpc0_gpccs_gpc_exception_zcull_m()) == 0U) {
|
||||
return;
|
||||
}
|
||||
|
||||
hww_esr = nvgpu_readl(g,
|
||||
nvgpu_safe_add_u32(gr_zcull_hww_esr_r(), offset));
|
||||
|
||||
nvgpu_gr_intr_report_exception(g, (gpc << 8U),
|
||||
GPU_PGRAPH_GPC_GFX_EXCEPTION,
|
||||
hww_esr, GPU_PGRAPH_GPC_GFX_EXCEPTION_ZCULL);
|
||||
|
||||
/* clear the interrupt */
|
||||
nvgpu_writel(g, nvgpu_safe_add_u32(
|
||||
gr_zcull_hww_esr_r(), offset),
|
||||
gr_zcull_hww_esr_reset_active_f());
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"gpc:%d zcull interrupt intr: 0x%x", gpc, hww_esr);
|
||||
}
|
||||
|
||||
void gv11b_gr_intr_handle_gpc_setup_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception)
|
||||
{
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc);
|
||||
u32 hww_esr;
|
||||
|
||||
if ((gpc_exception & gr_gpc0_gpccs_gpc_exception_setup_m()) == 0U) {
|
||||
return;
|
||||
}
|
||||
|
||||
hww_esr = nvgpu_readl(g,
|
||||
nvgpu_safe_add_u32(gr_setup_hww_esr_r(), offset));
|
||||
|
||||
nvgpu_gr_intr_report_exception(g, (gpc << 8U),
|
||||
GPU_PGRAPH_GPC_GFX_EXCEPTION,
|
||||
hww_esr, GPU_PGRAPH_GPC_GFX_EXCEPTION_SETUP);
|
||||
|
||||
/* clear the interrupt */
|
||||
nvgpu_writel(g, nvgpu_safe_add_u32(
|
||||
gr_setup_hww_esr_r(), offset),
|
||||
gr_setup_hww_esr_reset_active_f());
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"gpc:%d setup interrupt intr: 0x%x", gpc, hww_esr);
|
||||
}
|
||||
|
||||
void gv11b_gr_intr_handle_gpc_pes_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception)
|
||||
{
|
||||
u32 offset = nvgpu_gr_gpc_offset(g, gpc);
|
||||
u32 hww_esr, sub_err_type;
|
||||
|
||||
if (((gpc_exception & gr_gpc0_gpccs_gpc_exception_pes0_m()) == 0U) &&
|
||||
((gpc_exception & gr_gpc0_gpccs_gpc_exception_pes1_m())
|
||||
== 0U)) {
|
||||
return;
|
||||
}
|
||||
|
||||
hww_esr = nvgpu_readl(g,
|
||||
nvgpu_safe_add_u32(gr_pes_hww_esr_r(), offset));
|
||||
|
||||
if ((gpc_exception & gr_gpc0_gpccs_gpc_exception_pes0_m()) != 0U) {
|
||||
sub_err_type = GPU_PGRAPH_GPC_GFX_EXCEPTION_PES0;
|
||||
}
|
||||
|
||||
if ((gpc_exception & gr_gpc0_gpccs_gpc_exception_pes1_m()) != 0U) {
|
||||
sub_err_type = GPU_PGRAPH_GPC_GFX_EXCEPTION_PES1;
|
||||
}
|
||||
|
||||
nvgpu_gr_intr_report_exception(g, (gpc << 8U),
|
||||
GPU_PGRAPH_GPC_GFX_EXCEPTION,
|
||||
hww_esr, sub_err_type);
|
||||
|
||||
/* clear the interrupt */
|
||||
nvgpu_writel(g, nvgpu_safe_add_u32(
|
||||
gr_pes_hww_esr_r(), offset),
|
||||
gr_pes_hww_esr_reset_task_f());
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr,
|
||||
"gpc:%d pes interrupt intr: 0x%x", gpc, hww_esr);
|
||||
}
|
||||
|
||||
void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc,
|
||||
u32 gpc_exception, u32 *corrected_err, u32 *uncorrected_err)
|
||||
{
|
||||
@@ -637,6 +752,24 @@ void gv11b_gr_intr_handle_tpc_mpc_exception(struct gk20a *g, u32 gpc, u32 tpc)
|
||||
gr_gpc0_tpc0_mpc_hww_esr_reset_trigger_f());
|
||||
}
|
||||
|
||||
void gv11b_gr_intr_handle_tpc_pe_exception(struct gk20a *g, u32 gpc, u32 tpc)
|
||||
{
|
||||
u32 esr;
|
||||
u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc),
|
||||
nvgpu_gr_tpc_offset(g, tpc));
|
||||
|
||||
esr = nvgpu_readl(g, nvgpu_safe_add_u32(gr_gpc0_tpc0_pe_hww_esr_r(),
|
||||
offset));
|
||||
nvgpu_gr_intr_report_exception(g, ((gpc << 8U) | tpc),
|
||||
GPU_PGRAPH_GPC_GFX_EXCEPTION,
|
||||
esr, GPU_PGRAPH_GPC_GFX_EXCEPTION_TPC_PE);
|
||||
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "pe hww esr 0x%08x", esr);
|
||||
|
||||
nvgpu_writel(g, nvgpu_safe_add_u32(gr_gpc0_tpc0_pe_hww_esr_r(), offset),
|
||||
gr_gpc0_tpc0_pe_hww_esr_reset_task_f());
|
||||
}
|
||||
|
||||
void gv11b_gr_intr_enable_hww_exceptions(struct gk20a *g)
|
||||
{
|
||||
/* enable exceptions */
|
||||
@@ -688,7 +821,9 @@ void gv11b_gr_intr_enable_exceptions(struct gk20a *g,
|
||||
*/
|
||||
|
||||
/* enable exceptions */
|
||||
nvgpu_writel(g, gr_exception2_en_r(), 0x0U); /* BE not enabled */
|
||||
reg_val = gr_exception2_en_be_enabled_f();
|
||||
nvgpu_log(g, gpu_dbg_info, "gr_exception2_en 0x%08x", reg_val);
|
||||
nvgpu_writel(g, gr_exception2_en_r(), reg_val);
|
||||
|
||||
reg_val = (u32)BIT32(nvgpu_gr_config_get_gpc_count(gr_config));
|
||||
nvgpu_writel(g, gr_exception1_en_r(),
|
||||
@@ -716,6 +851,7 @@ void gv11b_gr_intr_enable_gpc_exceptions(struct gk20a *g,
|
||||
|
||||
nvgpu_writel(g, gr_gpcs_tpcs_tpccs_tpc_exception_en_r(),
|
||||
gr_gpcs_tpcs_tpccs_tpc_exception_en_sm_enabled_f() |
|
||||
gr_gpcs_tpcs_tpccs_tpc_exception_en_pe_enabled_f() |
|
||||
gr_gpcs_tpcs_tpccs_tpc_exception_en_mpc_enabled_f());
|
||||
|
||||
tpc_mask_calc = (u32)BIT32(
|
||||
|
||||
@@ -586,11 +586,21 @@ static const struct gpu_ops gv11b_ops = {
|
||||
gv11b_gr_intr_handle_gcc_exception,
|
||||
.handle_gpc_gpcmmu_exception =
|
||||
gv11b_gr_intr_handle_gpc_gpcmmu_exception,
|
||||
.handle_gpc_prop_exception =
|
||||
gv11b_gr_intr_handle_gpc_prop_exception,
|
||||
.handle_gpc_zcull_exception =
|
||||
gv11b_gr_intr_handle_gpc_zcull_exception,
|
||||
.handle_gpc_setup_exception =
|
||||
gv11b_gr_intr_handle_gpc_setup_exception,
|
||||
.handle_gpc_pes_exception =
|
||||
gv11b_gr_intr_handle_gpc_pes_exception,
|
||||
.handle_gpc_gpccs_exception =
|
||||
gv11b_gr_intr_handle_gpc_gpccs_exception,
|
||||
.get_tpc_exception = gm20b_gr_intr_get_tpc_exception,
|
||||
.handle_tpc_mpc_exception =
|
||||
gv11b_gr_intr_handle_tpc_mpc_exception,
|
||||
.handle_tpc_pe_exception =
|
||||
gv11b_gr_intr_handle_tpc_pe_exception,
|
||||
.handle_tex_exception = NULL,
|
||||
.enable_hww_exceptions =
|
||||
gv11b_gr_intr_enable_hww_exceptions,
|
||||
|
||||
@@ -850,6 +850,14 @@ struct gpu_ops {
|
||||
void (*handle_gpc_gpcmmu_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 gpc_exception,
|
||||
u32 *corrected_err, u32 *uncorrected_err);
|
||||
void (*handle_gpc_prop_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 gpc_exception);
|
||||
void (*handle_gpc_zcull_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 gpc_exception);
|
||||
void (*handle_gpc_setup_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 gpc_exception);
|
||||
void (*handle_gpc_pes_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 gpc_exception);
|
||||
void (*handle_gpc_gpccs_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 gpc_exception,
|
||||
u32 *corrected_err, u32 *uncorrected_err);
|
||||
@@ -857,6 +865,8 @@ struct gpu_ops {
|
||||
struct nvgpu_gr_tpc_exception *pending_tpc);
|
||||
void (*handle_tpc_mpc_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 tpc);
|
||||
void (*handle_tpc_pe_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 tpc);
|
||||
void (*handle_tex_exception)(struct gk20a *g,
|
||||
u32 gpc, u32 tpc);
|
||||
void (*enable_hww_exceptions)(struct gk20a *g);
|
||||
|
||||
@@ -192,6 +192,19 @@ struct ctxsw_err_info {
|
||||
#define GPU_PGRAPH_BE_EXCEPTION (8U)
|
||||
#define GPU_PGRAPH_MPC_EXCEPTION (9U)
|
||||
#define GPU_PGRAPH_ILLEGAL_ERROR (10U)
|
||||
#define GPU_PGRAPH_GPC_GFX_EXCEPTION (11U)
|
||||
|
||||
/** Sub-errors in GPU_PGRAPH_BE_EXCEPTION. */
|
||||
#define GPU_PGRAPH_BE_EXCEPTION_CROP (0U)
|
||||
#define GPU_PGRAPH_BE_EXCEPTION_ZROP (1U)
|
||||
|
||||
/** Sub-errors in GPU_PGRAPH_GPC_GFX_EXCEPTION. */
|
||||
#define GPU_PGRAPH_GPC_GFX_EXCEPTION_PROP (0U)
|
||||
#define GPU_PGRAPH_GPC_GFX_EXCEPTION_ZCULL (1U)
|
||||
#define GPU_PGRAPH_GPC_GFX_EXCEPTION_SETUP (2U)
|
||||
#define GPU_PGRAPH_GPC_GFX_EXCEPTION_PES0 (3U)
|
||||
#define GPU_PGRAPH_GPC_GFX_EXCEPTION_PES1 (4U)
|
||||
#define GPU_PGRAPH_GPC_GFX_EXCEPTION_TPC_PE (5U)
|
||||
|
||||
/** Sub-errors in GPU_PGRAPH_ILLEGAL_ERROR. */
|
||||
#define GPU_PGRAPH_ILLEGAL_NOTIFY (0U)
|
||||
|
||||
Reference in New Issue
Block a user