From b0973eacbb3e396ffee09c0dfc4073c82327d60c Mon Sep 17 00:00:00 2001 From: Vinod G Date: Mon, 8 Apr 2019 17:01:51 -0700 Subject: [PATCH] gpu: nvgpu: Add handle_class_error hal Add handle_class_error hal, which reports more data regarding class error. Move all register access code in gk20a_gr_handle_class_error function to this hal. JIRA NVGPU-3016 Change-Id: I868268267f1879974795c2829e816a6956551b58 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/2092877 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 24 ++------------- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 2 ++ drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 2 ++ drivers/gpu/nvgpu/gv100/hal_gv100.c | 2 ++ drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 2 ++ drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c | 29 +++++++++++++++++++ drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.h | 2 ++ drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 ++ drivers/gpu/nvgpu/tu104/hal_tu104.c | 2 ++ 9 files changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 1b9c898dc..274b88e13 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -463,35 +463,15 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch, static int gk20a_gr_handle_class_error(struct gk20a *g, struct nvgpu_gr_isr_data *isr_data) { - u32 gr_class_error; u32 chid = isr_data->ch != NULL ? isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID; nvgpu_log_fn(g, " "); - gr_class_error = - gr_class_error_code_v(gk20a_readl(g, gr_class_error_r())); + g->ops.gr.intr.handle_class_error(g, chid, isr_data); + gk20a_gr_set_error_notifier(g, isr_data, NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY); - nvgpu_err(g, "class error 0x%08x, offset 0x%08x," - "sub channel 0x%08x mme generated %d," - " mme pc 0x%08xdata high %d priv status %d" - " unhandled intr 0x%08x for channel %u", - isr_data->class_num, (isr_data->offset << 2), - gr_trapped_addr_subch_v(isr_data->addr), - gr_trapped_addr_mme_generated_v(isr_data->addr), - gr_trapped_data_mme_pc_v( - gk20a_readl(g, gr_trapped_data_mme_r())), - gr_trapped_addr_datahigh_v(isr_data->addr), - gr_trapped_addr_priv_v(isr_data->addr), - gr_class_error, chid); - - nvgpu_err(g, "trapped data low 0x%08x", - gk20a_readl(g, gr_trapped_data_lo_r())); - if (gr_trapped_addr_datahigh_v(isr_data->addr) != 0U) { - nvgpu_err(g, "trapped data high 0x%08x", - gk20a_readl(g, gr_trapped_data_hi_r())); - } return -EINVAL; } diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 2edd2bf64..7e5eecac4 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -472,6 +472,8 @@ static const struct gpu_ops gm20b_ops = { .get_gfxp_rtv_cb_size = NULL, }, .intr = { + .handle_class_error = + gm20b_gr_intr_handle_class_error, .clear_pending_interrupts = gm20b_gr_intr_clear_pending_interrupts, .read_pending_interrupts = diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index bc1431f28..4a848b45b 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -567,6 +567,8 @@ static const struct gpu_ops gp10b_ops = { gp10b_gr_init_commit_cbes_reserve, }, .intr = { + .handle_class_error = + gm20b_gr_intr_handle_class_error, .clear_pending_interrupts = gm20b_gr_intr_clear_pending_interrupts, .read_pending_interrupts = diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 27541efa9..371b29e9b 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -697,6 +697,8 @@ static const struct gpu_ops gv100_ops = { gv11b_gr_init_commit_gfxp_wfi_timeout, }, .intr = { + .handle_class_error = + gm20b_gr_intr_handle_class_error, .clear_pending_interrupts = gm20b_gr_intr_clear_pending_interrupts, .read_pending_interrupts = diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index bff43d5d5..e0fcb5e32 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -672,6 +672,8 @@ static const struct gpu_ops gv11b_ops = { gv11b_gr_init_commit_gfxp_wfi_timeout, }, .intr = { + .handle_class_error = + gm20b_gr_intr_handle_class_error, .clear_pending_interrupts = gm20b_gr_intr_clear_pending_interrupts, .read_pending_interrupts = diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c index c52c31ecd..9c78bca79 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c @@ -31,6 +31,35 @@ #include +void gm20b_gr_intr_handle_class_error(struct gk20a *g, u32 chid, + struct nvgpu_gr_isr_data *isr_data) +{ + u32 gr_class_error; + + gr_class_error = + gr_class_error_code_v(nvgpu_readl(g, gr_class_error_r())); + + nvgpu_err(g, "class error 0x%08x, offset 0x%08x," + "sub channel 0x%08x mme generated %d," + " mme pc 0x%08xdata high %d priv status %d" + " unhandled intr 0x%08x for channel %u", + isr_data->class_num, (isr_data->offset << 2), + gr_trapped_addr_subch_v(isr_data->addr), + gr_trapped_addr_mme_generated_v(isr_data->addr), + gr_trapped_data_mme_pc_v( + nvgpu_readl(g, gr_trapped_data_mme_r())), + gr_trapped_addr_datahigh_v(isr_data->addr), + gr_trapped_addr_priv_v(isr_data->addr), + gr_class_error, chid); + + nvgpu_err(g, "trapped data low 0x%08x", + nvgpu_readl(g, gr_trapped_data_lo_r())); + if (gr_trapped_addr_datahigh_v(isr_data->addr) != 0U) { + nvgpu_err(g, "trapped data high 0x%08x", + nvgpu_readl(g, gr_trapped_data_hi_r())); + } +} + void gm20b_gr_intr_clear_pending_interrupts(struct gk20a *g, u32 gr_intr) { nvgpu_writel(g, gr_intr_r(), gr_intr); diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.h b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.h index fd7956db4..c961395a7 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.h @@ -31,6 +31,8 @@ struct nvgpu_gr_tpc_exception; struct nvgpu_gr_isr_data; struct nvgpu_gr_intr_info; +void gm20b_gr_intr_handle_class_error(struct gk20a *g, u32 chid, + struct nvgpu_gr_isr_data *isr_data); void gm20b_gr_intr_clear_pending_interrupts(struct gk20a *g, u32 gr_intr); u32 gm20b_gr_intr_read_pending_interrupts(struct gk20a *g, struct nvgpu_gr_intr_info *intr_info); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index b5cc46ab9..b00a462b8 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -783,6 +783,8 @@ struct gpu_ops { } init; struct { + void (*handle_class_error)(struct gk20a *g, u32 chid, + struct nvgpu_gr_isr_data *isr_data); void (*clear_pending_interrupts)(struct gk20a *g, u32 gr_intr); u32 (*read_pending_interrupts)(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 6424e9f36..87bf995e4 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -731,6 +731,8 @@ static const struct gpu_ops tu104_ops = { gv11b_gr_init_commit_gfxp_wfi_timeout, }, .intr = { + .handle_class_error = + gm20b_gr_intr_handle_class_error, .clear_pending_interrupts = gm20b_gr_intr_clear_pending_interrupts, .read_pending_interrupts =