gpu: nvgpu: Add handle_class_error hal

Add handle_class_error hal, which reports more data
regarding class error. Move all register access code in
gk20a_gr_handle_class_error function to this hal.

JIRA NVGPU-3016

Change-Id: I868268267f1879974795c2829e816a6956551b58
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2092877
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2019-04-08 17:01:51 -07:00
committed by mobile promotions
parent 1095f0eea3
commit b0973eacbb
9 changed files with 45 additions and 22 deletions

View File

@@ -463,35 +463,15 @@ int gk20a_gr_handle_fecs_error(struct gk20a *g, struct channel_gk20a *ch,
static int gk20a_gr_handle_class_error(struct gk20a *g,
struct nvgpu_gr_isr_data *isr_data)
{
u32 gr_class_error;
u32 chid = isr_data->ch != NULL ?
isr_data->ch->chid : FIFO_INVAL_CHANNEL_ID;
nvgpu_log_fn(g, " ");
gr_class_error =
gr_class_error_code_v(gk20a_readl(g, gr_class_error_r()));
g->ops.gr.intr.handle_class_error(g, chid, isr_data);
gk20a_gr_set_error_notifier(g, isr_data,
NVGPU_ERR_NOTIFIER_GR_ERROR_SW_NOTIFY);
nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
"sub channel 0x%08x mme generated %d,"
" mme pc 0x%08xdata high %d priv status %d"
" unhandled intr 0x%08x for channel %u",
isr_data->class_num, (isr_data->offset << 2),
gr_trapped_addr_subch_v(isr_data->addr),
gr_trapped_addr_mme_generated_v(isr_data->addr),
gr_trapped_data_mme_pc_v(
gk20a_readl(g, gr_trapped_data_mme_r())),
gr_trapped_addr_datahigh_v(isr_data->addr),
gr_trapped_addr_priv_v(isr_data->addr),
gr_class_error, chid);
nvgpu_err(g, "trapped data low 0x%08x",
gk20a_readl(g, gr_trapped_data_lo_r()));
if (gr_trapped_addr_datahigh_v(isr_data->addr) != 0U) {
nvgpu_err(g, "trapped data high 0x%08x",
gk20a_readl(g, gr_trapped_data_hi_r()));
}
return -EINVAL;
}

View File

@@ -472,6 +472,8 @@ static const struct gpu_ops gm20b_ops = {
.get_gfxp_rtv_cb_size = NULL,
},
.intr = {
.handle_class_error =
gm20b_gr_intr_handle_class_error,
.clear_pending_interrupts =
gm20b_gr_intr_clear_pending_interrupts,
.read_pending_interrupts =

View File

@@ -567,6 +567,8 @@ static const struct gpu_ops gp10b_ops = {
gp10b_gr_init_commit_cbes_reserve,
},
.intr = {
.handle_class_error =
gm20b_gr_intr_handle_class_error,
.clear_pending_interrupts =
gm20b_gr_intr_clear_pending_interrupts,
.read_pending_interrupts =

View File

@@ -697,6 +697,8 @@ static const struct gpu_ops gv100_ops = {
gv11b_gr_init_commit_gfxp_wfi_timeout,
},
.intr = {
.handle_class_error =
gm20b_gr_intr_handle_class_error,
.clear_pending_interrupts =
gm20b_gr_intr_clear_pending_interrupts,
.read_pending_interrupts =

View File

@@ -672,6 +672,8 @@ static const struct gpu_ops gv11b_ops = {
gv11b_gr_init_commit_gfxp_wfi_timeout,
},
.intr = {
.handle_class_error =
gm20b_gr_intr_handle_class_error,
.clear_pending_interrupts =
gm20b_gr_intr_clear_pending_interrupts,
.read_pending_interrupts =

View File

@@ -31,6 +31,35 @@
#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
void gm20b_gr_intr_handle_class_error(struct gk20a *g, u32 chid,
struct nvgpu_gr_isr_data *isr_data)
{
u32 gr_class_error;
gr_class_error =
gr_class_error_code_v(nvgpu_readl(g, gr_class_error_r()));
nvgpu_err(g, "class error 0x%08x, offset 0x%08x,"
"sub channel 0x%08x mme generated %d,"
" mme pc 0x%08xdata high %d priv status %d"
" unhandled intr 0x%08x for channel %u",
isr_data->class_num, (isr_data->offset << 2),
gr_trapped_addr_subch_v(isr_data->addr),
gr_trapped_addr_mme_generated_v(isr_data->addr),
gr_trapped_data_mme_pc_v(
nvgpu_readl(g, gr_trapped_data_mme_r())),
gr_trapped_addr_datahigh_v(isr_data->addr),
gr_trapped_addr_priv_v(isr_data->addr),
gr_class_error, chid);
nvgpu_err(g, "trapped data low 0x%08x",
nvgpu_readl(g, gr_trapped_data_lo_r()));
if (gr_trapped_addr_datahigh_v(isr_data->addr) != 0U) {
nvgpu_err(g, "trapped data high 0x%08x",
nvgpu_readl(g, gr_trapped_data_hi_r()));
}
}
void gm20b_gr_intr_clear_pending_interrupts(struct gk20a *g, u32 gr_intr)
{
nvgpu_writel(g, gr_intr_r(), gr_intr);

View File

@@ -31,6 +31,8 @@ struct nvgpu_gr_tpc_exception;
struct nvgpu_gr_isr_data;
struct nvgpu_gr_intr_info;
void gm20b_gr_intr_handle_class_error(struct gk20a *g, u32 chid,
struct nvgpu_gr_isr_data *isr_data);
void gm20b_gr_intr_clear_pending_interrupts(struct gk20a *g, u32 gr_intr);
u32 gm20b_gr_intr_read_pending_interrupts(struct gk20a *g,
struct nvgpu_gr_intr_info *intr_info);

View File

@@ -783,6 +783,8 @@ struct gpu_ops {
} init;
struct {
void (*handle_class_error)(struct gk20a *g, u32 chid,
struct nvgpu_gr_isr_data *isr_data);
void (*clear_pending_interrupts)(struct gk20a *g,
u32 gr_intr);
u32 (*read_pending_interrupts)(struct gk20a *g,

View File

@@ -731,6 +731,8 @@ static const struct gpu_ops tu104_ops = {
gv11b_gr_init_commit_gfxp_wfi_timeout,
},
.intr = {
.handle_class_error =
gm20b_gr_intr_handle_class_error,
.clear_pending_interrupts =
gm20b_gr_intr_clear_pending_interrupts,
.read_pending_interrupts =