gpu: nvgpu: enhance pbus error reporting

-Dump timeout save0 and save1 even if they could
 be unreliable when fecs_tgt in set in save0 . This
 is good to have for debug purposes.
-Add priv_ring hal for decode_error_code
-Decode fecs error code for supported error types

Bug 1998067

Change-Id: I60cb6902d099df4a7df45fa624e44d9e0d46360f
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1683014
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2018-03-27 11:52:27 -07:00
committed by mobile promotions
parent f81d83690f
commit aa7ee8dac0
8 changed files with 43 additions and 24 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -65,7 +65,7 @@ void gk20a_bus_init_hw(struct gk20a *g)
void gk20a_bus_isr(struct gk20a *g) void gk20a_bus_isr(struct gk20a *g)
{ {
u32 val, save0, save1, err_code; u32 val, save0, save1, fecs_errcode = 0;
val = gk20a_readl(g, bus_intr_0_r()); val = gk20a_readl(g, bus_intr_0_r());
@@ -78,29 +78,37 @@ void gk20a_bus_isr(struct gk20a *g)
save0 = gk20a_readl(g, timer_pri_timeout_save_0_r()); save0 = gk20a_readl(g, timer_pri_timeout_save_0_r());
if (timer_pri_timeout_save_0_fecs_tgt_v(save0)) { if (timer_pri_timeout_save_0_fecs_tgt_v(save0)) {
/*
err_code = gk20a_readl(g, * write & addr fields in timeout_save0
* might not be reliable
*/
fecs_errcode = gk20a_readl(g,
timer_pri_timeout_fecs_errcode_r()); timer_pri_timeout_fecs_errcode_r());
/* write and addr fields are not reliable */
nvgpu_err(g, "NV_PBUS_INTR_0: 0x%08x "
"FECS_ERRCODE 0x%08x", val, err_code);
if ((err_code & 0xffffff00) == 0xbadf1300)
nvgpu_err(g, "NV_PGRAPH_PRI_GPC0_GPCCS_FS_GPC: "
"0x%08x",
gk20a_readl(g, gr_gpc0_fs_gpc_r()));
} else {
save1 = gk20a_readl(g, timer_pri_timeout_save_1_r());
nvgpu_err(g, "NV_PBUS_INTR_0: 0x%08x ADR 0x%08x "
"R/W %s DATA 0x%08x",
val,
timer_pri_timeout_save_0_addr_v(save0) << 2,
timer_pri_timeout_save_0_write_v(save0) ?
"WRITE" : "READ", save1);
} }
save1 = gk20a_readl(g, timer_pri_timeout_save_1_r());
nvgpu_err(g, "NV_PBUS_INTR_0: 0x%08x ADR 0x%08x "
"%s DATA 0x%08x ",
val,
timer_pri_timeout_save_0_addr_v(save0) << 2,
timer_pri_timeout_save_0_write_v(save0) ?
"WRITE" : "READ", save1);
gk20a_writel(g, timer_pri_timeout_save_0_r(), 0); gk20a_writel(g, timer_pri_timeout_save_0_r(), 0);
gk20a_writel(g, timer_pri_timeout_save_1_r(), 0); gk20a_writel(g, timer_pri_timeout_save_1_r(), 0);
if (fecs_errcode) {
nvgpu_err(g, "FECS_ERRCODE 0x%08x", fecs_errcode);
if (g->ops.priv_ring.decode_error_code)
g->ops.priv_ring.decode_error_code(g,
fecs_errcode);
if ((fecs_errcode & 0xffffff00) == 0xbadf1300)
nvgpu_err(g, "NV_PGRAPH_PRI_GPC0_GPCCS_FS_GPC: "
"0x%08x",
gk20a_readl(g, gr_gpc0_fs_gpc_r()));
}
} else { } else {
nvgpu_err(g, "Unhandled NV_PBUS_INTR_0: 0x%08x", val); nvgpu_err(g, "Unhandled NV_PBUS_INTR_0: 0x%08x", val);
} }

View File

@@ -1113,6 +1113,7 @@ struct gpu_ops {
} falcon; } falcon;
struct { struct {
void (*isr)(struct gk20a *g); void (*isr)(struct gk20a *g);
void (*decode_error_code)(struct gk20a *g, u32 error_code);
} priv_ring; } priv_ring;
struct { struct {
int (*check_priv_security)(struct gk20a *g); int (*check_priv_security)(struct gk20a *g);

View File

@@ -49,6 +49,7 @@
#include "gp10b/fb_gp10b.h" #include "gp10b/fb_gp10b.h"
#include "gp10b/pmu_gp10b.h" #include "gp10b/pmu_gp10b.h"
#include "gp10b/gr_gp10b.h" #include "gp10b/gr_gp10b.h"
#include "gp10b/priv_ring_gp10b.h"
#include "gp106/fifo_gp106.h" #include "gp106/fifo_gp106.h"
#include "gp106/regops_gp106.h" #include "gp106/regops_gp106.h"
@@ -729,6 +730,7 @@ static const struct gpu_ops gp106_ops = {
}, },
.priv_ring = { .priv_ring = {
.isr = gp10b_priv_ring_isr, .isr = gp10b_priv_ring_isr,
.decode_error_code = gp10b_priv_ring_decode_error_code,
}, },
.fuse = { .fuse = {
.check_priv_security = gp106_fuse_check_priv_security, .check_priv_security = gp106_fuse_check_priv_security,

View File

@@ -646,6 +646,7 @@ static const struct gpu_ops gp10b_ops = {
}, },
.priv_ring = { .priv_ring = {
.isr = gp10b_priv_ring_isr, .isr = gp10b_priv_ring_isr,
.decode_error_code = gp10b_priv_ring_decode_error_code,
}, },
.fuse = { .fuse = {
.check_priv_security = gp10b_fuse_check_priv_security, .check_priv_security = gp10b_fuse_check_priv_security,

View File

@@ -64,7 +64,7 @@ static const char *const error_type_badf5xyy[] = {
"pri route error" "pri route error"
}; };
static void gp10b_priv_ring_decode_error_code(struct gk20a *g, void gp10b_priv_ring_decode_error_code(struct gk20a *g,
u32 error_code) u32 error_code)
{ {
u32 error_type, error_type_index; u32 error_type, error_type_index;
@@ -141,7 +141,8 @@ void gp10b_priv_ring_isr(struct gk20a *g)
pri_ringstation_sys_priv_error_info_subid_v(error_info), pri_ringstation_sys_priv_error_info_subid_v(error_info),
pri_ringstation_sys_priv_error_info_priv_level_v(error_info), pri_ringstation_sys_priv_error_info_priv_level_v(error_info),
error_code); error_code);
gp10b_priv_ring_decode_error_code(g, error_code); if (g->ops.priv_ring.decode_error_code)
g->ops.priv_ring.decode_error_code(g, error_code);
} }
if (status1) { if (status1) {
@@ -166,7 +167,9 @@ void gp10b_priv_ring_isr(struct gk20a *g)
pri_ringstation_gpc_gpc0_priv_error_info_priv_level_v(error_info), pri_ringstation_gpc_gpc0_priv_error_info_priv_level_v(error_info),
error_code); error_code);
gp10b_priv_ring_decode_error_code(g, error_code); if (g->ops.priv_ring.decode_error_code)
g->ops.priv_ring.decode_error_code(g,
error_code);
status1 = status1 & (~(BIT(gpc))); status1 = status1 & (~(BIT(gpc)));
if (!status1) if (!status1)

View File

@@ -1,7 +1,7 @@
/* /*
* GP10B PRIV ringmaster * GP10B PRIV ringmaster
* *
* Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"), * copy of this software and associated documentation files (the "Software"),
@@ -27,5 +27,7 @@
struct gk20a; struct gk20a;
void gp10b_priv_ring_isr(struct gk20a *g); void gp10b_priv_ring_isr(struct gk20a *g);
void gp10b_priv_ring_decode_error_code(struct gk20a *g,
u32 error_code);
#endif /*__PRIV_RING_GP10B_H__*/ #endif /*__PRIV_RING_GP10B_H__*/

View File

@@ -734,6 +734,7 @@ static const struct gpu_ops gv100_ops = {
}, },
.priv_ring = { .priv_ring = {
.isr = gp10b_priv_ring_isr, .isr = gp10b_priv_ring_isr,
.decode_error_code = gp10b_priv_ring_decode_error_code,
}, },
.nvlink = { .nvlink = {
.discover_ioctrl = gv100_nvlink_discover_ioctrl, .discover_ioctrl = gv100_nvlink_discover_ioctrl,

View File

@@ -711,6 +711,7 @@ static const struct gpu_ops gv11b_ops = {
}, },
.priv_ring = { .priv_ring = {
.isr = gp10b_priv_ring_isr, .isr = gp10b_priv_ring_isr,
.decode_error_code = gp10b_priv_ring_decode_error_code,
}, },
.fuse = { .fuse = {
.check_priv_security = gp10b_fuse_check_priv_security, .check_priv_security = gp10b_fuse_check_priv_security,