gpu: nvgpu: Update debug crash dump

Update the debug crash dump to be clearer, more concise and
avoid many of the misformatting issues that have crept in over
the last couple years.

This also changes the debug prints to move from pr_err() in
the Linux kernel to nvgpu_err(). This makes it easier to
filter all nvgpu messages in a log file with a single grep
command.

Change-Id: I00ca9e6c32da7a79c8f6903a139bf6b43e89618a
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1940515
GVS: Gerrit_Virtual_Submit
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2018-10-31 12:28:47 -07:00
committed by mobile promotions
parent ac5763eb0c
commit 032b37bee5
2 changed files with 92 additions and 74 deletions

View File

@@ -4126,51 +4126,53 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
syncpointa = info->inst.syncpointa; syncpointa = info->inst.syncpointa;
syncpointb = info->inst.syncpointb; syncpointb = info->inst.syncpointb;
gk20a_debug_output(o, "%d-%s, TSG: %u, pid %d, refs %d%s: ", gk20a_debug_output(o, "Channel ID: %d, TSG ID: %u, pid %d, refs %d; deterministic = %s",
info->chid, info->chid,
g->name, info->tsgid,
info->tsgid, info->pid,
info->pid, info->refs,
info->refs, info->deterministic ? "yes" : "no");
info->deterministic ? ", deterministic" : ""); gk20a_debug_output(o, " In use: %-3s busy: %-3s status: %s",
gk20a_debug_output(o, "channel status: %s in use %s %s\n", (ccsr_channel_enable_v(info->channel_reg) ==
(ccsr_channel_enable_v(info->channel_reg) == ccsr_channel_enable_in_use_v()) ? "yes" : "no",
ccsr_channel_enable_in_use_v()) ? "" : "not", (ccsr_channel_busy_v(info->channel_reg) ==
gk20a_decode_ccsr_chan_status(status), ccsr_channel_busy_true_v()) ? "yes" : "no",
(ccsr_channel_busy_v(info->channel_reg) == gk20a_decode_ccsr_chan_status(status));
ccsr_channel_busy_true_v()) ? "busy" : "not busy");
gk20a_debug_output(o, gk20a_debug_output(o,
"RAMFC : TOP: %016llx PUT: %016llx GET: %016llx " " TOP %016llx"
"FETCH: %016llx\n" " PUT %016llx GET %016llx",
"HEADER: %08x COUNT: %08x\n" info->inst.pb_top_level_get,
"SYNCPOINT %08x %08x SEMAPHORE %08x %08x %08x %08x\n", info->inst.pb_put,
info->inst.pb_top_level_get, info->inst.pb_get);
info->inst.pb_put, gk20a_debug_output(o,
info->inst.pb_get, " FETCH %016llx"
info->inst.pb_fetch, " HEADER %08x COUNT %08x",
info->inst.pb_header, info->inst.pb_fetch,
info->inst.pb_count, info->inst.pb_header,
syncpointa, info->inst.pb_count);
syncpointb, gk20a_debug_output(o,
info->inst.semaphorea, " SYNCPOINT %08x %08x "
info->inst.semaphoreb, "SEMAPHORE %08x %08x %08x %08x",
info->inst.semaphorec, syncpointa,
info->inst.semaphored); syncpointb,
info->inst.semaphorea,
info->inst.semaphoreb,
info->inst.semaphorec,
info->inst.semaphored);
if (info->sema.addr != 0ULL) { if (info->sema.addr == 0ULL) {
gk20a_debug_output(o, "SEMA STATE: value: 0x%08x " gk20a_debug_output(o,
"next_val: 0x%08x addr: 0x%010llx\n", " SEMA STATE: val: %u next_val: %u addr: 0x%010llx",
info->sema.value, info->sema.value,
info->sema.next, info->sema.next,
info->sema.addr); info->sema.addr);
} }
#ifdef CONFIG_TEGRA_GK20A_NVHOST #ifdef CONFIG_TEGRA_GK20A_NVHOST
if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v()) if ((pbdma_syncpointb_op_v(syncpointb) == pbdma_syncpointb_op_wait_v())
&& (pbdma_syncpointb_wait_switch_v(syncpointb) == && (pbdma_syncpointb_wait_switch_v(syncpointb) ==
pbdma_syncpointb_wait_switch_en_v())) pbdma_syncpointb_wait_switch_en_v()))
gk20a_debug_output(o, "%s on syncpt %u (%s) val %u\n", gk20a_debug_output(o, "%s on syncpt %u (%s) val %u",
(status == 3 || status == 8) ? "Waiting" : "Waited", (status == 3 || status == 8) ? "Waiting" : "Waited",
pbdma_syncpointb_syncpt_index_v(syncpointb), pbdma_syncpointb_syncpt_index_v(syncpointb),
nvgpu_nvhost_syncpt_get_name(g->nvhost_dev, nvgpu_nvhost_syncpt_get_name(g->nvhost_dev,
@@ -4178,7 +4180,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
pbdma_syncpointa_payload_v(syncpointa)); pbdma_syncpointa_payload_v(syncpointa));
#endif #endif
gk20a_debug_output(o, "\n"); gk20a_debug_output(o, " ");
} }
void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g, void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
@@ -4239,6 +4241,8 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
gk20a_channel_put(ch); gk20a_channel_put(ch);
} }
gk20a_debug_output(o, "Channel Status - chip %-5s", g->name);
gk20a_debug_output(o, "---------------------------");
for (chid = 0; chid < f->num_channels; chid++) { for (chid = 0; chid < f->num_channels; chid++) {
struct nvgpu_channel_dump_info *info = infos[chid]; struct nvgpu_channel_dump_info *info = infos[chid];
@@ -4247,6 +4251,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
nvgpu_kfree(g, info); nvgpu_kfree(g, info);
} }
} }
gk20a_debug_output(o, " ");
nvgpu_kfree(g, infos); nvgpu_kfree(g, infos);
} }
@@ -4258,39 +4263,46 @@ void gk20a_dump_pbdma_status(struct gk20a *g,
host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA); host_num_pbdma = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_PBDMA);
gk20a_debug_output(o, "PBDMA Status - chip %-5s", g->name);
gk20a_debug_output(o, "-------------------------");
for (i = 0; i < host_num_pbdma; i++) { for (i = 0; i < host_num_pbdma; i++) {
u32 status = gk20a_readl(g, fifo_pbdma_status_r(i)); u32 status = gk20a_readl(g, fifo_pbdma_status_r(i));
u32 chan_status = fifo_pbdma_status_chan_status_v(status); u32 chan_status = fifo_pbdma_status_chan_status_v(status);
gk20a_debug_output(o, "%s pbdma %d: ", g->name, i); gk20a_debug_output(o, "pbdma %d:", i);
gk20a_debug_output(o, gk20a_debug_output(o,
"id: %d (%s), next_id: %d (%s) chan status: %s\n", " id: %d - %-9s next_id: - %d %-9s | status: %s",
fifo_pbdma_status_id_v(status), fifo_pbdma_status_id_v(status),
(fifo_pbdma_status_id_type_v(status) == (fifo_pbdma_status_id_type_v(status) ==
fifo_pbdma_status_id_type_tsgid_v()) ? fifo_pbdma_status_id_type_tsgid_v()) ?
"tsg" : "channel", "[tsg]" : "[channel]",
fifo_pbdma_status_next_id_v(status), fifo_pbdma_status_next_id_v(status),
(fifo_pbdma_status_next_id_type_v(status) == (fifo_pbdma_status_next_id_type_v(status) ==
fifo_pbdma_status_next_id_type_tsgid_v()) ? fifo_pbdma_status_next_id_type_tsgid_v()) ?
"tsg" : "channel", "[tsg]" : "[channel]",
gk20a_decode_pbdma_chan_eng_ctx_status(chan_status)); gk20a_decode_pbdma_chan_eng_ctx_status(chan_status));
gk20a_debug_output(o, "PBDMA_PUT: %016llx PBDMA_GET: %016llx " gk20a_debug_output(o,
"GP_PUT: %08x GP_GET: %08x " " PBDMA_PUT %016llx PBDMA_GET %016llx",
"FETCH: %08x HEADER: %08x\n"
"HDR: %08x SHADOW0: %08x SHADOW1: %08x",
(u64)gk20a_readl(g, pbdma_put_r(i)) + (u64)gk20a_readl(g, pbdma_put_r(i)) +
((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL), ((u64)gk20a_readl(g, pbdma_put_hi_r(i)) << 32ULL),
(u64)gk20a_readl(g, pbdma_get_r(i)) + (u64)gk20a_readl(g, pbdma_get_r(i)) +
((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL), ((u64)gk20a_readl(g, pbdma_get_hi_r(i)) << 32ULL));
gk20a_debug_output(o,
" GP_PUT %08x GP_GET %08x "
"FETCH %08x HEADER %08x",
gk20a_readl(g, pbdma_gp_put_r(i)), gk20a_readl(g, pbdma_gp_put_r(i)),
gk20a_readl(g, pbdma_gp_get_r(i)), gk20a_readl(g, pbdma_gp_get_r(i)),
gk20a_readl(g, pbdma_gp_fetch_r(i)), gk20a_readl(g, pbdma_gp_fetch_r(i)),
gk20a_readl(g, pbdma_pb_header_r(i)), gk20a_readl(g, pbdma_pb_header_r(i)));
gk20a_debug_output(o,
" HDR %08x SHADOW0 %08x SHADOW1 %08x",
gk20a_readl(g, pbdma_hdr_shadow_r(i)), gk20a_readl(g, pbdma_hdr_shadow_r(i)),
gk20a_readl(g, pbdma_gp_shadow_0_r(i)), gk20a_readl(g, pbdma_gp_shadow_0_r(i)),
gk20a_readl(g, pbdma_gp_shadow_1_r(i))); gk20a_readl(g, pbdma_gp_shadow_1_r(i)));
} }
gk20a_debug_output(o, "\n");
gk20a_debug_output(o, " ");
} }
void gk20a_dump_eng_status(struct gk20a *g, void gk20a_dump_eng_status(struct gk20a *g,
@@ -4300,30 +4312,33 @@ void gk20a_dump_eng_status(struct gk20a *g,
host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES); host_num_engines = nvgpu_get_litter_value(g, GPU_LIT_HOST_NUM_ENGINES);
gk20a_debug_output(o, "Engine status - chip %-5s", g->name);
gk20a_debug_output(o, "--------------------------");
for (i = 0; i < host_num_engines; i++) { for (i = 0; i < host_num_engines; i++) {
u32 status = gk20a_readl(g, fifo_engine_status_r(i)); u32 status = gk20a_readl(g, fifo_engine_status_r(i));
u32 ctx_status = fifo_engine_status_ctx_status_v(status); u32 ctx_status = fifo_engine_status_ctx_status_v(status);
gk20a_debug_output(o, "%s eng %d: ", g->name, i);
gk20a_debug_output(o, gk20a_debug_output(o,
"id: %d (%s), next_id: %d (%s), ctx status: %s ", "Engine %d | "
"ID: %d - %-9s next_id: %d %-9s | status: %s",
i,
fifo_engine_status_id_v(status), fifo_engine_status_id_v(status),
(fifo_engine_status_id_type_v(status) == (fifo_engine_status_id_type_v(status) ==
fifo_engine_status_id_type_tsgid_v()) ? fifo_engine_status_id_type_tsgid_v()) ?
"tsg" : "channel", "[tsg]" : "[channel]",
fifo_engine_status_next_id_v(status), fifo_engine_status_next_id_v(status),
(fifo_engine_status_next_id_type_v(status) == (fifo_engine_status_next_id_type_v(status) ==
fifo_engine_status_next_id_type_tsgid_v()) ? fifo_engine_status_next_id_type_tsgid_v()) ?
"tsg" : "channel", "[tsg]" : "[channel]",
gk20a_decode_pbdma_chan_eng_ctx_status(ctx_status)); gk20a_decode_pbdma_chan_eng_ctx_status(ctx_status));
if (fifo_engine_status_faulted_v(status) != 0U) { if (fifo_engine_status_faulted_v(status) != 0U) {
gk20a_debug_output(o, "faulted "); gk20a_debug_output(o, " State: faulted");
} }
if (fifo_engine_status_engine_v(status) != 0U) { if (fifo_engine_status_engine_v(status) != 0U) {
gk20a_debug_output(o, "busy "); gk20a_debug_output(o, " State: busy");
} }
gk20a_debug_output(o, "\n");
} }
gk20a_debug_output(o, "\n"); gk20a_debug_output(o, "\n");
} }

View File

@@ -40,7 +40,9 @@ unsigned int gk20a_debug_trace_cmdbuf;
static inline void gk20a_debug_write_printk(void *ctx, const char *str, static inline void gk20a_debug_write_printk(void *ctx, const char *str,
size_t len) size_t len)
{ {
pr_info("%s", str); struct gk20a *g = ctx;
nvgpu_err(g, str);
} }
static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str, static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
@@ -49,8 +51,7 @@ static inline void gk20a_debug_write_to_seqfile(void *ctx, const char *str,
seq_write((struct seq_file *)ctx, str, len); seq_write((struct seq_file *)ctx, str, len);
} }
void gk20a_debug_output(struct gk20a_debug_output *o, void gk20a_debug_output(struct gk20a_debug_output *o, const char *fmt, ...)
const char *fmt, ...)
{ {
va_list args; va_list args;
int len; int len;
@@ -61,6 +62,13 @@ void gk20a_debug_output(struct gk20a_debug_output *o,
o->fn(o->ctx, o->buf, len); o->fn(o->ctx, o->buf, len);
} }
void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
{
gk20a_debug_dump_all_channel_status_ramfc(g, o);
g->ops.fifo.dump_pbdma_status(g, o);
g->ops.fifo.dump_eng_status(g, o);
}
static int gk20a_gr_dump_regs(struct gk20a *g, static int gk20a_gr_dump_regs(struct gk20a *g,
struct gk20a_debug_output *o) struct gk20a_debug_output *o)
{ {
@@ -73,7 +81,8 @@ static int gk20a_gr_dump_regs(struct gk20a *g,
int gk20a_gr_debug_dump(struct gk20a *g) int gk20a_gr_debug_dump(struct gk20a *g)
{ {
struct gk20a_debug_output o = { struct gk20a_debug_output o = {
.fn = gk20a_debug_write_printk .fn = gk20a_debug_write_printk,
.ctx = g,
}; };
gk20a_gr_dump_regs(g, &o); gk20a_gr_dump_regs(g, &o);
@@ -108,7 +117,8 @@ void gk20a_debug_dump(struct gk20a *g)
{ {
struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g)); struct gk20a_platform *platform = gk20a_get_platform(dev_from_gk20a(g));
struct gk20a_debug_output o = { struct gk20a_debug_output o = {
.fn = gk20a_debug_write_printk .fn = gk20a_debug_write_printk,
.ctx = g,
}; };
/* HAL only initialized after 1st power-on */ /* HAL only initialized after 1st power-on */
@@ -169,13 +179,6 @@ static const struct file_operations gk20a_debug_fops = {
.release = single_release, .release = single_release,
}; };
void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o)
{
gk20a_debug_dump_all_channel_status_ramfc(g, o);
g->ops.fifo.dump_pbdma_status(g, o);
g->ops.fifo.dump_eng_status(g, o);
}
static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) static ssize_t disable_bigpage_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos)
{ {
char buf[3]; char buf[3];