gpu: nvgpu: gp10b: enhance priv ring error reporting

-Add start_conn, disconnect and overflow fault type
 priv error detection
-For busy looping in interrupt context, use nvgpu_udelay()
 instead of nvgpu_usleep_range()

Bug 200350539

Change-Id: I0d0da86d5688bca36817d445151818632c5ea4f1
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1569589
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2017-09-27 11:15:38 -07:00
committed by mobile promotions
parent 31e594befe
commit 5f8cfaa250

View File

@@ -39,10 +39,12 @@ void gp10b_priv_ring_isr(struct gk20a *g)
u32 cmd;
s32 retry = 100;
u32 gpc;
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
u32 gpc_stride, offset;
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
nvgpu_info(g, "unhandled priv ring intr");
return;
}
status0 = gk20a_readl(g, pri_ringmaster_intr_status0_r());
status1 = gk20a_readl(g, pri_ringmaster_intr_status1_r());
@@ -50,6 +52,16 @@ void gp10b_priv_ring_isr(struct gk20a *g)
nvgpu_err(g, "ringmaster intr status0: 0x%08x,"
"status1: 0x%08x", status0, status1);
if (pri_ringmaster_intr_status0_ring_start_conn_fault_v(status0) != 0)
nvgpu_err(g,
"BUG: connectivity problem on the startup sequence");
if (pri_ringmaster_intr_status0_disconnect_fault_v(status0) != 0)
nvgpu_err(g, "ring disconnected");
if (pri_ringmaster_intr_status0_overflow_fault_v(status0) != 0)
nvgpu_err(g, "ring overflowed");
if (pri_ringmaster_intr_status0_gbl_write_error_sys_v(status0) != 0) {
nvgpu_err(g, "SYS write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x",
gk20a_readl(g, pri_ringstation_sys_priv_error_adr_r()),
@@ -58,27 +70,43 @@ void gp10b_priv_ring_isr(struct gk20a *g)
gk20a_readl(g, pri_ringstation_sys_priv_error_code_r()));
}
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
if (status1 & BIT(gpc)) {
nvgpu_err(g, "GPC%u write error. ADR %08x WRDAT %08x INFO %08x, CODE %08x", gpc,
gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_adr_r() + gpc * gpc_stride),
gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_wrdat_r() + gpc * gpc_stride),
gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_info_r() + gpc * gpc_stride),
gk20a_readl(g, pri_ringstation_gpc_gpc0_priv_error_code_r() + gpc * gpc_stride));
if (status1) {
gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
for (gpc = 0; gpc < g->gr.gpc_count; gpc++) {
offset = gpc * gpc_stride;
if (status1 & BIT(gpc)) {
nvgpu_err(g, "GPC%u write error. ADR %08x "
"WRDAT %08x INFO %08x, CODE %08x", gpc,
gk20a_readl(g,
pri_ringstation_gpc_gpc0_priv_error_adr_r() + offset),
gk20a_readl(g,
pri_ringstation_gpc_gpc0_priv_error_wrdat_r() + offset),
gk20a_readl(g,
pri_ringstation_gpc_gpc0_priv_error_info_r() + offset),
gk20a_readl(g,
pri_ringstation_gpc_gpc0_priv_error_code_r() + offset));
status1 = status1 & (~(BIT(gpc)));
if (!status1)
break;
}
}
}
/* clear interrupt */
cmd = gk20a_readl(g, pri_ringmaster_command_r());
cmd = set_field(cmd, pri_ringmaster_command_cmd_m(),
pri_ringmaster_command_cmd_ack_interrupt_f());
gk20a_writel(g, pri_ringmaster_command_r(), cmd);
do {
/* poll for clear interrupt done */
cmd = pri_ringmaster_command_cmd_v(
gk20a_readl(g, pri_ringmaster_command_r()));
while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && retry) {
nvgpu_udelay(20);
cmd = pri_ringmaster_command_cmd_v(
gk20a_readl(g, pri_ringmaster_command_r()));
nvgpu_usleep_range(20, 40);
} while (cmd != pri_ringmaster_command_cmd_no_cmd_v() && --retry);
retry--;
}
if (retry <= 0)
nvgpu_warn(g, "priv ringmaster cmd ack too many retries");
if (retry == 0)
nvgpu_err(g, "priv ringmaster intr ack failed");
}