gpu: nvgpu: mask intr before gpu power off

once gpu is powered off i.e. power_on set to false, nvgpu isr
does not handle stall/nonstall irq. Depending upon state
of gpu, this can result in either of following errors:

1) irq 458: nobody cared (try booting with the "irqpoll" option)
2) "HSM ERROR 42, GPU" from SCE if it detects that an interrupt is
not in time.

Fix these by masking all interrupts just before gpu power off
as nvgpu won't be handling any irq anymore.

While masking interrupts, if there are any pending interrupts,
then report those with a log message.

Bug 1987855
Bug 200424832

Change-Id: I95b087f5c24d439e5da26c6e4fff74d8a525f291
Signed-off-by: Nitin Kumbhar <nkumbhar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1770802
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nitin Kumbhar
2018-07-04 22:56:58 +05:30
committed by mobile promotions
parent 2d454db04f
commit 13cc7ea93d
13 changed files with 83 additions and 1 deletions

View File

@@ -93,6 +93,17 @@ int gk20a_detect_chip(struct gk20a *g)
return gpu_init_hal(g);
}
static void gk20a_mask_interrupts(struct gk20a *g)
{
if (g->ops.mc.intr_mask != NULL) {
g->ops.mc.intr_mask(g);
}
if (g->ops.mc.log_pending_intrs != NULL) {
g->ops.mc.log_pending_intrs(g);
}
}
int gk20a_prepare_poweroff(struct gk20a *g)
{
int ret = 0;
@@ -122,6 +133,8 @@ int gk20a_prepare_poweroff(struct gk20a *g)
if (nvgpu_is_enabled(g, NVGPU_PMU_PSTATE))
gk20a_deinit_pstate_support(g);
gk20a_mask_interrupts(g);
g->power_on = false;
return ret;

View File

@@ -1119,6 +1119,7 @@ struct gpu_ops {
int (*apply_smpc_war)(struct dbg_session_gk20a *dbg_s);
} regops;
struct {
void (*intr_mask)(struct gk20a *g);
void (*intr_enable)(struct gk20a *g);
void (*intr_unit_config)(struct gk20a *g,
bool enable, bool is_stalling, u32 unit);
@@ -1139,6 +1140,7 @@ struct gpu_ops {
void (*reset)(struct gk20a *g, u32 units);
u32 (*boot_0)(struct gk20a *g, u32 *arch, u32 *impl, u32 *rev);
bool (*is_intr1_pending)(struct gk20a *g, enum nvgpu_unit unit, u32 mc_intr_1);
void (*log_pending_intrs)(struct gk20a *g);
} mc;
struct {
void (*show_dump)(struct gk20a *g,

View File

@@ -121,6 +121,14 @@ u32 mc_gk20a_isr_nonstall(struct gk20a *g)
return ops;
}
void mc_gk20a_intr_mask(struct gk20a *g)
{
nvgpu_writel(g, mc_intr_en_0_r(),
mc_intr_en_0_inta_disabled_f());
nvgpu_writel(g, mc_intr_en_1_r(),
mc_intr_en_1_inta_disabled_f());
}
void mc_gk20a_intr_enable(struct gk20a *g)
{
u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
@@ -292,6 +300,21 @@ bool mc_gk20a_is_intr1_pending(struct gk20a *g,
return is_pending;
}
void mc_gk20a_log_pending_intrs(struct gk20a *g)
{
u32 intr;
intr = g->ops.mc.intr_stall(g);
if (intr != 0U) {
nvgpu_info(g, "Pending stall intr0=0x%08x", intr);
}
intr = g->ops.mc.intr_nonstall(g);
if (intr != 0U) {
nvgpu_info(g, "Pending nonstall intr1=0x%08x", intr);
}
}
void mc_gk20a_handle_intr_nonstall(struct gk20a *g, u32 ops)
{
bool semaphore_wakeup, post_events;

View File

@@ -24,6 +24,7 @@
#define MC_GK20A_H
struct gk20a;
void mc_gk20a_intr_mask(struct gk20a *g);
void mc_gk20a_intr_enable(struct gk20a *g);
void mc_gk20a_intr_unit_config(struct gk20a *g, bool enable,
bool is_stalling, u32 mask);
@@ -41,5 +42,7 @@ void gk20a_mc_reset(struct gk20a *g, u32 units);
u32 gk20a_mc_boot_0(struct gk20a *g, u32 *arch, u32 *impl, u32 *rev);
bool mc_gk20a_is_intr1_pending(struct gk20a *g,
enum nvgpu_unit unit, u32 mc_intr_1);
void mc_gk20a_log_pending_intrs(struct gk20a *g);
void mc_gk20a_handle_intr_nonstall(struct gk20a *g, u32 ops);
#endif /* MC_GK20A_H */

View File

@@ -578,6 +578,7 @@ static const struct gpu_ops gm20b_ops = {
.apply_smpc_war = gm20b_apply_smpc_war,
},
.mc = {
.intr_mask = mc_gk20a_intr_mask,
.intr_enable = mc_gk20a_intr_enable,
.intr_unit_config = mc_gk20a_intr_unit_config,
.isr_stall = mc_gk20a_isr_stall,
@@ -593,6 +594,7 @@ static const struct gpu_ops gm20b_ops = {
.reset = gk20a_mc_reset,
.boot_0 = gk20a_mc_boot_0,
.is_intr1_pending = mc_gk20a_is_intr1_pending,
.log_pending_intrs = mc_gk20a_log_pending_intrs,
},
.debug = {
.show_dump = gk20a_debug_show_dump,

View File

@@ -694,6 +694,7 @@ static const struct gpu_ops gp106_ops = {
.apply_smpc_war = gp106_apply_smpc_war,
},
.mc = {
.intr_mask = mc_gp10b_intr_mask,
.intr_enable = mc_gp10b_intr_enable,
.intr_unit_config = mc_gp10b_intr_unit_config,
.isr_stall = mc_gp10b_isr_stall,
@@ -709,6 +710,7 @@ static const struct gpu_ops gp106_ops = {
.reset = gk20a_mc_reset,
.boot_0 = gk20a_mc_boot_0,
.is_intr1_pending = mc_gp10b_is_intr1_pending,
.log_pending_intrs = mc_gp10b_log_pending_intrs,
},
.debug = {
.show_dump = gk20a_debug_show_dump,

View File

@@ -623,6 +623,7 @@ static const struct gpu_ops gp10b_ops = {
.apply_smpc_war = gp10b_apply_smpc_war,
},
.mc = {
.intr_mask = mc_gp10b_intr_mask,
.intr_enable = mc_gp10b_intr_enable,
.intr_unit_config = mc_gp10b_intr_unit_config,
.isr_stall = mc_gp10b_isr_stall,
@@ -638,6 +639,7 @@ static const struct gpu_ops gp10b_ops = {
.reset = gk20a_mc_reset,
.boot_0 = gk20a_mc_boot_0,
.is_intr1_pending = mc_gp10b_is_intr1_pending,
.log_pending_intrs = mc_gp10b_log_pending_intrs,
},
.debug = {
.show_dump = gk20a_debug_show_dump,

View File

@@ -32,6 +32,17 @@
#include <nvgpu/hw/gp10b/hw_mc_gp10b.h>
#define MAX_MC_INTR_REGS 2U
void mc_gp10b_intr_mask(struct gk20a *g)
{
nvgpu_writel(g, mc_intr_en_clear_r(NVGPU_MC_INTR_STALLING),
0xffffffffU);
nvgpu_writel(g, mc_intr_en_clear_r(NVGPU_MC_INTR_NONSTALLING),
0xffffffffU);
}
void mc_gp10b_intr_enable(struct gk20a *g)
{
u32 eng_intr_mask = gk20a_fifo_engine_interrupt_mask(g);
@@ -195,3 +206,17 @@ bool mc_gp10b_is_intr1_pending(struct gk20a *g,
return is_pending;
}
void mc_gp10b_log_pending_intrs(struct gk20a *g)
{
u32 i, intr;
for (i = 0; i < MAX_MC_INTR_REGS; i++) {
intr = nvgpu_readl(g, mc_intr_r(i));
if (intr == 0U) {
continue;
}
nvgpu_info(g, "Pending intr%d=0x%08x", i, intr);
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -27,6 +27,7 @@ struct gk20a;
#define NVGPU_MC_INTR_STALLING 0U
#define NVGPU_MC_INTR_NONSTALLING 1U
void mc_gp10b_intr_mask(struct gk20a *g);
void mc_gp10b_intr_enable(struct gk20a *g);
void mc_gp10b_intr_unit_config(struct gk20a *g, bool enable,
bool is_stalling, u32 mask);
@@ -34,6 +35,7 @@ void mc_gp10b_isr_stall(struct gk20a *g);
bool mc_gp10b_is_intr1_pending(struct gk20a *g,
enum nvgpu_unit unit, u32 mc_intr_1);
void mc_gp10b_log_pending_intrs(struct gk20a *g);
u32 mc_gp10b_intr_stall(struct gk20a *g);
void mc_gp10b_intr_stall_pause(struct gk20a *g);
void mc_gp10b_intr_stall_resume(struct gk20a *g);

View File

@@ -785,6 +785,7 @@ static const struct gpu_ops gv100_ops = {
.apply_smpc_war = gv100_apply_smpc_war,
},
.mc = {
.intr_mask = mc_gp10b_intr_mask,
.intr_enable = mc_gv100_intr_enable,
.intr_unit_config = mc_gp10b_intr_unit_config,
.isr_stall = mc_gp10b_isr_stall,
@@ -799,6 +800,7 @@ static const struct gpu_ops gv100_ops = {
.disable = gk20a_mc_disable,
.reset = gk20a_mc_reset,
.boot_0 = gk20a_mc_boot_0,
.log_pending_intrs = mc_gp10b_log_pending_intrs,
.is_intr1_pending = mc_gp10b_is_intr1_pending,
.is_intr_hub_pending = gv11b_mc_is_intr_hub_pending,
.is_intr_nvlink_pending = gv100_mc_is_intr_nvlink_pending,

View File

@@ -718,6 +718,7 @@ static const struct gpu_ops gv11b_ops = {
.apply_smpc_war = gv11b_apply_smpc_war,
},
.mc = {
.intr_mask = mc_gp10b_intr_mask,
.intr_enable = mc_gv11b_intr_enable,
.intr_unit_config = mc_gp10b_intr_unit_config,
.isr_stall = mc_gp10b_isr_stall,
@@ -733,6 +734,7 @@ static const struct gpu_ops gv11b_ops = {
.reset = gk20a_mc_reset,
.boot_0 = gk20a_mc_boot_0,
.is_intr1_pending = mc_gp10b_is_intr1_pending,
.log_pending_intrs = mc_gp10b_log_pending_intrs,
.is_intr_hub_pending = gv11b_mc_is_intr_hub_pending,
.is_stall_and_eng_intr_pending =
gv11b_mc_is_stall_and_eng_intr_pending,

View File

@@ -489,6 +489,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.apply_smpc_war = gp10b_apply_smpc_war,
},
.mc = {
.intr_mask = mc_gp10b_intr_mask,
.intr_enable = mc_gp10b_intr_enable,
.intr_unit_config = mc_gp10b_intr_unit_config,
.isr_stall = mc_gp10b_isr_stall,
@@ -504,6 +505,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
.reset = gk20a_mc_reset,
.boot_0 = gk20a_mc_boot_0,
.is_intr1_pending = mc_gp10b_is_intr1_pending,
.log_pending_intrs = mc_gp10b_log_pending_intrs,
},
.debug = {
.show_dump = NULL,

View File

@@ -558,6 +558,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.apply_smpc_war = gv11b_apply_smpc_war,
},
.mc = {
.intr_mask = mc_gp10b_intr_mask,
.intr_enable = mc_gv11b_intr_enable,
.intr_unit_config = mc_gp10b_intr_unit_config,
.isr_stall = mc_gp10b_isr_stall,
@@ -574,6 +575,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
.boot_0 = gk20a_mc_boot_0,
.is_intr1_pending = mc_gp10b_is_intr1_pending,
.is_intr_hub_pending = gv11b_mc_is_intr_hub_pending,
.log_pending_intrs = mc_gp10b_log_pending_intrs,
},
.debug = {
.show_dump = NULL,