gpu: nvgpu: Fault engines on PBDMA error

On PBDMA error even though the engine might not be wedged, we need to
kick the channel out of engine. Add that logic. Also when channel is
not in engine, we need to remove it from runlist.

Bug 1498688

Change-Id: I5939feb41d0a90635ba313b265c7e3b5d3f48622
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/417682
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Kevin Huang (Eng-SW) <kevinh@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
This commit is contained in:
Terje Bergstrom
2014-06-02 12:02:26 +03:00
committed by Dan Willemsen
parent bcf8c6411c
commit 28c35a1b99
3 changed files with 58 additions and 48 deletions

View File

@@ -1145,6 +1145,51 @@ static void gk20a_fifo_trigger_mmu_fault(struct gk20a *g,
gk20a_writel(g, fifo_intr_en_0_r(), 0x7FFFFFFF);
}
u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 hw_chid)
{
int i;
u32 engines = 0;
for (i = 0; i < g->fifo.max_engines; i++) {
u32 status = gk20a_readl(g, fifo_engine_status_r(i));
u32 ctx_status =
fifo_engine_status_ctx_status_v(status);
bool type_ch = fifo_pbdma_status_id_type_v(status) ==
fifo_pbdma_status_id_type_chid_v();
bool busy = fifo_engine_status_engine_v(status) ==
fifo_engine_status_engine_busy_v();
u32 id = (ctx_status ==
fifo_engine_status_ctx_status_ctxsw_load_v()) ?
fifo_engine_status_next_id_v(status) :
fifo_engine_status_id_v(status);
if (type_ch && busy && id == hw_chid)
engines |= BIT(i);
}
return engines;
}
void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose)
{
u32 engines = gk20a_fifo_engines_on_ch(g, hw_chid);
if (engines)
gk20a_fifo_recover(g, engines, verbose);
else {
int i;
struct channel_gk20a *ch =
g->fifo.channel + hw_chid;
gk20a_disable_channel_no_update(ch);
for (i = 0; i < g->fifo.max_runlists; i++)
gk20a_fifo_update_runlist(g, i,
hw_chid, false, false);
if (gk20a_fifo_set_ctx_mmu_error(g, ch))
gk20a_debug_dump(g->dev);
}
}
void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids,
bool verbose)
{
@@ -1326,7 +1371,6 @@ static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr)
return handled;
}
static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
struct gk20a *g,
struct fifo_gk20a *f,
@@ -1345,9 +1389,11 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
if ((f->intr.pbdma.device_fatal_0 |
f->intr.pbdma.channel_fatal_0 |
f->intr.pbdma.restartable_0) & pbdma_intr_0) {
dev_err(dev, "pbdma_intr_0(%d):0x%08x PBH: %08x M0: %08x",
gk20a_err(dev_from_gk20a(g),
"pbdma_intr_0(%d):0x%08x PBH: %08x SHADOW: %08x M0: %08x",
pbdma_id, pbdma_intr_0,
gk20a_readl(g, pbdma_pb_header_r(pbdma_id)),
gk20a_readl(g, pbdma_hdr_shadow_r(pbdma_id)),
gk20a_readl(g, pbdma_method0_r(pbdma_id)));
reset = true;
handled |= ((f->intr.pbdma.device_fatal_0 |
@@ -1371,32 +1417,10 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev,
if (reset) {
/* Remove the channel from runlist */
u32 status = gk20a_readl(g, fifo_pbdma_status_r(pbdma_id));
u32 hw_chid = fifo_pbdma_status_id_v(status);
if (fifo_pbdma_status_id_type_v(status)
== fifo_pbdma_status_id_type_chid_v()) {
struct channel_gk20a *ch = g->fifo.channel +
fifo_pbdma_status_id_v(status);
struct fifo_runlist_info_gk20a *runlist =
g->fifo.runlist_info;
int i;
bool verbose;
/* disable the channel from hw and increment
* syncpoints */
gk20a_disable_channel_no_update(ch);
/* remove the channel from runlist */
clear_bit(ch->hw_chid,
runlist->active_channels);
ch->has_timedout = true;
/* Recreate the runlist */
for (i = 0; i < g->fifo.max_runlists; i++)
gk20a_fifo_update_runlist(g,
0, ~0, false, false);
verbose = gk20a_fifo_set_ctx_mmu_error(g, ch);
if (verbose)
gk20a_debug_dump(g->dev);
gk20a_fifo_recover_ch(g, hw_chid, true);
}
}
@@ -1523,34 +1547,14 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, u32 hw_chid)
!tegra_platform_is_silicon());
if (ret) {
int i;
u32 engines = 0;
struct fifo_gk20a *f = &g->fifo;
struct channel_gk20a *ch = &f->channel[hw_chid];
struct channel_gk20a *ch = &g->fifo.channel[hw_chid];
gk20a_err(dev_from_gk20a(g), "preempt channel %d timeout\n",
hw_chid);
/* forcefully reset all busy engines using this channel */
for (i = 0; i < g->fifo.max_engines; i++) {
u32 status = gk20a_readl(g, fifo_engine_status_r(i));
u32 ctx_status =
fifo_engine_status_ctx_status_v(status);
bool type_ch = fifo_pbdma_status_id_type_v(status) ==
fifo_pbdma_status_id_type_chid_v();
bool busy = fifo_engine_status_engine_v(status) ==
fifo_engine_status_engine_busy_v();
u32 id = (ctx_status ==
fifo_engine_status_ctx_status_ctxsw_load_v()) ?
fifo_engine_status_next_id_v(status) :
fifo_engine_status_id_v(status);
if (type_ch && busy && id == hw_chid)
engines |= BIT(i);
}
gk20a_set_error_notifier(ch,
NVHOST_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_fifo_recover(g, engines, true);
gk20a_fifo_recover_ch(g, hw_chid, true);
}
/* re-enable elpg or release pmu mutex */

View File

@@ -149,6 +149,7 @@ int gk20a_fifo_enable_engine_activity(struct gk20a *g,
int gk20a_fifo_disable_engine_activity(struct gk20a *g,
struct fifo_engine_info_gk20a *eng_info,
bool wait_for_idle);
u32 gk20a_fifo_engines_on_ch(struct gk20a *g, u32 hw_chid);
int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid,
bool add, bool wait_for_finish);
@@ -157,6 +158,7 @@ int gk20a_fifo_suspend(struct gk20a *g);
bool gk20a_fifo_mmu_fault_pending(struct gk20a *g);
void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose);
void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose);
int gk20a_init_fifo_reset_enable_hw(struct gk20a *g);
void gk20a_init_fifo(struct gpu_ops *gops);

View File

@@ -174,6 +174,10 @@ static inline u32 pbdma_pb_header_type_inc_f(void)
{
return 0x20000000;
}
static inline u32 pbdma_hdr_shadow_r(u32 i)
{
return 0x00040118 + i*8192;
}
static inline u32 pbdma_subdevice_r(u32 i)
{
return 0x00040094 + i*8192;