gpu: nvgpu: cancel all wdt timeouts while handling SCHED errors

A SCHED error might cause multiple channels' watchdogs to trigger
simultaneously

Hence, to avoid this conflict cancel watchdog timeout on all
channels before recovering from SCHED errors

Also, define API gk20a_channel_timeout_stop_all_channels()
to cancel wdt timeout on all channels

Bug 200133289

Change-Id: I8324c397891f0a711327b77d0677cd6718af6d01
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/810959
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Deepak Nibade
2015-10-05 13:54:37 +05:30
committed by Terje Bergstrom
parent 21508ec801
commit d01a0249c4
3 changed files with 22 additions and 0 deletions

View File

@@ -1573,6 +1573,21 @@ static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
mutex_unlock(&ch->timeout.lock);
}
void gk20a_channel_timeout_stop_all_channels(struct gk20a *g)
{
u32 chid;
struct fifo_gk20a *f = &g->fifo;
for (chid = 0; chid < f->num_channels; chid++) {
struct channel_gk20a *ch = &f->channel[chid];
if (gk20a_channel_get(ch)) {
gk20a_channel_timeout_stop(ch);
gk20a_channel_put(ch);
}
}
}
static void gk20a_channel_timeout_handler(struct work_struct *work)
{
struct channel_gk20a_job *job;

View File

@@ -256,4 +256,5 @@ void channel_gk20a_free_inst(struct gk20a *g, struct channel_gk20a *ch);
int channel_gk20a_setup_ramfc(struct channel_gk20a *c,
u64 gpfifo_base, u32 gpfifo_entries, u32 flags);
void channel_gk20a_enable(struct channel_gk20a *ch);
void gk20a_channel_timeout_stop_all_channels(struct gk20a *g);
#endif /* CHANNEL_GK20A_H */

View File

@@ -1450,6 +1450,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
struct channel_gk20a *ch = &f->channel[id];
if (is_tsg) {
gk20a_channel_timeout_stop_all_channels(g);
gk20a_fifo_recover(g, BIT(engine_id), id, true,
true, true);
ret = true;
@@ -1467,6 +1468,11 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g)
"fifo sched ctxsw timeout error:"
"engine = %u, ch = %d", engine_id, id);
gk20a_gr_debug_dump(g->dev);
/*
* Cancel all channels' timeout since SCHED error might
* trigger multiple watchdogs at a time
*/
gk20a_channel_timeout_stop_all_channels(g);
gk20a_fifo_recover(g, BIT(engine_id), id, false,
true, ch->timeout_debug_dump);
ret = true;