gpu: nvgpu: Rework the channel timeout handler messages

Rework how the messages in the channel timeout handler to be a little
bit more verbose and more clear about what is happening.

Bug 1732449
JIRA DNVGPU-12

Change-Id: Ifc018d99c647b3036caa8ad453e5e3dfc4151396
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1153669
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Alex Waterman
2016-05-03 10:46:48 -07:00
committed by Terje Bergstrom
parent 5f36481371
commit 832e4fce1e

View File

@@ -1697,9 +1697,8 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
/* Need global lock since multiple channels can timeout at a time */
mutex_lock(&g->ch_wdt_lock);
gk20a_debug_dump(g->dev);
gk20a_gr_debug_dump(g->dev);
gk20a_err(dev_from_gk20a(g), "Possible job timeout on ch=%d",
ch->hw_chid);
/* Get timed out job and reset the timer */
mutex_lock(&ch->timeout.lock);
@@ -1707,14 +1706,23 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)
ch->timeout.initialized = false;
mutex_unlock(&ch->timeout.lock);
if (gr_gk20a_disable_ctxsw(g))
if (gr_gk20a_disable_ctxsw(g)) {
gk20a_err(dev_from_gk20a(g), "Unable to disable ctxsw!");
goto fail_unlock;
}
if (gk20a_fence_is_expired(job->post_fence))
if (gk20a_fence_is_expired(job->post_fence)) {
gk20a_err(dev_from_gk20a(g),
"Timed out fence is expired on c=%d!",
ch->hw_chid);
goto fail_enable_ctxsw;
}
gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out\n",
ch->hw_chid);
gk20a_err(dev_from_gk20a(g), "Confirmed: job on channel %d timed out",
ch->hw_chid);
gk20a_debug_dump(g->dev);
gk20a_gr_debug_dump(g->dev);
/* Get failing engine data */
engine_id = gk20a_fifo_get_failing_engine_data(g, &id, &is_tsg);