mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: fix deadlock between clean up and timeout worker
In case one job completes just around timeout boundary, it is possible that we launch both clean up worker and timeout worker for same job Then in clean up worker we try to cancel timeout worker, and in timeout worker we try to wait for clean up to finish which leads to deadlock with below stacks stack 1: [<ffffffc0000bb484>] cancel_delayed_work_sync+0x10/0x18 [<ffffffc0004f820c>] gk20a_channel_cancel_job_clean_up+0x20/0x44 [<ffffffc0004fc794>] gk20a_channel_abort_clean_up+0x34/0x31c [<ffffffc0004fcb30>] gk20a_channel_abort+0xb4/0xc0 [<ffffffc0004f3d18>] gk20a_fifo_recover_ch+0x9c/0xec [<ffffffc0004f3f04>] gk20a_fifo_force_reset_ch+0xdc/0xf8 [<ffffffc0004fa8c4>] gk20a_channel_timeout_handler+0xf8/0x128 stack 2: [<ffffffc0000bb484>] cancel_delayed_work_sync+0x10/0x18 [<ffffffc0004f82c4>] gk20a_channel_timeout_stop+0x40/0x60 [<ffffffc0004fc488>] gk20a_channel_clean_up_jobs+0x7c/0x238 To fix this, cancel the timeout worker in gk20a_channel_update() itself instead of cancelling in gk20a_channel_clean_up_jobs() Bug 200246829 Change-Id: Idef9de3cae29668f4e25beb564422cf2e3736182 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1259963 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
bc5a258049
commit
53a9eceab7
@@ -2218,7 +2218,6 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
||||
platform = gk20a_get_platform(g->dev);
|
||||
|
||||
gk20a_channel_cancel_job_clean_up(c, false);
|
||||
gk20a_channel_timeout_stop(c);
|
||||
|
||||
while (1) {
|
||||
bool completed;
|
||||
@@ -2244,8 +2243,6 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
|
||||
break;
|
||||
}
|
||||
|
||||
gk20a_channel_timeout_stop(c);
|
||||
|
||||
WARN_ON(!c->sync);
|
||||
|
||||
if (c->sync) {
|
||||
@@ -2317,6 +2314,7 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
|
||||
}
|
||||
|
||||
trace_gk20a_channel_update(c->hw_chid);
|
||||
gk20a_channel_timeout_stop(c);
|
||||
gk20a_channel_schedule_job_clean_up(c);
|
||||
|
||||
gk20a_channel_put(c);
|
||||
|
||||
Reference in New Issue
Block a user