gpu: nvgpu: fix race for channel sync read/write

CTS test dEQP-VK.api.object_management.max_concurrent.device_group
crashes with invalid userspace memory access.
Currently, nvgpu_submit_prepare_syncs() races with
nvgpu_channel_clean_up_jobs() and this race condition is exposed when
aggressive_sync_destroy_thresh is set to non-zero value.
nvgpu_submit_prepare_syncs() gets ref for c->sync to submit job and
releases channel sync_lock. Meanwhile, nvgpu_worker_poll_work()
triggers nvgpu_channel_clean_up_jobs(), which destroys ref'd c->sync
pointer.
This patch protects channel's sync pointer by holding channel sync_lock
during complete execution of nvgpu_submit_prepare_syncs().

Bug 2613870

Change-Id: I6f3d48aff361d1cb38c30d2ce5de276d0c55fb6f
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2176929
Reviewed-by: Vedashree Vidwans <vvidwans@nvidia.com>
Reviewed-by: Debarshi Dutta <ddutta@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Vedashree Vidwans <vvidwans@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2019-08-16 10:39:18 -07:00
committed by mobile promotions
parent 26d955be23
commit e22d743a20
2 changed files with 6 additions and 5 deletions

View File

@@ -59,19 +59,17 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
bool flag_sync_fence = (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) != 0U;
bool flag_fence_wait = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) != 0U;
nvgpu_mutex_acquire(&c->sync_lock);
if (g->aggressive_sync_destroy_thresh != 0U) {
nvgpu_mutex_acquire(&c->sync_lock);
if (c->sync == NULL) {
c->sync = nvgpu_channel_sync_create(c, false);
if (c->sync == NULL) {
err = -ENOMEM;
nvgpu_mutex_release(&c->sync_lock);
goto fail;
}
new_sync_created = true;
}
nvgpu_channel_sync_get_ref(c->sync);
nvgpu_mutex_release(&c->sync_lock);
}
if ((g->ops.channel.set_syncpt != NULL) && new_sync_created) {
@@ -163,6 +161,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
goto clean_up_incr_cmd;
}
nvgpu_mutex_release(&c->sync_lock);
return 0;
clean_up_incr_cmd:
@@ -181,6 +180,7 @@ clean_up_wait_cmd:
job->wait_cmd = NULL;
}
fail:
nvgpu_mutex_release(&c->sync_lock);
*wait_cmd = NULL;
return err;
}