mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: Fix semaphore race condition
A race condition existed in gk20a_channel_semaphore_wait_fd(). In some instances the semaphore underlying the sync_fence being waited on would have already signaled. This would cause the subsequent sync_fence_wait_async() call to return 1 and do nothing. Normally, the sync_fence_wait_async() call would release the newly created semaphore but in the above case that would not happen and hang any channel waiting on that semaphore. To fix this problem if sync_fence_wait_async() returns 1 immediately release the newly created semaphore. Bug 1604892 Change-Id: I1f5e811695bb099f71b7762835aba4a7e27362ec Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/935910 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> GVS: Gerrit_Virtual_Submit
This commit is contained in:
@@ -456,7 +456,7 @@ static int gk20a_channel_semaphore_wait_fd(
|
||||
struct priv_cmd_entry *wait_cmd = NULL;
|
||||
struct wait_fence_work *w;
|
||||
int written;
|
||||
int err;
|
||||
int err, ret;
|
||||
u64 va;
|
||||
|
||||
sync_fence = gk20a_sync_fence_fdget(fd);
|
||||
@@ -490,8 +490,18 @@ static int gk20a_channel_semaphore_wait_fd(
|
||||
va = gk20a_semaphore_gpu_va(w->sema, c->vm);
|
||||
/* GPU unblocked when when the semaphore value becomes 1. */
|
||||
written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false);
|
||||
|
||||
WARN_ON(written != wait_cmd->size);
|
||||
sync_fence_wait_async(sync_fence, &w->waiter);
|
||||
ret = sync_fence_wait_async(sync_fence, &w->waiter);
|
||||
|
||||
/*
|
||||
* If the sync_fence has already signaled then the above async_wait
|
||||
* will never trigger. This causes the semaphore release op to never
|
||||
* happen which, in turn, hangs the GPU. That's bad. So let's just
|
||||
* do the semaphore_release right now.
|
||||
*/
|
||||
if (ret == 1)
|
||||
gk20a_semaphore_release(w->sema);
|
||||
|
||||
/* XXX - this fixes an actual bug, we need to hold a ref to this
|
||||
semaphore while the job is in flight. */
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms and conditions of the GNU General Public License,
|
||||
@@ -154,7 +154,9 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
|
||||
|
||||
#ifdef CONFIG_SYNC
|
||||
sync_fence = gk20a_sync_fence_create(timeline, semaphore,
|
||||
dependency, "fence");
|
||||
dependency, "f-gk20a-0x%04llx",
|
||||
((u64)(void *)semaphore->value) &
|
||||
0xffff);
|
||||
if (!sync_fence)
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
@@ -47,8 +47,9 @@ struct gk20a_sync_pt {
|
||||
ktime_t dep_timestamp;
|
||||
|
||||
/*
|
||||
* A spinlock is necessary since there are times when this lock
|
||||
* will be acquired in interrupt context.
|
||||
* Use a spin lock here since it will have better performance
|
||||
* than a mutex - there should be very little contention on this
|
||||
* lock.
|
||||
*/
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user