diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 952e6e6af..bba18789e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -456,7 +456,7 @@ static int gk20a_channel_semaphore_wait_fd( struct priv_cmd_entry *wait_cmd = NULL; struct wait_fence_work *w; int written; - int err; + int err, ret; u64 va; sync_fence = gk20a_sync_fence_fdget(fd); @@ -490,8 +490,18 @@ static int gk20a_channel_semaphore_wait_fd( va = gk20a_semaphore_gpu_va(w->sema, c->vm); /* GPU unblocked when when the semaphore value becomes 1. */ written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); + WARN_ON(written != wait_cmd->size); - sync_fence_wait_async(sync_fence, &w->waiter); + ret = sync_fence_wait_async(sync_fence, &w->waiter); + + /* + * If the sync_fence has already signaled then the above async_wait + * will never trigger. This causes the semaphore release op to never + * happen which, in turn, hangs the GPU. That's bad. So let's just + * do the semaphore_release right now. + */ + if (ret == 1) + gk20a_semaphore_release(w->sema); /* XXX - this fixes an actual bug, we need to hold a ref to this semaphore while the job is in flight. */ diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c index 7a3f90e96..70666407a 100644 --- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -154,7 +154,9 @@ struct gk20a_fence *gk20a_fence_from_semaphore( #ifdef CONFIG_SYNC sync_fence = gk20a_sync_fence_create(timeline, semaphore, - dependency, "fence"); + dependency, "f-gk20a-0x%04llx", + ((u64)(void *)semaphore->value) & + 0xffff); if (!sync_fence) return NULL; #endif diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index 8740f0e23..e01c0e9aa 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c @@ -47,8 +47,9 @@ struct gk20a_sync_pt { ktime_t dep_timestamp; /* - * A spinlock is necessary since there are times when this lock - * will be acquired in interrupt context. + * Use a spin lock here since it will have better performance + * than a mutex - there should be very little contention on this + * lock. */ spinlock_t lock; };