gpu: nvgpu: decrease refcount when sync-unmap fails

When nvgpu_vm_unmap_sync fails, nvgpu_unmap_sync currently bails
out without decreasing the buffer refcount. This prevents from
releasing the buffer, in case a deferred job completes after the
timeout (which was observed 2 times during overnight
stress tests). This also means that the fixed address is not
re-useable.

Throw out a warning when nvgpu_vm_unmap_sync fails, but proceed
with decreasing refcount.

Bug 200578193

Change-Id: Ie0cc7caa7d12ca0a3b42123a5f7a28bda72dabbc
Signed-off-by: ddutta <ddutta@nvidia.com>
(cherry picked from commit a433f26d5b
in dev-main)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2291352
Tested-by: Naveen Kumar S <nkumars@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: Bibek Basu <bbasu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
ddutta
2020-03-03 14:49:36 +05:30
committed by mobile promotions
parent fbad02d5e0
commit bb2c8ef511

View File

@@ -1180,6 +1180,7 @@ static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
{
struct nvgpu_timeout timeout;
int ret = 0;
bool done = false;
nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -1189,16 +1190,18 @@ static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
nvgpu_timeout_init(vm->mm->g, &timeout, 100, NVGPU_TIMER_CPU_TIMER);
do {
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1) {
break;
}
nvgpu_msleep(10);
} while (nvgpu_timeout_expired_msg(&timeout,
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) <= 1) {
done = true;
} else if (nvgpu_timeout_expired_msg(&timeout,
"sync-unmap failed on 0x%llx",
mapped_buffer->addr) == 0);
mapped_buffer->addr) != 0) {
done = true;
} else {
nvgpu_msleep(10);
}
} while (!done);
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) != 1 &&
nvgpu_timeout_expired(&timeout)) {
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) > 1) {
ret = -ETIMEDOUT;
}
@@ -1221,11 +1224,9 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
if (mapped_buffer->flags & NVGPU_VM_MAP_FIXED_OFFSET) {
if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer)) {
/*
* Looks like we have failed... Better not continue in
* case the buffer is in use.
*/
goto done;
nvgpu_warn(vm->mm->g, "%d references remaining on 0x%llx",
nvgpu_atomic_read(&mapped_buffer->ref.refcount),
mapped_buffer->addr);
}
}