mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:16:01 +03:00
gpu: nvgpu: decrease refcount when sync-unmap fails
When nvgpu_vm_unmap_sync fails, nvgpu_unmap_sync currently bails
out without decreasing the buffer refcount. This prevents from
releasing the buffer, in case a deferred job completes after the
timeout (which was observed 2 times during overnight
stress tests). This also means that the fixed address is not
re-useable.
Throw out a warning when nvgpu_vm_unmap_sync fails, but proceed
with decreasing refcount.
Bug 200578193
Change-Id: Ie0cc7caa7d12ca0a3b42123a5f7a28bda72dabbc
Signed-off-by: ddutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2291352
(cherry picked from commit bb2c8ef511)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2307940
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: Amulya Yarlagadda <ayarlagadda@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Amulya Yarlagadda <ayarlagadda@nvidia.com>
This commit is contained in:
committed by
Amulya Yarlagadda
parent
aaecfae53f
commit
af9af422c6
@@ -1180,6 +1180,7 @@ static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
|
|||||||
{
|
{
|
||||||
struct nvgpu_timeout timeout;
|
struct nvgpu_timeout timeout;
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
bool done = false;
|
||||||
|
|
||||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||||
|
|
||||||
@@ -1189,16 +1190,18 @@ static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
|
|||||||
nvgpu_timeout_init(vm->mm->g, &timeout, 100, NVGPU_TIMER_CPU_TIMER);
|
nvgpu_timeout_init(vm->mm->g, &timeout, 100, NVGPU_TIMER_CPU_TIMER);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1) {
|
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) <= 1) {
|
||||||
break;
|
done = true;
|
||||||
}
|
} else if (nvgpu_timeout_expired_msg(&timeout,
|
||||||
nvgpu_msleep(10);
|
|
||||||
} while (nvgpu_timeout_expired_msg(&timeout,
|
|
||||||
"sync-unmap failed on 0x%llx",
|
"sync-unmap failed on 0x%llx",
|
||||||
mapped_buffer->addr) == 0);
|
mapped_buffer->addr) != 0) {
|
||||||
|
done = true;
|
||||||
|
} else {
|
||||||
|
nvgpu_msleep(10);
|
||||||
|
}
|
||||||
|
} while (!done);
|
||||||
|
|
||||||
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) != 1 &&
|
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) > 1) {
|
||||||
nvgpu_timeout_expired(&timeout)) {
|
|
||||||
ret = -ETIMEDOUT;
|
ret = -ETIMEDOUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1221,11 +1224,9 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
|
|||||||
|
|
||||||
if (mapped_buffer->flags & NVGPU_VM_MAP_FIXED_OFFSET) {
|
if (mapped_buffer->flags & NVGPU_VM_MAP_FIXED_OFFSET) {
|
||||||
if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer)) {
|
if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer)) {
|
||||||
/*
|
nvgpu_warn(vm->mm->g, "%d references remaining on 0x%llx",
|
||||||
* Looks like we have failed... Better not continue in
|
nvgpu_atomic_read(&mapped_buffer->ref.refcount),
|
||||||
* case the buffer is in use.
|
mapped_buffer->addr);
|
||||||
*/
|
|
||||||
goto done;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user