gpu: nvgpu: VM unmap refactoring

Re-organize the unmap code to be better split between OS specific
requirements and common core requirements. The new code flow works
as follows:

  nvgpu_vm_unmap()

Is the primary entrance to the unmap path. It takes a VM and a GPU
virtual address to unmap. There's also an optional batch mapping
struct.

This function is responsible for making sure there is a real buffer
and that if it's being called on a fixed mapping then the mapping
will definitely be freed (since buffers are ref-counted). Then this
function decrements the ref-count and returns.

If the ref-count hits zero then __nvgpu_vm_unmap_ref() is called
which just calls __nvgpu_vm_unmap() with the relevant batch struct
if present. This is where the real work is done. __nvgpu_vm_unmap()
clears the GMMU mapping, removes the mapped buffer from the various
lists and trees it may be in and then calls the
nvgpu_vm_unmap_system() function. This function handles any OS
specific stuff and must be defined by all VM OS implementations.

There's a a short cut used by some other core VM code to free
mappings without going through nvgpu_vm_map(). Mostly they just
directly decrement the mapping ref-count which can then call
__nvgpu_vm_unmap_ref() if the ref-count hits zero.

JIRA NVGPU-30
JIRA NVGPU-71

Change-Id: Ic626d37ab936819841bab45214f027b40ffa4e5a
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1583982
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2017-10-17 10:55:00 -07:00
committed by mobile promotions
parent a37cec19f0
commit d13c256d5e
8 changed files with 136 additions and 106 deletions

View File

@@ -1167,7 +1167,7 @@ __releases(&l->cde_app->mutex)
cde_ctx->init_cmd_executed = true;
/* unmap the buffers - channel holds references to them now */
nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
return err;
@@ -1175,7 +1175,7 @@ exit_unmap_surface:
if (surface)
dma_buf_vunmap(compbits_scatter_buf, surface);
exit_unmap_vaddr:
nvgpu_vm_unmap(cde_ctx->vm, map_vaddr);
nvgpu_vm_unmap(cde_ctx->vm, map_vaddr, NULL);
exit_idle:
gk20a_idle(g);
return err;

View File

@@ -118,7 +118,10 @@ static int gk20a_as_ioctl_unmap_buffer(
struct nvgpu_as_unmap_buffer_args *args)
{
gk20a_dbg_fn("");
return nvgpu_vm_unmap_buffer(as_share->vm, args->offset, NULL);
nvgpu_vm_unmap(as_share->vm, args->offset, NULL);
return 0;
}
static int gk20a_as_ioctl_map_buffer_batch(
@@ -155,10 +158,7 @@ static int gk20a_as_ioctl_map_buffer_batch(
break;
}
err = nvgpu_vm_unmap_buffer(as_share->vm, unmap_args.offset,
&batch);
if (err)
break;
nvgpu_vm_unmap(as_share->vm, unmap_args.offset, &batch);
}
if (err) {

View File

@@ -1142,7 +1142,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
return 0;
err_unmap:
nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL);
nvgpu_vm_unmap(mm->perfbuf.vm, args->offset, NULL);
err_remove_vm:
nvgpu_vm_put(mm->perfbuf.vm);
nvgpu_mutex_release(&g->dbg_sessions_lock);
@@ -1386,7 +1386,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
err = g->ops.dbg_session_ops.perfbuffer_disable(g);
nvgpu_vm_unmap_buffer(vm, offset, NULL);
nvgpu_vm_unmap(vm, offset, NULL);
nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
nvgpu_vm_put(vm);

View File

@@ -641,88 +641,20 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
return err;
}
int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
struct vm_gk20a_mapping_batch *batch)
{
struct gk20a *g = vm->mm->g;
struct nvgpu_mapped_buf *mapped_buffer;
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
if (!mapped_buffer) {
nvgpu_mutex_release(&vm->update_gmmu_lock);
nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
return 0;
}
if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
struct nvgpu_timeout timeout;
nvgpu_mutex_release(&vm->update_gmmu_lock);
nvgpu_timeout_init(vm->mm->g, &timeout, 10000,
NVGPU_TIMER_RETRY_TIMER);
do {
if (nvgpu_atomic_read(
&mapped_buffer->ref.refcount) == 1)
break;
nvgpu_udelay(5);
} while (!nvgpu_timeout_expired_msg(&timeout,
"sync-unmap failed on 0x%llx"));
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
}
if (mapped_buffer->user_mapped == 0) {
nvgpu_mutex_release(&vm->update_gmmu_lock);
nvgpu_err(g, "addr already unmapped from user 0x%llx", offset);
return 0;
}
mapped_buffer->user_mapped--;
if (mapped_buffer->user_mapped == 0)
vm->num_user_mapped_buffers--;
vm->kref_put_batch = batch;
nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
vm->kref_put_batch = NULL;
nvgpu_mutex_release(&vm->update_gmmu_lock);
return 0;
}
/* NOTE! mapped_buffers lock must be held */
void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
struct vm_gk20a_mapping_batch *batch)
/*
* This is the function call-back for freeing OS specific components of an
* nvgpu_mapped_buf. This should most likely never be called outside of the
* core MM framework!
*
* Note: the VM lock will be held.
*/
void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer)
{
struct vm_gk20a *vm = mapped_buffer->vm;
struct gk20a *g = vm->mm->g;
g->ops.mm.gmmu_unmap(vm,
mapped_buffer->addr,
mapped_buffer->size,
mapped_buffer->pgsz_idx,
mapped_buffer->va_allocated,
gk20a_mem_flag_none,
mapped_buffer->vm_area ?
mapped_buffer->vm_area->sparse : false,
batch);
gk20a_mm_unpin(dev_from_vm(vm), mapped_buffer->dmabuf,
mapped_buffer->sgt);
/* remove from mapped buffer tree and remove list, free */
nvgpu_remove_mapped_buf(vm, mapped_buffer);
if (!nvgpu_list_empty(&mapped_buffer->buffer_list))
nvgpu_list_del(&mapped_buffer->buffer_list);
/* keep track of mapped buffers */
if (mapped_buffer->user_mapped)
vm->num_user_mapped_buffers--;
if (mapped_buffer->own_mem_ref)
dma_buf_put(mapped_buffer->dmabuf);
nvgpu_kfree(g, mapped_buffer);
}

View File

@@ -36,6 +36,9 @@
#include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h"
static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
struct vm_gk20a_mapping_batch *batch);
int vm_aspace_id(struct vm_gk20a *vm)
{
return vm->as_share ? vm->as_share->id : -1;
@@ -538,7 +541,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
while (node) {
mapped_buffer = mapped_buffer_from_rbtree_node(node);
nvgpu_vm_unmap_locked(mapped_buffer, NULL);
__nvgpu_vm_unmap(mapped_buffer, NULL);
nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
}
@@ -702,8 +705,7 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
vm->kref_put_batch = &batch;
for (i = 0; i < num_buffers; ++i)
nvgpu_ref_put(&mapped_buffers[i]->ref,
nvgpu_vm_unmap_locked_ref);
nvgpu_ref_put(&mapped_buffers[i]->ref, __nvgpu_vm_unmap_ref);
vm->kref_put_batch = NULL;
nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
@@ -712,26 +714,118 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
nvgpu_big_free(vm->mm->g, mapped_buffers);
}
void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref)
/*
* Really unmap. This does the real GMMU unmap and removes the mapping from the
* VM map tracking tree (and vm_area list if necessary).
*/
static void __nvgpu_vm_unmap(struct nvgpu_mapped_buf *mapped_buffer,
struct vm_gk20a_mapping_batch *batch)
{
struct vm_gk20a *vm = mapped_buffer->vm;
struct gk20a *g = vm->mm->g;
vm->num_user_mapped_buffers--;
g->ops.mm.gmmu_unmap(vm,
mapped_buffer->addr,
mapped_buffer->size,
mapped_buffer->pgsz_idx,
mapped_buffer->va_allocated,
gk20a_mem_flag_none,
mapped_buffer->vm_area ?
mapped_buffer->vm_area->sparse : false,
batch);
/*
* Remove from mapped buffer tree. Then delete the buffer from the
* linked list of mapped buffers; though note: not all mapped buffers
* are part of a vm_area.
*/
nvgpu_remove_mapped_buf(vm, mapped_buffer);
nvgpu_list_del(&mapped_buffer->buffer_list);
/*
* OS specific freeing. This is after the generic freeing incase the
* generic freeing relies on some component of the OS specific
* nvgpu_mapped_buf in some abstraction or the like.
*/
nvgpu_vm_unmap_system(mapped_buffer);
nvgpu_kfree(g, mapped_buffer);
}
void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref)
{
struct nvgpu_mapped_buf *mapped_buffer =
container_of(ref, struct nvgpu_mapped_buf, ref);
nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch);
__nvgpu_vm_unmap(mapped_buffer, mapped_buffer->vm->kref_put_batch);
}
void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
/*
* For fixed-offset buffers we must sync the buffer. That means we wait for the
* buffer to hit a ref-count of 1 before proceeding.
*
* Note: this requires the update_gmmu_lock to be held since we release it and
* re-aquire it in this function.
*/
static int nvgpu_vm_unmap_sync_buffer(struct vm_gk20a *vm,
struct nvgpu_mapped_buf *mapped_buffer)
{
struct nvgpu_timeout timeout;
int ret = 0;
nvgpu_mutex_release(&vm->update_gmmu_lock);
/*
* 500ms second timer.
*/
nvgpu_timeout_init(vm->mm->g, &timeout, 50, NVGPU_TIMER_CPU_TIMER);
do {
if (nvgpu_atomic_read(&mapped_buffer->ref.refcount) == 1)
break;
nvgpu_msleep(10);
} while (!nvgpu_timeout_expired_msg(&timeout,
"sync-unmap failed on 0x%llx"));
if (nvgpu_timeout_expired(&timeout))
ret = -ETIMEDOUT;
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
return ret;
}
void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
struct vm_gk20a_mapping_batch *batch)
{
struct gk20a *g = vm->mm->g;
struct nvgpu_mapped_buf *mapped_buffer;
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
mapped_buffer = __nvgpu_vm_find_mapped_buf(vm, offset);
if (!mapped_buffer) {
nvgpu_mutex_release(&vm->update_gmmu_lock);
nvgpu_err(g, "invalid addr to unmap 0x%llx", offset);
return;
if (!mapped_buffer)
goto done;
if (mapped_buffer->flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
if (nvgpu_vm_unmap_sync_buffer(vm, mapped_buffer))
/*
* Looks like we have failed... Better not continue in
* case the buffer is in use.
*/
goto done;
}
nvgpu_ref_put(&mapped_buffer->ref, nvgpu_vm_unmap_locked_ref);
/*
* Make sure we have access to the batch if we end up calling through to
* the unmap_ref function.
*/
vm->kref_put_batch = batch;
nvgpu_ref_put(&mapped_buffer->ref, __nvgpu_vm_unmap_ref);
vm->kref_put_batch = NULL;
done:
nvgpu_mutex_release(&vm->update_gmmu_lock);
return;
}

View File

@@ -208,7 +208,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
&vm_area->buffer_list_head,
nvgpu_mapped_buf, buffer_list) {
nvgpu_list_del(&buffer->buffer_list);
nvgpu_ref_put(&buffer->ref, nvgpu_vm_unmap_locked_ref);
nvgpu_ref_put(&buffer->ref, __nvgpu_vm_unmap_ref);
}
/* if this was a sparse mapping, free the va */

View File

@@ -98,10 +98,6 @@ int nvgpu_vm_map_buffer(struct vm_gk20a *vm,
u64 mapping_size,
struct vm_gk20a_mapping_batch *batch);
/* Note: batch may be NULL if unmap op is not part of a batch */
int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset,
struct vm_gk20a_mapping_batch *batch);
/* find buffer corresponding to va */
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
struct dma_buf **dmabuf,

View File

@@ -207,11 +207,19 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
struct nvgpu_mapped_buf **mapped_buffers,
int num_buffers);
void nvgpu_vm_unmap_locked(struct nvgpu_mapped_buf *mapped_buffer,
void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset,
struct vm_gk20a_mapping_batch *batch);
void nvgpu_vm_unmap_locked_ref(struct nvgpu_ref *ref);
void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset);
/*
* Implemented by each OS. Called from within the core VM code to handle OS
* specific components of an nvgpu_mapped_buf.
*/
void nvgpu_vm_unmap_system(struct nvgpu_mapped_buf *mapped_buffer);
/*
* Don't use this outside of the core VM code!
*/
void __nvgpu_vm_unmap_ref(struct nvgpu_ref *ref);
/*
* These all require the VM update lock to be held.