gpu: nvgpu: use nvgpu rbtree to store mapped buffers

Use nvgpu rbtree instead of linux rbtree to store
mapped buffers for each VM

Move to use "struct nvgpu_rbtree_node" instead of
"struct rb_node"
And similarly use rbtree APIs from <nvgpu/rbtree.h>
instead of linux APIs

Jira NVGPU-13

Change-Id: Id96ba76e20fa9ecad016cd5d5a6a7d40579a70f2
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1453043
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Deepak Nibade
2017-03-31 12:16:33 +05:30
committed by mobile promotions
parent 6dda47a114
commit ce3c30f14f
3 changed files with 81 additions and 99 deletions

View File

@@ -311,9 +311,9 @@ static inline u32 lo32(u64 f)
} }
static struct mapped_buffer_node *find_mapped_buffer_locked( static struct mapped_buffer_node *find_mapped_buffer_locked(
struct rb_root *root, u64 addr); struct nvgpu_rbtree_node *root, u64 addr);
static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
struct rb_root *root, struct dma_buf *dmabuf, struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
u32 kind); u32 kind);
static int update_gmmu_ptes_locked(struct vm_gk20a *vm, static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
enum gmmu_pgsz_gk20a pgsz_idx, enum gmmu_pgsz_gk20a pgsz_idx,
@@ -1289,7 +1289,7 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
{ {
struct mapped_buffer_node *mapped_buffer; struct mapped_buffer_node *mapped_buffer;
struct mapped_buffer_node **buffer_list; struct mapped_buffer_node **buffer_list;
struct rb_node *node; struct nvgpu_rbtree_node *node = NULL;
int i = 0; int i = 0;
if (vm->userspace_managed) { if (vm->userspace_managed) {
@@ -1307,16 +1307,15 @@ int gk20a_vm_get_buffers(struct vm_gk20a *vm,
return -ENOMEM; return -ENOMEM;
} }
node = rb_first(&vm->mapped_buffers); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
while (node) { while (node) {
mapped_buffer = mapped_buffer = mapped_buffer_from_rbtree_node(node);
container_of(node, struct mapped_buffer_node, node);
if (mapped_buffer->user_mapped) { if (mapped_buffer->user_mapped) {
buffer_list[i] = mapped_buffer; buffer_list[i] = mapped_buffer;
kref_get(&mapped_buffer->ref); kref_get(&mapped_buffer->ref);
i++; i++;
} }
node = rb_next(&mapped_buffer->node); nvgpu_rbtree_enum_next(&node, node);
} }
BUG_ON(i != vm->num_user_mapped_buffers); BUG_ON(i != vm->num_user_mapped_buffers);
@@ -1396,7 +1395,7 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_mutex_acquire(&vm->update_gmmu_lock);
mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset);
if (!mapped_buffer) { if (!mapped_buffer) {
nvgpu_mutex_release(&vm->update_gmmu_lock); nvgpu_mutex_release(&vm->update_gmmu_lock);
gk20a_err(d, "invalid addr to unmap 0x%llx", offset); gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
@@ -1488,104 +1487,81 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
return 0; return 0;
} }
static int insert_mapped_buffer(struct rb_root *root,
static void remove_mapped_buffer(struct vm_gk20a *vm,
struct mapped_buffer_node *mapped_buffer) struct mapped_buffer_node *mapped_buffer)
{ {
struct rb_node **new_node = &(root->rb_node), *parent = NULL; nvgpu_rbtree_unlink(&mapped_buffer->node, &vm->mapped_buffers);
}
/* Figure out where to put new node */ static int insert_mapped_buffer(struct vm_gk20a *vm,
while (*new_node) { struct mapped_buffer_node *mapped_buffer)
struct mapped_buffer_node *cmp_with = {
container_of(*new_node, struct mapped_buffer_node, mapped_buffer->node.key_start = mapped_buffer->addr;
node); mapped_buffer->node.key_end = mapped_buffer->addr + mapped_buffer->size;
parent = *new_node; nvgpu_rbtree_insert(&mapped_buffer->node, &vm->mapped_buffers);
if (cmp_with->addr > mapped_buffer->addr) /* u64 cmp */
new_node = &((*new_node)->rb_left);
else if (cmp_with->addr != mapped_buffer->addr) /* u64 cmp */
new_node = &((*new_node)->rb_right);
else
return -EINVAL; /* no fair dup'ing */
}
/* Add new node and rebalance tree. */
rb_link_node(&mapped_buffer->node, parent, new_node);
rb_insert_color(&mapped_buffer->node, root);
return 0; return 0;
} }
static struct mapped_buffer_node *find_mapped_buffer_reverse_locked( static struct mapped_buffer_node *find_mapped_buffer_reverse_locked(
struct rb_root *root, struct dma_buf *dmabuf, struct nvgpu_rbtree_node *root, struct dma_buf *dmabuf,
u32 kind) u32 kind)
{ {
struct rb_node *node = rb_first(root); struct nvgpu_rbtree_node *node = NULL;
nvgpu_rbtree_enum_start(0, &node, root);
while (node) { while (node) {
struct mapped_buffer_node *mapped_buffer = struct mapped_buffer_node *mapped_buffer =
container_of(node, struct mapped_buffer_node, node); mapped_buffer_from_rbtree_node(node);
if (mapped_buffer->dmabuf == dmabuf && if (mapped_buffer->dmabuf == dmabuf &&
kind == mapped_buffer->kind) kind == mapped_buffer->kind)
return mapped_buffer; return mapped_buffer;
node = rb_next(&mapped_buffer->node);
nvgpu_rbtree_enum_next(&node, node);
} }
return NULL; return NULL;
} }
static struct mapped_buffer_node *find_mapped_buffer_locked( static struct mapped_buffer_node *find_mapped_buffer_locked(
struct rb_root *root, u64 addr) struct nvgpu_rbtree_node *root, u64 addr)
{ {
struct nvgpu_rbtree_node *node = NULL;
struct rb_node *node = root->rb_node; nvgpu_rbtree_search(addr, &node, root);
while (node) { if (!node)
struct mapped_buffer_node *mapped_buffer =
container_of(node, struct mapped_buffer_node, node);
if (mapped_buffer->addr > addr) /* u64 cmp */
node = node->rb_left;
else if (mapped_buffer->addr != addr) /* u64 cmp */
node = node->rb_right;
else
return mapped_buffer;
}
return NULL; return NULL;
return mapped_buffer_from_rbtree_node(node);
} }
static struct mapped_buffer_node *find_mapped_buffer_range_locked( static struct mapped_buffer_node *find_mapped_buffer_range_locked(
struct rb_root *root, u64 addr) struct nvgpu_rbtree_node *root, u64 addr)
{ {
struct rb_node *node = root->rb_node; struct nvgpu_rbtree_node *node = NULL;
while (node) {
struct mapped_buffer_node *m = nvgpu_rbtree_range_search(addr, &node, root);
container_of(node, struct mapped_buffer_node, node); if (!node)
if (m->addr <= addr && m->addr + m->size > addr)
return m;
else if (m->addr > addr) /* u64 cmp */
node = node->rb_left;
else
node = node->rb_right;
}
return NULL; return NULL;
return mapped_buffer_from_rbtree_node(node);
} }
/* find the first mapped buffer with GPU VA less than addr */ /* find the first mapped buffer with GPU VA less than addr */
static struct mapped_buffer_node *find_mapped_buffer_less_than_locked( static struct mapped_buffer_node *find_mapped_buffer_less_than_locked(
struct rb_root *root, u64 addr) struct nvgpu_rbtree_node *root, u64 addr)
{ {
struct rb_node *node = root->rb_node; struct nvgpu_rbtree_node *node = NULL;
struct mapped_buffer_node *ret = NULL;
while (node) { nvgpu_rbtree_less_than_search(addr, &node, root);
struct mapped_buffer_node *mapped_buffer = if (!node)
container_of(node, struct mapped_buffer_node, node); return NULL;
if (mapped_buffer->addr >= addr)
node = node->rb_left;
else {
ret = mapped_buffer;
node = node->rb_right;
}
}
return ret; return mapped_buffer_from_rbtree_node(node);
} }
#define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0])) #define BFR_ATTRS (sizeof(nvmap_bfr_param)/sizeof(nvmap_bfr_param[0]))
@@ -1693,7 +1669,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
* mappings by checking the buffer with the highest GPU VA * mappings by checking the buffer with the highest GPU VA
* that is less than our buffer end */ * that is less than our buffer end */
buffer = find_mapped_buffer_less_than_locked( buffer = find_mapped_buffer_less_than_locked(
&vm->mapped_buffers, map_offset + map_size); vm->mapped_buffers, map_offset + map_size);
if (buffer && buffer->addr + buffer->size > map_offset) { if (buffer && buffer->addr + buffer->size > map_offset) {
gk20a_warn(dev, "overlapping buffer map requested"); gk20a_warn(dev, "overlapping buffer map requested");
return -EINVAL; return -EINVAL;
@@ -1877,7 +1853,7 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
struct mapped_buffer_node *mapped_buffer = NULL; struct mapped_buffer_node *mapped_buffer = NULL;
if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) {
mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers,
offset_align); offset_align);
if (!mapped_buffer) if (!mapped_buffer)
return 0; return 0;
@@ -1887,7 +1863,7 @@ static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
return 0; return 0;
} else { } else {
mapped_buffer = mapped_buffer =
find_mapped_buffer_reverse_locked(&vm->mapped_buffers, find_mapped_buffer_reverse_locked(vm->mapped_buffers,
dmabuf, kind); dmabuf, kind);
if (!mapped_buffer) if (!mapped_buffer)
return 0; return 0;
@@ -2433,7 +2409,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
nvgpu_init_list_node(&mapped_buffer->va_buffers_list); nvgpu_init_list_node(&mapped_buffer->va_buffers_list);
kref_init(&mapped_buffer->ref); kref_init(&mapped_buffer->ref);
err = insert_mapped_buffer(&vm->mapped_buffers, mapped_buffer); err = insert_mapped_buffer(vm, mapped_buffer);
if (err) { if (err) {
gk20a_err(d, "failed to insert into mapped buffer tree"); gk20a_err(d, "failed to insert into mapped buffer tree");
goto clean_up; goto clean_up;
@@ -2456,7 +2432,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
clean_up: clean_up:
if (inserted) { if (inserted) {
rb_erase(&mapped_buffer->node, &vm->mapped_buffers); remove_mapped_buffer(vm, mapped_buffer);
if (user_mapped) if (user_mapped)
vm->num_user_mapped_buffers--; vm->num_user_mapped_buffers--;
} }
@@ -2483,7 +2459,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm,
nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_mutex_acquire(&vm->update_gmmu_lock);
mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva);
if (!mapped_buffer || !mapped_buffer->user_mapped) if (!mapped_buffer || !mapped_buffer->user_mapped)
{ {
@@ -2542,7 +2518,7 @@ int gk20a_vm_map_compbits(struct vm_gk20a *vm,
nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_mutex_acquire(&vm->update_gmmu_lock);
mapped_buffer = mapped_buffer =
find_mapped_buffer_locked(&vm->mapped_buffers, mapping_gva); find_mapped_buffer_locked(vm->mapped_buffers, mapping_gva);
if (!mapped_buffer || !mapped_buffer->user_mapped) { if (!mapped_buffer || !mapped_buffer->user_mapped) {
nvgpu_mutex_release(&vm->update_gmmu_lock); nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -3274,7 +3250,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
struct gk20a *g = gk20a_from_vm(vm); struct gk20a *g = gk20a_from_vm(vm);
nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_mutex_acquire(&vm->update_gmmu_lock);
buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr); buffer = find_mapped_buffer_locked(vm->mapped_buffers, gpu_vaddr);
if (buffer) if (buffer)
addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl, addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
buffer->flags); buffer->flags);
@@ -3886,7 +3862,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer,
mapped_buffer->sgt); mapped_buffer->sgt);
/* remove from mapped buffer tree and remove list, free */ /* remove from mapped buffer tree and remove list, free */
rb_erase(&mapped_buffer->node, &vm->mapped_buffers); remove_mapped_buffer(vm, mapped_buffer);
if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list)) if (!nvgpu_list_empty(&mapped_buffer->va_buffers_list))
nvgpu_list_del(&mapped_buffer->va_buffers_list); nvgpu_list_del(&mapped_buffer->va_buffers_list);
@@ -3908,7 +3884,7 @@ void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset)
struct mapped_buffer_node *mapped_buffer; struct mapped_buffer_node *mapped_buffer;
nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_mutex_acquire(&vm->update_gmmu_lock);
mapped_buffer = find_mapped_buffer_locked(&vm->mapped_buffers, offset); mapped_buffer = find_mapped_buffer_locked(vm->mapped_buffers, offset);
if (!mapped_buffer) { if (!mapped_buffer) {
nvgpu_mutex_release(&vm->update_gmmu_lock); nvgpu_mutex_release(&vm->update_gmmu_lock);
gk20a_err(d, "invalid addr to unmap 0x%llx", offset); gk20a_err(d, "invalid addr to unmap 0x%llx", offset);
@@ -3939,7 +3915,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
{ {
struct mapped_buffer_node *mapped_buffer; struct mapped_buffer_node *mapped_buffer;
struct vm_reserved_va_node *va_node, *va_node_tmp; struct vm_reserved_va_node *va_node, *va_node_tmp;
struct rb_node *node; struct nvgpu_rbtree_node *node = NULL;
struct gk20a *g = vm->mm->g; struct gk20a *g = vm->mm->g;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -3961,12 +3937,11 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
/* TBD: add a flag here for the unmap code to recognize teardown /* TBD: add a flag here for the unmap code to recognize teardown
* and short-circuit any otherwise expensive operations. */ * and short-circuit any otherwise expensive operations. */
node = rb_first(&vm->mapped_buffers); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
while (node) { while (node) {
mapped_buffer = mapped_buffer = mapped_buffer_from_rbtree_node(node);
container_of(node, struct mapped_buffer_node, node);
gk20a_vm_unmap_locked(mapped_buffer, NULL); gk20a_vm_unmap_locked(mapped_buffer, NULL);
node = rb_first(&vm->mapped_buffers); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
} }
/* destroy remaining reserved memory areas */ /* destroy remaining reserved memory areas */
@@ -4402,7 +4377,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
if (err) if (err)
goto clean_up_allocators; goto clean_up_allocators;
vm->mapped_buffers = RB_ROOT; vm->mapped_buffers = NULL;
nvgpu_mutex_init(&vm->update_gmmu_lock); nvgpu_mutex_init(&vm->update_gmmu_lock);
kref_init(&vm->ref); kref_init(&vm->ref);
@@ -5199,7 +5174,7 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_mutex_acquire(&vm->update_gmmu_lock);
mapped_buffer = find_mapped_buffer_range_locked(&vm->mapped_buffers, mapped_buffer = find_mapped_buffer_range_locked(vm->mapped_buffers,
gpu_va); gpu_va);
if (!mapped_buffer) { if (!mapped_buffer) {
nvgpu_mutex_release(&vm->update_gmmu_lock); nvgpu_mutex_release(&vm->update_gmmu_lock);

View File

@@ -26,6 +26,7 @@
#include <nvgpu/allocator.h> #include <nvgpu/allocator.h>
#include <nvgpu/list.h> #include <nvgpu/list.h>
#include <nvgpu/rbtree.h>
#ifdef CONFIG_ARM64 #ifdef CONFIG_ARM64
#define outer_flush_range(a, b) #define outer_flush_range(a, b)
@@ -196,7 +197,7 @@ struct priv_cmd_entry {
struct mapped_buffer_node { struct mapped_buffer_node {
struct vm_gk20a *vm; struct vm_gk20a *vm;
struct rb_node node; struct nvgpu_rbtree_node node;
struct list_head unmap_list; struct list_head unmap_list;
struct nvgpu_list_node va_buffers_list; struct nvgpu_list_node va_buffers_list;
struct vm_reserved_va_node *va_node; struct vm_reserved_va_node *va_node;
@@ -231,6 +232,13 @@ mapped_buffer_node_from_va_buffers_list(struct nvgpu_list_node *node)
((uintptr_t)node - offsetof(struct mapped_buffer_node, va_buffers_list)); ((uintptr_t)node - offsetof(struct mapped_buffer_node, va_buffers_list));
}; };
static inline struct mapped_buffer_node *
mapped_buffer_from_rbtree_node(struct nvgpu_rbtree_node *node)
{
return (struct mapped_buffer_node *)
((uintptr_t)node - offsetof(struct mapped_buffer_node, node));
};
struct vm_reserved_va_node { struct vm_reserved_va_node {
struct nvgpu_list_node reserved_va_list; struct nvgpu_list_node reserved_va_list;
struct nvgpu_list_node va_buffers_list; struct nvgpu_list_node va_buffers_list;
@@ -306,7 +314,7 @@ struct vm_gk20a {
struct nvgpu_allocator user; struct nvgpu_allocator user;
struct nvgpu_allocator user_lp; struct nvgpu_allocator user_lp;
struct rb_root mapped_buffers; struct nvgpu_rbtree_node *mapped_buffers;
struct nvgpu_list_node reserved_va_list; struct nvgpu_list_node reserved_va_list;

View File

@@ -202,7 +202,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
struct vm_reserved_va_node *va_node, *va_node_tmp; struct vm_reserved_va_node *va_node, *va_node_tmp;
struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_share_params *p = &msg.params.as_share; struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
struct rb_node *node; struct nvgpu_rbtree_node *node = NULL;
int err; int err;
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -211,12 +211,11 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
/* TBD: add a flag here for the unmap code to recognize teardown /* TBD: add a flag here for the unmap code to recognize teardown
* and short-circuit any otherwise expensive operations. */ * and short-circuit any otherwise expensive operations. */
node = rb_first(&vm->mapped_buffers); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
while (node) { while (node) {
mapped_buffer = mapped_buffer = mapped_buffer_from_rbtree_node(node);
container_of(node, struct mapped_buffer_node, node);
gk20a_vm_unmap_locked(mapped_buffer, NULL); gk20a_vm_unmap_locked(mapped_buffer, NULL);
node = rb_first(&vm->mapped_buffers); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers);
} }
/* destroy remaining reserved memory areas */ /* destroy remaining reserved memory areas */
@@ -406,7 +405,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
if (err) if (err)
goto clean_up_user_allocator; goto clean_up_user_allocator;
vm->mapped_buffers = RB_ROOT; vm->mapped_buffers = NULL;
nvgpu_mutex_init(&vm->update_gmmu_lock); nvgpu_mutex_init(&vm->update_gmmu_lock);
kref_init(&vm->ref); kref_init(&vm->ref);