diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index d832b792d..038fa4c80 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -199,14 +199,21 @@ static int gk20a_as_ioctl_get_va_regions(
for (i = 0; i < write_entries; ++i) {
struct nvgpu_as_va_region region;
+ u32 base, limit;
memset(®ion, 0, sizeof(struct nvgpu_as_va_region));
+ if (!vm->vma[i].constraint.enable) {
+ base = vm->vma[i].base;
+ limit = vm->vma[i].limit;
+ } else {
+ base = vm->vma[i].constraint.base;
+ limit = vm->vma[i].constraint.limit;
+ }
+
region.page_size = vm->gmmu_page_sizes[i];
- region.offset = vm->vma[i].base;
- /* No __aeabi_uldivmod() on some platforms... */
- region.pages = (vm->vma[i].end - vm->vma[i].start) >>
- ilog2(region.page_size);
+ region.offset = (u64)base * region.page_size;
+ region.pages = limit - base; /* NOTE: limit is exclusive */
if (copy_to_user(user_region_ptr + i, ®ion, sizeof(region)))
return -EFAULT;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index eb52f28de..f6c9f9018 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -59,7 +59,6 @@
#include "hw_fb_gk20a.h"
#include "gk20a_scale.h"
#include "dbg_gpu_gk20a.h"
-#include "gk20a_allocator.h"
#include "hal.h"
#include "vgpu/vgpu.h"
@@ -1511,7 +1510,6 @@ static int gk20a_probe(struct platform_device *dev)
&gk20a->mm.disable_bigpage);
gk20a_pmu_debugfs_init(dev);
gk20a_cde_debugfs_init(dev);
- gk20a_alloc_debugfs_init(dev);
#endif
gk20a_init_gr(gk20a);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
index 56fb22df3..0037257c8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.c
@@ -1,7 +1,7 @@
/*
* gk20a allocator
*
- * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -16,1149 +16,112 @@
* along with this program. If not, see .
*/
-#include
-#include
-#include
-#include
-
-#include "platform_gk20a.h"
#include "gk20a_allocator.h"
+#include
-#include "mm_gk20a.h"
-
-static struct dentry *balloc_debugfs_root;
-
-static struct kmem_cache *buddy_cache; /* slab cache for meta data. */
-
-static u32 balloc_tracing_on;
-
-#define balloc_trace_func() \
- do { \
- if (balloc_tracing_on) \
- trace_printk("%s\n", __func__); \
- } while (0)
-
-#define balloc_trace_func_done() \
- do { \
- if (balloc_tracing_on) \
- trace_printk("%s_done\n", __func__); \
- } while (0)
-
-
-static void balloc_init_alloc_debug(struct gk20a_allocator *a);
-static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
- int lock);
-static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
- u64 addr);
-static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b);
-static void __balloc_do_free_fixed(struct gk20a_allocator *a,
- struct gk20a_fixed_alloc *falloc);
-
-/*
- * This function is not present in older kernel's list.h code.
- */
-#ifndef list_last_entry
-#define list_last_entry(ptr, type, member) \
- list_entry((ptr)->prev, type, member)
-#endif
-
-/*
- * GPU buddy allocator for various address spaces.
- *
- * Current limitations:
- * o A fixed allocation could potentially be made that borders PDEs with
- * different PTE sizes. This would require that fixed buffer to have
- * different sized PTEs for different parts of the allocation. Probably
- * best to just require PDE alignment for fixed address allocs.
- *
- * o It is currently possible to make an allocator that has a buddy alignment
- * out of sync with the PDE block size alignment. A simple example is a
- * 32GB address space starting at byte 1. Every buddy is shifted off by 1
- * which means each buddy corresponf to more than one actual GPU page. The
- * best way to fix this is probably just require PDE blocksize alignment
- * for the start of the address space. At the moment all allocators are
- * easily PDE aligned so this hasn't been a problem.
- */
-
-/*
- * Pick a suitable maximum order for this allocator.
- *
- * Hueristic: Just guessing that the best max order is the largest single
- * block that will fit in the address space.
- */
-static void balloc_compute_max_order(struct gk20a_allocator *a)
+/* init allocator struct */
+int gk20a_allocator_init(struct gk20a_allocator *allocator,
+ const char *name, u32 start, u32 len)
{
- u64 true_max_order = ilog2(a->blks);
+ memset(allocator, 0, sizeof(struct gk20a_allocator));
- if (a->max_order > true_max_order)
- a->max_order = true_max_order;
- if (a->max_order > GPU_BALLOC_MAX_ORDER)
- a->max_order = GPU_BALLOC_MAX_ORDER;
-}
+ strncpy(allocator->name, name, 32);
-/*
- * Since we can only allocate in chucks of a->blk_size we need to trim off
- * any excess data that is not aligned to a->blk_size.
- */
-static void balloc_allocator_align(struct gk20a_allocator *a)
-{
- a->start = ALIGN(a->base, a->blk_size);
- a->end = (a->base + a->length) & ~(a->blk_size - 1);
- a->count = a->end - a->start;
- a->blks = a->count >> a->blk_shift;
-}
+ allocator->base = start;
+ allocator->limit = start + len - 1;
-/*
- * Pass NULL for parent if you want a top level buddy.
- */
-static struct gk20a_buddy *balloc_new_buddy(struct gk20a_allocator *a,
- struct gk20a_buddy *parent,
- u64 start, u64 order)
-{
- struct gk20a_buddy *new_buddy;
-
- new_buddy = kmem_cache_alloc(buddy_cache, GFP_KERNEL);
- if (!new_buddy)
- return NULL;
-
- memset(new_buddy, 0, sizeof(struct gk20a_buddy));
-
- new_buddy->parent = parent;
- new_buddy->start = start;
- new_buddy->order = order;
- new_buddy->end = start + (1 << order) * a->blk_size;
-
- return new_buddy;
-}
-
-static void __balloc_buddy_list_add(struct gk20a_allocator *a,
- struct gk20a_buddy *b,
- struct list_head *list)
-{
- if (buddy_is_in_list(b)) {
- balloc_dbg(a, "Oops: adding added buddy (%llu:0x%llx)\n",
- b->order, b->start);
- BUG();
- }
-
- /*
- * Add big PTE blocks to the tail, small to the head for GVA spaces.
- * This lets the code that checks if there are available blocks check
- * without cycling through the entire list.
- */
- if (a->flags & GPU_BALLOC_GVA_SPACE &&
- b->pte_size == BALLOC_PTE_SIZE_BIG)
- list_add_tail(&b->buddy_entry, list);
- else
- list_add(&b->buddy_entry, list);
-
- buddy_set_in_list(b);
-}
-
-static void __balloc_buddy_list_rem(struct gk20a_allocator *a,
- struct gk20a_buddy *b)
-{
- if (!buddy_is_in_list(b)) {
- balloc_dbg(a, "Oops: removing removed buddy (%llu:0x%llx)\n",
- b->order, b->start);
- BUG();
- }
-
- list_del_init(&b->buddy_entry);
- buddy_clr_in_list(b);
-}
-
-/*
- * Add a buddy to one of the buddy lists and deal with the necessary
- * book keeping. Adds the buddy to the list specified by the buddy's order.
- */
-static void balloc_blist_add(struct gk20a_allocator *a, struct gk20a_buddy *b)
-{
- __balloc_buddy_list_add(a, b, balloc_get_order_list(a, b->order));
- a->buddy_list_len[b->order]++;
-}
-
-static void balloc_blist_rem(struct gk20a_allocator *a, struct gk20a_buddy *b)
-{
- __balloc_buddy_list_rem(a, b);
- a->buddy_list_len[b->order]--;
-}
-
-static u64 balloc_get_order(struct gk20a_allocator *a, u64 len)
-{
- if (len == 0)
- return 0;
-
- len--;
- len >>= a->blk_shift;
-
- return fls(len);
-}
-
-static u64 __balloc_max_order_in(struct gk20a_allocator *a, u64 start, u64 end)
-{
- u64 size = (end - start) >> a->blk_shift;
-
- if (size > 0)
- return min_t(u64, ilog2(size), a->max_order);
- else
- return GPU_BALLOC_MAX_ORDER;
-}
-
-/*
- * Initialize the buddy lists.
- */
-static int balloc_init_lists(struct gk20a_allocator *a)
-{
- int i;
- u64 bstart, bend, order;
- struct gk20a_buddy *buddy;
-
- bstart = a->start;
- bend = a->end;
-
- /* First make sure the LLs are valid. */
- for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++)
- INIT_LIST_HEAD(balloc_get_order_list(a, i));
-
- while (bstart < bend) {
- order = __balloc_max_order_in(a, bstart, bend);
-
- buddy = balloc_new_buddy(a, NULL, bstart, order);
- if (!buddy)
- goto cleanup;
-
- balloc_blist_add(a, buddy);
- bstart += balloc_order_to_len(a, order);
- }
-
- return 0;
-
-cleanup:
- for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
- if (!list_empty(balloc_get_order_list(a, i))) {
- buddy = list_first_entry(balloc_get_order_list(a, i),
- struct gk20a_buddy, buddy_entry);
- balloc_blist_rem(a, buddy);
- kmem_cache_free(buddy_cache, buddy);
- }
- }
-
- return -ENOMEM;
-}
-
-/*
- * Initialize a buddy allocator. Returns 0 on success. This allocator does
- * not necessarily manage bytes. It manages distinct ranges of resources. This
- * allows the allocator to work for things like comp_tags, semaphores, etc.
- *
- * @allocator: Ptr to an allocator struct to init.
- * @vm: GPU VM to associate this allocator with. Can be NULL. Will be used to
- * get PTE size for GVA spaces.
- * @name: Name of the allocator. Doesn't have to be static storage.
- * @base: The base address of the resource pool being managed.
- * @size: Number of resources in the pool.
- * @blk_size: Minimum number of resources to allocate at once. For things like
- * semaphores this is 1. For GVA this might be as much as 64k. This
- * corresponds to order 0. Must be power of 2.
- * @max_order: Pick a maximum order. If you leave this as 0, the buddy allocator
- * will try and pick a reasonable max order.
- * @flags: Extra flags necessary. See GPU_BALLOC_*.
- */
-int __gk20a_allocator_init(struct gk20a_allocator *a,
- struct vm_gk20a *vm, const char *name,
- u64 base, u64 size, u64 blk_size, u64 max_order,
- u64 flags)
-{
- int err;
-
- memset(a, 0, sizeof(struct gk20a_allocator));
- strncpy(a->name, name, 32);
-
- a->base = base;
- a->length = size;
- a->blk_size = blk_size;
- a->blk_shift = __ffs(blk_size);
-
- /* blk_size must be greater than 0 and a power of 2. */
- if (blk_size == 0)
- return -EINVAL;
- if (blk_size & (blk_size - 1))
- return -EINVAL;
-
- if (max_order > GPU_BALLOC_MAX_ORDER)
- return -EINVAL;
-
- /* If this is to manage a GVA space we need a VM. */
- if (flags & GPU_BALLOC_GVA_SPACE && !vm)
- return -EINVAL;
-
- a->vm = vm;
- if (flags & GPU_BALLOC_GVA_SPACE)
- a->pte_blk_order = balloc_get_order(a, vm->big_page_size << 10);
-
- a->flags = flags;
- a->max_order = max_order;
-
- balloc_allocator_align(a);
- balloc_compute_max_order(a);
-
- /* Shared buddy kmem_cache for all allocators. */
- if (!buddy_cache)
- buddy_cache = KMEM_CACHE(gk20a_buddy, 0);
- if (!buddy_cache)
+ allocator->bitmap = vzalloc(BITS_TO_LONGS(len) * sizeof(long));
+ if (!allocator->bitmap)
return -ENOMEM;
- a->alloced_buddies = RB_ROOT;
- err = balloc_init_lists(a);
- if (err)
- return err;
+ allocator_dbg(allocator, "%s : base %d, limit %d",
+ allocator->name, allocator->base);
- mutex_init(&a->lock);
+ init_rwsem(&allocator->rw_sema);
- a->init = 1;
-
- balloc_init_alloc_debug(a);
- balloc_dbg(a, "New allocator: base 0x%llx\n", a->base);
- balloc_dbg(a, " size 0x%llx\n", a->length);
- balloc_dbg(a, " blk_size 0x%llx\n", a->blk_size);
- balloc_dbg(a, " max_order %llu\n", a->max_order);
- balloc_dbg(a, " flags 0x%llx\n", a->flags);
+ allocator->alloc = gk20a_allocator_block_alloc;
+ allocator->free = gk20a_allocator_block_free;
return 0;
}
-int gk20a_allocator_init(struct gk20a_allocator *a, const char *name,
- u64 base, u64 size, u64 blk_size)
+/* destroy allocator, free all remaining blocks if any */
+void gk20a_allocator_destroy(struct gk20a_allocator *allocator)
{
- return __gk20a_allocator_init(a, NULL, name,
- base, size, blk_size, 0, 0);
+ down_write(&allocator->rw_sema);
+
+ vfree(allocator->bitmap);
+
+ memset(allocator, 0, sizeof(struct gk20a_allocator));
}
/*
- * Clean up and destroy the passed allocator.
- */
-void gk20a_allocator_destroy(struct gk20a_allocator *a)
-{
- struct rb_node *node;
- struct gk20a_buddy *bud;
- struct gk20a_fixed_alloc *falloc;
- int i;
-
- balloc_lock(a);
-
- if (!IS_ERR_OR_NULL(a->debugfs_entry))
- debugfs_remove(a->debugfs_entry);
-
- /*
- * Free the fixed allocs first.
- */
- while ((node = rb_first(&a->fixed_allocs)) != NULL) {
- falloc = container_of(node,
- struct gk20a_fixed_alloc, alloced_entry);
-
- __balloc_do_free_fixed(a, falloc);
- rb_erase(node, &a->fixed_allocs);
- }
-
- /*
- * And now free all outstanding allocations.
- */
- while ((node = rb_first(&a->alloced_buddies)) != NULL) {
- bud = container_of(node, struct gk20a_buddy, alloced_entry);
- balloc_free_buddy(a, bud->start);
- balloc_blist_add(a, bud);
- balloc_coalesce(a, bud);
- }
-
- /*
- * Now clean up the unallocated buddies.
- */
- for (i = 0; i < GPU_BALLOC_ORDER_LIST_LEN; i++) {
- BUG_ON(a->buddy_list_alloced[i] != 0);
-
- while (!list_empty(balloc_get_order_list(a, i))) {
- bud = list_first_entry(balloc_get_order_list(a, i),
- struct gk20a_buddy, buddy_entry);
- balloc_blist_rem(a, bud);
- kmem_cache_free(buddy_cache, bud);
- }
-
- if (a->buddy_list_len[i] != 0) {
- pr_info("Excess buddies!!! (%d: %llu)\n",
- i, a->buddy_list_len[i]);
- BUG();
- }
- if (a->buddy_list_split[i] != 0) {
- pr_info("Excess split nodes!!! (%d: %llu)\n",
- i, a->buddy_list_split[i]);
- BUG();
- }
- if (a->buddy_list_alloced[i] != 0) {
- pr_info("Excess alloced nodes!!! (%d: %llu)\n",
- i, a->buddy_list_alloced[i]);
- BUG();
- }
- }
-
- a->init = 0;
-
- balloc_unlock(a);
-
- /*
- * We cant unlock an allocator after memsetting it. That wipes the
- * state of the mutex. Hopefully no one uses the allocator after
- * destroying it...
- */
- memset(a, 0, sizeof(struct gk20a_allocator));
-}
-
-/*
- * Combine the passed buddy if possible. The pointer in @b may not be valid
- * after this as the buddy may be freed.
+ * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
+ * returned to caller in *addr.
*
- * @a must be locked.
- */
-static void balloc_coalesce(struct gk20a_allocator *a, struct gk20a_buddy *b)
+ * contiguous allocation, which allocates one block of
+ * contiguous address.
+*/
+int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len, u32 align)
{
- struct gk20a_buddy *parent;
+ unsigned long _addr;
- if (buddy_is_alloced(b) || buddy_is_split(b))
- return;
+ allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
- /*
- * If both our buddy and I are both not allocated and not split then
- * we can coalesce ourselves.
- */
- if (!b->buddy)
- return;
- if (buddy_is_alloced(b->buddy) || buddy_is_split(b->buddy))
- return;
+ if ((*addr != 0 && *addr < allocator->base) || /* check addr range */
+ *addr + len > allocator->limit || /* check addr range */
+ *addr & (align - 1) || /* check addr alignment */
+ len == 0) /* check len */
+ return -EINVAL;
- parent = b->parent;
-
- balloc_blist_rem(a, b);
- balloc_blist_rem(a, b->buddy);
-
- buddy_clr_split(parent);
- a->buddy_list_split[parent->order]--;
- balloc_blist_add(a, parent);
-
- /*
- * Recursively coalesce as far as we can go.
- */
- balloc_coalesce(a, parent);
-
- /* Clean up the remains. */
- kmem_cache_free(buddy_cache, b->buddy);
- kmem_cache_free(buddy_cache, b);
-}
-
-/*
- * Split a buddy into two new buddies who are 1/2 the size of the parent buddy.
- *
- * @a must be locked.
- */
-static int balloc_split_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b,
- int pte_size)
-{
- struct gk20a_buddy *left, *right;
- u64 half;
-
- left = balloc_new_buddy(a, b, b->start, b->order - 1);
- if (!left)
+ len = ALIGN(len, align);
+ if (!len)
return -ENOMEM;
- half = (b->end - b->start) / 2;
+ down_write(&allocator->rw_sema);
- right = balloc_new_buddy(a, b, b->start + half, b->order - 1);
- if (!right) {
- kmem_cache_free(buddy_cache, left);
+ _addr = bitmap_find_next_zero_area(allocator->bitmap,
+ allocator->limit - allocator->base + 1,
+ *addr ? (*addr - allocator->base) : 0,
+ len,
+ align - 1);
+ if ((_addr > allocator->limit - allocator->base + 1) ||
+ (*addr && *addr != (_addr + allocator->base))) {
+ up_write(&allocator->rw_sema);
return -ENOMEM;
}
- buddy_set_split(b);
- a->buddy_list_split[b->order]++;
+ bitmap_set(allocator->bitmap, _addr, len);
+ *addr = allocator->base + _addr;
- b->left = left;
- b->right = right;
- left->buddy = right;
- right->buddy = left;
- left->parent = b;
- right->parent = b;
+ up_write(&allocator->rw_sema);
- /* PTE considerations. */
- if (a->flags & GPU_BALLOC_GVA_SPACE &&
- left->order <= a->pte_blk_order) {
- left->pte_size = pte_size;
- right->pte_size = pte_size;
- }
-
- balloc_blist_rem(a, b);
- balloc_blist_add(a, left);
- balloc_blist_add(a, right);
+ allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
return 0;
}
-/*
- * Place the passed buddy into the RB tree for allocated buddies. Never fails
- * unless the passed entry is a duplicate which is a bug.
- *
- * @a must be locked.
- */
-void balloc_alloc_buddy(struct gk20a_allocator *a, struct gk20a_buddy *b)
+/* free all blocks between start and end */
+int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
+ u32 addr, u32 len, u32 align)
{
- struct rb_node **new = &(a->alloced_buddies.rb_node);
- struct rb_node *parent = NULL;
+ allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
- while (*new) {
- struct gk20a_buddy *bud = container_of(*new, struct gk20a_buddy,
- alloced_entry);
+ if (addr + len > allocator->limit || /* check addr range */
+ addr < allocator->base ||
+ addr & (align - 1)) /* check addr alignment */
+ return -EINVAL;
- parent = *new;
- if (b->start < bud->start)
- new = &((*new)->rb_left);
- else if (b->start > bud->start)
- new = &((*new)->rb_right);
- else
- BUG_ON("Duplicate entries in allocated list!\n");
- }
+ len = ALIGN(len, align);
+ if (!len)
+ return -EINVAL;
- rb_link_node(&b->alloced_entry, parent, new);
- rb_insert_color(&b->alloced_entry, &a->alloced_buddies);
+ down_write(&allocator->rw_sema);
+ bitmap_clear(allocator->bitmap, addr - allocator->base, len);
+ up_write(&allocator->rw_sema);
- buddy_set_alloced(b);
- a->buddy_list_alloced[b->order]++;
-}
-
-/*
- * Remove the passed buddy from the allocated buddy RB tree. Returns the
- * deallocated buddy for further processing.
- *
- * @a must be locked.
- */
-static struct gk20a_buddy *balloc_free_buddy(struct gk20a_allocator *a,
- u64 addr)
-{
- struct rb_node *node = a->alloced_buddies.rb_node;
- struct gk20a_buddy *bud;
-
- while (node) {
- bud = container_of(node, struct gk20a_buddy, alloced_entry);
-
- if (addr < bud->start)
- node = node->rb_left;
- else if (addr > bud->start)
- node = node->rb_right;
- else
- break;
- }
-
- if (!node)
- return NULL;
-
- rb_erase(node, &a->alloced_buddies);
- buddy_clr_alloced(bud);
- a->buddy_list_alloced[bud->order]--;
-
- return bud;
-}
-
-/*
- * Find a suitable buddy for the given order and PTE type (big or little).
- */
-static struct gk20a_buddy *__balloc_find_buddy(struct gk20a_allocator *a,
- u64 order, int pte_size)
-{
- struct gk20a_buddy *bud;
-
- if (list_empty(balloc_get_order_list(a, order)))
- return NULL;
-
- if (a->flags & GPU_BALLOC_GVA_SPACE &&
- pte_size == BALLOC_PTE_SIZE_BIG)
- bud = list_last_entry(balloc_get_order_list(a, order),
- struct gk20a_buddy, buddy_entry);
- else
- bud = list_first_entry(balloc_get_order_list(a, order),
- struct gk20a_buddy, buddy_entry);
-
- if (bud->pte_size != BALLOC_PTE_SIZE_ANY &&
- bud->pte_size != pte_size)
- return NULL;
-
- return bud;
-}
-
-/*
- * Allocate a suitably sized buddy. If no suitable buddy exists split higher
- * order buddies until we have a suitable buddy to allocate.
- *
- * For PDE grouping add an extra check to see if a buddy is suitable: that the
- * buddy exists in a PDE who's PTE size is reasonable
- *
- * @a must be locked.
- */
-static u64 __balloc_do_alloc(struct gk20a_allocator *a, u64 order, int pte_size)
-{
- u64 split_order;
- struct gk20a_buddy *bud;
-
- split_order = order;
- while (!(bud = __balloc_find_buddy(a, split_order, pte_size)))
- split_order++;
-
- while (bud->order != order) {
- if (balloc_split_buddy(a, bud, pte_size))
- return 0; /* No mem... */
- bud = bud->left;
- }
-
- balloc_blist_rem(a, bud);
- balloc_alloc_buddy(a, bud);
-
- return bud->start;
-}
-
-/*
- * Allocate memory from the passed allocator.
- */
-u64 gk20a_balloc(struct gk20a_allocator *a, u64 len)
-{
- u64 order, addr;
- int pte_size;
-
- balloc_trace_func();
-
- balloc_lock(a);
-
- order = balloc_get_order(a, len);
-
- if (order > a->max_order) {
- balloc_unlock(a);
- balloc_dbg(a, "Alloc fail\n");
- balloc_trace_func_done();
- return 0;
- }
-
- /*
- * For now pass the base address of the allocator's region to
- * __get_pte_size(). This ensures we get the right page size for
- * the alloc but we don't have to know what the real address is
- * going to be quite yet.
- *
- * TODO: once userspace supports a unified address space pass 0 for
- * the base. This will make only 'len' affect the PTE size.
- */
- if (a->flags & GPU_BALLOC_GVA_SPACE)
- pte_size = __get_pte_size(a->vm, a->base, len);
- else
- pte_size = BALLOC_PTE_SIZE_ANY;
-
- addr = __balloc_do_alloc(a, order, pte_size);
-
- a->bytes_alloced += len;
- a->bytes_alloced_real += balloc_order_to_len(a, order);
-
- balloc_unlock(a);
- balloc_dbg(a, "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s\n",
- addr, order, len,
- pte_size == gmmu_page_size_big ? "big" :
- pte_size == gmmu_page_size_small ? "small" :
- "NA/any");
-
- balloc_trace_func_done();
- return addr;
-}
-
-/*
- * See if the passed range is actually available for allocation. If so, then
- * return 1, otherwise return 0.
- *
- * TODO: Right now this uses the unoptimal approach of going through all
- * outstanding allocations and checking their base/ends. This could be better.
- */
-static int balloc_is_range_free(struct gk20a_allocator *a, u64 base, u64 end)
-{
- struct rb_node *node;
- struct gk20a_buddy *bud;
-
- node = rb_first(&a->alloced_buddies);
- if (!node)
- return 1; /* No allocs yet. */
-
- bud = container_of(node, struct gk20a_buddy, alloced_entry);
-
- while (bud->start < end) {
- if ((bud->start > base && bud->start < end) ||
- (bud->end > base && bud->end < end))
- return 0;
-
- node = rb_next(node);
- if (!node)
- break;
- bud = container_of(node, struct gk20a_buddy, alloced_entry);
- }
-
- return 1;
-}
-
-static void balloc_alloc_fixed(struct gk20a_allocator *a,
- struct gk20a_fixed_alloc *f)
-{
- struct rb_node **new = &(a->fixed_allocs.rb_node);
- struct rb_node *parent = NULL;
-
- while (*new) {
- struct gk20a_fixed_alloc *falloc =
- container_of(*new, struct gk20a_fixed_alloc,
- alloced_entry);
-
- parent = *new;
- if (f->start < falloc->start)
- new = &((*new)->rb_left);
- else if (f->start > falloc->start)
- new = &((*new)->rb_right);
- else
- BUG_ON("Duplicate entries in allocated list!\n");
- }
-
- rb_link_node(&f->alloced_entry, parent, new);
- rb_insert_color(&f->alloced_entry, &a->fixed_allocs);
-}
-
-/*
- * Remove the passed buddy from the allocated buddy RB tree. Returns the
- * deallocated buddy for further processing.
- *
- * @a must be locked.
- */
-static struct gk20a_fixed_alloc *balloc_free_fixed(struct gk20a_allocator *a,
- u64 addr)
-{
- struct rb_node *node = a->fixed_allocs.rb_node;
- struct gk20a_fixed_alloc *falloc;
-
- while (node) {
- falloc = container_of(node,
- struct gk20a_fixed_alloc, alloced_entry);
-
- if (addr < falloc->start)
- node = node->rb_left;
- else if (addr > falloc->start)
- node = node->rb_right;
- else
- break;
- }
-
- if (!node)
- return NULL;
-
- rb_erase(node, &a->fixed_allocs);
-
- return falloc;
-}
-
-/*
- * Find the parent range - doesn't necessarily need the parent to actually exist
- * as a buddy. Finding an existing parent comes later...
- */
-static void __balloc_get_parent_range(struct gk20a_allocator *a,
- u64 base, u64 order,
- u64 *pbase, u64 *porder)
-{
- u64 base_mask;
- u64 shifted_base = balloc_base_shift(a, base);
-
- order++;
- base_mask = ~((a->blk_size << order) - 1);
-
- shifted_base &= base_mask;
-
- *pbase = balloc_base_unshift(a, shifted_base);
- *porder = order;
-}
-
-/*
- * Makes a buddy at the passed address. This will make all parent buddies
- * necessary for this buddy to exist as well.
- */
-static struct gk20a_buddy *__balloc_make_fixed_buddy(struct gk20a_allocator *a,
- u64 base, u64 order)
-{
- struct gk20a_buddy *bud = NULL;
- struct list_head *order_list;
- u64 cur_order = order, cur_base = base;
-
- /*
- * Algo:
- * 1. Keep jumping up a buddy order until we find the real buddy that
- * this buddy exists in.
- * 2. Then work our way down through the buddy tree until we hit a dead
- * end.
- * 3. Start splitting buddies until we split to the one we need to
- * make.
- */
- while (cur_order <= a->max_order) {
- int found = 0;
-
- order_list = balloc_get_order_list(a, cur_order);
- list_for_each_entry(bud, order_list, buddy_entry) {
- if (bud->start == cur_base) {
- found = 1;
- break;
- }
- }
-
- if (found)
- break;
-
- __balloc_get_parent_range(a, cur_base, cur_order,
- &cur_base, &cur_order);
- }
-
- if (cur_order > a->max_order) {
- balloc_dbg(a, "No buddy for range ???\n");
- return NULL;
- }
-
- /* Split this buddy as necessary until we get the target buddy. */
- while (bud->start != base || bud->order != order) {
- if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) {
- balloc_coalesce(a, bud);
- return NULL;
- }
-
- if (base < bud->right->start)
- bud = bud->left;
- else
- bud = bud->right;
-
- }
-
- return bud;
-}
-
-static u64 __balloc_do_alloc_fixed(struct gk20a_allocator *a,
- struct gk20a_fixed_alloc *falloc,
- u64 base, u64 len)
-{
- u64 shifted_base, inc_base;
- u64 align_order;
-
- shifted_base = balloc_base_shift(a, base);
- if (shifted_base == 0)
- align_order = __fls(len >> a->blk_shift);
- else
- align_order = min_t(u64,
- __ffs(shifted_base >> a->blk_shift),
- __fls(len >> a->blk_shift));
-
- if (align_order > a->max_order) {
- balloc_dbg(a, "Align order too big: %llu > %llu\n",
- align_order, a->max_order);
- return 0;
- }
-
- /*
- * Generate a list of buddies that satisfy this allocation.
- */
- inc_base = shifted_base;
- while (inc_base < (shifted_base + len)) {
- u64 order_len = balloc_order_to_len(a, align_order);
- u64 remaining;
- struct gk20a_buddy *bud;
-
- bud = __balloc_make_fixed_buddy(a,
- balloc_base_unshift(a, inc_base),
- align_order);
- if (!bud) {
- balloc_dbg(a, "Fixed buddy failed: {0x%llx, %llu}!\n",
- balloc_base_unshift(a, inc_base),
- align_order);
- goto err_and_cleanup;
- }
-
- balloc_blist_rem(a, bud);
- balloc_alloc_buddy(a, bud);
- __balloc_buddy_list_add(a, bud, &falloc->buddies);
-
- /* Book keeping. */
- inc_base += order_len;
- remaining = (shifted_base + len) - inc_base;
- align_order = __ffs(inc_base >> a->blk_shift);
-
- /* If we don't have much left - trim down align_order. */
- if (balloc_order_to_len(a, align_order) > remaining)
- align_order = __balloc_max_order_in(a, inc_base,
- inc_base + remaining);
- }
-
- return base;
-
-err_and_cleanup:
- while (!list_empty(&falloc->buddies)) {
- struct gk20a_buddy *bud = list_first_entry(&falloc->buddies,
- struct gk20a_buddy,
- buddy_entry);
-
- __balloc_buddy_list_rem(a, bud);
- balloc_free_buddy(a, bud->start);
- kmem_cache_free(buddy_cache, bud);
- }
+ allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
return 0;
}
-
-/*
- * Allocate a fixed address allocation. The address of the allocation is @base
- * and the length is @len. This is not a typical buddy allocator operation and
- * as such has a high posibility of failure if the address space is heavily in
- * use.
- *
- * Please do not use this function unless _absolutely_ necessary.
- */
-u64 gk20a_balloc_fixed(struct gk20a_allocator *a, u64 base, u64 len)
-{
- struct gk20a_fixed_alloc *falloc = NULL;
- struct gk20a_buddy *bud;
- u64 ret, real_bytes = 0;
-
- balloc_trace_func();
-
- /* If base isn't aligned to an order 0 block, fail. */
- if (base & (a->blk_size - 1))
- goto fail;
-
- if (len == 0)
- goto fail;
-
- falloc = kmalloc(sizeof(*falloc), GFP_KERNEL);
- if (!falloc)
- goto fail;
-
- INIT_LIST_HEAD(&falloc->buddies);
- falloc->start = base;
- falloc->end = base + len;
-
- balloc_lock(a);
- if (!balloc_is_range_free(a, base, base + len)) {
- balloc_dbg(a, "Range not free: 0x%llx -> 0x%llx\n",
- base, base + len);
- goto fail_unlock;
- }
-
- ret = __balloc_do_alloc_fixed(a, falloc, base, len);
- if (!ret) {
- balloc_dbg(a, "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n",
- base, base + len);
- goto fail_unlock;
- }
-
- balloc_alloc_fixed(a, falloc);
-
- list_for_each_entry(bud, &falloc->buddies, buddy_entry)
- real_bytes += (bud->end - bud->start);
-
- a->bytes_alloced += len;
- a->bytes_alloced_real += real_bytes;
-
- balloc_unlock(a);
- balloc_dbg(a, "Alloc (fixed) 0x%llx\n", base);
-
- balloc_trace_func_done();
- return base;
-
-fail_unlock:
- balloc_unlock(a);
-fail:
- kfree(falloc);
- balloc_trace_func_done();
- return 0;
-}
-
-static void __balloc_do_free_fixed(struct gk20a_allocator *a,
- struct gk20a_fixed_alloc *falloc)
-{
- struct gk20a_buddy *bud;
-
- while (!list_empty(&falloc->buddies)) {
- bud = list_first_entry(&falloc->buddies,
- struct gk20a_buddy,
- buddy_entry);
- __balloc_buddy_list_rem(a, bud);
-
- balloc_free_buddy(a, bud->start);
- balloc_blist_add(a, bud);
- a->bytes_freed += balloc_order_to_len(a, bud->order);
-
- /*
- * Attemp to defrag the allocation.
- */
- balloc_coalesce(a, bud);
- }
-
- kfree(falloc);
-}
-
-/*
- * Free the passed allocation.
- */
-void gk20a_bfree(struct gk20a_allocator *a, u64 addr)
-{
- struct gk20a_buddy *bud;
- struct gk20a_fixed_alloc *falloc;
-
- balloc_trace_func();
-
- if (!addr) {
- balloc_trace_func_done();
- return;
- }
-
- balloc_lock(a);
-
- /*
- * First see if this is a fixed alloc. If not fall back to a regular
- * buddy.
- */
- falloc = balloc_free_fixed(a, addr);
- if (falloc) {
- __balloc_do_free_fixed(a, falloc);
- goto done;
- }
-
- bud = balloc_free_buddy(a, addr);
- if (!bud)
- goto done;
-
- balloc_blist_add(a, bud);
- a->bytes_freed += balloc_order_to_len(a, bud->order);
-
- /*
- * Attemp to defrag the allocation.
- */
- balloc_coalesce(a, bud);
-
-done:
- balloc_unlock(a);
- balloc_dbg(a, "Free 0x%llx\n", addr);
- balloc_trace_func_done();
- return;
-}
-
-/*
- * Print the buddy allocator top level stats. If you pass @s as NULL then the
- * stats are printed to the kernel log. This lets this code be used for
- * debugging purposes internal to the allocator.
- */
-static void balloc_print_stats(struct gk20a_allocator *a, struct seq_file *s,
- int lock)
-{
-#define __balloc_pstat(s, fmt, arg...) \
- do { \
- if (s) \
- seq_printf(s, fmt, ##arg); \
- else \
- balloc_dbg(a, fmt, ##arg); \
- } while (0)
-
- int i;
- struct rb_node *node;
- struct gk20a_fixed_alloc *falloc;
-
- __balloc_pstat(s, "base = %llu, limit = %llu, blk_size = %llu\n",
- a->base, a->length, a->blk_size);
- __balloc_pstat(s, "Internal params:\n");
- __balloc_pstat(s, " start = %llu\n", a->start);
- __balloc_pstat(s, " end = %llu\n", a->end);
- __balloc_pstat(s, " count = %llu\n", a->count);
- __balloc_pstat(s, " blks = %llu\n", a->blks);
- __balloc_pstat(s, " max_order = %llu\n", a->max_order);
-
- __balloc_pstat(s, "Buddy blocks:\n");
- __balloc_pstat(s, " Order Free Alloced Split\n");
- __balloc_pstat(s, " ----- ---- ------- -----\n");
-
- if (lock)
- balloc_lock(a);
- for (i = a->max_order; i >= 0; i--) {
- if (a->buddy_list_len[i] == 0 &&
- a->buddy_list_alloced[i] == 0 &&
- a->buddy_list_split[i] == 0)
- continue;
-
- __balloc_pstat(s, " %3d %-7llu %-9llu %llu\n", i,
- a->buddy_list_len[i],
- a->buddy_list_alloced[i],
- a->buddy_list_split[i]);
- }
-
- __balloc_pstat(s, "\n");
-
- for (node = rb_first(&a->fixed_allocs), i = 1;
- node != NULL;
- node = rb_next(node)) {
- falloc = container_of(node,
- struct gk20a_fixed_alloc, alloced_entry);
-
- __balloc_pstat(s, "Fixed alloc (%d): [0x%llx -> 0x%llx]\n",
- i, falloc->start, falloc->end);
- }
-
- __balloc_pstat(s, "\n");
- __balloc_pstat(s, "Bytes allocated: %llu\n", a->bytes_alloced);
- __balloc_pstat(s, "Bytes allocated (real): %llu\n",
- a->bytes_alloced_real);
- __balloc_pstat(s, "Bytes freed: %llu\n", a->bytes_freed);
-
- if (lock)
- balloc_unlock(a);
-
-#undef __balloc_pstats
-}
-
-static int __alloc_show(struct seq_file *s, void *unused)
-{
- struct gk20a_allocator *a = s->private;
-
- balloc_print_stats(a, s, 1);
-
- return 0;
-}
-
-static int __alloc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, __alloc_show, inode->i_private);
-}
-
-static const struct file_operations __alloc_fops = {
- .open = __alloc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static void balloc_init_alloc_debug(struct gk20a_allocator *a)
-{
- if (!balloc_debugfs_root)
- return;
-
- a->debugfs_entry = debugfs_create_file(a->name, S_IRUGO,
- balloc_debugfs_root,
- a, &__alloc_fops);
-}
-
-void gk20a_alloc_debugfs_init(struct platform_device *pdev)
-{
- struct gk20a_platform *platform = platform_get_drvdata(pdev);
- struct dentry *gpu_root = platform->debugfs;
-
- balloc_debugfs_root = debugfs_create_dir("allocators", gpu_root);
- if (IS_ERR_OR_NULL(balloc_debugfs_root))
- return;
-
- debugfs_create_u32("tracing", 0664, balloc_debugfs_root,
- &balloc_tracing_on);
-}
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index e86e053b0..69a227bde 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -17,190 +17,75 @@
#ifndef GK20A_ALLOCATOR_H
#define GK20A_ALLOCATOR_H
-#include
#include
-#include
-#include
+#include
+#include
/* #define ALLOCATOR_DEBUG */
-/*
- * Each buddy is an element in a binary tree.
- */
-struct gk20a_buddy {
- struct gk20a_buddy *parent; /* Parent node. */
- struct gk20a_buddy *buddy; /* This node's buddy. */
- struct gk20a_buddy *left; /* Lower address sub-node. */
- struct gk20a_buddy *right; /* Higher address sub-node. */
-
- struct list_head buddy_entry; /* List entry for various lists. */
- struct rb_node alloced_entry; /* RB tree of allocations. */
-
- u64 start; /* Start address of this buddy. */
- u64 end; /* End address of this buddy. */
- u64 order; /* Buddy order. */
-
-#define BALLOC_BUDDY_ALLOCED 0x1
-#define BALLOC_BUDDY_SPLIT 0x2
-#define BALLOC_BUDDY_IN_LIST 0x4
- int flags; /* List of associated flags. */
-
- /*
- * Size of the PDE this buddy is using. This allows for grouping like
- * sized allocations into the same PDE.
- */
-#define BALLOC_PTE_SIZE_ANY 0x0
-#define BALLOC_PTE_SIZE_SMALL 0x1
-#define BALLOC_PTE_SIZE_BIG 0x2
- int pte_size;
-};
-
-#define __buddy_flag_ops(flag, flag_up) \
- static inline int buddy_is_ ## flag(struct gk20a_buddy *b) \
- { \
- return b->flags & BALLOC_BUDDY_ ## flag_up; \
- } \
- static inline void buddy_set_ ## flag(struct gk20a_buddy *b) \
- { \
- b->flags |= BALLOC_BUDDY_ ## flag_up; \
- } \
- static inline void buddy_clr_ ## flag(struct gk20a_buddy *b) \
- { \
- b->flags &= ~BALLOC_BUDDY_ ## flag_up; \
- }
-
-/*
- * int buddy_is_alloced(struct gk20a_buddy *b);
- * void buddy_set_alloced(struct gk20a_buddy *b);
- * void buddy_clr_alloced(struct gk20a_buddy *b);
- *
- * int buddy_is_split(struct gk20a_buddy *b);
- * void buddy_set_split(struct gk20a_buddy *b);
- * void buddy_clr_split(struct gk20a_buddy *b);
- *
- * int buddy_is_in_list(struct gk20a_buddy *b);
- * void buddy_set_in_list(struct gk20a_buddy *b);
- * void buddy_clr_in_list(struct gk20a_buddy *b);
- */
-__buddy_flag_ops(alloced, ALLOCED);
-__buddy_flag_ops(split, SPLIT);
-__buddy_flag_ops(in_list, IN_LIST);
-
-/*
- * Keeps info for a fixed allocation.
- */
-struct gk20a_fixed_alloc {
- struct list_head buddies; /* List of buddies. */
- struct rb_node alloced_entry; /* RB tree of fixed allocations. */
-
- u64 start; /* Start of fixed block. */
- u64 end; /* End address. */
-};
-
-struct vm_gk20a;
-
-/*
- * GPU buddy allocator for the various GPU address spaces. Each addressable unit
- * doesn't have to correspond to a byte. In some cases each unit is a more
- * complex object such as a comp_tag line or the like.
- *
- * The max order is computed based on the size of the minimum order and the size
- * of the address space.
- *
- * order_size is the size of an order 0 buddy.
- */
+/* main struct */
struct gk20a_allocator {
- struct vm_gk20a *vm; /* Parent VM - can be NULL. */
+ char name[32]; /* name for allocator */
+ struct rb_root rb_root; /* rb tree root for blocks */
- char name[32]; /* Name of allocator. */
+ u32 base; /* min value of this linear space */
+ u32 limit; /* max value = limit - 1 */
- u64 base; /* Base address of the space. */
- u64 length; /* Length of the space. */
- u64 blk_size; /* Size of order 0 allocation. */
- u64 blk_shift; /* Shift to divide by blk_size. */
+ unsigned long *bitmap; /* bitmap */
- int init; /* Non-zero if initialized. */
+ struct gk20a_alloc_block *block_first; /* first block in list */
+ struct gk20a_alloc_block *block_recent; /* last visited block */
- /* Internal stuff. */
- u64 start; /* Real start (aligned to blk_size). */
- u64 end; /* Real end, trimmed if needed. */
- u64 count; /* Count of objects in space. */
- u64 blks; /* Count of blks in the space. */
- u64 max_order; /* Specific maximum order. */
+ u32 first_free_addr; /* first free addr, non-contigous
+ allocation preferred start,
+ in order to pick up small holes */
+ u32 last_free_addr; /* last free addr, contiguous
+ allocation preferred start */
+ u32 cached_hole_size; /* max free hole size up to
+ last_free_addr */
+ u32 block_count; /* number of blocks */
- struct rb_root alloced_buddies; /* Outstanding allocations. */
- struct rb_root fixed_allocs; /* Outstanding fixed allocations. */
+ struct rw_semaphore rw_sema; /* lock */
+ struct kmem_cache *block_cache; /* slab cache */
- struct mutex lock; /* Protects buddy access. */
+ /* if enabled, constrain to [base, limit) */
+ struct {
+ bool enable;
+ u32 base;
+ u32 limit;
+ } constraint;
-#define GPU_BALLOC_GVA_SPACE 0x1
- u64 flags;
+ int (*alloc)(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len, u32 align);
+ int (*free)(struct gk20a_allocator *allocator,
+ u32 addr, u32 len, u32 align);
- /*
- * Impose an upper bound on the maximum order.
- */
-#define GPU_BALLOC_MAX_ORDER 31
-#define GPU_BALLOC_ORDER_LIST_LEN (GPU_BALLOC_MAX_ORDER + 1)
-
- struct list_head buddy_list[GPU_BALLOC_ORDER_LIST_LEN];
- u64 buddy_list_len[GPU_BALLOC_ORDER_LIST_LEN];
- u64 buddy_list_split[GPU_BALLOC_ORDER_LIST_LEN];
- u64 buddy_list_alloced[GPU_BALLOC_ORDER_LIST_LEN];
-
- /*
- * This is for when the allocator is managing a GVA space (the
- * GPU_BALLOC_GVA_SPACE bit is set in @flags). This requires
- * that we group like sized allocations into PDE blocks.
- */
- u64 pte_blk_order;
-
- struct dentry *debugfs_entry;
-
- u64 bytes_alloced;
- u64 bytes_alloced_real;
- u64 bytes_freed;
};
-#define balloc_lock(a) mutex_lock(&(a)->lock)
-#define balloc_unlock(a) mutex_unlock(&(a)->lock)
-
-#define balloc_get_order_list(a, order) (&(a)->buddy_list[(order)])
-#define balloc_order_to_len(a, order) ((1 << order) * (a)->blk_size)
-#define balloc_base_shift(a, base) ((base) - (a)->start)
-#define balloc_base_unshift(a, base) ((base) + (a)->start)
-
-int gk20a_allocator_init(struct gk20a_allocator *allocator,
- const char *name, u64 base, u64 size, u64 order0);
-int __gk20a_allocator_init(struct gk20a_allocator *allocator,
- struct vm_gk20a *vm, const char *name,
- u64 base, u64 size, u64 order0,
- u64 max_order, u64 flags);
+int gk20a_allocator_init(struct gk20a_allocator *allocator,
+ const char *name, u32 base, u32 size);
void gk20a_allocator_destroy(struct gk20a_allocator *allocator);
-/*
- * Normal alloc/free operations for the buddy allocator.
- */
-u64 gk20a_balloc(struct gk20a_allocator *allocator, u64 len);
-void gk20a_bfree(struct gk20a_allocator *allocator, u64 addr);
+int gk20a_allocator_block_alloc(struct gk20a_allocator *allocator,
+ u32 *addr, u32 len, u32 align);
-/*
- * Special interface to allocate a memory regions with a specific starting
- * address. Yikes.
- */
-u64 gk20a_balloc_fixed(struct gk20a_allocator *allocator, u64 base, u64 len);
-
-/*
- * Debugfs init.
- */
-void gk20a_alloc_debugfs_init(struct platform_device *pdev);
+int gk20a_allocator_block_free(struct gk20a_allocator *allocator,
+ u32 addr, u32 len, u32 align);
#if defined(ALLOCATOR_DEBUG)
-#define balloc_dbg(alloctor, format, arg...) \
- pr_info("%-25s %25s() " format, \
- alloctor->name, __func__, ##arg)
-#else
-#define balloc_dbg(allocator, format, arg...)
-#endif
+
+#define allocator_dbg(alloctor, format, arg...) \
+do { \
+ if (1) \
+ pr_debug("gk20a_allocator (%s) %s: " format "\n",\
+ alloctor->name, __func__, ##arg);\
+} while (0)
+
+#else /* ALLOCATOR_DEBUG */
+
+#define allocator_dbg(format, arg...)
+
+#endif /* ALLOCATOR_DEBUG */
#endif /* GK20A_ALLOCATOR_H */
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
index bcadde936..c5d0f0c46 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c
@@ -89,8 +89,9 @@ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
if (err)
return err;
- __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
- 1, max_comptag_lines - 1, 1, 10, 0);
+ gk20a_allocator_init(&gr->comp_tags, "comptag",
+ 1, /* start */
+ max_comptag_lines - 1); /* length*/
gr->comptags_per_cacheline = comptags_per_cacheline;
gr->slices_per_ltc = slices_per_fbp / g->ltc_count;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index c11414b57..5d1ff5630 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -132,8 +132,10 @@ static void gk20a_mm_delete_priv(void *_priv)
if (priv->comptags.lines) {
BUG_ON(!priv->comptag_allocator);
- gk20a_bfree(priv->comptag_allocator,
- priv->comptags.real_offset);
+ priv->comptag_allocator->free(priv->comptag_allocator,
+ priv->comptags.offset,
+ priv->comptags.allocated_lines,
+ 1);
}
/* Free buffer states */
@@ -222,9 +224,10 @@ static int gk20a_alloc_comptags(struct gk20a *g,
u32 lines, bool user_mappable)
{
struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
+ u32 offset = 0;
+ int err;
u32 ctaglines_to_allocate;
- u32 ctagline_align = 1;
- u32 offset;
+ u32 ctagline_align;
const u32 aggregate_cacheline_sz =
g->gr.cacheline_size * g->gr.slices_per_ltc *
g->ltc_count;
@@ -238,6 +241,7 @@ static int gk20a_alloc_comptags(struct gk20a *g,
if (!user_mappable) {
ctaglines_to_allocate = lines;
+ ctagline_align = 1;
} else {
/* Unfortunately, we cannot use allocation alignment
* here, since compbits per cacheline is not always a
@@ -269,26 +273,72 @@ static int gk20a_alloc_comptags(struct gk20a *g,
if (ctaglines_to_allocate < lines)
return -EINVAL; /* integer overflow */
- pr_info("user-mapped CTAGS: %u\n", ctaglines_to_allocate);
}
/* store the allocator so we can use it when we free the ctags */
priv->comptag_allocator = allocator;
- offset = gk20a_balloc(allocator, ctaglines_to_allocate);
- if (!offset)
- return -ENOMEM;
+ err = allocator->alloc(allocator, &offset,
+ ctaglines_to_allocate, 1);
+ if (!err) {
+ const u32 alignment_lines =
+ DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
+ offset;
- priv->comptags.lines = lines;
- priv->comptags.real_offset = offset;
+ /* prune the preceding ctaglines that were allocated
+ for alignment */
+ if (alignment_lines) {
+ /* free alignment lines */
+ int tmp=
+ allocator->free(allocator, offset,
+ alignment_lines,
+ 1);
+ WARN_ON(tmp);
- if (user_mappable)
- offset = DIV_ROUND_UP(offset, ctagline_align) * ctagline_align;
+ offset += alignment_lines;
+ ctaglines_to_allocate -= alignment_lines;
+ }
- priv->comptags.offset = offset;
+ /* check if we can prune the trailing, too */
+ if (user_mappable)
+ {
+ u32 needed_cachelines =
+ DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
- return 0;
+ u32 first_unneeded_cacheline =
+ DIV_ROUND_UP(round_up(needed_cachelines *
+ aggregate_cacheline_sz,
+ small_pgsz),
+ aggregate_cacheline_sz);
+ u32 needed_ctaglines =
+ first_unneeded_cacheline *
+ g->gr.comptags_per_cacheline;
+
+ if (needed_ctaglines < ctaglines_to_allocate) {
+ /* free alignment lines */
+ int tmp=
+ allocator->free(
+ allocator,
+ offset + needed_ctaglines,
+ (ctaglines_to_allocate -
+ needed_ctaglines),
+ 1);
+ WARN_ON(tmp);
+
+ ctaglines_to_allocate = needed_ctaglines;
+ }
+ }
+
+ priv->comptags.offset = offset;
+ priv->comptags.lines = lines;
+ priv->comptags.allocated_lines = ctaglines_to_allocate;
+ priv->comptags.user_mappable = user_mappable;
+ }
+ return err;
}
+
+
+
static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
{
gk20a_dbg_fn("");
@@ -839,12 +889,14 @@ static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset)
}
u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
- u64 size,
- enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
+ u64 size,
+ enum gmmu_pgsz_gk20a gmmu_pgsz_idx)
{
struct gk20a_allocator *vma = &vm->vma[gmmu_pgsz_idx];
+ int err;
u64 offset;
+ u32 start_page_nr = 0, num_pages;
u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) {
@@ -860,19 +912,28 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm,
}
- /* Be certain we round up to gmmu_page_size if needed */
+ /* be certain we round up to gmmu_page_size if needed */
+ /* TBD: DIV_ROUND_UP -> undefined reference to __aeabi_uldivmod */
size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1);
+
gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size,
vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10);
- offset = gk20a_balloc(vma, size);
- if (!offset) {
+ /* The vma allocator represents page accounting. */
+ num_pages = size >> ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
+
+ err = vma->alloc(vma, &start_page_nr, num_pages, 1);
+
+ if (err) {
gk20a_err(dev_from_vm(vm),
- "%s oom: sz=0x%llx", vma->name, size);
+ "%s oom: sz=0x%llx", vma->name, size);
return 0;
}
+ offset = (u64)start_page_nr <<
+ ilog2(vm->gmmu_page_sizes[gmmu_pgsz_idx]);
gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset);
+
return offset;
}
@@ -881,12 +942,25 @@ int gk20a_vm_free_va(struct vm_gk20a *vm,
enum gmmu_pgsz_gk20a pgsz_idx)
{
struct gk20a_allocator *vma = &vm->vma[pgsz_idx];
+ u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
+ u32 page_shift = ilog2(page_size);
+ u32 start_page_nr, num_pages;
+ int err;
gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx",
vma->name, offset, size);
- gk20a_bfree(vma, offset);
- return 0;
+ start_page_nr = (u32)(offset >> page_shift);
+ num_pages = (u32)((size + page_size - 1) >> page_shift);
+
+ err = vma->free(vma, start_page_nr, num_pages, 1);
+ if (err) {
+ gk20a_err(dev_from_vm(vm),
+ "not found: offset=0x%llx, sz=0x%llx",
+ offset, size);
+ }
+
+ return err;
}
static int insert_mapped_buffer(struct rb_root *root,
@@ -1062,7 +1136,7 @@ static int validate_fixed_buffer(struct vm_gk20a *vm,
if (map_offset & (vm->gmmu_page_sizes[bfr->pgsz_idx] - 1)) {
gk20a_err(dev, "map offset must be buffer page size aligned 0x%llx",
- map_offset);
+ map_offset);
return -EINVAL;
}
@@ -2359,6 +2433,7 @@ int gk20a_init_vm(struct mm_gk20a *mm,
char *name)
{
int err, i;
+ u32 num_small_pages, num_large_pages, low_hole_pages;
char alloc_name[32];
u64 small_vma_size, large_vma_size;
u32 pde_lo, pde_hi;
@@ -2419,31 +2494,34 @@ int gk20a_init_vm(struct mm_gk20a *mm,
large_vma_size = vm->va_limit - small_vma_size;
}
+ num_small_pages = (u32)(small_vma_size >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
+
+ /* num_pages above is without regard to the low-side hole. */
+ low_hole_pages = (vm->va_start >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
+
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name,
vm->gmmu_page_sizes[gmmu_page_size_small]>>10);
- err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
- vm, alloc_name,
- vm->va_start,
- small_vma_size - vm->va_start,
- SZ_4K,
- GPU_BALLOC_MAX_ORDER,
- GPU_BALLOC_GVA_SPACE);
+ err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
+ alloc_name,
+ low_hole_pages, /*start*/
+ num_small_pages - low_hole_pages);/* length*/
if (err)
goto clean_up_ptes;
if (big_pages) {
+ u32 start = (u32)(small_vma_size >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
+ num_large_pages = (u32)(large_vma_size >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
+
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
- /*
- * Big page VMA starts at the end of the small page VMA.
- */
- err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
- vm, alloc_name,
- small_vma_size,
- large_vma_size,
- big_page_size,
- GPU_BALLOC_MAX_ORDER,
- GPU_BALLOC_GVA_SPACE);
+ err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
+ alloc_name,
+ start, /* start */
+ num_large_pages); /* length */
if (err)
goto clean_up_small_allocator;
}
@@ -2524,9 +2602,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share)
int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
struct nvgpu_as_alloc_space_args *args)
-{
- int err = -ENOMEM;
+{ int err = -ENOMEM;
int pgsz_idx = gmmu_page_size_small;
+ u32 start_page_nr;
struct gk20a_allocator *vma;
struct vm_gk20a *vm = as_share->vm;
struct gk20a *g = vm->mm->g;
@@ -2557,19 +2635,21 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
goto clean_up;
}
- vma = &vm->vma[pgsz_idx];
+ start_page_nr = 0;
if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET)
- vaddr_start = gk20a_balloc_fixed(vma, args->o_a.offset,
- (u64)args->pages *
- (u64)args->page_size);
- else
- vaddr_start = gk20a_balloc(vma, args->pages * args->page_size);
+ start_page_nr = (u32)(args->o_a.offset >>
+ ilog2(vm->gmmu_page_sizes[pgsz_idx]));
- if (!vaddr_start) {
+ vma = &vm->vma[pgsz_idx];
+ err = vma->alloc(vma, &start_page_nr, args->pages, 1);
+ if (err) {
kfree(va_node);
goto clean_up;
}
+ vaddr_start = (u64)start_page_nr <<
+ ilog2(vm->gmmu_page_sizes[pgsz_idx]);
+
va_node->vaddr_start = vaddr_start;
va_node->size = (u64)args->page_size * (u64)args->pages;
va_node->pgsz_idx = pgsz_idx;
@@ -2593,7 +2673,7 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
true);
if (!map_offset) {
mutex_unlock(&vm->update_gmmu_lock);
- gk20a_bfree(vma, vaddr_start);
+ vma->free(vma, start_page_nr, args->pages, 1);
kfree(va_node);
goto clean_up;
}
@@ -2605,7 +2685,6 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share,
mutex_unlock(&vm->update_gmmu_lock);
args->o_a.offset = vaddr_start;
- err = 0;
clean_up:
return err;
@@ -2616,6 +2695,7 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
{
int err = -ENOMEM;
int pgsz_idx;
+ u32 start_page_nr;
struct gk20a_allocator *vma;
struct vm_gk20a *vm = as_share->vm;
struct vm_reserved_va_node *va_node;
@@ -2628,8 +2708,14 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
pgsz_idx = __nv_gmmu_va_is_upper(vm, args->offset) ?
gmmu_page_size_big : gmmu_page_size_small;
+ start_page_nr = (u32)(args->offset >>
+ ilog2(vm->gmmu_page_sizes[pgsz_idx]));
+
vma = &vm->vma[pgsz_idx];
- gk20a_bfree(vma, args->offset);
+ err = vma->free(vma, start_page_nr, args->pages, 1);
+
+ if (err)
+ goto clean_up;
mutex_lock(&vm->update_gmmu_lock);
va_node = addr_to_reservation(vm, args->offset);
@@ -2659,8 +2745,8 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share,
kfree(va_node);
}
mutex_unlock(&vm->update_gmmu_lock);
- err = 0;
+clean_up:
return err;
}
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index cf246744f..57f7a3733 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -131,7 +131,6 @@ enum gmmu_pgsz_gk20a {
};
struct gk20a_comptags {
- u32 real_offset;
u32 offset;
u32 lines;
u32 allocated_lines;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index fc8d130c8..275fbd4e4 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -2438,6 +2438,7 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
struct pmu_payload payload;
u32 seq;
u32 data;
+ int err = 0;
gk20a_dbg_fn("");
@@ -2488,11 +2489,12 @@ static int pmu_init_perfmon(struct pmu_gk20a *pmu)
gk20a_writel(g, pwr_pmu_idle_ctrl_r(2), data);
if (!pmu->sample_buffer)
- pmu->sample_buffer = gk20a_balloc(&pmu->dmem,
- 2 * sizeof(u16));
- if (!pmu->sample_buffer) {
+ err = pmu->dmem.alloc(&pmu->dmem,
+ &pmu->sample_buffer, 2 * sizeof(u16),
+ PMU_DMEM_ALLOC_ALIGNMENT);
+ if (err) {
gk20a_err(dev_from_gk20a(g),
- "failed to allocate perfmon sample buffer");
+ "failed to allocate perfmon sample buffer");
return -ENOMEM;
}
@@ -2590,17 +2592,15 @@ static int pmu_process_init_msg(struct pmu_gk20a *pmu,
for (i = 0; i < PMU_QUEUE_COUNT; i++)
pmu_queue_init(pmu, i, init);
- if (!pmu->dmem.init) {
- /* Align start and end addresses */
+ if (!pmu->dmem.alloc) {
+ /*Align start and end addresses*/
u32 start = ALIGN(pv->get_pmu_init_msg_pmu_sw_mg_off(init),
- PMU_DMEM_ALLOC_ALIGNMENT);
+ PMU_DMEM_ALLOC_ALIGNMENT);
u32 end = (pv->get_pmu_init_msg_pmu_sw_mg_off(init) +
- pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
+ pv->get_pmu_init_msg_pmu_sw_mg_size(init)) &
~(PMU_DMEM_ALLOC_ALIGNMENT - 1);
u32 size = end - start;
- __gk20a_allocator_init(&pmu->dmem, NULL, "gk20a_pmu_dmem",
- start, size,
- PMU_DMEM_ALLOC_ALIGNMENT, 4, 0);
+ gk20a_allocator_init(&pmu->dmem, "gk20a_pmu_dmem", start, size);
}
pmu->pmu_ready = true;
@@ -2737,14 +2737,20 @@ static int pmu_response_handle(struct pmu_gk20a *pmu,
seq->callback = NULL;
if (pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_in_a_ptr(seq)) != 0)
- gk20a_bfree(&pmu->dmem,
+ pmu->dmem.free(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset(pmu,
- pv->get_pmu_seq_in_a_ptr(seq)));
+ pv->get_pmu_seq_in_a_ptr(seq)),
+ pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_in_a_ptr(seq)),
+ PMU_DMEM_ALLOC_ALIGNMENT);
if (pv->pmu_allocation_get_dmem_size(pmu,
pv->get_pmu_seq_out_a_ptr(seq)) != 0)
- gk20a_bfree(&pmu->dmem,
+ pmu->dmem.free(&pmu->dmem,
pv->pmu_allocation_get_dmem_offset(pmu,
- pv->get_pmu_seq_out_a_ptr(seq)));
+ pv->get_pmu_seq_out_a_ptr(seq)),
+ pv->pmu_allocation_get_dmem_size(pmu,
+ pv->get_pmu_seq_out_a_ptr(seq)),
+ PMU_DMEM_ALLOC_ALIGNMENT);
if (seq->callback)
seq->callback(g, msg, seq->cb_params, seq->desc, ret);
@@ -3381,10 +3387,11 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
pv->pmu_allocation_set_dmem_size(pmu, in,
(u16)max(payload->in.size, payload->out.size));
- *(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)) =
- gk20a_balloc(&pmu->dmem,
- pv->pmu_allocation_get_dmem_size(pmu, in));
- if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu, in)))
+ err = pmu->dmem.alloc(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset_addr(pmu, in),
+ pv->pmu_allocation_get_dmem_size(pmu, in),
+ PMU_DMEM_ALLOC_ALIGNMENT);
+ if (err)
goto clean_up;
pmu_copy_to_dmem(pmu, (pv->pmu_allocation_get_dmem_offset(pmu,
@@ -3405,12 +3412,11 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
(u16)payload->out.size);
if (payload->out.buf != payload->in.buf) {
-
- *(pv->pmu_allocation_get_dmem_offset_addr(pmu, out)) =
- gk20a_balloc(&pmu->dmem,
- pv->pmu_allocation_get_dmem_size(pmu, out));
- if (!*(pv->pmu_allocation_get_dmem_offset_addr(pmu,
- out)))
+ err = pmu->dmem.alloc(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset_addr(pmu, out),
+ pv->pmu_allocation_get_dmem_size(pmu, out),
+ PMU_DMEM_ALLOC_ALIGNMENT);
+ if (err)
goto clean_up;
} else {
BUG_ON(in == NULL);
@@ -3438,11 +3444,15 @@ int gk20a_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
clean_up:
gk20a_dbg_fn("fail");
if (in)
- gk20a_bfree(&pmu->dmem,
- pv->pmu_allocation_get_dmem_offset(pmu, in));
+ pmu->dmem.free(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset(pmu, in),
+ pv->pmu_allocation_get_dmem_size(pmu, in),
+ PMU_DMEM_ALLOC_ALIGNMENT);
if (out)
- gk20a_bfree(&pmu->dmem,
- pv->pmu_allocation_get_dmem_offset(pmu, out));
+ pmu->dmem.free(&pmu->dmem,
+ pv->pmu_allocation_get_dmem_offset(pmu, out),
+ pv->pmu_allocation_get_dmem_size(pmu, out),
+ PMU_DMEM_ALLOC_ALIGNMENT);
pmu_seq_release(pmu, seq);
return err;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
index e54805a62..6cd173e8f 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -3,7 +3,7 @@
*
* GK20A PMU (aka. gPMU outside gk20a context)
*
- * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -437,7 +437,7 @@ struct pmu_ucode_desc {
#define PMU_UNIT_ID_IS_VALID(id) \
(((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
-#define PMU_DMEM_ALLOC_ALIGNMENT (4)
+#define PMU_DMEM_ALLOC_ALIGNMENT (32)
#define PMU_DMEM_ALIGNMENT (4)
#define PMU_CMD_FLAGS_PMU_MASK (0xF0)
diff --git a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
index 053550f68..04f61c58a 100644
--- a/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/semaphore_gk20a.c
@@ -3,7 +3,7 @@
*
* GK20A Semaphores
*
- * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -44,10 +44,8 @@ struct gk20a_semaphore_pool *gk20a_semaphore_pool_alloc(struct device *d,
if (gk20a_get_sgtable(d, &p->sgt, p->cpu_va, p->iova, p->size))
goto clean_up;
- /* Sacrifice one semaphore in the name of returning error codes. */
- if (gk20a_allocator_init(&p->alloc, unique_name,
- SEMAPHORE_SIZE, p->size - SEMAPHORE_SIZE,
- SEMAPHORE_SIZE))
+ if (gk20a_allocator_init(&p->alloc, unique_name, 0,
+ p->size))
goto clean_up;
gk20a_dbg_info("cpuva=%p iova=%llx phys=%llx", p->cpu_va,
@@ -165,8 +163,8 @@ struct gk20a_semaphore *gk20a_semaphore_alloc(struct gk20a_semaphore_pool *pool)
if (!s)
return NULL;
- s->offset = gk20a_balloc(&pool->alloc, SEMAPHORE_SIZE);
- if (!s->offset) {
+ if (pool->alloc.alloc(&pool->alloc, &s->offset, SEMAPHORE_SIZE,
+ SEMAPHORE_SIZE)) {
gk20a_err(pool->dev, "failed to allocate semaphore");
kfree(s);
return NULL;
@@ -188,7 +186,8 @@ static void gk20a_semaphore_free(struct kref *ref)
struct gk20a_semaphore *s =
container_of(ref, struct gk20a_semaphore, ref);
- gk20a_bfree(&s->pool->alloc, s->offset);
+ s->pool->alloc.free(&s->pool->alloc, s->offset, SEMAPHORE_SIZE,
+ SEMAPHORE_SIZE);
gk20a_semaphore_pool_put(s->pool);
kfree(s);
}
diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
index bfb1b5abd..9090be230 100644
--- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c
@@ -90,8 +90,9 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
if (err)
return err;
- __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
- 1, max_comptag_lines - 1, 1, 10, 0);
+ gk20a_allocator_init(&gr->comp_tags, "comptag",
+ 1, /* start */
+ max_comptag_lines - 1); /* length*/
gr->comptags_per_cacheline = comptags_per_cacheline;
gr->slices_per_ltc = slices_per_ltc;
diff --git a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
index 211e34b54..1beac2160 100644
--- a/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/ltc_vgpu.c
@@ -41,8 +41,9 @@ static int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr)
if (max_comptag_lines < 2)
return -ENXIO;
- __gk20a_allocator_init(&gr->comp_tags, NULL, "comptag",
- 1, max_comptag_lines - 1, 1, 10, 0); /* length*/
+ gk20a_allocator_init(&gr->comp_tags, "comptag",
+ 1, /* start */
+ max_comptag_lines - 1); /* length*/
return 0;
}
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 855aac0d7..94e4602f2 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -243,9 +243,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm;
+ u32 num_small_pages, num_large_pages, low_hole_pages;
u64 small_vma_size, large_vma_size;
char name[32];
int err, i;
+ u32 start;
/* note: keep the page sizes sorted lowest to highest here */
u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
@@ -292,27 +294,33 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
small_vma_size = (u64)16 << 30;
large_vma_size = vm->va_limit - small_vma_size;
+ num_small_pages = (u32)(small_vma_size >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
+
+ /* num_pages above is without regard to the low-side hole. */
+ low_hole_pages = (vm->va_start >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
+
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
gmmu_page_sizes[gmmu_page_size_small]>>10);
- err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
- vm, name,
- vm->va_start,
- small_vma_size - vm->va_start,
- SZ_4K,
- GPU_BALLOC_MAX_ORDER,
- GPU_BALLOC_GVA_SPACE);
+ err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
+ name,
+ low_hole_pages, /*start*/
+ num_small_pages - low_hole_pages);/* length*/
if (err)
goto clean_up_share;
+ start = (u32)(small_vma_size >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
+ num_large_pages = (u32)(large_vma_size >>
+ ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
+
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
gmmu_page_sizes[gmmu_page_size_big]>>10);
- err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
- vm, name,
- small_vma_size,
- large_vma_size,
- big_page_size,
- GPU_BALLOC_MAX_ORDER,
- GPU_BALLOC_GVA_SPACE);
+ err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
+ name,
+ start, /* start */
+ num_large_pages); /* length */
if (err)
goto clean_up_small_allocator;