Files
linux-nvgpu/drivers/gpu/nvgpu/common/mm/page_allocator.c
Alex Waterman d630f1d99f gpu: nvgpu: Unify the small and large page address spaces
The basic structure of this patch is to make the small page allocator
and the large page allocator into pointers (where they used to be just
structs). Then assign each of those pointers to the same actual
allocator since the buddy allocator has supported mixed page sizes
since its inception.

For the rest of the driver some changes had to be made in order to
actually support mixed pages in a single address space.

1. Unifying the allocation page size determination

   Since the allocation and map operations happen at distinct
   times both mapping and allocation of GVA space must agree
   on page size. This is because the allocation has to separate
   allocations into separate PDEs to avoid the necessity of
   supporting mixed PDEs.

   To this end a function __get_pte_size() was introduced which
   is used both by the balloc code and the core GPU MM code. It
   determines page size based only on the length of the mapping/
   allocation.

2. Fixed address allocation + page size

   Similar to regular mappings/GVA allocations fixed address
   mapping page size determination had to be modified. In the
   past the address of the mapping determined page size since
   the address space split was by address (low addresses were
   small pages, high addresses large pages). Since that is no
   longer the case the page size field in the reserve memory
   ioctl is now honored by the mapping code. When, for instance,
   CUDA makes a memory reservation it specifies small or large
   pages. When CUDA requests mappings to be made within that
   address range the page size is then looked up in the reserved
   memory struct.

   Fixed address reservations were also modified to now always
   allocate at a PDE granularity (64M or 128M depending on
   large page size. This prevents non-fixed allocations from
   ending up in the same PDE and causing kernel panics or GMMU
   faults.

3. The rest...

   The rest of the changes are just by products of the above.
   Lots of places required minor updates to use a pointer to
   the GVA allocator struct instead of the struct itself.

Lastly, this change is not truly complete. More work remains to be
done in order to fully remove the notion that there was such a thing
as separate address spaces for different page sizes. Basically after
this patch what remains is cleanup and proper documentation.

Bug 1396644
Bug 1729947

Change-Id: If51ab396a37ba16c69e434adb47edeef083dce57
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1265300
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2017-01-31 16:23:07 -08:00

941 lines
24 KiB
C

/*
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/mm.h>
#include <nvgpu/allocator.h>
#include <nvgpu/page_allocator.h>
#include "buddy_allocator_priv.h"
#define palloc_dbg(a, fmt, arg...) \
alloc_dbg(palloc_owner(a), fmt, ##arg)
static struct kmem_cache *page_alloc_cache;
static struct kmem_cache *page_alloc_chunk_cache;
static struct kmem_cache *page_alloc_slab_page_cache;
static DEFINE_MUTEX(meta_data_cache_lock);
/*
* Handle the book-keeping for these operations.
*/
static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
struct page_alloc_slab_page *page)
{
BUG_ON(page->state != SP_NONE);
list_add(&page->list_entry, &slab->empty);
slab->nr_empty++;
page->state = SP_EMPTY;
}
static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
struct page_alloc_slab_page *page)
{
BUG_ON(page->state != SP_NONE);
list_add(&page->list_entry, &slab->partial);
slab->nr_partial++;
page->state = SP_PARTIAL;
}
static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
struct page_alloc_slab_page *page)
{
BUG_ON(page->state != SP_NONE);
list_add(&page->list_entry, &slab->full);
slab->nr_full++;
page->state = SP_FULL;
}
static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
struct page_alloc_slab_page *page)
{
list_del_init(&page->list_entry);
slab->nr_empty--;
page->state = SP_NONE;
}
static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
struct page_alloc_slab_page *page)
{
list_del_init(&page->list_entry);
slab->nr_partial--;
page->state = SP_NONE;
}
static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
struct page_alloc_slab_page *page)
{
list_del_init(&page->list_entry);
slab->nr_full--;
page->state = SP_NONE;
}
static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a)
{
struct nvgpu_page_allocator *va = a->priv;
return nvgpu_alloc_length(&va->source_allocator);
}
static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a)
{
struct nvgpu_page_allocator *va = a->priv;
return nvgpu_alloc_base(&va->source_allocator);
}
static int nvgpu_page_alloc_inited(struct nvgpu_allocator *a)
{
struct nvgpu_page_allocator *va = a->priv;
return nvgpu_alloc_initialized(&va->source_allocator);
}
static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a)
{
struct nvgpu_page_allocator *va = a->priv;
return nvgpu_alloc_end(&va->source_allocator);
}
static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a)
{
struct nvgpu_page_allocator *va = a->priv;
return nvgpu_alloc_space(&va->source_allocator);
}
static int nvgpu_page_reserve_co(struct nvgpu_allocator *a,
struct nvgpu_alloc_carveout *co)
{
struct nvgpu_page_allocator *va = a->priv;
return nvgpu_alloc_reserve_carveout(&va->source_allocator, co);
}
static void nvgpu_page_release_co(struct nvgpu_allocator *a,
struct nvgpu_alloc_carveout *co)
{
struct nvgpu_page_allocator *va = a->priv;
nvgpu_alloc_release_carveout(&va->source_allocator, co);
}
static void __nvgpu_free_pages(struct nvgpu_page_allocator *a,
struct nvgpu_page_alloc *alloc,
bool free_buddy_alloc)
{
struct page_alloc_chunk *chunk;
while (!list_empty(&alloc->alloc_chunks)) {
chunk = list_first_entry(&alloc->alloc_chunks,
struct page_alloc_chunk,
list_entry);
list_del(&chunk->list_entry);
if (free_buddy_alloc)
nvgpu_free(&a->source_allocator, chunk->base);
kfree(chunk);
}
kfree(alloc);
}
static int __insert_page_alloc(struct nvgpu_page_allocator *a,
struct nvgpu_page_alloc *alloc)
{
struct rb_node **new = &a->allocs.rb_node;
struct rb_node *parent = NULL;
while (*new) {
struct nvgpu_page_alloc *tmp =
container_of(*new, struct nvgpu_page_alloc,
tree_entry);
parent = *new;
if (alloc->base < tmp->base) {
new = &((*new)->rb_left);
} else if (alloc->base > tmp->base) {
new = &((*new)->rb_right);
} else {
WARN(1, "Duplicate entries in allocated list!\n");
return 0;
}
}
rb_link_node(&alloc->tree_entry, parent, new);
rb_insert_color(&alloc->tree_entry, &a->allocs);
return 0;
}
static struct nvgpu_page_alloc *__find_page_alloc(
struct nvgpu_page_allocator *a,
u64 addr)
{
struct rb_node *node = a->allocs.rb_node;
struct nvgpu_page_alloc *alloc;
while (node) {
alloc = container_of(node, struct nvgpu_page_alloc, tree_entry);
if (addr < alloc->base)
node = node->rb_left;
else if (addr > alloc->base)
node = node->rb_right;
else
break;
}
if (!node)
return NULL;
rb_erase(node, &a->allocs);
return alloc;
}
static struct page_alloc_slab_page *alloc_slab_page(
struct nvgpu_page_allocator *a,
struct page_alloc_slab *slab)
{
struct page_alloc_slab_page *slab_page;
slab_page = kmem_cache_alloc(page_alloc_slab_page_cache, GFP_KERNEL);
if (!slab_page) {
palloc_dbg(a, "OOM: unable to alloc slab_page struct!\n");
return ERR_PTR(-ENOMEM);
}
memset(slab_page, 0, sizeof(*slab_page));
slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size);
if (!slab_page->page_addr) {
kfree(slab_page);
palloc_dbg(a, "OOM: vidmem is full!\n");
return ERR_PTR(-ENOMEM);
}
INIT_LIST_HEAD(&slab_page->list_entry);
slab_page->slab_size = slab->slab_size;
slab_page->nr_objects = (u32)a->page_size / slab->slab_size;
slab_page->nr_objects_alloced = 0;
slab_page->owner = slab;
slab_page->state = SP_NONE;
a->pages_alloced++;
palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u\n",
slab_page->page_addr, slab_page->slab_size);
return slab_page;
}
static void free_slab_page(struct nvgpu_page_allocator *a,
struct page_alloc_slab_page *slab_page)
{
palloc_dbg(a, "Freeing slab page @ 0x%012llx\n", slab_page->page_addr);
BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
slab_page->nr_objects_alloced != 0 ||
slab_page->bitmap != 0);
nvgpu_free(&a->source_allocator, slab_page->page_addr);
a->pages_freed++;
kmem_cache_free(page_alloc_slab_page_cache, slab_page);
}
/*
* This expects @alloc to have 1 empty page_alloc_chunk already added to the
* alloc_chunks list.
*/
static int __do_slab_alloc(struct nvgpu_page_allocator *a,
struct page_alloc_slab *slab,
struct nvgpu_page_alloc *alloc)
{
struct page_alloc_slab_page *slab_page = NULL;
struct page_alloc_chunk *chunk;
unsigned long offs;
/*
* Check the partial and empty lists to see if we have some space
* readily available. Take the slab_page out of what ever list it
* was in since it may be put back into a different list later.
*/
if (!list_empty(&slab->partial)) {
slab_page = list_first_entry(&slab->partial,
struct page_alloc_slab_page,
list_entry);
del_slab_page_from_partial(slab, slab_page);
} else if (!list_empty(&slab->empty)) {
slab_page = list_first_entry(&slab->empty,
struct page_alloc_slab_page,
list_entry);
del_slab_page_from_empty(slab, slab_page);
}
if (!slab_page) {
slab_page = alloc_slab_page(a, slab);
if (IS_ERR(slab_page))
return PTR_ERR(slab_page);
}
/*
* We now have a slab_page. Do the alloc.
*/
offs = bitmap_find_next_zero_area(&slab_page->bitmap,
slab_page->nr_objects,
0, 1, 0);
if (offs >= slab_page->nr_objects) {
WARN(1, "Empty/partial slab with no free objects?");
/* Add the buggy page to the full list... This isn't ideal. */
add_slab_page_to_full(slab, slab_page);
return -ENOMEM;
}
bitmap_set(&slab_page->bitmap, offs, 1);
slab_page->nr_objects_alloced++;
if (slab_page->nr_objects_alloced < slab_page->nr_objects)
add_slab_page_to_partial(slab, slab_page);
else if (slab_page->nr_objects_alloced == slab_page->nr_objects)
add_slab_page_to_full(slab, slab_page);
else
BUG(); /* Should be impossible to hit this. */
/*
* Handle building the nvgpu_page_alloc struct. We expect one
* page_alloc_chunk to be present.
*/
alloc->slab_page = slab_page;
alloc->nr_chunks = 1;
alloc->length = slab_page->slab_size;
alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
chunk = list_first_entry(&alloc->alloc_chunks,
struct page_alloc_chunk, list_entry);
chunk->base = alloc->base;
chunk->length = alloc->length;
return 0;
}
/*
* Allocate from a slab instead of directly from the page allocator.
*/
static struct nvgpu_page_alloc *__nvgpu_alloc_slab(
struct nvgpu_page_allocator *a, u64 len)
{
int err, slab_nr;
struct page_alloc_slab *slab;
struct nvgpu_page_alloc *alloc = NULL;
struct page_alloc_chunk *chunk = NULL;
/*
* Align the length to a page and then divide by the page size (4k for
* this code). ilog2() of that then gets us the correct slab to use.
*/
slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
slab = &a->slabs[slab_nr];
alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
if (!alloc) {
palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n");
goto fail;
}
chunk = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
if (!chunk) {
palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n");
goto fail;
}
INIT_LIST_HEAD(&alloc->alloc_chunks);
list_add(&chunk->list_entry, &alloc->alloc_chunks);
err = __do_slab_alloc(a, slab, alloc);
if (err)
goto fail;
palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]\n",
len, slab_nr, alloc->base);
a->nr_slab_allocs++;
return alloc;
fail:
kfree(alloc);
kfree(chunk);
return NULL;
}
static void __nvgpu_free_slab(struct nvgpu_page_allocator *a,
struct nvgpu_page_alloc *alloc)
{
struct page_alloc_slab_page *slab_page = alloc->slab_page;
struct page_alloc_slab *slab = slab_page->owner;
enum slab_page_state new_state;
int offs;
offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size;
bitmap_clear(&slab_page->bitmap, offs, 1);
slab_page->nr_objects_alloced--;
if (slab_page->nr_objects_alloced == 0)
new_state = SP_EMPTY;
else
new_state = SP_PARTIAL;
/*
* Need to migrate the page to a different list.
*/
if (new_state != slab_page->state) {
/* Delete - can't be in empty. */
if (slab_page->state == SP_PARTIAL)
del_slab_page_from_partial(slab, slab_page);
else
del_slab_page_from_full(slab, slab_page);
/* And add. */
if (new_state == SP_EMPTY) {
if (list_empty(&slab->empty))
add_slab_page_to_empty(slab, slab_page);
else
free_slab_page(a, slab_page);
} else {
add_slab_page_to_partial(slab, slab_page);
}
}
/*
* Now handle the page_alloc.
*/
__nvgpu_free_pages(a, alloc, false);
a->nr_slab_frees++;
return;
}
/*
* Allocate physical pages. Since the underlying allocator is a buddy allocator
* the returned pages are always contiguous. However, since there could be
* fragmentation in the space this allocator will collate smaller non-contiguous
* allocations together if necessary.
*/
static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages(
struct nvgpu_page_allocator *a, u64 pages)
{
struct nvgpu_page_alloc *alloc;
struct page_alloc_chunk *c;
u64 max_chunk_len = pages << a->page_shift;
int i = 0;
alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
if (!alloc)
goto fail;
memset(alloc, 0, sizeof(*alloc));
INIT_LIST_HEAD(&alloc->alloc_chunks);
alloc->length = pages << a->page_shift;
while (pages) {
u64 chunk_addr = 0;
u64 chunk_pages = (u64)1 << __fls(pages);
u64 chunk_len = chunk_pages << a->page_shift;
/*
* Take care of the possibility that the allocation must be
* contiguous. If this is not the first iteration then that
* means the first iteration failed to alloc the entire
* requested size. The buddy allocator guarantees any given
* single alloc is contiguous.
*/
if (a->flags & GPU_ALLOC_FORCE_CONTIG && i != 0)
goto fail_cleanup;
if (chunk_len > max_chunk_len)
chunk_len = max_chunk_len;
/*
* Keep attempting to allocate in smaller chunks until the alloc
* either succeeds or is smaller than the page_size of the
* allocator (i.e the allocator is OOM).
*/
do {
chunk_addr = nvgpu_alloc(&a->source_allocator,
chunk_len);
/* Divide by 2 and try again */
if (!chunk_addr) {
palloc_dbg(a, "balloc failed: 0x%llx\n",
chunk_len);
chunk_len >>= 1;
max_chunk_len = chunk_len;
}
} while (!chunk_addr && chunk_len >= a->page_size);
chunk_pages = chunk_len >> a->page_shift;
if (!chunk_addr) {
palloc_dbg(a, "bailing @ 0x%llx\n", chunk_len);
goto fail_cleanup;
}
c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
if (!c) {
nvgpu_free(&a->source_allocator, chunk_addr);
goto fail_cleanup;
}
pages -= chunk_pages;
c->base = chunk_addr;
c->length = chunk_len;
list_add(&c->list_entry, &alloc->alloc_chunks);
i++;
}
alloc->nr_chunks = i;
c = list_first_entry(&alloc->alloc_chunks,
struct page_alloc_chunk, list_entry);
alloc->base = c->base;
return alloc;
fail_cleanup:
while (!list_empty(&alloc->alloc_chunks)) {
c = list_first_entry(&alloc->alloc_chunks,
struct page_alloc_chunk, list_entry);
list_del(&c->list_entry);
nvgpu_free(&a->source_allocator, c->base);
kfree(c);
}
kfree(alloc);
fail:
return ERR_PTR(-ENOMEM);
}
static struct nvgpu_page_alloc *__nvgpu_alloc_pages(
struct nvgpu_page_allocator *a, u64 len)
{
struct nvgpu_page_alloc *alloc = NULL;
struct page_alloc_chunk *c;
u64 pages;
int i = 0;
pages = ALIGN(len, a->page_size) >> a->page_shift;
alloc = __do_nvgpu_alloc_pages(a, pages);
if (IS_ERR(alloc)) {
palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)\n",
pages << a->page_shift, pages);
return NULL;
}
palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n",
pages << a->page_shift, pages, alloc->base);
list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
i++, c->base, c->length);
}
return alloc;
}
/*
* Allocate enough pages to satisfy @len. Page size is determined at
* initialization of the allocator.
*
* The return is actually a pointer to a struct nvgpu_page_alloc pointer. This
* is because it doesn't make a lot of sense to return the address of the first
* page in the list of pages (since they could be discontiguous). This has
* precedent in the dma_alloc APIs, though, it's really just an annoying
* artifact of the fact that the nvgpu_alloc() API requires a u64 return type.
*/
static u64 nvgpu_page_alloc(struct nvgpu_allocator *__a, u64 len)
{
struct nvgpu_page_allocator *a = page_allocator(__a);
struct nvgpu_page_alloc *alloc = NULL;
u64 real_len;
/*
* If we want contig pages we have to round up to a power of two. It's
* easier to do that here than in the buddy allocator.
*/
real_len = a->flags & GPU_ALLOC_FORCE_CONTIG ?
roundup_pow_of_two(len) : len;
alloc_lock(__a);
if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES &&
real_len <= (a->page_size / 2))
alloc = __nvgpu_alloc_slab(a, real_len);
else
alloc = __nvgpu_alloc_pages(a, real_len);
if (!alloc) {
alloc_unlock(__a);
return 0;
}
__insert_page_alloc(a, alloc);
a->nr_allocs++;
if (real_len > a->page_size / 2)
a->pages_alloced += alloc->length >> a->page_shift;
alloc_unlock(__a);
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
return alloc->base;
else
return (u64) (uintptr_t) alloc;
}
/*
* Note: this will remove the nvgpu_page_alloc struct from the RB tree
* if it's found.
*/
static void nvgpu_page_free(struct nvgpu_allocator *__a, u64 base)
{
struct nvgpu_page_allocator *a = page_allocator(__a);
struct nvgpu_page_alloc *alloc;
alloc_lock(__a);
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
alloc = __find_page_alloc(a, base);
else
alloc = __find_page_alloc(a,
((struct nvgpu_page_alloc *)(uintptr_t)base)->base);
if (!alloc) {
palloc_dbg(a, "Hrm, found no alloc?\n");
goto done;
}
a->nr_frees++;
palloc_dbg(a, "Free 0x%llx id=0x%010llx\n",
alloc->length, alloc->base);
/*
* Frees *alloc.
*/
if (alloc->slab_page) {
__nvgpu_free_slab(a, alloc);
} else {
a->pages_freed += (alloc->length >> a->page_shift);
__nvgpu_free_pages(a, alloc, true);
}
done:
alloc_unlock(__a);
}
static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed(
struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
{
struct nvgpu_page_alloc *alloc;
struct page_alloc_chunk *c;
alloc = kmem_cache_alloc(page_alloc_cache, GFP_KERNEL);
c = kmem_cache_alloc(page_alloc_chunk_cache, GFP_KERNEL);
if (!alloc || !c)
goto fail;
alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
if (!alloc->base) {
WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base);
goto fail;
}
alloc->nr_chunks = 1;
alloc->length = length;
INIT_LIST_HEAD(&alloc->alloc_chunks);
c->base = alloc->base;
c->length = length;
list_add(&c->list_entry, &alloc->alloc_chunks);
return alloc;
fail:
kfree(c);
kfree(alloc);
return ERR_PTR(-ENOMEM);
}
/*
* @page_size is ignored.
*/
static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a,
u64 base, u64 len, u32 page_size)
{
struct nvgpu_page_allocator *a = page_allocator(__a);
struct nvgpu_page_alloc *alloc = NULL;
struct page_alloc_chunk *c;
u64 aligned_len, pages;
int i = 0;
aligned_len = ALIGN(len, a->page_size);
pages = aligned_len >> a->page_shift;
alloc_lock(__a);
alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0);
if (IS_ERR(alloc)) {
alloc_unlock(__a);
return 0;
}
__insert_page_alloc(a, alloc);
alloc_unlock(__a);
palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n",
alloc->base, aligned_len, pages);
list_for_each_entry(c, &alloc->alloc_chunks, list_entry) {
palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n",
i++, c->base, c->length);
}
a->nr_fixed_allocs++;
a->pages_alloced += pages;
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER)
return alloc->base;
else
return (u64) (uintptr_t) alloc;
}
static void nvgpu_page_free_fixed(struct nvgpu_allocator *__a,
u64 base, u64 len)
{
struct nvgpu_page_allocator *a = page_allocator(__a);
struct nvgpu_page_alloc *alloc;
alloc_lock(__a);
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
alloc = __find_page_alloc(a, base);
if (!alloc)
goto done;
} else {
alloc = (struct nvgpu_page_alloc *) (uintptr_t) base;
}
palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx\n",
alloc->base, alloc->length);
a->nr_fixed_frees++;
a->pages_freed += (alloc->length >> a->page_shift);
/*
* This works for the time being since the buddy allocator
* uses the same free function for both fixed and regular
* allocs. This would have to be updated if the underlying
* allocator were to change.
*/
__nvgpu_free_pages(a, alloc, true);
done:
alloc_unlock(__a);
}
static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *__a)
{
struct nvgpu_page_allocator *a = page_allocator(__a);
alloc_lock(__a);
kfree(a);
__a->priv = NULL;
alloc_unlock(__a);
}
static void nvgpu_page_print_stats(struct nvgpu_allocator *__a,
struct seq_file *s, int lock)
{
struct nvgpu_page_allocator *a = page_allocator(__a);
int i;
if (lock)
alloc_lock(__a);
__alloc_pstat(s, __a, "Page allocator:\n");
__alloc_pstat(s, __a, " allocs %lld\n", a->nr_allocs);
__alloc_pstat(s, __a, " frees %lld\n", a->nr_frees);
__alloc_pstat(s, __a, " fixed_allocs %lld\n", a->nr_fixed_allocs);
__alloc_pstat(s, __a, " fixed_frees %lld\n", a->nr_fixed_frees);
__alloc_pstat(s, __a, " slab_allocs %lld\n", a->nr_slab_allocs);
__alloc_pstat(s, __a, " slab_frees %lld\n", a->nr_slab_frees);
__alloc_pstat(s, __a, " pages alloced %lld\n", a->pages_alloced);
__alloc_pstat(s, __a, " pages freed %lld\n", a->pages_freed);
__alloc_pstat(s, __a, "\n");
/*
* Slab info.
*/
if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
__alloc_pstat(s, __a, "Slabs:\n");
__alloc_pstat(s, __a, " size empty partial full\n");
__alloc_pstat(s, __a, " ---- ----- ------- ----\n");
for (i = 0; i < a->nr_slabs; i++) {
struct page_alloc_slab *slab = &a->slabs[i];
__alloc_pstat(s, __a, " %-9u %-9d %-9u %u\n",
slab->slab_size,
slab->nr_empty, slab->nr_partial,
slab->nr_full);
}
__alloc_pstat(s, __a, "\n");
}
__alloc_pstat(s, __a, "Source alloc: %s\n",
a->source_allocator.name);
nvgpu_alloc_print_stats(&a->source_allocator, s, lock);
if (lock)
alloc_unlock(__a);
}
static const struct nvgpu_allocator_ops page_ops = {
.alloc = nvgpu_page_alloc,
.free = nvgpu_page_free,
.alloc_fixed = nvgpu_page_alloc_fixed,
.free_fixed = nvgpu_page_free_fixed,
.reserve_carveout = nvgpu_page_reserve_co,
.release_carveout = nvgpu_page_release_co,
.base = nvgpu_page_alloc_base,
.length = nvgpu_page_alloc_length,
.end = nvgpu_page_alloc_end,
.inited = nvgpu_page_alloc_inited,
.space = nvgpu_page_alloc_space,
.fini = nvgpu_page_allocator_destroy,
.print_stats = nvgpu_page_print_stats,
};
/*
* nr_slabs is computed as follows: divide page_size by 4096 to get number of
* 4k pages in page_size. Then take the base 2 log of that to get number of
* slabs. For 64k page_size that works on like:
*
* 1024*64 / 1024*4 = 16
* ilog2(16) = 4
*
* That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
*/
static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a)
{
size_t nr_slabs = ilog2(a->page_size >> 12);
unsigned int i;
a->slabs = kcalloc(nr_slabs,
sizeof(struct page_alloc_slab),
GFP_KERNEL);
if (!a->slabs)
return -ENOMEM;
a->nr_slabs = nr_slabs;
for (i = 0; i < nr_slabs; i++) {
struct page_alloc_slab *slab = &a->slabs[i];
slab->slab_size = SZ_4K * (1 << i);
INIT_LIST_HEAD(&slab->empty);
INIT_LIST_HEAD(&slab->partial);
INIT_LIST_HEAD(&slab->full);
slab->nr_empty = 0;
slab->nr_partial = 0;
slab->nr_full = 0;
}
return 0;
}
int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
const char *name, u64 base, u64 length,
u64 blk_size, u64 flags)
{
struct nvgpu_page_allocator *a;
char buddy_name[sizeof(__a->name)];
int err;
mutex_lock(&meta_data_cache_lock);
if (!page_alloc_cache)
page_alloc_cache = KMEM_CACHE(nvgpu_page_alloc, 0);
if (!page_alloc_chunk_cache)
page_alloc_chunk_cache = KMEM_CACHE(page_alloc_chunk, 0);
if (!page_alloc_slab_page_cache)
page_alloc_slab_page_cache =
KMEM_CACHE(page_alloc_slab_page, 0);
mutex_unlock(&meta_data_cache_lock);
if (!page_alloc_cache || !page_alloc_chunk_cache)
return -ENOMEM;
if (blk_size < SZ_4K)
return -EINVAL;
a = kzalloc(sizeof(struct nvgpu_page_allocator), GFP_KERNEL);
if (!a)
return -ENOMEM;
err = __nvgpu_alloc_common_init(__a, name, a, false, &page_ops);
if (err)
goto fail;
a->base = base;
a->length = length;
a->page_size = blk_size;
a->page_shift = __ffs(blk_size);
a->allocs = RB_ROOT;
a->owner = __a;
a->flags = flags;
if (flags & GPU_ALLOC_4K_VIDMEM_PAGES && blk_size > SZ_4K) {
err = nvgpu_page_alloc_init_slabs(a);
if (err)
goto fail;
}
snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
err = nvgpu_buddy_allocator_init(g, &a->source_allocator, buddy_name,
base, length, blk_size, 0);
if (err)
goto fail;
nvgpu_init_alloc_debug(g, __a);
palloc_dbg(a, "New allocator: type page\n");
palloc_dbg(a, " base 0x%llx\n", a->base);
palloc_dbg(a, " size 0x%llx\n", a->length);
palloc_dbg(a, " page_size 0x%llx\n", a->page_size);
palloc_dbg(a, " flags 0x%llx\n", a->flags);
palloc_dbg(a, " slabs: %d\n", a->nr_slabs);
return 0;
fail:
kfree(a);
return err;
}