mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
MISRA rules 10.6, 10.7, and 10.8 prevent mixing of types in composite expressions. Resolve these violations by casting variables/constants to the appropriate types. Jira NVGPU-850 Jira NVGPU-853 Jira NVGPU-851 Change-Id: If6db312187211bc428cf465929082118565dacf4 Signed-off-by: Adeel Raza <araza@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1931156 GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
1094 lines
27 KiB
C
1094 lines
27 KiB
C
/*
|
|
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/bitops.h>
|
|
#include <nvgpu/allocator.h>
|
|
#include <nvgpu/page_allocator.h>
|
|
#include <nvgpu/kmem.h>
|
|
#include <nvgpu/bug.h>
|
|
#include <nvgpu/log2.h>
|
|
#include <nvgpu/sizes.h>
|
|
|
|
#include "buddy_allocator_priv.h"
|
|
|
|
#define palloc_dbg(a, fmt, arg...) \
|
|
alloc_dbg(palloc_owner(a), fmt, ##arg)
|
|
|
|
/*
|
|
* Since some Linux headers are still leaked into common code this is necessary
|
|
* for some builds.
|
|
*/
|
|
#ifdef PAGE_SIZE
|
|
#undef PAGE_SIZE
|
|
#endif
|
|
|
|
#ifdef PAGE_ALIGN
|
|
#undef PAGE_ALIGN
|
|
#endif
|
|
|
|
/*
|
|
* VIDMEM page size is 4k.
|
|
*/
|
|
#define PAGE_SIZE 0x1000
|
|
#define PAGE_ALIGN(addr) ((addr + (PAGE_SIZE - 1)) & \
|
|
((typeof(addr)) ~(PAGE_SIZE - 1)))
|
|
|
|
/*
|
|
* Handle the book-keeping for these operations.
|
|
*/
|
|
static inline void add_slab_page_to_empty(struct page_alloc_slab *slab,
|
|
struct page_alloc_slab_page *page)
|
|
{
|
|
BUG_ON(page->state != SP_NONE);
|
|
nvgpu_list_add(&page->list_entry, &slab->empty);
|
|
slab->nr_empty++;
|
|
page->state = SP_EMPTY;
|
|
}
|
|
static inline void add_slab_page_to_partial(struct page_alloc_slab *slab,
|
|
struct page_alloc_slab_page *page)
|
|
{
|
|
BUG_ON(page->state != SP_NONE);
|
|
nvgpu_list_add(&page->list_entry, &slab->partial);
|
|
slab->nr_partial++;
|
|
page->state = SP_PARTIAL;
|
|
}
|
|
static inline void add_slab_page_to_full(struct page_alloc_slab *slab,
|
|
struct page_alloc_slab_page *page)
|
|
{
|
|
BUG_ON(page->state != SP_NONE);
|
|
nvgpu_list_add(&page->list_entry, &slab->full);
|
|
slab->nr_full++;
|
|
page->state = SP_FULL;
|
|
}
|
|
|
|
static inline void del_slab_page_from_empty(struct page_alloc_slab *slab,
|
|
struct page_alloc_slab_page *page)
|
|
{
|
|
nvgpu_list_del(&page->list_entry);
|
|
slab->nr_empty--;
|
|
page->state = SP_NONE;
|
|
}
|
|
static inline void del_slab_page_from_partial(struct page_alloc_slab *slab,
|
|
struct page_alloc_slab_page *page)
|
|
{
|
|
nvgpu_list_del(&page->list_entry);
|
|
slab->nr_partial--;
|
|
page->state = SP_NONE;
|
|
}
|
|
static inline void del_slab_page_from_full(struct page_alloc_slab *slab,
|
|
struct page_alloc_slab_page *page)
|
|
{
|
|
nvgpu_list_del(&page->list_entry);
|
|
slab->nr_full--;
|
|
page->state = SP_NONE;
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_length(struct nvgpu_allocator *a)
|
|
{
|
|
struct nvgpu_page_allocator *va = a->priv;
|
|
|
|
return nvgpu_alloc_length(&va->source_allocator);
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_base(struct nvgpu_allocator *a)
|
|
{
|
|
struct nvgpu_page_allocator *va = a->priv;
|
|
|
|
return nvgpu_alloc_base(&va->source_allocator);
|
|
}
|
|
|
|
static bool nvgpu_page_alloc_inited(struct nvgpu_allocator *a)
|
|
{
|
|
struct nvgpu_page_allocator *va = a->priv;
|
|
|
|
return nvgpu_alloc_initialized(&va->source_allocator);
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_end(struct nvgpu_allocator *a)
|
|
{
|
|
struct nvgpu_page_allocator *va = a->priv;
|
|
|
|
return nvgpu_alloc_end(&va->source_allocator);
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_space(struct nvgpu_allocator *a)
|
|
{
|
|
struct nvgpu_page_allocator *va = a->priv;
|
|
|
|
return nvgpu_alloc_space(&va->source_allocator);
|
|
}
|
|
|
|
static int nvgpu_page_reserve_co(struct nvgpu_allocator *a,
|
|
struct nvgpu_alloc_carveout *co)
|
|
{
|
|
struct nvgpu_page_allocator *va = a->priv;
|
|
|
|
return nvgpu_alloc_reserve_carveout(&va->source_allocator, co);
|
|
}
|
|
|
|
static void nvgpu_page_release_co(struct nvgpu_allocator *a,
|
|
struct nvgpu_alloc_carveout *co)
|
|
{
|
|
struct nvgpu_page_allocator *va = a->priv;
|
|
|
|
nvgpu_alloc_release_carveout(&va->source_allocator, co);
|
|
}
|
|
|
|
static struct nvgpu_sgl *nvgpu_page_alloc_sgl_next(struct nvgpu_sgl *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return (struct nvgpu_sgl *)sgl_impl->next;
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return sgl_impl->phys;
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_sgl_ipa_to_pa(struct gk20a *g,
|
|
struct nvgpu_sgl *sgl, u64 ipa, u64 *pa_len)
|
|
{
|
|
return ipa;
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_sgl_dma(struct nvgpu_sgl *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return sgl_impl->dma;
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_sgl_length(struct nvgpu_sgl *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return sgl_impl->length;
|
|
}
|
|
|
|
static u64 nvgpu_page_alloc_sgl_gpu_addr(struct gk20a *g,
|
|
struct nvgpu_sgl *sgl,
|
|
struct nvgpu_gmmu_attrs *attrs)
|
|
{
|
|
struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return sgl_impl->phys;
|
|
}
|
|
|
|
static void nvgpu_page_alloc_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
|
|
{
|
|
/*
|
|
* No-op here. The free is handled by the page_alloc free() functions.
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* These implement the generic scatter gather ops for pages allocated
|
|
* by the page allocator. however, the primary aim for this, is of course,
|
|
* vidmem.
|
|
*/
|
|
static const struct nvgpu_sgt_ops page_alloc_sgl_ops = {
|
|
.sgl_next = nvgpu_page_alloc_sgl_next,
|
|
.sgl_phys = nvgpu_page_alloc_sgl_phys,
|
|
.sgl_ipa = nvgpu_page_alloc_sgl_phys,
|
|
.sgl_ipa_to_pa = nvgpu_page_alloc_sgl_ipa_to_pa,
|
|
.sgl_dma = nvgpu_page_alloc_sgl_dma,
|
|
.sgl_length = nvgpu_page_alloc_sgl_length,
|
|
.sgl_gpu_addr = nvgpu_page_alloc_sgl_gpu_addr,
|
|
.sgt_free = nvgpu_page_alloc_sgt_free,
|
|
};
|
|
|
|
/*
|
|
* This actually frees the sgl memory. Used by the page_alloc free() functions.
|
|
*/
|
|
static void nvgpu_page_alloc_sgl_proper_free(struct gk20a *g,
|
|
struct nvgpu_mem_sgl *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *next;
|
|
|
|
while (sgl) {
|
|
next = sgl->next;
|
|
nvgpu_kfree(g, sgl);
|
|
sgl = next;
|
|
}
|
|
}
|
|
|
|
static void nvgpu_page_alloc_free_pages(struct nvgpu_page_allocator *a,
|
|
struct nvgpu_page_alloc *alloc,
|
|
bool free_buddy_alloc)
|
|
{
|
|
struct nvgpu_sgl *sgl = alloc->sgt.sgl;
|
|
struct gk20a *g = a->owner->g;
|
|
|
|
if (free_buddy_alloc) {
|
|
while (sgl) {
|
|
nvgpu_free(&a->source_allocator,
|
|
nvgpu_sgt_get_phys(g, &alloc->sgt, sgl));
|
|
sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
|
|
}
|
|
}
|
|
|
|
nvgpu_page_alloc_sgl_proper_free(a->owner->g,
|
|
(struct nvgpu_mem_sgl *)sgl);
|
|
nvgpu_kmem_cache_free(a->alloc_cache, alloc);
|
|
}
|
|
|
|
static void insert_page_alloc(struct nvgpu_page_allocator *a,
|
|
struct nvgpu_page_alloc *alloc)
|
|
{
|
|
alloc->tree_entry.key_start = alloc->base;
|
|
alloc->tree_entry.key_end = alloc->base + alloc->length;
|
|
|
|
nvgpu_rbtree_insert(&alloc->tree_entry, &a->allocs);
|
|
}
|
|
|
|
static struct nvgpu_page_alloc *find_page_alloc(
|
|
struct nvgpu_page_allocator *a,
|
|
u64 addr)
|
|
{
|
|
struct nvgpu_page_alloc *alloc;
|
|
struct nvgpu_rbtree_node *node = NULL;
|
|
|
|
nvgpu_rbtree_search(addr, &node, a->allocs);
|
|
if (node == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
alloc = nvgpu_page_alloc_from_rbtree_node(node);
|
|
|
|
nvgpu_rbtree_unlink(node, &a->allocs);
|
|
|
|
return alloc;
|
|
}
|
|
|
|
static struct page_alloc_slab_page *alloc_slab_page(
|
|
struct nvgpu_page_allocator *a,
|
|
struct page_alloc_slab *slab)
|
|
{
|
|
struct page_alloc_slab_page *slab_page;
|
|
|
|
slab_page = nvgpu_kmem_cache_alloc(a->slab_page_cache);
|
|
if (slab_page == NULL) {
|
|
palloc_dbg(a, "OOM: unable to alloc slab_page struct!");
|
|
return NULL;
|
|
}
|
|
|
|
memset(slab_page, 0, sizeof(*slab_page));
|
|
|
|
slab_page->page_addr = nvgpu_alloc(&a->source_allocator, a->page_size);
|
|
if (slab_page->page_addr == 0ULL) {
|
|
nvgpu_kmem_cache_free(a->slab_page_cache, slab_page);
|
|
palloc_dbg(a, "OOM: vidmem is full!");
|
|
return NULL;
|
|
}
|
|
|
|
nvgpu_init_list_node(&slab_page->list_entry);
|
|
slab_page->slab_size = slab->slab_size;
|
|
slab_page->nr_objects = (u32)a->page_size / slab->slab_size;
|
|
slab_page->nr_objects_alloced = 0;
|
|
slab_page->owner = slab;
|
|
slab_page->state = SP_NONE;
|
|
|
|
a->pages_alloced++;
|
|
|
|
palloc_dbg(a, "Allocated new slab page @ 0x%012llx size=%u",
|
|
slab_page->page_addr, slab_page->slab_size);
|
|
|
|
return slab_page;
|
|
}
|
|
|
|
static void free_slab_page(struct nvgpu_page_allocator *a,
|
|
struct page_alloc_slab_page *slab_page)
|
|
{
|
|
palloc_dbg(a, "Freeing slab page @ 0x%012llx", slab_page->page_addr);
|
|
|
|
BUG_ON((slab_page->state != SP_NONE && slab_page->state != SP_EMPTY) ||
|
|
slab_page->nr_objects_alloced != 0U ||
|
|
slab_page->bitmap != 0U);
|
|
|
|
nvgpu_free(&a->source_allocator, slab_page->page_addr);
|
|
a->pages_freed++;
|
|
|
|
nvgpu_kmem_cache_free(a->slab_page_cache, slab_page);
|
|
}
|
|
|
|
/*
|
|
* This expects @alloc to have 1 empty sgl_entry ready for usage.
|
|
*/
|
|
static int do_slab_alloc(struct nvgpu_page_allocator *a,
|
|
struct page_alloc_slab *slab,
|
|
struct nvgpu_page_alloc *alloc)
|
|
{
|
|
struct page_alloc_slab_page *slab_page = NULL;
|
|
struct nvgpu_mem_sgl *sgl;
|
|
unsigned long offs;
|
|
|
|
/*
|
|
* Check the partial and empty lists to see if we have some space
|
|
* readily available. Take the slab_page out of what ever list it
|
|
* was in since it may be put back into a different list later.
|
|
*/
|
|
if (!nvgpu_list_empty(&slab->partial)) {
|
|
slab_page = nvgpu_list_first_entry(&slab->partial,
|
|
page_alloc_slab_page,
|
|
list_entry);
|
|
del_slab_page_from_partial(slab, slab_page);
|
|
} else if (!nvgpu_list_empty(&slab->empty)) {
|
|
slab_page = nvgpu_list_first_entry(&slab->empty,
|
|
page_alloc_slab_page,
|
|
list_entry);
|
|
del_slab_page_from_empty(slab, slab_page);
|
|
}
|
|
|
|
if (slab_page == NULL) {
|
|
slab_page = alloc_slab_page(a, slab);
|
|
if (slab_page == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We now have a slab_page. Do the alloc.
|
|
*/
|
|
offs = bitmap_find_next_zero_area(&slab_page->bitmap,
|
|
slab_page->nr_objects,
|
|
0, 1, 0);
|
|
if (offs >= slab_page->nr_objects) {
|
|
(void) WARN(1, "Empty/partial slab with no free objects?");
|
|
|
|
/* Add the buggy page to the full list... This isn't ideal. */
|
|
add_slab_page_to_full(slab, slab_page);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
bitmap_set(&slab_page->bitmap, offs, 1);
|
|
slab_page->nr_objects_alloced++;
|
|
|
|
if (slab_page->nr_objects_alloced < slab_page->nr_objects) {
|
|
add_slab_page_to_partial(slab, slab_page);
|
|
} else if (slab_page->nr_objects_alloced == slab_page->nr_objects) {
|
|
add_slab_page_to_full(slab, slab_page);
|
|
} else {
|
|
BUG(); /* Should be impossible to hit this. */
|
|
}
|
|
|
|
/*
|
|
* Handle building the nvgpu_page_alloc struct. We expect one sgl
|
|
* to be present.
|
|
*/
|
|
alloc->slab_page = slab_page;
|
|
alloc->nr_chunks = 1;
|
|
alloc->length = slab_page->slab_size;
|
|
alloc->base = slab_page->page_addr + (offs * slab_page->slab_size);
|
|
|
|
sgl = (struct nvgpu_mem_sgl *)alloc->sgt.sgl;
|
|
sgl->phys = alloc->base;
|
|
sgl->dma = alloc->base;
|
|
sgl->length = alloc->length;
|
|
sgl->next = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Allocate from a slab instead of directly from the page allocator.
|
|
*/
|
|
static struct nvgpu_page_alloc *nvgpu_alloc_slab(
|
|
struct nvgpu_page_allocator *a, u64 len)
|
|
{
|
|
int err, slab_nr;
|
|
struct page_alloc_slab *slab;
|
|
struct nvgpu_page_alloc *alloc = NULL;
|
|
struct nvgpu_mem_sgl *sgl = NULL;
|
|
|
|
/*
|
|
* Align the length to a page and then divide by the page size (4k for
|
|
* this code). ilog2() of that then gets us the correct slab to use.
|
|
*/
|
|
slab_nr = (int)ilog2(PAGE_ALIGN(len) >> 12);
|
|
slab = &a->slabs[slab_nr];
|
|
|
|
alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
|
|
if (alloc == NULL) {
|
|
palloc_dbg(a, "OOM: could not alloc page_alloc struct!");
|
|
goto fail;
|
|
}
|
|
|
|
alloc->sgt.ops = &page_alloc_sgl_ops;
|
|
|
|
sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
|
|
if (sgl == NULL) {
|
|
palloc_dbg(a, "OOM: could not alloc sgl struct!");
|
|
goto fail;
|
|
}
|
|
|
|
alloc->sgt.sgl = (struct nvgpu_sgl *)sgl;
|
|
err = do_slab_alloc(a, slab, alloc);
|
|
if (err != 0) {
|
|
goto fail;
|
|
}
|
|
|
|
palloc_dbg(a, "Alloc 0x%04llx sr=%d id=0x%010llx [slab]",
|
|
len, slab_nr, alloc->base);
|
|
a->nr_slab_allocs++;
|
|
|
|
return alloc;
|
|
|
|
fail:
|
|
if (alloc) {
|
|
nvgpu_kmem_cache_free(a->alloc_cache, alloc);
|
|
}
|
|
if (sgl) {
|
|
nvgpu_kfree(a->owner->g, sgl);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static void nvgpu_free_slab(struct nvgpu_page_allocator *a,
|
|
struct nvgpu_page_alloc *alloc)
|
|
{
|
|
struct page_alloc_slab_page *slab_page = alloc->slab_page;
|
|
struct page_alloc_slab *slab = slab_page->owner;
|
|
enum slab_page_state new_state;
|
|
int offs;
|
|
|
|
offs = (u32)(alloc->base - slab_page->page_addr) / slab_page->slab_size;
|
|
bitmap_clear(&slab_page->bitmap, offs, 1);
|
|
|
|
slab_page->nr_objects_alloced--;
|
|
|
|
if (slab_page->nr_objects_alloced == 0U) {
|
|
new_state = SP_EMPTY;
|
|
} else {
|
|
new_state = SP_PARTIAL;
|
|
}
|
|
|
|
/*
|
|
* Need to migrate the page to a different list.
|
|
*/
|
|
if (new_state != slab_page->state) {
|
|
/* Delete - can't be in empty. */
|
|
if (slab_page->state == SP_PARTIAL) {
|
|
del_slab_page_from_partial(slab, slab_page);
|
|
} else {
|
|
del_slab_page_from_full(slab, slab_page);
|
|
}
|
|
|
|
/* And add. */
|
|
if (new_state == SP_EMPTY) {
|
|
if (nvgpu_list_empty(&slab->empty)) {
|
|
add_slab_page_to_empty(slab, slab_page);
|
|
} else {
|
|
free_slab_page(a, slab_page);
|
|
}
|
|
} else {
|
|
add_slab_page_to_partial(slab, slab_page);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now handle the page_alloc.
|
|
*/
|
|
nvgpu_page_alloc_free_pages(a, alloc, false);
|
|
a->nr_slab_frees++;
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Allocate physical pages. Since the underlying allocator is a buddy allocator
|
|
* the returned pages are always contiguous. However, since there could be
|
|
* fragmentation in the space this allocator will collate smaller non-contiguous
|
|
* allocations together if necessary.
|
|
*/
|
|
static struct nvgpu_page_alloc *do_nvgpu_alloc_pages(
|
|
struct nvgpu_page_allocator *a, u64 pages)
|
|
{
|
|
struct nvgpu_page_alloc *alloc;
|
|
struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL;
|
|
u64 max_chunk_len = pages << a->page_shift;
|
|
int i = 0;
|
|
|
|
alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
|
|
if (alloc == NULL) {
|
|
goto fail;
|
|
}
|
|
|
|
memset(alloc, 0, sizeof(*alloc));
|
|
|
|
alloc->length = pages << a->page_shift;
|
|
alloc->sgt.ops = &page_alloc_sgl_ops;
|
|
|
|
while (pages) {
|
|
u64 chunk_addr = 0;
|
|
u64 chunk_pages = (u64)1 << __fls(pages);
|
|
u64 chunk_len = chunk_pages << a->page_shift;
|
|
|
|
/*
|
|
* Take care of the possibility that the allocation must be
|
|
* contiguous. If this is not the first iteration then that
|
|
* means the first iteration failed to alloc the entire
|
|
* requested size. The buddy allocator guarantees any given
|
|
* single alloc is contiguous.
|
|
*/
|
|
if ((a->flags & GPU_ALLOC_FORCE_CONTIG) != 0ULL && i != 0) {
|
|
goto fail_cleanup;
|
|
}
|
|
|
|
if (chunk_len > max_chunk_len) {
|
|
chunk_len = max_chunk_len;
|
|
}
|
|
|
|
/*
|
|
* Keep attempting to allocate in smaller chunks until the alloc
|
|
* either succeeds or is smaller than the page_size of the
|
|
* allocator (i.e the allocator is OOM).
|
|
*/
|
|
do {
|
|
chunk_addr = nvgpu_alloc(&a->source_allocator,
|
|
chunk_len);
|
|
|
|
/* Divide by 2 and try again */
|
|
if (chunk_addr == 0ULL) {
|
|
palloc_dbg(a, "balloc failed: 0x%llx",
|
|
chunk_len);
|
|
chunk_len >>= 1;
|
|
max_chunk_len = chunk_len;
|
|
}
|
|
} while (chunk_addr == 0ULL && chunk_len >= a->page_size);
|
|
|
|
chunk_pages = chunk_len >> a->page_shift;
|
|
|
|
if (chunk_addr == 0ULL) {
|
|
palloc_dbg(a, "bailing @ 0x%llx", chunk_len);
|
|
goto fail_cleanup;
|
|
}
|
|
|
|
sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
|
|
if (sgl == NULL) {
|
|
nvgpu_free(&a->source_allocator, chunk_addr);
|
|
goto fail_cleanup;
|
|
}
|
|
|
|
pages -= chunk_pages;
|
|
|
|
sgl->phys = chunk_addr;
|
|
sgl->dma = chunk_addr;
|
|
sgl->length = chunk_len;
|
|
|
|
/*
|
|
* Build the singly linked list with a head node that is part of
|
|
* the list.
|
|
*/
|
|
if (prev_sgl) {
|
|
prev_sgl->next = sgl;
|
|
} else {
|
|
alloc->sgt.sgl = (struct nvgpu_sgl *)sgl;
|
|
}
|
|
|
|
prev_sgl = sgl;
|
|
|
|
i++;
|
|
}
|
|
|
|
alloc->nr_chunks = i;
|
|
alloc->base = ((struct nvgpu_mem_sgl *)alloc->sgt.sgl)->phys;
|
|
|
|
return alloc;
|
|
|
|
fail_cleanup:
|
|
sgl = (struct nvgpu_mem_sgl *)alloc->sgt.sgl;
|
|
while (sgl) {
|
|
struct nvgpu_mem_sgl *next = sgl->next;
|
|
|
|
nvgpu_free(&a->source_allocator, sgl->phys);
|
|
nvgpu_kfree(a->owner->g, sgl);
|
|
|
|
sgl = next;
|
|
}
|
|
|
|
nvgpu_kmem_cache_free(a->alloc_cache, alloc);
|
|
fail:
|
|
return NULL;
|
|
}
|
|
|
|
static struct nvgpu_page_alloc *nvgpu_alloc_pages(
|
|
struct nvgpu_page_allocator *a, u64 len)
|
|
{
|
|
struct gk20a *g = a->owner->g;
|
|
struct nvgpu_page_alloc *alloc = NULL;
|
|
struct nvgpu_sgl *sgl;
|
|
u64 pages;
|
|
int i = 0;
|
|
|
|
pages = ALIGN(len, a->page_size) >> a->page_shift;
|
|
|
|
alloc = do_nvgpu_alloc_pages(a, pages);
|
|
if (alloc == NULL) {
|
|
palloc_dbg(a, "Alloc 0x%llx (%llu) (failed)",
|
|
pages << a->page_shift, pages);
|
|
return NULL;
|
|
}
|
|
|
|
palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx",
|
|
pages << a->page_shift, pages, alloc->base);
|
|
sgl = alloc->sgt.sgl;
|
|
while (sgl) {
|
|
palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx",
|
|
i++,
|
|
nvgpu_sgt_get_phys(g, &alloc->sgt, sgl),
|
|
nvgpu_sgt_get_length(&alloc->sgt, sgl));
|
|
sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
|
|
}
|
|
palloc_dbg(a, "Alloc done");
|
|
|
|
return alloc;
|
|
}
|
|
|
|
/*
|
|
* Allocate enough pages to satisfy @len. Page size is determined at
|
|
* initialization of the allocator.
|
|
*
|
|
* The return is actually a pointer to a struct nvgpu_page_alloc pointer. This
|
|
* is because it doesn't make a lot of sense to return the address of the first
|
|
* page in the list of pages (since they could be discontiguous). This has
|
|
* precedent in the dma_alloc APIs, though, it's really just an annoying
|
|
* artifact of the fact that the nvgpu_alloc() API requires a u64 return type.
|
|
*/
|
|
static u64 nvgpu_page_alloc(struct nvgpu_allocator *na, u64 len)
|
|
{
|
|
struct nvgpu_page_allocator *a = page_allocator(na);
|
|
struct nvgpu_page_alloc *alloc = NULL;
|
|
u64 real_len;
|
|
|
|
/*
|
|
* If we want contig pages we have to round up to a power of two. It's
|
|
* easier to do that here than in the buddy allocator.
|
|
*/
|
|
real_len = ((a->flags & GPU_ALLOC_FORCE_CONTIG) != 0ULL) ?
|
|
roundup_pow_of_two(len) : len;
|
|
|
|
alloc_lock(na);
|
|
if ((a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) != 0ULL &&
|
|
real_len <= (a->page_size / 2U)) {
|
|
alloc = nvgpu_alloc_slab(a, real_len);
|
|
} else {
|
|
alloc = nvgpu_alloc_pages(a, real_len);
|
|
}
|
|
|
|
if (alloc == NULL) {
|
|
alloc_unlock(na);
|
|
return 0;
|
|
}
|
|
|
|
insert_page_alloc(a, alloc);
|
|
|
|
a->nr_allocs++;
|
|
if (real_len > a->page_size / 2U) {
|
|
a->pages_alloced += alloc->length >> a->page_shift;
|
|
}
|
|
alloc_unlock(na);
|
|
|
|
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
|
|
return alloc->base;
|
|
} else {
|
|
return (u64) (uintptr_t) alloc;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Note: this will remove the nvgpu_page_alloc struct from the RB tree
|
|
* if it's found.
|
|
*/
|
|
static void nvgpu_page_free(struct nvgpu_allocator *na, u64 base)
|
|
{
|
|
struct nvgpu_page_allocator *a = page_allocator(na);
|
|
struct nvgpu_page_alloc *alloc;
|
|
|
|
alloc_lock(na);
|
|
|
|
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
|
|
alloc = find_page_alloc(a, base);
|
|
} else {
|
|
alloc = find_page_alloc(a,
|
|
((struct nvgpu_page_alloc *)(uintptr_t)base)->base);
|
|
}
|
|
|
|
if (alloc == NULL) {
|
|
palloc_dbg(a, "Hrm, found no alloc?");
|
|
goto done;
|
|
}
|
|
|
|
a->nr_frees++;
|
|
|
|
palloc_dbg(a, "Free 0x%llx id=0x%010llx",
|
|
alloc->length, alloc->base);
|
|
|
|
/*
|
|
* Frees *alloc.
|
|
*/
|
|
if (alloc->slab_page) {
|
|
nvgpu_free_slab(a, alloc);
|
|
} else {
|
|
a->pages_freed += (alloc->length >> a->page_shift);
|
|
nvgpu_page_alloc_free_pages(a, alloc, true);
|
|
}
|
|
|
|
done:
|
|
alloc_unlock(na);
|
|
}
|
|
|
|
static struct nvgpu_page_alloc *nvgpu_alloc_pages_fixed(
|
|
struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused)
|
|
{
|
|
struct nvgpu_page_alloc *alloc;
|
|
struct nvgpu_mem_sgl *sgl;
|
|
|
|
alloc = nvgpu_kmem_cache_alloc(a->alloc_cache);
|
|
sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl));
|
|
if (alloc == NULL || sgl == NULL) {
|
|
goto fail;
|
|
}
|
|
|
|
alloc->sgt.ops = &page_alloc_sgl_ops;
|
|
alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0);
|
|
if (alloc->base == 0ULL) {
|
|
(void) WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx",
|
|
base);
|
|
goto fail;
|
|
}
|
|
|
|
alloc->nr_chunks = 1;
|
|
alloc->length = length;
|
|
alloc->sgt.sgl = (struct nvgpu_sgl *)sgl;
|
|
|
|
sgl->phys = alloc->base;
|
|
sgl->dma = alloc->base;
|
|
sgl->length = length;
|
|
sgl->next = NULL;
|
|
|
|
return alloc;
|
|
|
|
fail:
|
|
if (sgl) {
|
|
nvgpu_kfree(a->owner->g, sgl);
|
|
}
|
|
if (alloc) {
|
|
nvgpu_kmem_cache_free(a->alloc_cache, alloc);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* @page_size is ignored.
|
|
*/
|
|
static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *na,
|
|
u64 base, u64 len, u32 page_size)
|
|
{
|
|
struct nvgpu_page_allocator *a = page_allocator(na);
|
|
struct nvgpu_page_alloc *alloc = NULL;
|
|
struct nvgpu_sgl *sgl;
|
|
struct gk20a *g = a->owner->g;
|
|
u64 aligned_len, pages;
|
|
int i = 0;
|
|
|
|
aligned_len = ALIGN(len, a->page_size);
|
|
pages = aligned_len >> a->page_shift;
|
|
|
|
alloc_lock(na);
|
|
|
|
alloc = nvgpu_alloc_pages_fixed(a, base, aligned_len, 0);
|
|
if (alloc == NULL) {
|
|
alloc_unlock(na);
|
|
return 0;
|
|
}
|
|
|
|
insert_page_alloc(a, alloc);
|
|
alloc_unlock(na);
|
|
|
|
palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)",
|
|
alloc->base, aligned_len, pages);
|
|
sgl = alloc->sgt.sgl;
|
|
while (sgl) {
|
|
palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx",
|
|
i++,
|
|
nvgpu_sgt_get_phys(g, &alloc->sgt, sgl),
|
|
nvgpu_sgt_get_length(&alloc->sgt, sgl));
|
|
sgl = nvgpu_sgt_get_next(&alloc->sgt, sgl);
|
|
}
|
|
|
|
a->nr_fixed_allocs++;
|
|
a->pages_alloced += pages;
|
|
|
|
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
|
|
return alloc->base;
|
|
} else {
|
|
return (u64) (uintptr_t) alloc;
|
|
}
|
|
}
|
|
|
|
static void nvgpu_page_free_fixed(struct nvgpu_allocator *na,
|
|
u64 base, u64 len)
|
|
{
|
|
struct nvgpu_page_allocator *a = page_allocator(na);
|
|
struct nvgpu_page_alloc *alloc;
|
|
|
|
alloc_lock(na);
|
|
|
|
if (a->flags & GPU_ALLOC_NO_SCATTER_GATHER) {
|
|
alloc = find_page_alloc(a, base);
|
|
if (alloc == NULL) {
|
|
goto done;
|
|
}
|
|
} else {
|
|
alloc = (struct nvgpu_page_alloc *) (uintptr_t) base;
|
|
}
|
|
|
|
palloc_dbg(a, "Free [fixed] 0x%010llx + 0x%llx",
|
|
alloc->base, alloc->length);
|
|
|
|
a->nr_fixed_frees++;
|
|
a->pages_freed += (alloc->length >> a->page_shift);
|
|
|
|
/*
|
|
* This works for the time being since the buddy allocator
|
|
* uses the same free function for both fixed and regular
|
|
* allocs. This would have to be updated if the underlying
|
|
* allocator were to change.
|
|
*/
|
|
nvgpu_page_alloc_free_pages(a, alloc, true);
|
|
|
|
done:
|
|
alloc_unlock(na);
|
|
}
|
|
|
|
static void nvgpu_page_allocator_destroy(struct nvgpu_allocator *na)
|
|
{
|
|
struct nvgpu_page_allocator *a = page_allocator(na);
|
|
|
|
alloc_lock(na);
|
|
nvgpu_kfree(nvgpu_alloc_to_gpu(na), a);
|
|
na->priv = NULL;
|
|
alloc_unlock(na);
|
|
}
|
|
|
|
#ifdef __KERNEL__
|
|
static void nvgpu_page_print_stats(struct nvgpu_allocator *na,
|
|
struct seq_file *s, int lock)
|
|
{
|
|
struct nvgpu_page_allocator *a = page_allocator(na);
|
|
int i;
|
|
|
|
if (lock)
|
|
alloc_lock(na);
|
|
|
|
__alloc_pstat(s, na, "Page allocator:");
|
|
__alloc_pstat(s, na, " allocs %lld", a->nr_allocs);
|
|
__alloc_pstat(s, na, " frees %lld", a->nr_frees);
|
|
__alloc_pstat(s, na, " fixed_allocs %lld", a->nr_fixed_allocs);
|
|
__alloc_pstat(s, na, " fixed_frees %lld", a->nr_fixed_frees);
|
|
__alloc_pstat(s, na, " slab_allocs %lld", a->nr_slab_allocs);
|
|
__alloc_pstat(s, na, " slab_frees %lld", a->nr_slab_frees);
|
|
__alloc_pstat(s, na, " pages alloced %lld", a->pages_alloced);
|
|
__alloc_pstat(s, na, " pages freed %lld", a->pages_freed);
|
|
__alloc_pstat(s, na, "");
|
|
|
|
__alloc_pstat(s, na, "Page size: %lld KB",
|
|
a->page_size >> 10);
|
|
__alloc_pstat(s, na, "Total pages: %lld (%lld MB)",
|
|
a->length / a->page_size,
|
|
a->length >> 20);
|
|
__alloc_pstat(s, na, "Available pages: %lld (%lld MB)",
|
|
nvgpu_alloc_space(&a->source_allocator) / a->page_size,
|
|
nvgpu_alloc_space(&a->source_allocator) >> 20);
|
|
__alloc_pstat(s, na, "");
|
|
|
|
/*
|
|
* Slab info.
|
|
*/
|
|
if (a->flags & GPU_ALLOC_4K_VIDMEM_PAGES) {
|
|
__alloc_pstat(s, na, "Slabs:");
|
|
__alloc_pstat(s, na, " size empty partial full");
|
|
__alloc_pstat(s, na, " ---- ----- ------- ----");
|
|
|
|
for (i = 0; i < a->nr_slabs; i++) {
|
|
struct page_alloc_slab *slab = &a->slabs[i];
|
|
|
|
__alloc_pstat(s, na, " %-9u %-9d %-9u %u",
|
|
slab->slab_size,
|
|
slab->nr_empty, slab->nr_partial,
|
|
slab->nr_full);
|
|
}
|
|
__alloc_pstat(s, na, "");
|
|
}
|
|
|
|
__alloc_pstat(s, na, "Source alloc: %s",
|
|
a->source_allocator.name);
|
|
nvgpu_alloc_print_stats(&a->source_allocator, s, lock);
|
|
|
|
if (lock)
|
|
alloc_unlock(na);
|
|
}
|
|
#endif
|
|
|
|
static const struct nvgpu_allocator_ops page_ops = {
|
|
.alloc = nvgpu_page_alloc,
|
|
.free = nvgpu_page_free,
|
|
|
|
.alloc_fixed = nvgpu_page_alloc_fixed,
|
|
.free_fixed = nvgpu_page_free_fixed,
|
|
|
|
.reserve_carveout = nvgpu_page_reserve_co,
|
|
.release_carveout = nvgpu_page_release_co,
|
|
|
|
.base = nvgpu_page_alloc_base,
|
|
.length = nvgpu_page_alloc_length,
|
|
.end = nvgpu_page_alloc_end,
|
|
.inited = nvgpu_page_alloc_inited,
|
|
.space = nvgpu_page_alloc_space,
|
|
|
|
.fini = nvgpu_page_allocator_destroy,
|
|
|
|
#ifdef __KERNEL__
|
|
.print_stats = nvgpu_page_print_stats,
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* nr_slabs is computed as follows: divide page_size by 4096 to get number of
|
|
* 4k pages in page_size. Then take the base 2 log of that to get number of
|
|
* slabs. For 64k page_size that works on like:
|
|
*
|
|
* 1024*64 / 1024*4 = 16
|
|
* ilog2(16) = 4
|
|
*
|
|
* That gives buckets of 1, 2, 4, and 8 pages (i.e 4k, 8k, 16k, 32k).
|
|
*/
|
|
static int nvgpu_page_alloc_init_slabs(struct nvgpu_page_allocator *a)
|
|
{
|
|
size_t nr_slabs = ilog2(a->page_size >> 12);
|
|
unsigned int i;
|
|
|
|
a->slabs = nvgpu_kcalloc(nvgpu_alloc_to_gpu(a->owner),
|
|
nr_slabs,
|
|
sizeof(struct page_alloc_slab));
|
|
if (a->slabs == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
a->nr_slabs = nr_slabs;
|
|
|
|
for (i = 0; i < nr_slabs; i++) {
|
|
struct page_alloc_slab *slab = &a->slabs[i];
|
|
|
|
slab->slab_size = U32(SZ_4K) * BIT32(i);
|
|
nvgpu_init_list_node(&slab->empty);
|
|
nvgpu_init_list_node(&slab->partial);
|
|
nvgpu_init_list_node(&slab->full);
|
|
slab->nr_empty = 0;
|
|
slab->nr_partial = 0;
|
|
slab->nr_full = 0;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *na,
|
|
const char *name, u64 base, u64 length,
|
|
u64 blk_size, u64 flags)
|
|
{
|
|
struct nvgpu_page_allocator *a;
|
|
char buddy_name[sizeof(na->name)];
|
|
int err;
|
|
|
|
if (blk_size < SZ_4K) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
a = nvgpu_kzalloc(g, sizeof(struct nvgpu_page_allocator));
|
|
if (a == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
err = nvgpu_alloc_common_init(na, g, name, a, false, &page_ops);
|
|
if (err != 0) {
|
|
goto fail;
|
|
}
|
|
|
|
a->alloc_cache = nvgpu_kmem_cache_create(g,
|
|
sizeof(struct nvgpu_page_alloc));
|
|
a->slab_page_cache = nvgpu_kmem_cache_create(g,
|
|
sizeof(struct page_alloc_slab_page));
|
|
if (a->alloc_cache == NULL || a->slab_page_cache == NULL) {
|
|
err = -ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
a->base = base;
|
|
a->length = length;
|
|
a->page_size = blk_size;
|
|
a->page_shift = __ffs(blk_size);
|
|
a->allocs = NULL;
|
|
a->owner = na;
|
|
a->flags = flags;
|
|
|
|
if ((flags & GPU_ALLOC_4K_VIDMEM_PAGES) != 0ULL &&
|
|
blk_size > SZ_4K) {
|
|
err = nvgpu_page_alloc_init_slabs(a);
|
|
if (err != 0) {
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
snprintf(buddy_name, sizeof(buddy_name), "%s-src", name);
|
|
|
|
err = nvgpu_buddy_allocator_init(g, &a->source_allocator, NULL,
|
|
buddy_name, base, length, blk_size,
|
|
0ULL, 0ULL);
|
|
if (err != 0) {
|
|
goto fail;
|
|
}
|
|
|
|
#ifdef CONFIG_DEBUG_FS
|
|
nvgpu_init_alloc_debug(g, na);
|
|
#endif
|
|
palloc_dbg(a, "New allocator: type page");
|
|
palloc_dbg(a, " base 0x%llx", a->base);
|
|
palloc_dbg(a, " size 0x%llx", a->length);
|
|
palloc_dbg(a, " page_size 0x%llx", a->page_size);
|
|
palloc_dbg(a, " flags 0x%llx", a->flags);
|
|
palloc_dbg(a, " slabs: %d", a->nr_slabs);
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
if (a->alloc_cache) {
|
|
nvgpu_kmem_cache_destroy(a->alloc_cache);
|
|
}
|
|
if (a->slab_page_cache) {
|
|
nvgpu_kmem_cache_destroy(a->slab_page_cache);
|
|
}
|
|
nvgpu_kfree(g, a);
|
|
return err;
|
|
}
|