diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index a9f900695..b29045baf 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c @@ -64,6 +64,25 @@ static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, * easily PDE aligned so this hasn't been a problem. */ +static u32 nvgpu_balloc_page_size_to_pte_size(struct nvgpu_buddy_allocator *a, + u32 page_size) +{ + if ((a->flags & GPU_ALLOC_GVA_SPACE) == 0ULL) { + return BALLOC_PTE_SIZE_ANY; + } + + /* + * Make sure the page size is actually valid! + */ + if (page_size == a->vm->big_page_size) { + return BALLOC_PTE_SIZE_BIG; + } else if (page_size == SZ_4K) { + return BALLOC_PTE_SIZE_SMALL; + } else { + return BALLOC_PTE_SIZE_INVALID; + } +} + /* * Pick a suitable maximum order for this allocator. * @@ -142,7 +161,7 @@ static void __balloc_buddy_list_add(struct nvgpu_buddy_allocator *a, * without cycling through the entire list. */ if (a->flags & GPU_ALLOC_GVA_SPACE && - b->pte_size == GMMU_PAGE_SIZE_BIG) { + b->pte_size == BALLOC_PTE_SIZE_BIG) { nvgpu_list_add_tail(&b->buddy_entry, list); } else { nvgpu_list_add(&b->buddy_entry, list); @@ -387,7 +406,7 @@ static void balloc_coalesce(struct nvgpu_buddy_allocator *a, * @a must be locked. */ static int balloc_split_buddy(struct nvgpu_buddy_allocator *a, - struct nvgpu_buddy *b, int pte_size) + struct nvgpu_buddy *b, u32 pte_size) { struct nvgpu_buddy *left, *right; u64 half; @@ -415,9 +434,22 @@ static int balloc_split_buddy(struct nvgpu_buddy_allocator *a, left->parent = b; right->parent = b; - /* PTE considerations. */ + /* + * Potentially assign a PTE size to the new buddies. The obvious case is + * when we don't have a GPU VA space; just leave it alone. When we do + * have a GVA space we need to assign the passed PTE size to the buddy + * only if the buddy is less than the PDE block size. This is because if + * the buddy is less than the PDE block size then the buddy's parent + * may already have a PTE size. Thus we can only allocate this buddy to + * mappings with that PTE size (due to the large/small PTE separation + * requirement). + * + * When the buddy size is greater than or equal to the block size then + * we can leave the buddies PTE field alone since the PDE block has yet + * to be assigned a PTE size. + */ if (a->flags & GPU_ALLOC_GVA_SPACE && - left->order <= a->pte_blk_order) { + left->order < a->pte_blk_order) { left->pte_size = pte_size; right->pte_size = pte_size; } @@ -477,7 +509,7 @@ static struct nvgpu_buddy *balloc_free_buddy(struct nvgpu_buddy_allocator *a, * Find a suitable buddy for the given order and PTE type (big or little). */ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, - u64 order, int pte_size) + u64 order, u32 pte_size) { struct nvgpu_buddy *bud; @@ -487,7 +519,7 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, } if (a->flags & GPU_ALLOC_GVA_SPACE && - pte_size == GMMU_PAGE_SIZE_BIG) { + pte_size == BALLOC_PTE_SIZE_BIG) { bud = nvgpu_list_last_entry(balloc_get_order_list(a, order), nvgpu_buddy, buddy_entry); } else { @@ -514,7 +546,7 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, * @a must be locked. */ static u64 __balloc_do_alloc(struct nvgpu_buddy_allocator *a, - u64 order, int pte_size) + u64 order, u32 pte_size) { u64 split_order; struct nvgpu_buddy *bud = NULL; @@ -637,7 +669,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a, * necessary for this buddy to exist as well. */ static struct nvgpu_buddy *__balloc_make_fixed_buddy( - struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size) + struct nvgpu_buddy_allocator *a, u64 base, u64 order, u32 pte_size) { struct nvgpu_buddy *bud = NULL; struct nvgpu_list_node *order_list; @@ -714,11 +746,19 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, struct nvgpu_fixed_alloc *falloc, - u64 base, u64 len, int pte_size) + u64 base, u64 len, u32 pte_size) { u64 shifted_base, inc_base; u64 align_order; + /* + * Ensure that we have a valid PTE size here (ANY is a valid size). If + * this is INVALID then we are going to experience imminent corruption + * in the lists that hold buddies. This leads to some very strange + * crashes. + */ + BUG_ON(pte_size == BALLOC_PTE_SIZE_INVALID); + shifted_base = balloc_base_shift(a, base); if (shifted_base == 0U) { align_order = __fls(len >> a->blk_shift); @@ -814,10 +854,11 @@ static void __balloc_do_free_fixed(struct nvgpu_buddy_allocator *a, /* * Allocate memory from the passed allocator. */ -static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len) +static u64 nvgpu_buddy_balloc_pte(struct nvgpu_allocator *na, u64 len, + u32 page_size) { u64 order, addr; - int pte_size; + u32 pte_size; struct nvgpu_buddy_allocator *a = na->priv; alloc_lock(na); @@ -830,22 +871,21 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len) return 0; } - if (a->flags & GPU_ALLOC_GVA_SPACE) { - pte_size = __get_pte_size(a->vm, 0, len); - } else { - pte_size = BALLOC_PTE_SIZE_ANY; + pte_size = nvgpu_balloc_page_size_to_pte_size(a, page_size); + if (pte_size == BALLOC_PTE_SIZE_INVALID) { + return 0ULL; } addr = __balloc_do_alloc(a, order, pte_size); - if (addr) { + if (addr != 0ULL) { a->bytes_alloced += len; a->bytes_alloced_real += balloc_order_to_len(a, order); alloc_dbg(balloc_owner(a), "Alloc 0x%-10llx %3lld:0x%-10llx pte_size=%s", addr, order, len, - pte_size == GMMU_PAGE_SIZE_BIG ? "big" : - pte_size == GMMU_PAGE_SIZE_SMALL ? "small" : + pte_size == BALLOC_PTE_SIZE_BIG ? "big" : + pte_size == BALLOC_PTE_SIZE_SMALL ? "small" : "NA/any"); } else { alloc_dbg(balloc_owner(a), "Alloc failed: no mem!"); @@ -858,13 +898,15 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len) return addr; } -/* - * Requires @na to be locked. - */ +static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *na, u64 len) +{ + return nvgpu_buddy_balloc_pte(na, len, BALLOC_PTE_SIZE_ANY); +} + static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na, u64 base, u64 len, u32 page_size) { - int pte_size = BALLOC_PTE_SIZE_ANY; + u32 pte_size; u64 ret, real_bytes = 0; struct nvgpu_buddy *bud; struct nvgpu_fixed_alloc *falloc = NULL; @@ -879,15 +921,9 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na, goto fail; } - /* Check that the page size is valid. */ - if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) { - if (page_size == a->vm->big_page_size) { - pte_size = GMMU_PAGE_SIZE_BIG; - } else if (page_size == SZ_4K) { - pte_size = GMMU_PAGE_SIZE_SMALL; - } else { - goto fail; - } + pte_size = nvgpu_balloc_page_size_to_pte_size(a, page_size); + if (pte_size == BALLOC_PTE_SIZE_INVALID) { + goto fail; } falloc = nvgpu_kmalloc(nvgpu_alloc_to_gpu(na), sizeof(*falloc)); @@ -903,7 +939,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na, alloc_dbg(balloc_owner(a), "Range not free: 0x%llx -> 0x%llx", base, base + len); - goto fail_unlock; + goto fail; } ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size); @@ -911,7 +947,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na, alloc_dbg(balloc_owner(a), "Alloc-fixed failed ?? 0x%llx -> 0x%llx", base, base + len); - goto fail_unlock; + goto fail; } balloc_alloc_fixed(a, falloc); @@ -928,8 +964,6 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *na, return base; -fail_unlock: - alloc_unlock(na); fail: nvgpu_kfree(nvgpu_alloc_to_gpu(na), falloc); return 0; @@ -1051,7 +1085,8 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *na, } /* Should not be possible to fail... */ - addr = __nvgpu_balloc_fixed_buddy(na, co->base, co->length, 0); + addr = __nvgpu_balloc_fixed_buddy(na, co->base, co->length, + BALLOC_PTE_SIZE_ANY); if (!addr) { err = -ENOMEM; nvgpu_warn(na->g, @@ -1206,6 +1241,7 @@ static void nvgpu_buddy_print_stats(struct nvgpu_allocator *na, static const struct nvgpu_allocator_ops buddy_ops = { .alloc = nvgpu_buddy_balloc, + .alloc_pte = nvgpu_buddy_balloc_pte, .free = nvgpu_buddy_bfree, .alloc_fixed = nvgpu_balloc_fixed_buddy, diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h index fe3926b99..a90530b6f 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator_priv.h @@ -46,18 +46,20 @@ struct nvgpu_buddy { u64 end; /* End address of this buddy. */ u64 order; /* Buddy order. */ -#define BALLOC_BUDDY_ALLOCED 0x1 -#define BALLOC_BUDDY_SPLIT 0x2 -#define BALLOC_BUDDY_IN_LIST 0x4 - int flags; /* List of associated flags. */ +#define BALLOC_BUDDY_ALLOCED 0x1U +#define BALLOC_BUDDY_SPLIT 0x2U +#define BALLOC_BUDDY_IN_LIST 0x4U + u32 flags; /* List of associated flags. */ /* * Size of the PDE this buddy is using. This allows for grouping like - * sized allocations into the same PDE. This uses the gmmu_pgsz_gk20a - * enum except for the BALLOC_PTE_SIZE_ANY specifier. + * sized allocations into the same PDE. */ -#define BALLOC_PTE_SIZE_ANY -1 - int pte_size; +#define BALLOC_PTE_SIZE_ANY (~0U) +#define BALLOC_PTE_SIZE_INVALID 0U +#define BALLOC_PTE_SIZE_SMALL 1U +#define BALLOC_PTE_SIZE_BIG 2U + u32 pte_size; }; static inline struct nvgpu_buddy * diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c index 4057a5991..ec0aa888f 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c @@ -77,6 +77,11 @@ u64 nvgpu_alloc(struct nvgpu_allocator *a, u64 len) return a->ops->alloc(a, len); } +u64 nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size) +{ + return a->ops->alloc_pte(a, len, page_size); +} + void nvgpu_free(struct nvgpu_allocator *a, u64 addr) { a->ops->free(a, addr); diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3cb8ed60a..57d9afb5d 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -152,7 +152,7 @@ u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, u32 pgsz_idx) /* Be certain we round up to page_size if needed */ size = (size + ((u64)page_size - 1U)) & ~((u64)page_size - 1U); - addr = nvgpu_alloc(vma, size); + addr = nvgpu_alloc_pte(vma, size, page_size); if (!addr) { nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size); return 0; diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c index c2c0d569b..d096de5d2 100644 --- a/drivers/gpu/nvgpu/common/mm/vm_area.c +++ b/drivers/gpu/nvgpu/common/mm/vm_area.c @@ -99,11 +99,22 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size, struct nvgpu_allocator *vma; struct nvgpu_vm_area *vm_area; u64 vaddr_start = 0; + u64 our_addr = *addr; u32 pgsz_idx = GMMU_PAGE_SIZE_SMALL; + /* + * If we have a fixed address then use the passed address in *addr. This + * corresponds to the o_a field in the IOCTL. But since we do not + * support specific alignments in the buddy allocator we ignore the + * field if it isn't a fixed offset. + */ + if ((flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) != 0U) { + our_addr = *addr; + } + nvgpu_log(g, gpu_dbg_map, - "ADD vm_area: pgsz=%#-8x pages=%-9u addr=%#-14llx flags=0x%x", - page_size, pages, *addr, flags); + "ADD vm_area: pgsz=%#-8x pages=%-9u a/o=%#-14llx flags=0x%x", + page_size, pages, our_addr, flags); for (; pgsz_idx < GMMU_NR_PAGE_SIZES; pgsz_idx++) { if (vm->gmmu_page_sizes[pgsz_idx] == page_size) { @@ -133,14 +144,15 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size, vma = vm->vma[pgsz_idx]; if (flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) { - vaddr_start = nvgpu_alloc_fixed(vma, *addr, + vaddr_start = nvgpu_alloc_fixed(vma, our_addr, (u64)pages * (u64)page_size, page_size); } else { - vaddr_start = nvgpu_alloc(vma, - (u64)pages * - (u64)page_size); + vaddr_start = nvgpu_alloc_pte(vma, + (u64)pages * + (u64)page_size, + page_size); } if (!vaddr_start) { diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h index 698aafb39..2d9b3d324 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h @@ -49,6 +49,8 @@ struct gk20a; */ struct nvgpu_allocator_ops { u64 (*alloc)(struct nvgpu_allocator *allocator, u64 len); + u64 (*alloc_pte)(struct nvgpu_allocator *allocator, u64 len, + u32 page_size); void (*free)(struct nvgpu_allocator *allocator, u64 addr); /* @@ -242,6 +244,7 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *na, * Allocator APIs. */ u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); +u64 nvgpu_alloc_pte(struct nvgpu_allocator *a, u64 len, u32 page_size); void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len,