diff --git a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c index 6f267c852..5042980f2 100644 --- a/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/bitmap_allocator.c @@ -55,8 +55,11 @@ static u64 nvgpu_bitmap_alloc_end(struct nvgpu_allocator *a) return ba->base + ba->length; } +/* + * @page_size is ignored. + */ static u64 nvgpu_bitmap_alloc_fixed(struct nvgpu_allocator *__a, - u64 base, u64 len) + u64 base, u64 len, u32 page_size) { struct nvgpu_bitmap_allocator *a = bitmap_allocator(__a); u64 blks, offs, ret; diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c index 39a53801f..eee0b6344 100644 --- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c @@ -484,8 +484,9 @@ static struct nvgpu_buddy *__balloc_find_buddy(struct nvgpu_buddy_allocator *a, bud = list_first_entry(balloc_get_order_list(a, order), struct nvgpu_buddy, buddy_entry); - if (bud->pte_size != BALLOC_PTE_SIZE_ANY && - bud->pte_size != pte_size) + if (pte_size != BALLOC_PTE_SIZE_ANY && + pte_size != bud->pte_size && + bud->pte_size != BALLOC_PTE_SIZE_ANY) return NULL; return bud; @@ -643,7 +644,7 @@ static void __balloc_get_parent_range(struct nvgpu_buddy_allocator *a, * necessary for this buddy to exist as well. */ static struct nvgpu_buddy *__balloc_make_fixed_buddy( - struct nvgpu_buddy_allocator *a, u64 base, u64 order) + struct nvgpu_buddy_allocator *a, u64 base, u64 order, int pte_size) { struct nvgpu_buddy *bud = NULL; struct list_head *order_list; @@ -664,6 +665,20 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( order_list = balloc_get_order_list(a, cur_order); list_for_each_entry(bud, order_list, buddy_entry) { if (bud->start == cur_base) { + /* + * Make sure page size matches if it's smaller + * than a PDE sized buddy. + */ + if (bud->order <= a->pte_blk_order && + bud->pte_size != BALLOC_PTE_SIZE_ANY && + bud->pte_size != pte_size) { + /* Welp, that's the end of that. */ + alloc_dbg(balloc_owner(a), + "Fixed buddy PTE " + "size mismatch!\n"); + return NULL; + } + found = 1; break; } @@ -683,7 +698,10 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( /* Split this buddy as necessary until we get the target buddy. */ while (bud->start != base || bud->order != order) { - if (balloc_split_buddy(a, bud, BALLOC_PTE_SIZE_ANY)) { + if (balloc_split_buddy(a, bud, pte_size)) { + alloc_dbg(balloc_owner(a), + "split buddy failed? {0x%llx, %llu}\n", + bud->start, bud->order); balloc_coalesce(a, bud); return NULL; } @@ -700,7 +718,7 @@ static struct nvgpu_buddy *__balloc_make_fixed_buddy( static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, struct nvgpu_fixed_alloc *falloc, - u64 base, u64 len) + u64 base, u64 len, int pte_size) { u64 shifted_base, inc_base; u64 align_order; @@ -731,7 +749,7 @@ static u64 __balloc_do_alloc_fixed(struct nvgpu_buddy_allocator *a, bud = __balloc_make_fixed_buddy(a, balloc_base_unshift(a, inc_base), - align_order); + align_order, pte_size); if (!bud) { alloc_dbg(balloc_owner(a), "Fixed buddy failed: {0x%llx, %llu}!\n", @@ -817,17 +835,8 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) return 0; } - /* - * For now pass the base address of the allocator's region to - * __get_pte_size(). This ensures we get the right page size for - * the alloc but we don't have to know what the real address is - * going to be quite yet. - * - * TODO: once userspace supports a unified address space pass 0 for - * the base. This will make only 'len' affect the PTE size. - */ if (a->flags & GPU_ALLOC_GVA_SPACE) - pte_size = __get_pte_size(a->vm, a->base, len); + pte_size = __get_pte_size(a->vm, 0, len); else pte_size = BALLOC_PTE_SIZE_ANY; @@ -858,8 +867,9 @@ static u64 nvgpu_buddy_balloc(struct nvgpu_allocator *__a, u64 len) * Requires @__a to be locked. */ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, - u64 base, u64 len) + u64 base, u64 len, u32 page_size) { + int pte_size = BALLOC_PTE_SIZE_ANY; u64 ret, real_bytes = 0; struct nvgpu_buddy *bud; struct nvgpu_fixed_alloc *falloc = NULL; @@ -874,6 +884,16 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, if (len == 0) goto fail; + /* Check that the page size is valid. */ + if (a->flags & GPU_ALLOC_GVA_SPACE && a->vm->big_pages) { + if (page_size == a->vm->big_page_size) + pte_size = gmmu_page_size_big; + else if (page_size == SZ_4K) + pte_size = gmmu_page_size_small; + else + goto fail; + } + falloc = kmalloc(sizeof(*falloc), GFP_KERNEL); if (!falloc) goto fail; @@ -889,7 +909,7 @@ static u64 __nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, goto fail_unlock; } - ret = __balloc_do_alloc_fixed(a, falloc, base, len); + ret = __balloc_do_alloc_fixed(a, falloc, base, len, pte_size); if (!ret) { alloc_dbg(balloc_owner(a), "Alloc-fixed failed ?? 0x%llx -> 0x%llx\n", @@ -927,13 +947,13 @@ fail: * Please do not use this function unless _absolutely_ necessary. */ static u64 nvgpu_balloc_fixed_buddy(struct nvgpu_allocator *__a, - u64 base, u64 len) + u64 base, u64 len, u32 page_size) { u64 alloc; struct nvgpu_buddy_allocator *a = __a->priv; alloc_lock(__a); - alloc = __nvgpu_balloc_fixed_buddy(__a, base, len); + alloc = __nvgpu_balloc_fixed_buddy(__a, base, len, page_size); a->alloc_made = 1; alloc_unlock(__a); @@ -1034,7 +1054,7 @@ static int nvgpu_buddy_reserve_co(struct nvgpu_allocator *__a, } /* Should not be possible to fail... */ - addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length); + addr = __nvgpu_balloc_fixed_buddy(__a, co->base, co->length, 0); if (!addr) { err = -ENOMEM; pr_warn("%s: Failed to reserve a valid carveout!\n", __func__); @@ -1310,6 +1330,10 @@ int __nvgpu_buddy_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, alloc_dbg(__a, " base 0x%llx\n", a->base); alloc_dbg(__a, " size 0x%llx\n", a->length); alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size); + if (flags & GPU_ALLOC_GVA_SPACE) + alloc_dbg(balloc_owner(a), + " pde_size 0x%llx\n", + balloc_order_to_len(a, a->pte_blk_order)); alloc_dbg(__a, " max_order %llu\n", a->max_order); alloc_dbg(__a, " flags 0x%llx\n", a->flags); diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c index ebd779c06..cf8c45691 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_allocator.c @@ -77,10 +77,11 @@ void nvgpu_free(struct nvgpu_allocator *a, u64 addr) a->ops->free(a, addr); } -u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len) +u64 nvgpu_alloc_fixed(struct nvgpu_allocator *a, u64 base, u64 len, + u32 page_size) { if (a->ops->alloc_fixed) - return a->ops->alloc_fixed(a, base, len); + return a->ops->alloc_fixed(a, base, len, page_size); return 0; } diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index c61b22380..96f8f2427 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c @@ -648,7 +648,7 @@ done: } static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( - struct nvgpu_page_allocator *a, u64 base, u64 length) + struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) { struct nvgpu_page_alloc *alloc; struct page_alloc_chunk *c; @@ -658,7 +658,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( if (!alloc || !c) goto fail; - alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length); + alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); if (!alloc->base) { WARN(1, "nvgpu: failed to fixed alloc pages @ 0x%010llx", base); goto fail; @@ -680,8 +680,11 @@ fail: return ERR_PTR(-ENOMEM); } +/* + * @page_size is ignored. + */ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, - u64 base, u64 len) + u64 base, u64 len, u32 page_size) { struct nvgpu_page_allocator *a = page_allocator(__a); struct nvgpu_page_alloc *alloc = NULL; @@ -694,7 +697,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, alloc_lock(__a); - alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len); + alloc = __nvgpu_alloc_pages_fixed(a, base, aligned_len, 0); if (IS_ERR(alloc)) { alloc_unlock(__a); return 0; diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c index 07601d424..adf0297bc 100644 --- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c @@ -281,7 +281,7 @@ static int gk20a_as_ioctl_get_va_regions( struct nvgpu_as_va_region region; struct nvgpu_allocator *vma = nvgpu_alloc_initialized(&vm->fixed) ? - &vm->fixed : &vm->vma[i]; + &vm->fixed : vm->vma[i]; memset(®ion, 0, sizeof(struct nvgpu_as_va_region)); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index cdbaef79f..83bbcb547 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1001,7 +1001,8 @@ int gk20a_init_mm_setup_sw(struct gk20a *g) mutex_init(&mm->l2_op_lock); /*TBD: make channel vm size configurable */ - mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE; + mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE - + NV_MM_DEFAULT_KERNEL_SIZE; mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE; gk20a_dbg_info("channel vm size: user %dMB kernel %dMB", @@ -1626,7 +1627,7 @@ u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a gmmu_pgsz_idx) { - struct nvgpu_allocator *vma = &vm->vma[gmmu_pgsz_idx]; + struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx]; u64 offset; u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; @@ -1663,7 +1664,7 @@ int gk20a_vm_free_va(struct vm_gk20a *vm, u64 offset, u64 size, enum gmmu_pgsz_gk20a pgsz_idx) { - struct nvgpu_allocator *vma = &vm->vma[pgsz_idx]; + struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", vma->name, offset, size); @@ -1790,13 +1791,7 @@ struct buffer_attrs { static void gmmu_select_page_size(struct vm_gk20a *vm, struct buffer_attrs *bfr) { - int i; - /* choose the biggest first (top->bottom) */ - for (i = gmmu_page_size_kernel - 1; i >= 0; i--) - if (!((vm->gmmu_page_sizes[i] - 1) & bfr->align)) { - bfr->pgsz_idx = i; - break; - } + bfr->pgsz_idx = __get_pte_size(vm, 0, bfr->size); } static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, @@ -2497,9 +2492,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, * the alignment determined by gmmu_select_page_size(). */ if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) { - int pgsz_idx = - __nv_gmmu_va_is_big_page_region(vm, offset_align) ? - gmmu_page_size_big : gmmu_page_size_small; + int pgsz_idx = __get_pte_size(vm, offset_align, mapping_size); if (pgsz_idx > bfr.pgsz_idx) { gk20a_err(d, "%llx buffer pgsz %d, VA pgsz %d", offset_align, bfr.pgsz_idx, pgsz_idx); @@ -3149,7 +3142,7 @@ static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at, u64 addr = 0; if (at) - addr = nvgpu_alloc_fixed(allocator, at, size); + addr = nvgpu_alloc_fixed(allocator, at, size, 0); else addr = nvgpu_alloc(allocator, size); @@ -4260,12 +4253,13 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) * * !!! TODO: cleanup. */ - sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->vma[gmmu_page_size_kernel], + sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel, vm->va_limit - mm->channel.kernel_size, - 512 * PAGE_SIZE); + 512 * PAGE_SIZE, + SZ_4K); if (!sema_sea->gpu_va) { - nvgpu_free(&vm->vma[gmmu_page_size_small], sema_sea->gpu_va); + nvgpu_free(&vm->kernel, sema_sea->gpu_va); gk20a_vm_put(vm); return -ENOMEM; } @@ -4273,14 +4267,78 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) err = gk20a_semaphore_pool_map(vm->sema_pool, vm); if (err) { gk20a_semaphore_pool_unmap(vm->sema_pool, vm); - nvgpu_free(&vm->vma[gmmu_page_size_small], - vm->sema_pool->gpu_va); + nvgpu_free(vm->vma[gmmu_page_size_small], + vm->sema_pool->gpu_va); gk20a_vm_put(vm); } return 0; } +/* + * Determine if the passed address space can support big pages or not. + */ +int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size) +{ + u64 mask = ((u64)vm->big_page_size << 10) - 1; + + if (base & mask || size & mask) + return 0; + return 1; +} + +/* + * Attempt to find a reserved memory area to determine PTE size for the passed + * mapping. If no reserved area can be found use small pages but drop a warning. + */ +enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, + u64 base, u64 size) +{ + struct vm_reserved_va_node *node; + + node = addr_to_reservation(vm, base); + if (!node) + return gmmu_page_size_small; + + return node->pgsz_idx; +} + +/** + * gk20a_init_vm() - Initialize an address space. + * + * @mm - Parent MM. + * @vm - The VM to init. + * @big_page_size - Size of big pages associated with this VM. + * @low_hole - The size of the low hole (unaddressable memory at the bottom of + * the address space. + * @kernel_reserved - Space reserved for kernel only allocations. + * @aperture_size - Total size of the aperture. + * @big_pages - Ignored. Will be set based on other passed params. + * @name - Name of the address space. + * + * This function initializes an address space according to the following map: + * + * +--+ 0x0 + * | | + * +--+ @low_hole + * | | + * ~ ~ This is the "user" section. + * | | + * +--+ @aperture_size - @kernel_reserved + * | | + * ~ ~ This is the "kernel" section. + * | | + * +--+ @aperture_size + * + * The user section is therefor what ever is left over after the @low_hole and + * @kernel_reserved memory have been portioned out. The @kernel_reserved is + * always persent at the top of the memory space and the @low_hole is always at + * the bottom. + * + * For certain address spaces a "user" section makes no sense (bar1, etc) so in + * such cases the @kernel_reserved and @low_hole should sum to exactly + * @aperture_size. + */ int gk20a_init_vm(struct mm_gk20a *mm, struct vm_gk20a *vm, u32 big_page_size, @@ -4293,20 +4351,23 @@ int gk20a_init_vm(struct mm_gk20a *mm, { int err, i; char alloc_name[32]; - u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, - kernel_vma_start, kernel_vma_limit; + u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit; u32 pde_lo, pde_hi; struct gk20a *g = mm->g; /* note: this must match gmmu_pgsz_gk20a enum */ u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size, SZ_4K }; - WARN_ON(kernel_reserved + low_hole > aperture_size); - if (kernel_reserved > aperture_size) + if (WARN_ON(kernel_reserved + low_hole > aperture_size)) return -ENOMEM; vm->mm = mm; + /* Set up vma pointers. */ + vm->vma[0] = &vm->user; + vm->vma[1] = &vm->user; + vm->vma[2] = &vm->kernel; + vm->va_start = low_hole; vm->va_limit = aperture_size; vm->big_pages = big_pages; @@ -4321,10 +4382,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, gk20a_dbg_info("small page-size (%dKB)", vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); - - gk20a_dbg_info("big page-size (%dKB)", - vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); - + gk20a_dbg_info("big page-size (%dKB) (%s)\n", + vm->gmmu_page_sizes[gmmu_page_size_big] >> 10, name); gk20a_dbg_info("kernel page-size (%dKB)", vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); @@ -4348,38 +4407,27 @@ int gk20a_init_vm(struct mm_gk20a *mm, goto clean_up_pdes; /* setup vma limits */ - small_vma_start = low_hole; - - if (big_pages) { - /* First 16GB of the address space goes towards small - * pages. What ever remains is allocated to large - * pages. */ - small_vma_limit = __nv_gmmu_va_small_page_limit(); - large_vma_start = small_vma_limit; - large_vma_limit = vm->va_limit - kernel_reserved; - } else { - small_vma_limit = vm->va_limit - kernel_reserved; - large_vma_start = 0; - large_vma_limit = 0; - } + user_vma_start = low_hole; + user_vma_limit = vm->va_limit - kernel_reserved; kernel_vma_start = vm->va_limit - kernel_reserved; kernel_vma_limit = vm->va_limit; gk20a_dbg_info( - "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", - small_vma_start, small_vma_limit, - large_vma_start, large_vma_limit, + "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", + user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit); - /* check that starts do not exceed limits */ - WARN_ON(small_vma_start > small_vma_limit); - WARN_ON(large_vma_start > large_vma_limit); - /* kernel_vma must also be non-zero */ + WARN_ON(user_vma_start > user_vma_limit); WARN_ON(kernel_vma_start >= kernel_vma_limit); - if (small_vma_start > small_vma_limit || - large_vma_start > large_vma_limit || + /* + * A "user" area only makes sense for the GVA spaces. For VMs where + * there is no "user" area user_vma_start will be equal to + * user_vma_limit (i.e a 0 sized space). In such a situation the kernel + * area must be non-zero in length. + */ + if (user_vma_start > user_vma_limit || kernel_vma_start >= kernel_vma_limit) { err = -EINVAL; goto clean_up_pdes; @@ -4389,8 +4437,8 @@ int gk20a_init_vm(struct mm_gk20a *mm, * Attempt to make a separate VM for fixed allocations. */ if (g->separate_fixed_allocs && - small_vma_start < small_vma_limit) { - if (g->separate_fixed_allocs >= small_vma_limit) + user_vma_start < user_vma_limit) { + if (g->separate_fixed_allocs >= user_vma_limit) goto clean_up_pdes; snprintf(alloc_name, sizeof(alloc_name), @@ -4398,7 +4446,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, err = __nvgpu_buddy_allocator_init(g, &vm->fixed, vm, alloc_name, - small_vma_start, + user_vma_start, g->separate_fixed_allocs, SZ_4K, GPU_BALLOC_MAX_ORDER, @@ -4407,47 +4455,41 @@ int gk20a_init_vm(struct mm_gk20a *mm, goto clean_up_ptes; /* Make sure to update the user vma size. */ - small_vma_start = g->separate_fixed_allocs; + user_vma_start = g->separate_fixed_allocs; } - if (small_vma_start < small_vma_limit) { - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", name, - vm->gmmu_page_sizes[gmmu_page_size_small] >> 10); - err = __nvgpu_buddy_allocator_init( - g, - &vm->vma[gmmu_page_size_small], - vm, alloc_name, - small_vma_start, - small_vma_limit - small_vma_start, - SZ_4K, - GPU_BALLOC_MAX_ORDER, - GPU_ALLOC_GVA_SPACE); + if (user_vma_start < user_vma_limit) { + snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name); + if (!gk20a_big_pages_possible(vm, user_vma_start, + user_vma_limit - user_vma_start)) + vm->big_pages = false; + + err = __nvgpu_buddy_allocator_init(g, &vm->user, + vm, alloc_name, + user_vma_start, + user_vma_limit - + user_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_ALLOC_GVA_SPACE); if (err) goto clean_up_ptes; + } else { + /* + * Make these allocator pointers point to the kernel allocator + * since we still use the legacy notion of page size to choose + * the allocator. + */ + vm->vma[0] = &vm->kernel; + vm->vma[1] = &vm->kernel; } - if (large_vma_start < large_vma_limit) { - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB", - name, vm->gmmu_page_sizes[gmmu_page_size_big] >> 10); - err = __nvgpu_buddy_allocator_init( - g, - &vm->vma[gmmu_page_size_big], - vm, alloc_name, - large_vma_start, - large_vma_limit - large_vma_start, - big_page_size, - GPU_BALLOC_MAX_ORDER, - GPU_ALLOC_GVA_SPACE); - if (err) - goto clean_up_small_allocator; - } + snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name); + if (!gk20a_big_pages_possible(vm, kernel_vma_start, + kernel_vma_limit - kernel_vma_start)) + vm->big_pages = false; - snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB-sys", - name, vm->gmmu_page_sizes[gmmu_page_size_kernel] >> 10); - /* - * kernel reserved VMA is at the end of the aperture - */ - err = __nvgpu_buddy_allocator_init(g, &vm->vma[gmmu_page_size_kernel], + err = __nvgpu_buddy_allocator_init(g, &vm->kernel, vm, alloc_name, kernel_vma_start, kernel_vma_limit - kernel_vma_start, @@ -4455,7 +4497,7 @@ int gk20a_init_vm(struct mm_gk20a *mm, GPU_BALLOC_MAX_ORDER, GPU_ALLOC_GVA_SPACE); if (err) - goto clean_up_big_allocator; + goto clean_up_user_allocator; vm->mapped_buffers = RB_ROOT; @@ -4471,17 +4513,14 @@ int gk20a_init_vm(struct mm_gk20a *mm, if (vm->va_limit > SZ_4G) { err = gk20a_init_sema_pool(vm); if (err) - goto clean_up_big_allocator; + goto clean_up_user_allocator; } return 0; -clean_up_big_allocator: - if (large_vma_start < large_vma_limit) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); -clean_up_small_allocator: - if (small_vma_start < small_vma_limit) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); +clean_up_user_allocator: + if (user_vma_start < user_vma_limit) + nvgpu_alloc_destroy(&vm->user); clean_up_ptes: free_gmmu_pages(vm, &vm->pdb); clean_up_pdes: @@ -4523,9 +4562,10 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, vm->as_share = as_share; vm->enable_ctag = true; - snprintf(name, sizeof(name), "gk20a_as_%d", as_share->id); + snprintf(name, sizeof(name), "as_%d", as_share->id); - err = gk20a_init_vm(mm, vm, big_page_size, big_page_size << 10, + err = gk20a_init_vm(mm, vm, big_page_size, + big_page_size << 10, mm->channel.kernel_size, mm->channel.user_size + mm->channel.kernel_size, !mm->disable_bigpage, userspace_managed, name); @@ -4586,13 +4626,14 @@ int gk20a_vm_alloc_space(struct gk20a_as_share *as_share, goto clean_up; } - vma = &vm->vma[pgsz_idx]; + vma = vm->vma[pgsz_idx]; if (args->flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) { if (nvgpu_alloc_initialized(&vm->fixed)) vma = &vm->fixed; vaddr_start = nvgpu_alloc_fixed(vma, args->o_a.offset, (u64)args->pages * - (u64)args->page_size); + (u64)args->page_size, + args->page_size); } else { vaddr_start = nvgpu_alloc(vma, (u64)args->pages * @@ -4662,13 +4703,13 @@ int gk20a_vm_free_space(struct gk20a_as_share *as_share, args->pages, args->offset); /* determine pagesz idx */ - pgsz_idx = __nv_gmmu_va_is_big_page_region(vm, args->offset) ? - gmmu_page_size_big : gmmu_page_size_small; + pgsz_idx = __get_pte_size(vm, args->offset, + args->page_size * args->pages); if (nvgpu_alloc_initialized(&vm->fixed)) vma = &vm->fixed; else - vma = &vm->vma[pgsz_idx]; + vma = vm->vma[pgsz_idx]; nvgpu_free(vma, args->offset); mutex_lock(&vm->update_gmmu_lock); @@ -4853,11 +4894,10 @@ int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, void gk20a_deinit_vm(struct vm_gk20a *vm) { - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); - if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); - if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); + if (nvgpu_alloc_initialized(&vm->kernel)) + nvgpu_alloc_destroy(&vm->kernel); + if (nvgpu_alloc_initialized(&vm->user)) + nvgpu_alloc_destroy(&vm->user); if (nvgpu_alloc_initialized(&vm->fixed)) nvgpu_alloc_destroy(&vm->fixed); @@ -4908,9 +4948,13 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); - gk20a_init_vm(mm, vm, big_page_size, SZ_4K, - mm->bar1.aperture_size - SZ_4K, - mm->bar1.aperture_size, false, false, "bar1"); + gk20a_init_vm(mm, vm, + big_page_size, + SZ_4K, /* Low hole */ + mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */ + mm->bar1.aperture_size, + true, false, + "bar1"); err = gk20a_alloc_inst_block(g, inst_block); if (err) @@ -4932,13 +4976,23 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) struct gk20a *g = gk20a_from_mm(mm); struct mem_desc *inst_block = &mm->pmu.inst_block; u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; + u32 low_hole, aperture_size; + + /* + * No user region - so we will pass that as zero sized. + */ + low_hole = SZ_4K * 16; + aperture_size = GK20A_PMU_VA_SIZE * 2; mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); gk20a_init_vm(mm, vm, big_page_size, - SZ_4K * 16, GK20A_PMU_VA_SIZE, - GK20A_PMU_VA_SIZE * 2, false, false, + low_hole, + aperture_size - low_hole, + aperture_size, + true, + false, "system"); err = gk20a_alloc_inst_block(g, inst_block); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 5ef8ae254..394d1d25b 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -270,11 +270,13 @@ struct vm_gk20a { struct gk20a_mm_entry pdb; - struct nvgpu_allocator vma[gmmu_nr_page_sizes]; - /* If necessary, split fixed from non-fixed. */ struct nvgpu_allocator fixed; + struct nvgpu_allocator *vma[gmmu_nr_page_sizes]; + struct nvgpu_allocator kernel; + struct nvgpu_allocator user; + struct rb_root mapped_buffers; struct list_head reserved_va_list; @@ -425,7 +427,7 @@ static inline int bar1_aperture_size_mb_gk20a(void) return 16; /* 16MB is more than enough atm. */ } -/*The maximum GPU VA range supported */ +/* The maximum GPU VA range supported */ #define NV_GMMU_VA_RANGE 38 /* The default userspace-visible GPU VA size */ @@ -434,43 +436,39 @@ static inline int bar1_aperture_size_mb_gk20a(void) /* The default kernel-reserved GPU VA size */ #define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32) -/* - * The bottom 16GB of the space are used for small pages, the remaining high - * memory is for large pages. - */ -static inline u64 __nv_gmmu_va_small_page_limit(void) -{ - return ((u64)SZ_1G * 16); -} - -static inline int __nv_gmmu_va_is_big_page_region(struct vm_gk20a *vm, u64 addr) -{ - struct nvgpu_allocator *a = &vm->vma[gmmu_page_size_big]; - - if (!vm->big_pages) - return 0; - - return addr >= nvgpu_alloc_base(a) && - addr < nvgpu_alloc_base(a) + nvgpu_alloc_length(a); -} +enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm, + u64 base, u64 size); /* * This determines the PTE size for a given alloc. Used by both the GVA space * allocator and the mm core code so that agreement can be reached on how to * map allocations. + * + * The page size of a buffer is this: + * + * o If the VM doesn't support large pages then obviously small pages + * must be used. + * o If the base address is non-zero (fixed address map): + * - Attempt to find a reserved memory area and use the page size + * based on that. + * - If no reserved page size is available, default to small pages. + * o If the base is zero: + * - If the size is greater than or equal to the big page size, use big + * pages. + * - Otherwise use small pages. */ static inline enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size) { - /* - * Currently userspace is not ready for a true unified address space. - * As a result, even though the allocator supports mixed address spaces - * the address spaces must be treated as separate for now. - */ - if (__nv_gmmu_va_is_big_page_region(vm, base)) - return gmmu_page_size_big; - else + if (!vm->big_pages) return gmmu_page_size_small; + + if (base) + return __get_pte_size_fixed_map(vm, base, size); + + if (size >= vm->gmmu_page_sizes[gmmu_page_size_big]) + return gmmu_page_size_big; + return gmmu_page_size_small; } /* @@ -797,6 +795,8 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct mem_desc *mem, void gk20a_remove_vm(struct vm_gk20a *vm, struct mem_desc *inst_block); +int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size); + extern const struct gk20a_mmu_level gk20a_mm_levels_64k[]; extern const struct gk20a_mmu_level gk20a_mm_levels_128k[]; diff --git a/drivers/gpu/nvgpu/include/nvgpu/allocator.h b/drivers/gpu/nvgpu/include/nvgpu/allocator.h index dee9b5625..d5a90c87a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/allocator.h @@ -41,11 +41,15 @@ struct nvgpu_allocator_ops { * regular and fixed allocations then free_fixed() does not need to * be implemented. This behavior exists for legacy reasons and should * not be propagated to new allocators. + * + * For allocators where the @page_size field is not applicable it can + * be left as 0. Otherwise a valid page size should be passed (4k or + * what the large page size is). */ u64 (*alloc_fixed)(struct nvgpu_allocator *allocator, - u64 base, u64 len); + u64 base, u64 len, u32 page_size); void (*free_fixed)(struct nvgpu_allocator *allocator, - u64 base, u64 len); + u64 base, u64 len); /* * Allow allocators to reserve space for carveouts. @@ -213,7 +217,8 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *a, u64 nvgpu_alloc(struct nvgpu_allocator *allocator, u64 len); void nvgpu_free(struct nvgpu_allocator *allocator, u64 addr); -u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); +u64 nvgpu_alloc_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len, + u32 page_size); void nvgpu_free_fixed(struct nvgpu_allocator *allocator, u64 base, u64 len); int nvgpu_alloc_reserve_carveout(struct nvgpu_allocator *a, @@ -298,5 +303,8 @@ void nvgpu_alloc_debugfs_init(struct device *dev); } while (0) #endif +#define balloc_pr(alloctor, format, arg...) \ + pr_info("%-25s %25s() " format, \ + alloctor->name, __func__, ##arg) #endif /* NVGPU_ALLOCATOR_H */ diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 66c9344b6..a21a020d2 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -227,11 +227,12 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_kernel]); - if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_small])) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); - if (nvgpu_alloc_initialized(&vm->vma[gmmu_page_size_big])) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); + if (nvgpu_alloc_initialized(&vm->kernel)) + nvgpu_alloc_destroy(&vm->kernel); + if (nvgpu_alloc_initialized(&vm->user)) + nvgpu_alloc_destroy(&vm->user); + if (nvgpu_alloc_initialized(&vm->fixed)) + nvgpu_alloc_destroy(&vm->fixed); mutex_unlock(&vm->update_gmmu_lock); @@ -273,8 +274,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, struct tegra_vgpu_as_share_params *p = &msg.params.as_share; struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm; - u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit, - kernel_vma_start, kernel_vma_limit; + u64 user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit; char name[32]; int err, i; const bool userspace_managed = @@ -306,6 +306,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, vm->mm = mm; vm->as_share = as_share; + /* Set up vma pointers. */ + vm->vma[0] = &vm->user; + vm->vma[1] = &vm->user; + vm->vma[2] = &vm->kernel; + for (i = 0; i < gmmu_nr_page_sizes; i++) vm->gmmu_page_sizes[i] = gmmu_page_sizes[i]; @@ -328,93 +333,74 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, vm->handle = p->handle; /* setup vma limits */ - small_vma_start = vm->va_start; - - if (vm->big_pages) { - /* First 16GB of the address space goes towards small - * pages. The kernel reserved pages are at the end. - * What ever remains is allocated to large pages. - */ - small_vma_limit = __nv_gmmu_va_small_page_limit(); - large_vma_start = small_vma_limit; - large_vma_limit = vm->va_limit - mm->channel.kernel_size; - } else { - small_vma_limit = vm->va_limit - mm->channel.kernel_size; - large_vma_start = 0; - large_vma_limit = 0; - } + user_vma_start = vm->va_start; + user_vma_limit = vm->va_limit - mm->channel.kernel_size; kernel_vma_start = vm->va_limit - mm->channel.kernel_size; kernel_vma_limit = vm->va_limit; gk20a_dbg_info( - "small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", - small_vma_start, small_vma_limit, - large_vma_start, large_vma_limit, + "user_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n", + user_vma_start, user_vma_limit, kernel_vma_start, kernel_vma_limit); - /* check that starts do not exceed limits */ - WARN_ON(small_vma_start > small_vma_limit); - WARN_ON(large_vma_start > large_vma_limit); - /* kernel_vma must also be non-zero */ + WARN_ON(user_vma_start > user_vma_limit); WARN_ON(kernel_vma_start >= kernel_vma_limit); - if (small_vma_start > small_vma_limit || - large_vma_start > large_vma_limit || + if (user_vma_start > user_vma_limit || kernel_vma_start >= kernel_vma_limit) { err = -EINVAL; goto clean_up_share; } - if (small_vma_start < small_vma_limit) { + if (user_vma_start < user_vma_limit) { snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, gmmu_page_sizes[gmmu_page_size_small] >> 10); + if (!gk20a_big_pages_possible(vm, user_vma_start, + user_vma_limit - user_vma_start)) + vm->big_pages = false; err = __nvgpu_buddy_allocator_init( g, - &vm->vma[gmmu_page_size_small], + vm->vma[gmmu_page_size_small], vm, name, - small_vma_start, - small_vma_limit - small_vma_start, + user_vma_start, + user_vma_limit - user_vma_start, SZ_4K, GPU_BALLOC_MAX_ORDER, GPU_ALLOC_GVA_SPACE); if (err) goto clean_up_share; - } - - if (large_vma_start < large_vma_limit) { - snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, - gmmu_page_sizes[gmmu_page_size_big] >> 10); - err = __nvgpu_buddy_allocator_init( - g, - &vm->vma[gmmu_page_size_big], - vm, name, - large_vma_start, - large_vma_limit - large_vma_start, - big_page_size, - GPU_BALLOC_MAX_ORDER, - GPU_ALLOC_GVA_SPACE); - if (err) - goto clean_up_small_allocator; + } else { + /* + * Make these allocator pointers point to the kernel allocator + * since we still use the legacy notion of page size to choose + * the allocator. + */ + vm->vma[0] = &vm->kernel; + vm->vma[1] = &vm->kernel; } snprintf(name, sizeof(name), "gk20a_as_%dKB-sys", gmmu_page_sizes[gmmu_page_size_kernel] >> 10); + if (!gk20a_big_pages_possible(vm, kernel_vma_start, + kernel_vma_limit - kernel_vma_start)) + vm->big_pages = false; + /* * kernel reserved VMA is at the end of the aperture */ err = __nvgpu_buddy_allocator_init( - g, - &vm->vma[gmmu_page_size_kernel], - vm, name, - kernel_vma_start, - kernel_vma_limit - kernel_vma_start, - SZ_4K, - GPU_BALLOC_MAX_ORDER, - GPU_ALLOC_GVA_SPACE); + g, + vm->vma[gmmu_page_size_kernel], + vm, name, + kernel_vma_start, + kernel_vma_limit - kernel_vma_start, + SZ_4K, + GPU_BALLOC_MAX_ORDER, + GPU_ALLOC_GVA_SPACE); if (err) - goto clean_up_big_allocator; + goto clean_up_user_allocator; vm->mapped_buffers = RB_ROOT; @@ -426,12 +412,9 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share, return 0; -clean_up_big_allocator: - if (large_vma_start < large_vma_limit) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_big]); -clean_up_small_allocator: - if (small_vma_start < small_vma_limit) - nvgpu_alloc_destroy(&vm->vma[gmmu_page_size_small]); +clean_up_user_allocator: + if (user_vma_start < user_vma_limit) + nvgpu_alloc_destroy(&vm->user); clean_up_share: msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; msg.handle = vgpu_get_handle(g);