From a252cc244a3729eb7f71243c0087f1c4dca52748 Mon Sep 17 00:00:00 2001 From: Vedashree Vidwans Date: Thu, 13 Aug 2020 10:31:19 -0700 Subject: [PATCH] gpu: nvgpu: modify alloc_as ioctl to accept mem size - Modify NVGPU_GPU_IOCTL_ALLOC_AS and struct nvgpu_alloc_as_args to accept start address and size of user memory. This allows configurable address space allocation. - Modify gk20a_as_alloc_share() and gk20a_vm_alloc_share() to receive va_range_start and va_range_end values. - gk20a_vm_alloc_share() initializes vm with low_hole = va_range_start, and user vma size = (va_range_end - va_range_start). - Modify nvgpu_as_alloc_space_args and nvgpu_as_free_space_args to accept 64 bit number of pages. Bug 2043269 JIRA NVGPU-5302 Change-Id: I243995adf5b7e0e84d6b36abe3b35a5ccabd7a37 Signed-off-by: Vedashree Vidwans Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2385496 Reviewed-by: automaticguardword Reviewed-by: svc-mobile-coverity Reviewed-by: Sami Kiminki Reviewed-by: Deepak Nibade Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit Tested-by: Sami Kiminki Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/mm/as.c | 72 +++++++++++++++++++---- drivers/gpu/nvgpu/common/mm/vm_area.c | 12 ++-- drivers/gpu/nvgpu/include/nvgpu/as.h | 25 +++++--- drivers/gpu/nvgpu/include/nvgpu/vm_area.h | 2 +- drivers/gpu/nvgpu/os/linux/ioctl_as.c | 12 +++- drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 3 + include/uapi/linux/nvgpu.h | 17 ++++-- userspace/units/gr/setup/nvgpu-gr-setup.c | 5 +- userspace/units/mm/as/as.c | 18 +++++- userspace/units/mm/as/as.h | 5 ++ 10 files changed, 134 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/nvgpu/common/mm/as.c b/drivers/gpu/nvgpu/common/mm/as.c index fb7e9d72f..8824e6392 100644 --- a/drivers/gpu/nvgpu/common/mm/as.c +++ b/drivers/gpu/nvgpu/common/mm/as.c @@ -50,7 +50,9 @@ static void release_as_share_id(struct gk20a_as_share *as_share) /* address space interfaces for the gk20a module */ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, - u32 big_page_size, u32 flags) + u32 big_page_size, u32 flags, + u64 va_range_start, u64 va_range_end, + u64 va_range_split) { struct gk20a_as *as = as_share->as; struct gk20a *g = gk20a_from_as(as); @@ -58,6 +60,10 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, struct vm_gk20a *vm; char name[NVGPU_VM_NAME_LEN]; char *p; + u64 user_size; + u64 kernel_size = mm->channel.kernel_size; + u64 pde_size, pde_size_mask; + bool big_pages; const bool userspace_managed = (flags & NVGPU_AS_ALLOC_USERSPACE_MANAGED) != 0U; const bool unified_va = @@ -67,6 +73,7 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, nvgpu_log_fn(g, " "); if (big_page_size == 0U) { + big_pages = false; big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); } else { if (!is_power_of_2(big_page_size)) { @@ -77,21 +84,60 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, nvgpu_mm_get_available_big_page_sizes(g)) == 0U) { return -EINVAL; } + big_pages = true; } + pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(g, big_page_size)); + pde_size_mask = nvgpu_safe_sub_u64(pde_size, U64(1)); + + if ((va_range_start == 0ULL) || + ((va_range_start & pde_size_mask) != 0ULL)) { + return -EINVAL; + } + + if ((va_range_end == 0ULL) || + ((va_range_end & pde_size_mask) != 0ULL)) { + return -EINVAL; + } + + if (va_range_start >= va_range_end) { + return -EINVAL; + } + + user_size = nvgpu_safe_sub_u64(va_range_end, va_range_start); + + if (unified_va || !big_pages) { + if (va_range_split != 0ULL) { + return -EINVAL; + } + } else { + /* non-unified VA: split required */ + if ((va_range_split == 0ULL) || + ((va_range_split & pde_size_mask) != 0ULL)) { + return -EINVAL; + } + + /* non-unified VA: split range checks */ + if ((va_range_split <= va_range_start) || + (va_range_split >= va_range_end)) { + return -EINVAL; + } + } + + nvgpu_log_info(g, + "vm: low_hole=0x%llx, user_size=0x%llx, kernel_size=0x%llx", + va_range_start, user_size, kernel_size); + p = strncpy(name, "as_", sizeof("as_")); (void) nvgpu_strnadd_u32(p, nvgpu_safe_cast_s32_to_u32(as_share->id), sizeof(name) - sizeof("as_"), 10U); vm = nvgpu_vm_init(g, big_page_size, - U64(big_page_size) << U64(10), - nvgpu_safe_sub_u64(mm->channel.user_size, - nvgpu_safe_sub_u64(mm->channel.kernel_size, - U64(big_page_size) << U64(10))), - mm->channel.kernel_size, - nvgpu_gmmu_va_small_page_limit(), - !mm->disable_bigpage, - userspace_managed, unified_va, name); + va_range_start, + user_size, + kernel_size, + va_range_split, + big_pages, userspace_managed, unified_va, name); if (vm == NULL) { return -ENOMEM; } @@ -104,8 +150,9 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, } int gk20a_as_alloc_share(struct gk20a *g, - u32 big_page_size, u32 flags, - struct gk20a_as_share **out) + u32 big_page_size, u32 flags, u64 va_range_start, + u64 va_range_end, u64 va_range_split, + struct gk20a_as_share **out) { struct gk20a_as_share *as_share; int err = 0; @@ -130,7 +177,8 @@ int gk20a_as_alloc_share(struct gk20a *g, if (err != 0) { goto failed; } - err = gk20a_vm_alloc_share(as_share, big_page_size, flags); + err = gk20a_vm_alloc_share(as_share, big_page_size, flags, + va_range_start, va_range_end, va_range_split); gk20a_idle(g); if (err != 0) { diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c index 35a1b4ec0..af8822f88 100644 --- a/drivers/gpu/nvgpu/common/mm/vm_area.c +++ b/drivers/gpu/nvgpu/common/mm/vm_area.c @@ -133,19 +133,19 @@ static int nvgpu_vm_area_alloc_get_pagesize_index(struct vm_gk20a *vm, } static int nvgpu_vm_area_alloc_memory(struct nvgpu_allocator *vma, u64 our_addr, - u32 pages, u32 page_size, u32 flags, + u64 pages, u32 page_size, u32 flags, u64 *vaddr_start_ptr) { u64 vaddr_start = 0; if ((flags & NVGPU_VM_AREA_ALLOC_FIXED_OFFSET) != 0U) { vaddr_start = nvgpu_alloc_fixed(vma, our_addr, - (u64)pages * + pages * (u64)page_size, page_size); } else { vaddr_start = nvgpu_alloc_pte(vma, - (u64)pages * + pages * (u64)page_size, page_size); } @@ -190,7 +190,7 @@ static int nvgpu_vm_area_alloc_gmmu_map(struct vm_gk20a *vm, return 0; } -int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size, +int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u64 pages, u32 page_size, u64 *addr, u32 flags) { struct gk20a *g = vm->mm->g; @@ -211,7 +211,7 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size, } nvgpu_log(g, gpu_dbg_map, - "ADD vm_area: pgsz=%#-8x pages=%-9u a/o=%#-14llx flags=0x%x", + "ADD vm_area: pgsz=%#-8x pages=%-9llu a/o=%#-14llx flags=0x%x", page_size, pages, our_addr, flags); if (nvgpu_vm_area_alloc_get_pagesize_index(vm, &pgsz_idx, @@ -232,7 +232,7 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size, vm_area->flags = flags; vm_area->addr = vaddr_start; - vm_area->size = (u64)page_size * (u64)pages; + vm_area->size = (u64)page_size * pages; vm_area->pgsz_idx = pgsz_idx; nvgpu_init_list_node(&vm_area->buffer_list_head); nvgpu_init_list_node(&vm_area->vm_area_list); diff --git a/drivers/gpu/nvgpu/include/nvgpu/as.h b/drivers/gpu/nvgpu/include/nvgpu/as.h index 28dc39fcf..17580f1ab 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/as.h +++ b/drivers/gpu/nvgpu/include/nvgpu/as.h @@ -1,7 +1,7 @@ /* * GK20A Address Spaces * - * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -96,11 +96,20 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share); /** * @brief Allocate an AS share. * - * @param g [in] The GPU - * @param big_page_size [in] Big page size to use for the VM - * @param flags [in] NVGPU_AS_ALLOC_* flags - * @params out [out] The resulting, allocated, gk20a_as_share - * structure + * @param g [in] The GPU + * @param big_page_size [in] Big page size to use for the VM, + * set 0 for no big pages + * @param flags [in] NVGPU_AS_ALLOC_* flags + * @param va_range_start [in] Requested user managed memory start + * address, used to map buffers, save data + * should be aligned by PDE + * @param va_range_end [in] Requested user managed va end address + * should be aligned by PDE + * @param va_range_split [in] Requested small/big page split + * should be aligned by PDE, + * ignored if UNIFIED_VA is set + * @params out [out] The resulting, allocated, gk20a_as_share + * structure * * Allocate the gk20a_as_share structure and the VM associated with it, based * on the provided \a big_page_size and NVGPU_AS_ALLOC_* \a flags. @@ -110,7 +119,9 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share); * @return 0 in case of success, < 0 in case of failure. */ int gk20a_as_alloc_share(struct gk20a *g, u32 big_page_size, - u32 flags, struct gk20a_as_share **out); + u32 flags, u64 va_range_start, + u64 va_range_end, u64 va_range_split, + struct gk20a_as_share **out); /** * @brief Retrieve the instance of gk20a from a gk20a_as instance. diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm_area.h b/drivers/gpu/nvgpu/include/nvgpu/vm_area.h index 2301177f9..f5f7bd694 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm_area.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm_area.h @@ -117,7 +117,7 @@ nvgpu_vm_area_from_vm_area_list(struct nvgpu_list_node *node) * @return Zero, for successful allocation. * Suitable error code for failures. */ -int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size, +int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u64 pages, u32 page_size, u64 *addr, u32 flags); /** diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_as.c b/drivers/gpu/nvgpu/os/linux/ioctl_as.c index 88c51861c..5f5e1047d 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_as.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -305,15 +306,24 @@ int gk20a_as_dev_open(struct inode *inode, struct file *filp) { struct gk20a_as_share *as_share; struct gk20a *g; + struct mm_gk20a *mm; int err; struct nvgpu_cdev *cdev; + u32 big_page_size; cdev = container_of(inode->i_cdev, struct nvgpu_cdev, cdev); g = nvgpu_get_gk20a_from_cdev(cdev); + mm = &g->mm; + big_page_size = g->ops.mm.gmmu.get_default_big_page_size(); nvgpu_log_fn(g, " "); - err = gk20a_as_alloc_share(g, 0, 0, &as_share); + err = gk20a_as_alloc_share(g, + big_page_size, + NVGPU_AS_ALLOC_UNIFIED_VA, + U64(big_page_size) << U64(10), + mm->channel.user_size, + 0ULL, &as_share); if (err) { nvgpu_log_fn(g, "failed to alloc share"); return err; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 8b6bdb9db..72b3311fd 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -594,6 +594,9 @@ static int gk20a_ctrl_alloc_as( err = gk20a_as_alloc_share(g, args->big_page_size, gk20a_as_translate_as_alloc_flags(g, args->flags), + args->va_range_start, + args->va_range_end, + args->va_range_split, &as_share); if (err) goto clean_up_file; diff --git a/include/uapi/linux/nvgpu.h b/include/uapi/linux/nvgpu.h index 33850bb53..0fb34ab39 100644 --- a/include/uapi/linux/nvgpu.h +++ b/include/uapi/linux/nvgpu.h @@ -392,7 +392,7 @@ struct nvgpu_gpu_mark_compressible_write_args { }; struct nvgpu_alloc_as_args { - __u32 big_page_size; + __u32 big_page_size; /* zero for no big pages for this VA */ __s32 as_fd; /* @@ -409,8 +409,12 @@ struct nvgpu_alloc_as_args { #define NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED (1 << 0) #define NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_UNIFIED_VA (1 << 1) __u32 flags; - - __u32 reserved; /* must be zero */ + __u32 reserved; /* must be zero */ + __u64 va_range_start; /* in: starting VA (aligned by PDE) */ + __u64 va_range_end; /* in: ending VA (aligned by PDE) */ + __u64 va_range_split; /* in: small/big page split (aligned by PDE, + * must be zero if UNIFIED_VA is set) */ + __u32 padding[6]; }; struct nvgpu_gpu_open_tsg_args { @@ -2037,14 +2041,14 @@ struct nvgpu32_as_alloc_space_args { }; struct nvgpu_as_alloc_space_args { - __u32 pages; /* in, pages */ + __u64 pages; /* in, pages */ __u32 page_size; /* in, bytes */ __u32 flags; /* in */ - __u32 padding; /* in */ union { __u64 offset; /* inout, byte address valid iff _FIXED_OFFSET */ __u64 align; /* in, alignment multiple (0:={1 or n/a}) */ } o_a; + __u32 padding[2]; /* in */ }; /* @@ -2055,8 +2059,9 @@ struct nvgpu_as_alloc_space_args { */ struct nvgpu_as_free_space_args { __u64 offset; /* in, byte address */ - __u32 pages; /* in, pages */ + __u64 pages; /* in, pages */ __u32 page_size; /* in, bytes */ + __u32 padding[3]; }; /* diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.c b/userspace/units/gr/setup/nvgpu-gr-setup.c index cc3a1a54a..649d1e224 100644 --- a/userspace/units/gr/setup/nvgpu-gr-setup.c +++ b/userspace/units/gr/setup/nvgpu-gr-setup.c @@ -172,7 +172,10 @@ static int gr_test_setup_allocate_ch_tsg(struct unit_module *m, goto ch_cleanup; } - err = gk20a_as_alloc_share(g, 0, 0, &as_share); + err = gk20a_as_alloc_share(g, + 0U, NVGPU_AS_ALLOC_UNIFIED_VA, + U64(SZ_4K) << U64(10), + (1ULL << 37), 0ULL, &as_share); if (err != 0) { unit_err(m, "failed vm memory alloc\n"); goto tsg_unbind; diff --git a/userspace/units/mm/as/as.c b/userspace/units/mm/as/as.c index 8cd7239de..2d3dbb948 100644 --- a/userspace/units/mm/as/as.c +++ b/userspace/units/mm/as/as.c @@ -58,24 +58,28 @@ static int global_id_count; /* Parameters to test standard cases of allocation */ static struct test_parameters test_64k_user_managed = { .big_page_size = SZ_64K, + .small_big_split = (SZ_1G * 56ULL), .flags = NVGPU_AS_ALLOC_USERSPACE_MANAGED, .expected_error = 0 }; static struct test_parameters test_0k_user_managed = { .big_page_size = 0, + .small_big_split = 0, .flags = NVGPU_AS_ALLOC_USERSPACE_MANAGED, .expected_error = 0 }; static struct test_parameters test_64k_unified_va = { .big_page_size = SZ_64K, + .small_big_split = 0, .flags = NVGPU_AS_ALLOC_UNIFIED_VA, .expected_error = 0 }; static struct test_parameters test_64k_unified_va_enabled = { .big_page_size = SZ_64K, + .small_big_split = 0, .flags = 0, .expected_error = 0, .unify_address_spaces_flag = true @@ -83,12 +87,14 @@ static struct test_parameters test_64k_unified_va_enabled = { static struct test_parameters test_einval_user_managed = { .big_page_size = 1, + .small_big_split = (SZ_1G * 56ULL), .flags = NVGPU_AS_ALLOC_USERSPACE_MANAGED, .expected_error = -EINVAL }; static struct test_parameters test_notp2_user_managed = { .big_page_size = SZ_64K-1, + .small_big_split = (SZ_1G * 56ULL), .flags = NVGPU_AS_ALLOC_USERSPACE_MANAGED, .expected_error = -EINVAL }; @@ -96,6 +102,7 @@ static struct test_parameters test_notp2_user_managed = { /* Parameters to test corner cases and error handling */ static struct test_parameters test_64k_user_managed_as_fail = { .big_page_size = SZ_64K, + .small_big_split = (SZ_1G * 56ULL), .flags = 0, .expected_error = -ENOMEM, .special_case = SPECIAL_CASE_AS_MALLOC_FAIL @@ -103,6 +110,7 @@ static struct test_parameters test_64k_user_managed_as_fail = { static struct test_parameters test_64k_user_managed_vm_fail = { .big_page_size = SZ_64K, + .small_big_split = (SZ_1G * 56ULL), .flags = 0, .expected_error = -ENOMEM, .special_case = SPECIAL_CASE_VM_INIT_FAIL @@ -110,6 +118,7 @@ static struct test_parameters test_64k_user_managed_vm_fail = { static struct test_parameters test_64k_user_managed_busy_fail_1 = { .big_page_size = SZ_64K, + .small_big_split = (SZ_1G * 56ULL), .flags = 0, .expected_error = -ENODEV, .special_case = SPECIAL_CASE_GK20A_BUSY_ALLOC @@ -117,6 +126,7 @@ static struct test_parameters test_64k_user_managed_busy_fail_1 = { static struct test_parameters test_64k_user_managed_busy_fail_2 = { .big_page_size = SZ_64K, + .small_big_split = (SZ_1G * 56ULL), .flags = 0, .expected_error = 0, .special_case = SPECIAL_CASE_GK20A_BUSY_RELEASE @@ -173,7 +183,7 @@ int test_init_mm(struct unit_module *m, struct gk20a *g, void *args) * Before ref_init calls to gk20a_as_alloc_share should immediately * fail. */ - err = gk20a_as_alloc_share(g, 0, 0, NULL); + err = gk20a_as_alloc_share(g, 0, 0, 0, 0, 0, NULL); if (err != -ENODEV) { unit_return_fail(m, "gk20a_as_alloc_share did not fail as expected err=%d\n", err); @@ -213,7 +223,8 @@ int test_as_alloc_share(struct unit_module *m, struct gk20a *g, void *args) } err = gk20a_as_alloc_share(g, params->big_page_size, - params->flags, &out); + params->flags, (SZ_64K << 10), (1ULL << 37), + params->small_big_split, &out); if (params->unify_address_spaces_flag) { nvgpu_set_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES, false); @@ -264,7 +275,8 @@ int test_gk20a_from_as(struct unit_module *m, struct gk20a *g, void *args) int err; err = gk20a_as_alloc_share(g, SZ_64K, NVGPU_AS_ALLOC_USERSPACE_MANAGED, - &out); + (SZ_64K << 10), (1ULL << 37), + nvgpu_gmmu_va_small_page_limit(), &out); if (err != 0) { unit_return_fail(m, "gk20a_as_alloc_share failed err=%d\n", err); diff --git a/userspace/units/mm/as/as.h b/userspace/units/mm/as/as.h index a934012a9..f7a82451e 100644 --- a/userspace/units/mm/as/as.h +++ b/userspace/units/mm/as/as.h @@ -55,6 +55,11 @@ struct test_parameters { */ int big_page_size; + /** + * Address for small big page vma split + */ + unsigned long long small_big_split; + /** * Flags to use when calling gk20a_as_alloc_share. Should be one of the * NVGPU_AS_ALLOC_* flag defined in as.h.