gpu: nvgpu: make user vma start,end pde aligned

Any PDE can allocate memory with a specific page size. That means memory allocation with page size 4K and 64K will be realized by different PDEs with page size (or PTE size) 4K and 64K respectively. To accomplish this user vma is required to be pde aligned. Currently, user vma is aligned by (big_page_size << 10) carried over from when pde size was equivalent to (big_page_size << 10). Modify user vma alignment check to use pde size. JIRA NVGPU-5302 Change-Id: I2c6599fe50ce9fb081dd1f5a8cd6aa48b17b33b4 Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2428327 Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: Sami Kiminki <skiminki@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2020-10-14 21:59:16 -07:00
parent 00d1e10ff2
commit 69948919b7
4 changed files with 25 additions and 23 deletions
--- a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
@@ -1389,8 +1389,6 @@ static int nvgpu_buddy_set_attributes(struct nvgpu_buddy_allocator *a,
 				u64 base, u64 size, u64 blk_size, u64 max_order,
 				u64 flags)
 {
-	u64 pde_size;
-	u64 base_big_page, size_big_page;
 	bool is_gva_space = (flags & GPU_ALLOC_GVA_SPACE) != 0ULL;

 	a->base = base;
@@ -1410,24 +1408,23 @@ static int nvgpu_buddy_set_attributes(struct nvgpu_buddy_allocator *a,

 	a->vm = vm;
 	if (is_gva_space) {
-		pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(vm));
+		u64 pde_size_mask, base_pde_align, size_pde_align;
+		u64 pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(
+				gk20a_from_vm(vm), vm->big_page_size));
 		a->pte_blk_order = balloc_get_order(a, pde_size);
-	}

-	/*
-	 * When we have a GVA space with big_pages enabled the size and base
-	 * must be PDE aligned. If big_pages are not enabled then this
-	 * requirement is not necessary.
-	 */
-	if (is_gva_space) {
-		u64 big_page_mask;
+		/*
+		 * When we have a GVA space with big_pages enabled the size and
+		 * base must be PDE aligned. If big_pages are not enabled then
+		 * this requirement is not necessary.
+		 */

-		big_page_mask = (U64(vm->big_page_size) << U64(10));
-		big_page_mask = nvgpu_safe_sub_u64(big_page_mask, U64(1));
-		base_big_page = a->base & big_page_mask;
-		size_big_page = a->length & big_page_mask;
+		pde_size_mask = nvgpu_safe_sub_u64(pde_size, U64(1));
+		base_pde_align = a->base & pde_size_mask;
+		size_pde_align = a->length & pde_size_mask;
 		if (vm->big_pages &&
-			((base_big_page != 0ULL) || (size_big_page != 0ULL))) {
+			((base_pde_align != 0ULL) ||
+				(size_pde_align != 0ULL))) {
 			return -EINVAL;
 		}
 	}
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -170,20 +170,22 @@ int nvgpu_vm_bind_channel(struct vm_gk20a *vm, struct nvgpu_channel *ch)
 * example, for gp10b, with a last level address bit PDE range of 28 to 21 the
 * amount of memory each last level PDE addresses is 21 bits - i.e 2MB.
 */
-u32 nvgpu_vm_pde_coverage_bit_count(struct vm_gk20a *vm)
+u32 nvgpu_vm_pde_coverage_bit_count(struct gk20a *g, u64 big_page_size)
 {
 	int final_pde_level = 0;
+	const struct gk20a_mmu_level *mmu_levels =
+		g->ops.mm.gmmu.get_mmu_levels(g, big_page_size);

 	/*
 	 * Find the second to last level of the page table programming
 	 * heirarchy: the last level is PTEs so we really want the level
 	 * before that which is the last level of PDEs.
 	 */
-	while (vm->mmu_levels[final_pde_level + 2].update_entry != NULL) {
+	while (mmu_levels[final_pde_level + 2].update_entry != NULL) {
 		final_pde_level++;
 	}

-	return vm->mmu_levels[final_pde_level].lo_bit[0];
+	return mmu_levels[final_pde_level].lo_bit[0];
 }

 NVGPU_COV_WHITELIST_BLOCK_BEGIN(deviate, 1, NVGPU_MISRA(Rule, 17_2), "TID-278")
@@ -320,7 +322,9 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
 */
 bool nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
 {
-	u64 mask = nvgpu_safe_sub_u64((u64)vm->big_page_size << 10ULL, 1ULL);
+	u64 pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(
+				gk20a_from_vm(vm), vm->big_page_size));
+	u64 mask = nvgpu_safe_sub_u64(pde_size, 1ULL);
 	u64 base_big_page = base & mask;
 	u64 size_big_page = size & mask;

--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -414,14 +414,15 @@ bool nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
 * @brief Determine how many bits of the address space is covered by
 *  last level PDE.
 *
- * @param vm [in]	Pointer to virtual memory context.
+ * @param g [in]		The GPU.
+ * @param big_page_size [in]	Big page size supported by GMMU.
 *
 * - Go to the last level before page table entry level and return
 *   the mmu_levels[x].lo_bit.
 *
 * @return              number of bits with last level of entry.
 */
-u32 nvgpu_vm_pde_coverage_bit_count(struct vm_gk20a *vm);
+u32 nvgpu_vm_pde_coverage_bit_count(struct gk20a *g, u64 big_page_size);

 /**
 * @brief Eliminates redundant cache flushes and invalidates.
--- a/userspace/units/mm/vm/vm.c
+++ b/userspace/units/mm/vm/vm.c
@@ -2058,7 +2058,7 @@ int test_vm_pde_coverage_bit_count(struct unit_module *m, struct gk20a *g,
 	int ret = UNIT_FAIL;
 	struct vm_gk20a *vm = create_test_vm(m, g);

-	bit_count = nvgpu_vm_pde_coverage_bit_count(vm);
+	bit_count = nvgpu_vm_pde_coverage_bit_count(g, vm->big_page_size);

 	if (bit_count != GP10B_PDE_BIT_COUNT) {
 		unit_err(m, "invalid PDE bit count\n");