From 69948919b7e7c3dc6185bcd3fa3b3752b4e2b7c8 Mon Sep 17 00:00:00 2001
From: Vedashree Vidwans <vvidwans@nvidia.com>
Date: Wed, 14 Oct 2020 21:59:16 -0700
Subject: [PATCH] gpu: nvgpu: make user vma start,end pde aligned

Any PDE can allocate memory with a specific page size. That means memory
allocation with page size 4K and 64K will be realized by different PDEs
with page size (or PTE size) 4K and 64K respectively. To accomplish this
user vma is required to be pde aligned.
Currently, user vma is aligned by (big_page_size << 10) carried over
from when pde size was equivalent to (big_page_size << 10).

Modify user vma alignment check to use pde size.

JIRA NVGPU-5302

Change-Id: I2c6599fe50ce9fb081dd1f5a8cd6aa48b17b33b4
Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2428327
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Sami Kiminki <skiminki@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: Sami Kiminki <skiminki@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 .../common/mm/allocators/buddy_allocator.c    | 29 +++++++++----------
 drivers/gpu/nvgpu/common/mm/vm.c              | 12 +++++---
 drivers/gpu/nvgpu/include/nvgpu/vm.h          |  5 ++--
 userspace/units/mm/vm/vm.c                    |  2 +-
 4 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
index 25d2a6782..e0233af70 100644
--- a/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/allocators/buddy_allocator.c
@@ -1389,8 +1389,6 @@ static int nvgpu_buddy_set_attributes(struct nvgpu_buddy_allocator *a,
 				u64 base, u64 size, u64 blk_size, u64 max_order,
 				u64 flags)
 {
-	u64 pde_size;
-	u64 base_big_page, size_big_page;
 	bool is_gva_space = (flags & GPU_ALLOC_GVA_SPACE) != 0ULL;
 
 	a->base = base;
@@ -1410,24 +1408,23 @@ static int nvgpu_buddy_set_attributes(struct nvgpu_buddy_allocator *a,
 
 	a->vm = vm;
 	if (is_gva_space) {
-		pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(vm));
+		u64 pde_size_mask, base_pde_align, size_pde_align;
+		u64 pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(
+				gk20a_from_vm(vm), vm->big_page_size));
 		a->pte_blk_order = balloc_get_order(a, pde_size);
-	}
 
-	/*
-	 * When we have a GVA space with big_pages enabled the size and base
-	 * must be PDE aligned. If big_pages are not enabled then this
-	 * requirement is not necessary.
-	 */
-	if (is_gva_space) {
-		u64 big_page_mask;
+		/*
+		 * When we have a GVA space with big_pages enabled the size and
+		 * base must be PDE aligned. If big_pages are not enabled then
+		 * this requirement is not necessary.
+		 */
 
-		big_page_mask = (U64(vm->big_page_size) << U64(10));
-		big_page_mask = nvgpu_safe_sub_u64(big_page_mask, U64(1));
-		base_big_page = a->base & big_page_mask;
-		size_big_page = a->length & big_page_mask;
+		pde_size_mask = nvgpu_safe_sub_u64(pde_size, U64(1));
+		base_pde_align = a->base & pde_size_mask;
+		size_pde_align = a->length & pde_size_mask;
 		if (vm->big_pages &&
-			((base_big_page != 0ULL) || (size_big_page != 0ULL))) {
+			((base_pde_align != 0ULL) ||
+				(size_pde_align != 0ULL))) {
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 4b2c96efd..4f7f8a2ea 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -170,20 +170,22 @@ int nvgpu_vm_bind_channel(struct vm_gk20a *vm, struct nvgpu_channel *ch)
  * example, for gp10b, with a last level address bit PDE range of 28 to 21 the
  * amount of memory each last level PDE addresses is 21 bits - i.e 2MB.
  */
-u32 nvgpu_vm_pde_coverage_bit_count(struct vm_gk20a *vm)
+u32 nvgpu_vm_pde_coverage_bit_count(struct gk20a *g, u64 big_page_size)
 {
 	int final_pde_level = 0;
+	const struct gk20a_mmu_level *mmu_levels =
+		g->ops.mm.gmmu.get_mmu_levels(g, big_page_size);
 
 	/*
 	 * Find the second to last level of the page table programming
 	 * heirarchy: the last level is PTEs so we really want the level
 	 * before that which is the last level of PDEs.
 	 */
-	while (vm->mmu_levels[final_pde_level + 2].update_entry != NULL) {
+	while (mmu_levels[final_pde_level + 2].update_entry != NULL) {
 		final_pde_level++;
 	}
 
-	return vm->mmu_levels[final_pde_level].lo_bit[0];
+	return mmu_levels[final_pde_level].lo_bit[0];
 }
 
 NVGPU_COV_WHITELIST_BLOCK_BEGIN(deviate, 1, NVGPU_MISRA(Rule, 17_2), "TID-278")
@@ -320,7 +322,9 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
  */
 bool nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
 {
-	u64 mask = nvgpu_safe_sub_u64((u64)vm->big_page_size << 10ULL, 1ULL);
+	u64 pde_size = BIT64(nvgpu_vm_pde_coverage_bit_count(
+				gk20a_from_vm(vm), vm->big_page_size));
+	u64 mask = nvgpu_safe_sub_u64(pde_size, 1ULL);
 	u64 base_big_page = base & mask;
 	u64 size_big_page = size & mask;
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index e1253e6c0..68d07be72 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -414,14 +414,15 @@ bool nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
  * @brief Determine how many bits of the address space is covered by
  *  last level PDE.
  *
- * @param vm [in]	Pointer to virtual memory context.
+ * @param g [in]		The GPU.
+ * @param big_page_size [in]	Big page size supported by GMMU.
  *
  * - Go to the last level before page table entry level and return
  *   the mmu_levels[x].lo_bit.
  *
  * @return              number of bits with last level of entry.
  */
-u32 nvgpu_vm_pde_coverage_bit_count(struct vm_gk20a *vm);
+u32 nvgpu_vm_pde_coverage_bit_count(struct gk20a *g, u64 big_page_size);
 
 /**
  * @brief Eliminates redundant cache flushes and invalidates.
diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c
index a5d558b18..2b834ac15 100644
--- a/userspace/units/mm/vm/vm.c
+++ b/userspace/units/mm/vm/vm.c
@@ -2058,7 +2058,7 @@ int test_vm_pde_coverage_bit_count(struct unit_module *m, struct gk20a *g,
 	int ret = UNIT_FAIL;
 	struct vm_gk20a *vm = create_test_vm(m, g);
 
-	bit_count = nvgpu_vm_pde_coverage_bit_count(vm);
+	bit_count = nvgpu_vm_pde_coverage_bit_count(g, vm->big_page_size);
 
 	if (bit_count != GP10B_PDE_BIT_COUNT) {
 		unit_err(m, "invalid PDE bit count\n");