diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 949237b1b..09948a25a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1811,6 +1811,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
 			goto clean_up;
 		}
 
+		BUG_ON(!pte_kv_cur);
+
 		gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
 		for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
 			if (likely(sgt)) {
@@ -2128,9 +2130,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
 		char *name)
 {
 	int err, i;
-	u32 num_pages, low_hole_pages;
+	u32 num_small_pages, num_large_pages, low_hole_pages;
 	char alloc_name[32];
-	u64 vma_size;
+	u64 small_vma_size, large_vma_size;
+	u32 pde_pages;
 
 	/* note: keep the page sizes sorted lowest to highest here */
 	u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, big_page_size };
@@ -2206,7 +2209,10 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
 		   name, vm->va_limit, vm->pdes.num_pdes);
 
 	/* allocate the page table directory */
-	err = alloc_gmmu_pages(vm, 0, &vm->pdes.ref,
+	pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
+
+	gk20a_dbg(gpu_dbg_pte, "Allocating %d ** 2 PDE pages\n", pde_pages);
+	err = alloc_gmmu_pages(vm, pde_pages, &vm->pdes.ref,
 			       &vm->pdes.sgt, &vm->pdes.size);
 	if (err)
 		goto clean_up_pdes;
@@ -2220,13 +2226,15 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
 		  vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl));
 	/* we could release vm->pdes.kv but it's only one page... */
 
-	/* low-half: alloc small pages */
-	/* high-half: alloc big pages */
-	vma_size = vm->va_limit;
-	if (big_pages)
-		vma_size /= 2;
+	/* First 16GB of the address space goes towards small pages. What ever
+	 * remains is allocated to large pages. */
+	small_vma_size = vm->va_limit;
+	if (big_pages) {
+		small_vma_size = (u64)16 << 30;
+		large_vma_size = vm->va_limit - small_vma_size;
+	}
 
-	num_pages = (u32)(vma_size >>
+	num_small_pages = (u32)(small_vma_size >>
 		    ilog2(vm->gmmu_page_sizes[gmmu_page_size_small]));
 
 	/* num_pages above is without regard to the low-side hole. */
@@ -2238,20 +2246,22 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
 	err = gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
 			     alloc_name,
 			     low_hole_pages,		 /*start*/
-			     num_pages - low_hole_pages);/* length*/
+			     num_small_pages - low_hole_pages);/* length*/
 	if (err)
 		goto clean_up_map_pde;
 
 	if (big_pages) {
-		num_pages = (u32)((vm->va_limit / 2) >>
+		u32 start = (u32)(small_vma_size >>
+			    ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
+		num_large_pages = (u32)(large_vma_size >>
 			    ilog2(vm->gmmu_page_sizes[gmmu_page_size_big]));
 
 		snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-%dKB",
 			 name, vm->gmmu_page_sizes[gmmu_page_size_big]>>10);
 		err = gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
 				      alloc_name,
-				      num_pages,		/* start */
-				      num_pages);		/* length */
+				      start,			/* start */
+				      num_large_pages);		/* length */
 		if (err)
 			goto clean_up_small_allocator;
 	}
@@ -2269,7 +2279,7 @@ clean_up_small_allocator:
 clean_up_map_pde:
 	unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
 clean_up_ptes:
-	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
+	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
 			vm->pdes.size);
 clean_up_pdes:
 	kfree(vm->pdes.ptes[gmmu_page_size_small]);
@@ -2647,10 +2657,15 @@ int gk20a_vm_unmap_buffer(struct gk20a_as_share *as_share, u64 offset)
 
 static void gk20a_deinit_vm(struct vm_gk20a *vm)
 {
+	u32 pde_pages;
+
 	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
 	gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
+
 	unmap_gmmu_pages(vm->pdes.ref, vm->pdes.sgt, vm->pdes.kv);
-	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, 0,
+
+	pde_pages = ilog2((vm->pdes.num_pdes + 511) / 512);
+	free_gmmu_pages(vm, vm->pdes.ref, vm->pdes.sgt, pde_pages,
 			vm->pdes.size);
 	kfree(vm->pdes.ptes[gmmu_page_size_small]);
 	kfree(vm->pdes.ptes[gmmu_page_size_big]);
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 04f9446bc..d39dcff0c 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -25,10 +25,13 @@
 #include <asm/cacheflush.h>
 #include "gk20a_allocator.h"
 
-/* For now keep the size relatively small-ish compared to the full
- * 40b va.  32GB for now. It consists of two 16GB spaces. */
-#define NV_GMMU_VA_RANGE	35ULL
-#define NV_GMMU_VA_IS_UPPER(x)	((x) >= ((u64)0x1 << (NV_GMMU_VA_RANGE-1)))
+/*
+ * Amount of the GVA space we actually use is smaller than the available space.
+ * The bottom 16GB of the space are used for small pages, the remaining high
+ * memory is for large pages.
+ */
+#define NV_GMMU_VA_RANGE	37ULL
+#define NV_GMMU_VA_IS_UPPER(x)	((x) >= ((u64)SZ_1G * 16))
 
 #ifdef CONFIG_ARM64
 #define outer_flush_range(a, b)
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 5b1a9a043..1adff5abe 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -111,7 +111,7 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
 
 	gk20a_dbg_fn("");
 
-	vaddr_hi = vaddr + pgsz * num_pages - 1;
+	vaddr_hi = vaddr + pgsz * (u64)num_pages - 1;
 	pde_range_from_vaddr_range(vm,
 				   vaddr,
 				   vaddr_hi,