From efbe371fd50c3bfa0d0cbd6d4762b4d5a01bcfa8 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Mon, 15 Apr 2019 14:07:22 -0700
Subject: [PATCH] gpu: nvgpu: Create hal/mm/gmmu and move gk20a GMMU code

Make a hal/mm/gmmu sub-unit for the GMMU HAL code. Also move the
gk20a specific HAL code there. gp10b will happen in the next patch.

This change also updates all the GMMU related HAL usage, of which
there is quite a bit. Generally the only change is a .gmmu needs to
be inserted into the HAL path. Each HAL init was also updated.

JIRA NVGPU-2042

Change-Id: I6c46bdfddb8e021f56103d9457fb3e2a226f8947
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2099693
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                    |   1 +
 drivers/gpu/nvgpu/Makefile.sources            |   1 +
 drivers/gpu/nvgpu/common/mm/as.c              |   2 +-
 drivers/gpu/nvgpu/common/mm/gmmu/page_table.c |   9 +-
 drivers/gpu/nvgpu/common/mm/mm.c              |  24 +-
 drivers/gpu/nvgpu/common/mm/nvgpu_mem.c       |   4 +-
 drivers/gpu/nvgpu/common/mm/vm.c              |   9 +-
 drivers/gpu/nvgpu/common/mm/vm_area.c         |   4 +-
 .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c  |  17 +-
 .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c  |  17 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c            | 239 ----------------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h            |  10 -
 drivers/gpu/nvgpu/gp10b/mm_gp10b.c            |   4 +-
 drivers/gpu/nvgpu/hal/init/hal_gm20b.c        |  18 +-
 drivers/gpu/nvgpu/hal/init/hal_gp10b.c        |  18 +-
 drivers/gpu/nvgpu/hal/init/hal_gv100.c        |  16 +-
 drivers/gpu/nvgpu/hal/init/hal_gv11b.c        |  18 +-
 drivers/gpu/nvgpu/hal/init/hal_tu104.c        |  16 +-
 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c    | 267 ++++++++++++++++++
 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h    |  47 +++
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h       |  64 +++--
 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c       |   3 +-
 drivers/gpu/nvgpu/os/linux/ioctl_dbg.c        |   2 +-
 drivers/gpu/nvgpu/os/linux/nvgpu_mem.c        |   6 +-
 drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c  |   2 +-
 .../buddy_allocator/buddy_allocator.c         |   6 +-
 .../units/mm/gmmu/page_table/page_table.c     |  27 +-
 userspace/units/mm/nvgpu_mem/nvgpu_mem.c      |   2 +-
 .../mm/page_table_faults/page_table_faults.c  |  20 +-
 userspace/units/mm/vm/vm.c                    |  14 +-
 30 files changed, 496 insertions(+), 391 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c
 create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 7f4a73e4c..01716e76d 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -151,6 +151,7 @@ nvgpu-y += \
 	common/nvlink/nvlink_tu104.o \
 	hal/mm/cache/flush_gk20a.o \
 	hal/mm/cache/flush_gv11b.o \
+	hal/mm/gmmu/gmmu_gk20a.o \
 	hal/mc/mc_gm20b.o \
 	hal/mc/mc_gp10b.o  \
 	hal/mc/mc_gv11b.o  \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 490e43d3d..fbe39502b 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -260,6 +260,7 @@ srcs += common/sim/sim.c \
 	tu104/func_tu104.c \
 	hal/mm/cache/flush_gk20a.c \
 	hal/mm/cache/flush_gv11b.c \
+	hal/mm/gmmu/gmmu_gk20a.c \
 	hal/mc/mc_gm20b.c  \
 	hal/mc/mc_gp10b.c  \
 	hal/mc/mc_gv11b.c  \
diff --git a/drivers/gpu/nvgpu/common/mm/as.c b/drivers/gpu/nvgpu/common/mm/as.c
index 1c07dfdd8..2f202569a 100644
--- a/drivers/gpu/nvgpu/common/mm/as.c
+++ b/drivers/gpu/nvgpu/common/mm/as.c
@@ -64,7 +64,7 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 	nvgpu_log_fn(g, " ");
 
 	if (big_page_size == 0U) {
-		big_page_size = g->ops.mm.get_default_big_page_size();
+		big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 	} else {
 		if (!is_power_of_2(big_page_size)) {
 			return -EINVAL;
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
index ff3dcf2d7..6b809eb83 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
@@ -99,7 +99,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 	}
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	vaddr = g->ops.mm.gmmu_map(vm, addr,
+	vaddr = g->ops.mm.gmmu.map(vm, addr,
 				   sgt,    /* sg list */
 				   0,      /* sg offset */
 				   size,
@@ -160,7 +160,7 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
 	struct gk20a *g = gk20a_from_vm(vm);
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	g->ops.mm.gmmu_unmap(vm,
+	g->ops.mm.gmmu.unmap(vm,
 			     gpu_va,
 			     mem->size,
 			     GMMU_PAGE_SIZE_KERNEL,
@@ -618,7 +618,7 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
 			 */
 			phys_addr = nvgpu_sgt_ipa_to_pa(g, sgt, sgl, ipa_addr,
 					&phys_length);
-			phys_addr = g->ops.mm.gpu_phys_addr(g, attrs, phys_addr)
+			phys_addr = g->ops.mm.gmmu.gpu_phys_addr(g, attrs, phys_addr)
 				+ space_to_skip;
 
 			/*
@@ -902,7 +902,8 @@ void nvgpu_gmmu_unmap_locked(struct vm_gk20a *vm,
 
 u32 __nvgpu_pte_words(struct gk20a *g)
 {
-	const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K);
+	const struct gk20a_mmu_level *l =
+		g->ops.mm.gmmu.get_mmu_levels(g, SZ_64K);
 	const struct gk20a_mmu_level *next_l;
 
 	/*
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
index 5da4d9677..383e7b559 100644
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -237,7 +237,7 @@ static int nvgpu_init_system_vm(struct mm_gk20a *mm)
 	int err;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 	u64 low_hole, aperture_size;
 
 	/*
@@ -300,7 +300,7 @@ static int nvgpu_init_hwpm(struct mm_gk20a *mm)
 static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
 {
 	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
 				   U64(big_page_size) << U64(10),
@@ -316,7 +316,7 @@ static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
 static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
 {
 	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
 				  U64(big_page_size) << U64(10),
@@ -396,7 +396,7 @@ static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
 	int err;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
 	nvgpu_log_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
@@ -429,7 +429,7 @@ static int nvgpu_init_engine_ucode_vm(struct gk20a *g,
 {
 	int err;
 	struct nvgpu_mem *inst_block = &ucode->inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	/* ucode aperture size is 32MB */
 	ucode->aperture_size = U32(32) << 20U;
@@ -621,7 +621,7 @@ u32 nvgpu_mm_get_default_big_page_size(struct gk20a *g)
 {
 	u32 big_page_size;
 
-	big_page_size = g->ops.mm.get_default_big_page_size();
+	big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	if (g->mm.disable_bigpage) {
 		big_page_size = 0;
@@ -634,12 +634,12 @@ u32 nvgpu_mm_get_available_big_page_sizes(struct gk20a *g)
 {
 	u32 available_big_page_sizes = 0;
 
-	if (!g->mm.disable_bigpage) {
-		available_big_page_sizes =
-			g->ops.mm.get_default_big_page_size();
-		if (g->ops.mm.get_big_page_sizes != NULL) {
-			available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
-		}
+	if (g->mm.disable_bigpage)
+		return available_big_page_sizes;
+
+	available_big_page_sizes = g->ops.mm.gmmu.get_default_big_page_size();
+	if (g->ops.mm.gmmu.get_big_page_sizes != NULL) {
+		available_big_page_sizes |= g->ops.mm.gmmu.get_big_page_sizes();
 	}
 
 	return available_big_page_sizes;
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 798875481..1491a38e5 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -92,8 +92,8 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
 	/* ensure it is not vidmem allocation */
 	WARN_ON(nvgpu_addr_is_vidmem_page_alloc(phys));
 
-	if (nvgpu_iommuable(g) && g->ops.mm.get_iommu_bit != NULL) {
-		return phys | 1ULL << g->ops.mm.get_iommu_bit(g);
+	if (nvgpu_iommuable(g) && g->ops.mm.gmmu.get_iommu_bit != NULL) {
+		return phys | 1ULL << g->ops.mm.gmmu.get_iommu_bit(g);
 	}
 
 	return phys;
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 71d76896e..af721f126 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -336,7 +336,8 @@ int nvgpu_vm_do_init(struct mm_gk20a *mm,
 	vm->big_page_size     = vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG];
 	vm->userspace_managed = userspace_managed;
 	vm->unified_va        = unified_va;
-	vm->mmu_levels        = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
+	vm->mmu_levels        =
+		g->ops.mm.gmmu.get_mmu_levels(g, vm->big_page_size);
 
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
 	if (g->is_virtual && userspace_managed) {
@@ -1068,7 +1069,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 		clear_ctags = gk20a_comptags_start_clear(os_buf);
 	}
 
-	map_addr = g->ops.mm.gmmu_map(vm,
+	map_addr = g->ops.mm.gmmu.map(vm,
 				      map_addr,
 				      sgt,
 				      phys_offset,
@@ -1123,7 +1124,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 
 clean_up:
 	if (mapped_buffer->addr != 0ULL) {
-		g->ops.mm.gmmu_unmap(vm,
+		g->ops.mm.gmmu.unmap(vm,
 				     mapped_buffer->addr,
 				     mapped_buffer->size,
 				     mapped_buffer->pgsz_idx,
@@ -1150,7 +1151,7 @@ static void nvgpu_vm_do_unmap(struct nvgpu_mapped_buf *mapped_buffer,
 	struct vm_gk20a *vm = mapped_buffer->vm;
 	struct gk20a *g = vm->mm->g;
 
-	g->ops.mm.gmmu_unmap(vm,
+	g->ops.mm.gmmu.unmap(vm,
 			     mapped_buffer->addr,
 			     mapped_buffer->size,
 			     mapped_buffer->pgsz_idx,
diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c
index 7127abe8d..f251e3761 100644
--- a/drivers/gpu/nvgpu/common/mm/vm_area.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -169,7 +169,7 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size,
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	if ((flags & NVGPU_VM_AREA_ALLOC_SPARSE) != 0U) {
-		u64 map_addr = g->ops.mm.gmmu_map(vm, vaddr_start,
+		u64 map_addr = g->ops.mm.gmmu.map(vm, vaddr_start,
 					 NULL,
 					 0,
 					 vm_area->size,
@@ -241,7 +241,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
 
 	/* if this was a sparse mapping, free the va */
 	if (vm_area->sparse) {
-		g->ops.mm.gmmu_unmap(vm,
+		g->ops.mm.gmmu.unmap(vm,
 				     vm_area->addr,
 				     vm_area->size,
 				     vm_area->pgsz_idx,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index b5e12e551..2f3e4c8d4 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -567,14 +567,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.is_fw_defined = gp10b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = vgpu_locked_gmmu_map,
-		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gk20a_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = NULL,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.init_inst_block = gk20a_init_inst_block,
@@ -591,6 +584,16 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 			.l2_flush = vgpu_mm_l2_flush,
 			.cbc_clean = NULL,
 		},
+		.gmmu = {
+			.map = vgpu_locked_gmmu_map,
+			.unmap = vgpu_locked_gmmu_unmap,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+			.get_iommu_bit = gk20a_mm_get_iommu_bit,
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+		},
 	},
 	.pramin = {
 		.data032_r = NULL,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 5e5134aa1..db7f724b8 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -661,14 +661,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.is_fw_defined = gv11b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = vgpu_locked_gmmu_map,
-		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gk20a_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = NULL,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.init_inst_block = gv11b_init_inst_block,
@@ -686,6 +679,16 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 			.l2_flush = vgpu_mm_l2_flush,
 			.cbc_clean = NULL,
 		},
+		.gmmu = {
+			.map = vgpu_locked_gmmu_map,
+			.unmap = vgpu_locked_gmmu_unmap,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+			.get_iommu_bit = gk20a_mm_get_iommu_bit,
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = NULL,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index f6bc19255..6e2ee77da 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -49,7 +49,6 @@
 
 #include "mm_gk20a.h"
 
-#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
 
 /*
@@ -122,237 +121,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 	return 0;
 }
 
-/* for gk20a the "video memory" apertures here are misnomers. */
-static inline u32 big_valid_pde0_bits(struct gk20a *g,
-				      struct nvgpu_gmmu_pd *pd, u64 addr)
-{
-	u32 pde0_bits =
-		nvgpu_aperture_mask(g, pd->mem,
-				    gmmu_pde_aperture_big_sys_mem_ncoh_f(),
-				    gmmu_pde_aperture_big_sys_mem_coh_f(),
-				    gmmu_pde_aperture_big_video_memory_f()) |
-		gmmu_pde_address_big_sys_f(
-			   (u32)(addr >> gmmu_pde_address_shift_v()));
-
-	return pde0_bits;
-}
-
-static inline u32 small_valid_pde1_bits(struct gk20a *g,
-					struct nvgpu_gmmu_pd *pd, u64 addr)
-{
-	u32 pde1_bits =
-		nvgpu_aperture_mask(g, pd->mem,
-				    gmmu_pde_aperture_small_sys_mem_ncoh_f(),
-				    gmmu_pde_aperture_small_sys_mem_coh_f(),
-				    gmmu_pde_aperture_small_video_memory_f()) |
-		gmmu_pde_vol_small_true_f() | /* tbd: why? */
-		gmmu_pde_address_small_sys_f(
-			   (u32)(addr >> gmmu_pde_address_shift_v()));
-
-	return pde1_bits;
-}
-
-static void update_gmmu_pde_locked(struct vm_gk20a *vm,
-				   const struct gk20a_mmu_level *l,
-				   struct nvgpu_gmmu_pd *pd,
-				   u32 pd_idx,
-				   u64 virt_addr,
-				   u64 phys_addr,
-				   struct nvgpu_gmmu_attrs *attrs)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	bool small_valid, big_valid;
-	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
-	u32 pde_v[2] = {0, 0};
-
-	small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
-	big_valid   = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
-
-	pde_v[0] = gmmu_pde_size_full_f();
-	pde_v[0] |= big_valid ?
-		big_valid_pde0_bits(g, pd, phys_addr) :
-		gmmu_pde_aperture_big_invalid_f();
-
-	pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
-		     (gmmu_pde_aperture_small_invalid_f() |
-		      gmmu_pde_vol_small_false_f()))
-		|
-		(big_valid ? (gmmu_pde_vol_big_true_f()) :
-		 gmmu_pde_vol_big_false_f());
-
-	pte_dbg(g, attrs,
-		"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
-		"GPU %#-12llx  phys %#-12llx "
-		"[0x%08x, 0x%08x]",
-		pd_idx, l->entry_size, pd_offset,
-		small_valid ? 'S' : '-',
-		big_valid ?   'B' : '-',
-		virt_addr, phys_addr,
-		pde_v[1], pde_v[0]);
-
-	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
-	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
-}
-
-static void __update_pte_sparse(u32 *pte_w)
-{
-	pte_w[0]  = gmmu_pte_valid_false_f();
-	pte_w[1] |= gmmu_pte_vol_true_f();
-}
-
-static void __update_pte(struct vm_gk20a *vm,
-			 u32 *pte_w,
-			 u64 phys_addr,
-			 struct nvgpu_gmmu_attrs *attrs)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
-	u32 pte_valid = attrs->valid ?
-		gmmu_pte_valid_true_f() :
-		gmmu_pte_valid_false_f();
-	u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
-	u32 addr = attrs->aperture == APERTURE_SYSMEM ?
-		gmmu_pte_address_sys_f(phys_shifted) :
-		gmmu_pte_address_vid_f(phys_shifted);
-	int ctag_shift = 0;
-	int shamt = ilog2(g->ops.fb.compression_page_size(g));
-	if (shamt < 0) {
-		nvgpu_err(g, "shift amount 'shamt' is negative");
-	} else {
-		ctag_shift = shamt;
-	}
-
-	pte_w[0] = pte_valid | addr;
-
-	if (attrs->priv) {
-		pte_w[0] |= gmmu_pte_privilege_true_f();
-	}
-
-	pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
-					 gmmu_pte_aperture_sys_mem_ncoh_f(),
-					 gmmu_pte_aperture_sys_mem_coh_f(),
-					 gmmu_pte_aperture_video_memory_f()) |
-		gmmu_pte_kind_f(attrs->kind_v) |
-		gmmu_pte_comptagline_f((U32(attrs->ctag) >> U32(ctag_shift)));
-
-	if ((attrs->ctag != 0ULL) &&
-	     vm->mm->use_full_comp_tag_line &&
-	    ((phys_addr & 0x10000ULL) != 0ULL)) {
-		pte_w[1] |= gmmu_pte_comptagline_f(
-			BIT32(gmmu_pte_comptagline_s() - 1U));
-	}
-
-	if (attrs->rw_flag == gk20a_mem_flag_read_only) {
-		pte_w[0] |= gmmu_pte_read_only_true_f();
-		pte_w[1] |= gmmu_pte_write_disable_true_f();
-	} else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
-		pte_w[1] |= gmmu_pte_read_disable_true_f();
-	}
-
-	if (!attrs->cacheable) {
-		pte_w[1] |= gmmu_pte_vol_true_f();
-	}
-
-	if (attrs->ctag != 0ULL) {
-		attrs->ctag += page_size;
-	}
-}
-
-static void update_gmmu_pte_locked(struct vm_gk20a *vm,
-				   const struct gk20a_mmu_level *l,
-				   struct nvgpu_gmmu_pd *pd,
-				   u32 pd_idx,
-				   u64 virt_addr,
-				   u64 phys_addr,
-				   struct nvgpu_gmmu_attrs *attrs)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
-	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
-	u32 pte_w[2] = {0, 0};
-	int ctag_shift = 0;
-	int shamt = ilog2(g->ops.fb.compression_page_size(g));
-	if (shamt < 0) {
-		nvgpu_err(g, "shift amount 'shamt' is negative");
-	} else {
-		ctag_shift = shamt;
-	}
-
-	if (phys_addr != 0ULL) {
-		__update_pte(vm, pte_w, phys_addr, attrs);
-	} else if (attrs->sparse) {
-		__update_pte_sparse(pte_w);
-	}
-
-	pte_dbg(g, attrs,
-		"PTE: i=%-4u size=%-2u offs=%-4u | "
-		"GPU %#-12llx  phys %#-12llx "
-		"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
-		"ctag=0x%08x "
-		"[0x%08x, 0x%08x]",
-		pd_idx, l->entry_size, pd_offset,
-		virt_addr, phys_addr,
-		page_size >> 10,
-		nvgpu_gmmu_perm_str(attrs->rw_flag),
-		attrs->kind_v,
-		nvgpu_aperture_str(g, attrs->aperture),
-		attrs->cacheable ? 'C' : '-',
-		attrs->sparse    ? 'S' : '-',
-		attrs->priv      ? 'P' : '-',
-		attrs->valid     ? 'V' : '-',
-		U32(attrs->ctag) >> U32(ctag_shift),
-		pte_w[1], pte_w[0]);
-
-	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
-	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
-}
-
-u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
-{
-	/*
-	 * big and small page sizes are the same
-	 */
-	return GMMU_PAGE_SIZE_SMALL;
-}
-
-u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
-{
-	/*
-	 * return invalid
-	 */
-	return GMMU_NR_PAGE_SIZES;
-}
-
-const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
-	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
-	 .lo_bit = {26, 26},
-	 .update_entry = update_gmmu_pde_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pde_pgsz},
-	{.hi_bit = {25, 25},
-	 .lo_bit = {12, 16},
-	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pte_pgsz},
-	{.update_entry = NULL}
-};
-
-const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
-	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
-	 .lo_bit = {27, 27},
-	 .update_entry = update_gmmu_pde_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pde_pgsz},
-	{.hi_bit = {26, 26},
-	 .lo_bit = {12, 17},
-	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pte_pgsz},
-	{.update_entry = NULL}
-};
-
 int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 {
 	int err = 0;
@@ -411,13 +179,6 @@ u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 	return 34;
 }
 
-const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
-						      u32 big_page_size)
-{
-	return (big_page_size == SZ_64K) ?
-		 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
-}
-
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
 {
 	struct fifo_gk20a *f = &g->fifo;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 439f3fc80..56026b2fb 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -91,15 +91,5 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
 					      u64 addr_lo, u64 addr_hi,
 					      u32 *pde_lo, u32 *pde_hi);
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g);
-
-const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
-						      u32 big_page_size);
-extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
-extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
-
-u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
-u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
 #endif /* MM_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 5f982fade..8c489d096 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -34,6 +34,8 @@
 #include "gm20b/mm_gm20b.h"
 #include "mm_gp10b.h"
 
+#include "hal/mm/gmmu/gmmu_gk20a.h"
+
 #include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
 
 u32 gp10b_mm_get_default_big_page_size(void)
@@ -51,7 +53,7 @@ int gp10b_init_bar2_vm(struct gk20a *g)
 	int err;
 	struct mm_gk20a *mm = &g->mm;
 	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	/* BAR2 aperture size is 32MB */
 	mm->bar2.aperture_size = U32(32) << 20U;
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index d2e494a94..fec13b14a 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -42,6 +42,7 @@
 #include <nvgpu/gr/fecs_trace.h>
 
 #include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/bus/bus_gm20b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -831,14 +832,7 @@ static const struct gpu_ops gm20b_ops = {
 		.is_fw_defined = gm20b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gm20b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gk20a_mm_get_iommu_bit,
-		.get_mmu_levels = gk20a_mm_get_mmu_levels,
 		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -852,6 +846,16 @@ static const struct gpu_ops gm20b_ops = {
 			.l2_flush = gk20a_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gk20a_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gm20b_mm_get_default_big_page_size,
+			.get_iommu_bit = gk20a_mm_get_iommu_bit,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+		}
 	},
 	.therm = {
 		.init_therm_setup_hw = gm20b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index d379c7328..f2843617a 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -42,6 +42,7 @@
 #include <nvgpu/pmu/pmu_perfmon.h>
 
 #include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -901,14 +902,7 @@ static const struct gpu_ops gp10b_ops = {
 		.is_fw_defined = gp10b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gp10b_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -924,6 +918,16 @@ static const struct gpu_ops gp10b_ops = {
 			.l2_flush = gk20a_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.get_iommu_bit = gp10b_mm_get_iommu_bit,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+		}
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv100.c b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
index 1b3037dc2..a07fd2da0 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
@@ -24,6 +24,7 @@
 
 #include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mm/cache/flush_gv11b.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1089,13 +1090,7 @@ static const struct gpu_ops gv100_ops = {
 		.is_fw_defined = gv100_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gv11b_gpu_phys_addr,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -1114,6 +1109,15 @@ static const struct gpu_ops gv100_ops = {
 			.l2_flush = gv11b_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gv11b_gpu_phys_addr,
+		}
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index cec14bccf..2ec8e52f9 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -32,6 +32,7 @@
 
 #include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mm/cache/flush_gv11b.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1064,14 +1065,7 @@ static const struct gpu_ops gv11b_ops = {
 		.is_fw_defined = gv11b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gv11b_gpu_phys_addr,
-		.get_iommu_bit = gp10b_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -1089,6 +1083,16 @@ static const struct gpu_ops gv11b_ops = {
 			.l2_flush = gv11b_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.get_iommu_bit = gp10b_mm_get_iommu_bit,
+			.gpu_phys_addr = gv11b_gpu_phys_addr,
+		}
 	},
 	.therm = {
 		.init_therm_setup_hw = gv11b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 24a520603..c6d46dc98 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -24,6 +24,7 @@
 
 #include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mm/cache/flush_gv11b.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1124,13 +1125,7 @@ static const struct gpu_ops tu104_ops = {
 		.is_fw_defined = tu104_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gv11b_gpu_phys_addr,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -1149,6 +1144,15 @@ static const struct gpu_ops tu104_ops = {
 			.l2_flush = gv11b_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gv11b_gpu_phys_addr,
+		}
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c
new file mode 100644
index 000000000..eeb9fce05
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/log2.h>
+
+#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
+
+#include "gmmu_gk20a.h"
+
+/* for gk20a the "video memory" apertures here are misnomers. */
+static inline u32 big_valid_pde0_bits(struct gk20a *g,
+				      struct nvgpu_gmmu_pd *pd, u64 addr)
+{
+	u32 pde0_bits =
+		nvgpu_aperture_mask(g, pd->mem,
+				    gmmu_pde_aperture_big_sys_mem_ncoh_f(),
+				    gmmu_pde_aperture_big_sys_mem_coh_f(),
+				    gmmu_pde_aperture_big_video_memory_f()) |
+		gmmu_pde_address_big_sys_f(
+			   (u32)(addr >> gmmu_pde_address_shift_v()));
+
+	return pde0_bits;
+}
+
+static inline u32 small_valid_pde1_bits(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u64 addr)
+{
+	u32 pde1_bits =
+		nvgpu_aperture_mask(g, pd->mem,
+				    gmmu_pde_aperture_small_sys_mem_ncoh_f(),
+				    gmmu_pde_aperture_small_sys_mem_coh_f(),
+				    gmmu_pde_aperture_small_video_memory_f()) |
+		gmmu_pde_vol_small_true_f() | /* tbd: why? */
+		gmmu_pde_address_small_sys_f(
+			   (u32)(addr >> gmmu_pde_address_shift_v()));
+
+	return pde1_bits;
+}
+
+static void update_gmmu_pde_locked(struct vm_gk20a *vm,
+				   const struct gk20a_mmu_level *l,
+				   struct nvgpu_gmmu_pd *pd,
+				   u32 pd_idx,
+				   u64 virt_addr,
+				   u64 phys_addr,
+				   struct nvgpu_gmmu_attrs *attrs)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	bool small_valid, big_valid;
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
+	u32 pde_v[2] = {0, 0};
+
+	small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
+	big_valid   = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
+
+	pde_v[0] = gmmu_pde_size_full_f();
+	pde_v[0] |= big_valid ?
+		big_valid_pde0_bits(g, pd, phys_addr) :
+		gmmu_pde_aperture_big_invalid_f();
+
+	pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
+		     (gmmu_pde_aperture_small_invalid_f() |
+		      gmmu_pde_vol_small_false_f()))
+		|
+		(big_valid ? (gmmu_pde_vol_big_true_f()) :
+		 gmmu_pde_vol_big_false_f());
+
+	pte_dbg(g, attrs,
+		"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
+		"GPU %#-12llx  phys %#-12llx "
+		"[0x%08x, 0x%08x]",
+		pd_idx, l->entry_size, pd_offset,
+		small_valid ? 'S' : '-',
+		big_valid ?   'B' : '-',
+		virt_addr, phys_addr,
+		pde_v[1], pde_v[0]);
+
+	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
+	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
+}
+
+static void __update_pte_sparse(u32 *pte_w)
+{
+	pte_w[0]  = gmmu_pte_valid_false_f();
+	pte_w[1] |= gmmu_pte_vol_true_f();
+}
+
+static void __update_pte(struct vm_gk20a *vm,
+			 u32 *pte_w,
+			 u64 phys_addr,
+			 struct nvgpu_gmmu_attrs *attrs)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
+	u32 pte_valid = attrs->valid ?
+		gmmu_pte_valid_true_f() :
+		gmmu_pte_valid_false_f();
+	u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
+	u32 addr = attrs->aperture == APERTURE_SYSMEM ?
+		gmmu_pte_address_sys_f(phys_shifted) :
+		gmmu_pte_address_vid_f(phys_shifted);
+	int ctag_shift = 0;
+	int shamt = ilog2(g->ops.fb.compression_page_size(g));
+	if (shamt < 0) {
+		nvgpu_err(g, "shift amount 'shamt' is negative");
+	} else {
+		ctag_shift = shamt;
+	}
+
+	pte_w[0] = pte_valid | addr;
+
+	if (attrs->priv) {
+		pte_w[0] |= gmmu_pte_privilege_true_f();
+	}
+
+	pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
+					 gmmu_pte_aperture_sys_mem_ncoh_f(),
+					 gmmu_pte_aperture_sys_mem_coh_f(),
+					 gmmu_pte_aperture_video_memory_f()) |
+		gmmu_pte_kind_f(attrs->kind_v) |
+		gmmu_pte_comptagline_f((U32(attrs->ctag) >> U32(ctag_shift)));
+
+	if ((attrs->ctag != 0ULL) &&
+	     vm->mm->use_full_comp_tag_line &&
+	    ((phys_addr & 0x10000ULL) != 0ULL)) {
+		pte_w[1] |= gmmu_pte_comptagline_f(
+			BIT32(gmmu_pte_comptagline_s() - 1U));
+	}
+
+	if (attrs->rw_flag == gk20a_mem_flag_read_only) {
+		pte_w[0] |= gmmu_pte_read_only_true_f();
+		pte_w[1] |= gmmu_pte_write_disable_true_f();
+	} else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
+		pte_w[1] |= gmmu_pte_read_disable_true_f();
+	}
+
+	if (!attrs->cacheable) {
+		pte_w[1] |= gmmu_pte_vol_true_f();
+	}
+
+	if (attrs->ctag != 0ULL) {
+		attrs->ctag += page_size;
+	}
+}
+
+static void update_gmmu_pte_locked(struct vm_gk20a *vm,
+				   const struct gk20a_mmu_level *l,
+				   struct nvgpu_gmmu_pd *pd,
+				   u32 pd_idx,
+				   u64 virt_addr,
+				   u64 phys_addr,
+				   struct nvgpu_gmmu_attrs *attrs)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
+	u32 pte_w[2] = {0, 0};
+	int ctag_shift = 0;
+	int shamt = ilog2(g->ops.fb.compression_page_size(g));
+	if (shamt < 0) {
+		nvgpu_err(g, "shift amount 'shamt' is negative");
+	} else {
+		ctag_shift = shamt;
+	}
+
+	if (phys_addr != 0ULL) {
+		__update_pte(vm, pte_w, phys_addr, attrs);
+	} else if (attrs->sparse) {
+		__update_pte_sparse(pte_w);
+	}
+
+	pte_dbg(g, attrs,
+		"PTE: i=%-4u size=%-2u offs=%-4u | "
+		"GPU %#-12llx  phys %#-12llx "
+		"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
+		"ctag=0x%08x "
+		"[0x%08x, 0x%08x]",
+		pd_idx, l->entry_size, pd_offset,
+		virt_addr, phys_addr,
+		page_size >> 10,
+		nvgpu_gmmu_perm_str(attrs->rw_flag),
+		attrs->kind_v,
+		nvgpu_aperture_str(g, attrs->aperture),
+		attrs->cacheable ? 'C' : '-',
+		attrs->sparse    ? 'S' : '-',
+		attrs->priv      ? 'P' : '-',
+		attrs->valid     ? 'V' : '-',
+		U32(attrs->ctag) >> U32(ctag_shift),
+		pte_w[1], pte_w[0]);
+
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
+}
+
+u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	/*
+	 * big and small page sizes are the same
+	 */
+	return GMMU_PAGE_SIZE_SMALL;
+}
+
+u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	/*
+	 * return invalid
+	 */
+	return GMMU_NR_PAGE_SIZES;
+}
+
+const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
+	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
+	 .lo_bit = {26, 26},
+	 .update_entry = update_gmmu_pde_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
+	{.hi_bit = {25, 25},
+	 .lo_bit = {12, 16},
+	 .update_entry = update_gmmu_pte_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
+	{.update_entry = NULL}
+};
+
+const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
+	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
+	 .lo_bit = {27, 27},
+	 .update_entry = update_gmmu_pde_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
+	{.hi_bit = {26, 26},
+	 .lo_bit = {12, 17},
+	 .update_entry = update_gmmu_pte_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
+	{.update_entry = NULL}
+};
+
+const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
+						      u32 big_page_size)
+{
+	return (big_page_size == SZ_64K) ?
+		 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
+}
diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h
new file mode 100644
index 000000000..e3515ca16
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HAL_MM_GMMU_GMMU_GK20A_H
+#define HAL_MM_GMMU_GMMU_GK20A_H
+
+#include <nvgpu/types.h>
+#include <nvgpu/gmmu.h>
+
+struct gk20a;
+
+/*
+ * Must include the <nvgpu/gmmu.h> header directly since the compiler needs to
+ * know the actual type before it declares an array (even if the size is not
+ * technically known here.
+ */
+extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
+extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
+
+const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
+						      u32 big_page_size);
+
+u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
+u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
+
+#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 1e9d290ea..9f8cebd89 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1348,42 +1348,12 @@ struct gpu_ops {
 		bool (*is_fw_defined)(void);
 	} netlist;
 	struct {
-		u64 (*gmmu_map)(struct vm_gk20a *vm,
-				u64 map_offset,
-				struct nvgpu_sgt *sgt,
-				u64 buffer_offset,
-				u64 size,
-				u32 pgsz_idx,
-				u8 kind_v,
-				u32 ctag_offset,
-				u32 flags,
-				enum gk20a_mem_rw_flag rw_flag,
-				bool clear_ctags,
-				bool sparse,
-				bool priv,
-				struct vm_gk20a_mapping_batch *batch,
-				enum nvgpu_aperture aperture);
-		void (*gmmu_unmap)(struct vm_gk20a *vm,
-				u64 vaddr,
-				u64 size,
-				u32 pgsz_idx,
-				bool va_allocated,
-				enum gk20a_mem_rw_flag rw_flag,
-				bool sparse,
-				struct vm_gk20a_mapping_batch *batch);
 		int (*vm_bind_channel)(struct vm_gk20a *vm,
 				struct channel_gk20a *ch);
-		u32 (*get_big_page_sizes)(void);
-		u32 (*get_default_big_page_size)(void);
-		u32 (*get_iommu_bit)(struct gk20a *g);
 		int (*init_mm_setup_hw)(struct gk20a *g);
 		bool (*is_bar1_supported)(struct gk20a *g);
 		int (*init_bar2_vm)(struct gk20a *g);
 		void (*remove_bar2_vm)(struct gk20a *g);
-		const struct gk20a_mmu_level *
-			(*get_mmu_levels)(struct gk20a *g, u32 big_page_size);
-		u64 (*gpu_phys_addr)(struct gk20a *g,
-				     struct nvgpu_gmmu_attrs *attrs, u64 phys);
 		int (*alloc_inst_block)(struct gk20a *g,
 					struct nvgpu_mem *inst_block);
 		void (*init_inst_block)(struct nvgpu_mem *inst_block,
@@ -1403,6 +1373,40 @@ struct gpu_ops {
 			int (*l2_flush)(struct gk20a *g, bool invalidate);
 			void (*cbc_clean)(struct gk20a *g);
 		} cache;
+		struct {
+			const struct gk20a_mmu_level *
+				(*get_mmu_levels)(struct gk20a *g,
+						  u32 big_page_size);
+			u64 (*map)(struct vm_gk20a *vm,
+				   u64 map_offset,
+				   struct nvgpu_sgt *sgt,
+				   u64 buffer_offset,
+				   u64 size,
+				   u32 pgsz_idx,
+				   u8 kind_v,
+				   u32 ctag_offset,
+				   u32 flags,
+				   enum gk20a_mem_rw_flag rw_flag,
+				   bool clear_ctags,
+				   bool sparse,
+				   bool priv,
+				   struct vm_gk20a_mapping_batch *batch,
+				   enum nvgpu_aperture aperture);
+			void (*unmap)(struct vm_gk20a *vm,
+				      u64 vaddr,
+				      u64 size,
+				      u32 pgsz_idx,
+				      bool va_allocated,
+				      enum gk20a_mem_rw_flag rw_flag,
+				      bool sparse,
+				      struct vm_gk20a_mapping_batch *batch);
+			u32 (*get_big_page_sizes)(void);
+			u32 (*get_default_big_page_size)(void);
+			u32 (*get_iommu_bit)(struct gk20a *g);
+			u64 (*gpu_phys_addr)(struct gk20a *g,
+					     struct nvgpu_gmmu_attrs *attrs,
+					     u64 phys);
+		} gmmu;
 	} mm;
 	/*
 	 * This function is called to allocate secure memory (memory
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 326c31e74..2c973f635 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -327,7 +327,8 @@ gk20a_ctrl_ioctl_gpu_characteristics(
 
 	gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g);
 	gpu.pde_coverage_bit_count =
-		g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0];
+		g->ops.mm.gmmu.get_mmu_levels(g,
+					gpu.big_page_size)[0].lo_bit[0];
 	gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g);
 
 	gpu.sm_arch_sm_version = g->params.sm_arch_sm_version;
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index af51a5445..5d801d7d5 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1355,7 +1355,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 	struct mm_gk20a *mm = &g->mm;
 	int err;
 	u32 virt_size;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
index 3d0f8a6d0..b8a99c68e 100644
--- a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
@@ -64,11 +64,11 @@ u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
 {
 	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
 	    !nvgpu_iommuable(g))
-		return g->ops.mm.gpu_phys_addr(g, NULL,
+		return g->ops.mm.gmmu.gpu_phys_addr(g, NULL,
 			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
 
 	if (sg_dma_address(sgl) == 0)
-		return g->ops.mm.gpu_phys_addr(g, NULL,
+		return g->ops.mm.gmmu.gpu_phys_addr(g, NULL,
 			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
 
 	if (sg_dma_address(sgl) == DMA_ERROR_CODE)
@@ -230,7 +230,7 @@ static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
 					struct nvgpu_gmmu_attrs *attrs)
 {
 	if (sg_dma_address((struct scatterlist *)sgl) == 0)
-		return g->ops.mm.gpu_phys_addr(g, attrs,
+		return g->ops.mm.gmmu.gpu_phys_addr(g, attrs,
 				__nvgpu_sgl_phys(g, sgl));
 
 	if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
diff --git a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
index 531b040e2..8101f58d1 100644
--- a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
@@ -85,7 +85,7 @@ u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_sgl *sgl,
 	struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl;
 
 	if (mem->dma == 0U) {
-		return g->ops.mm.gpu_phys_addr(g, attrs, mem->phys);
+		return g->ops.mm.gmmu.gpu_phys_addr(g, attrs, mem->phys);
 	}
 
 	if (mem->dma == DMA_ERROR_CODE) {
diff --git a/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c b/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c
index 3d939b4a1..62b5be1d8 100644
--- a/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c
+++ b/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c
@@ -68,9 +68,9 @@ static struct vm_gk20a *init_vm_env(struct unit_module *m, struct gk20a *g,
 	/* Initialize vm */
 
 	/* Minimum HALs for vm_init */
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 		gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
 
 	/* Minimum HAL init for PRAMIN */
 	g->ops.bus.set_bar0_window = gk20a_bus_set_bar0_window;
@@ -90,7 +90,7 @@ static struct vm_gk20a *init_vm_env(struct unit_module *m, struct gk20a *g,
 	flags |= GPU_ALLOC_GVA_SPACE;
 
 	/* Init vm with big_pages disabled */
-	test_vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	test_vm = nvgpu_vm_init(g, g->ops.mm.gmmu.get_default_big_page_size(),
 				   low_hole,
 				   aperture_size - low_hole,
 				   aperture_size,
diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c
index da6049a93..e398182f4 100644
--- a/userspace/units/mm/gmmu/page_table/page_table.c
+++ b/userspace/units/mm/gmmu/page_table/page_table.c
@@ -296,14 +296,14 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 
 	p->mm_is_iommuable = true;
 
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 		gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
 	g->ops.mm.alloc_inst_block = gk20a_alloc_inst_block;
 	g->ops.mm.init_inst_block = gv11b_init_inst_block;
-	g->ops.mm.gmmu_map = nvgpu_gmmu_map_locked;
-	g->ops.mm.gmmu_unmap = nvgpu_gmmu_unmap_locked;
-	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked;
+	g->ops.mm.gmmu.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.mm.is_bar1_supported = gv11b_mm_is_bar1_supported;
 	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
 	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
@@ -329,7 +329,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
 
 
-	mm->pmu.vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	mm->pmu.vm = nvgpu_vm_init(g,
+				   g->ops.mm.gmmu.get_default_big_page_size(),
 				   low_hole,
 				   aperture_size - low_hole,
 				   aperture_size,
@@ -729,7 +730,7 @@ static struct nvgpu_sgt *custom_sgt_create(struct unit_module *m,
 }
 
 /*
- * Helper function to wrap calls to g->ops.mm.gmmu_map and thus giving
+ * Helper function to wrap calls to g->ops.mm.gmmu.map and thus giving
  * access to more parameters
  */
 static u64 gmmu_map_advanced(struct unit_module *m, struct gk20a *g,
@@ -755,7 +756,7 @@ static u64 gmmu_map_advanced(struct unit_module *m, struct gk20a *g,
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
-	vaddr = g->ops.mm.gmmu_map(vm, (u64) mem->cpu_va,
+	vaddr = g->ops.mm.gmmu.map(vm, (u64) mem->cpu_va,
 				   sgt,
 				   offset,
 				   mem->size,
@@ -775,7 +776,7 @@ static u64 gmmu_map_advanced(struct unit_module *m, struct gk20a *g,
 }
 
 /*
- * Helper function to wrap calls to g->ops.mm.gmmu_unmap and thus giving
+ * Helper function to wrap calls to g->ops.mm.gmmu.unmap and thus giving
  * access to more parameters
  */
 static void gmmu_unmap_advanced(struct vm_gk20a *vm, struct nvgpu_mem *mem,
@@ -786,7 +787,7 @@ static void gmmu_unmap_advanced(struct vm_gk20a *vm, struct nvgpu_mem *mem,
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
-	g->ops.mm.gmmu_unmap(vm,
+	g->ops.mm.gmmu.unmap(vm,
 			     gpu_va,
 			     mem->size,
 			     params->page_size,
@@ -973,9 +974,9 @@ static struct vm_gk20a *init_test_req_vm(struct gk20a *g)
 	/* 1.4. Have a 4GB kernel reserved space */
 	kernel_reserved = 4 * SZ_1G;
 
-	return nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(), low_hole,
-			kernel_reserved - low_hole, aperture_size, big_pages,
-			true, true, "testmem");
+	return nvgpu_vm_init(g, g->ops.mm.gmmu.get_default_big_page_size(),
+			     low_hole, kernel_reserved - low_hole,
+			     aperture_size, big_pages, true, true, "testmem");
 }
 
 /* Test case to cover NVGPU-RQCD-45 C1 */
diff --git a/userspace/units/mm/nvgpu_mem/nvgpu_mem.c b/userspace/units/mm/nvgpu_mem/nvgpu_mem.c
index c26615aa8..52a05560f 100644
--- a/userspace/units/mm/nvgpu_mem/nvgpu_mem.c
+++ b/userspace/units/mm/nvgpu_mem/nvgpu_mem.c
@@ -344,7 +344,7 @@ static int test_nvgpu_mem_iommu_translate(struct unit_module *m,
 	 * Case: mm is iommuable
 	 * Set HAL to enable iommu_translate
 	 */
-	g->ops.mm.get_iommu_bit = gp10b_mm_get_iommu_bit;
+	g->ops.mm.gmmu.get_iommu_bit = gp10b_mm_get_iommu_bit;
 
 	temp_phys = nvgpu_mem_iommu_translate(g, test_sgl->phys);
 	if (temp_phys == test_sgl->phys) {
diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c
index f7a4293d7..caeeb7713 100644
--- a/userspace/units/mm/page_table_faults/page_table_faults.c
+++ b/userspace/units/mm/page_table_faults/page_table_faults.c
@@ -117,14 +117,14 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	p->mm_is_iommuable = true;
 
 	/* Minimum HALs for page_table */
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 		gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
 	g->ops.mm.alloc_inst_block = gk20a_alloc_inst_block;
 	g->ops.mm.init_inst_block = gv11b_init_inst_block;
-	g->ops.mm.gmmu_map = nvgpu_gmmu_map_locked;
-	g->ops.mm.gmmu_unmap = nvgpu_gmmu_unmap_locked;
-	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked;
+	g->ops.mm.gmmu.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
 	g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
 	g->ops.ramin.init_pdb = gp10b_ramin_init_pdb;
@@ -179,7 +179,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	aperture_size = GK20A_PMU_VA_SIZE;
 	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
 
-	mm->pmu.vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	mm->pmu.vm = nvgpu_vm_init(g,
+				   g->ops.mm.gmmu.get_default_big_page_size(),
 				   low_hole,
 				   aperture_size - low_hole,
 				   aperture_size,
@@ -193,7 +194,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 
 	/* BAR2 memory space */
 	mm->bar2.aperture_size = U32(32) << 20U;
-	mm->bar2.vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	mm->bar2.vm = nvgpu_vm_init(g,
+		g->ops.mm.gmmu.get_default_big_page_size(),
 		SZ_4K, mm->bar2.aperture_size - SZ_4K,
 		mm->bar2.aperture_size, false, false, false, "bar2");
 	if (mm->bar2.vm == NULL) {
@@ -323,10 +325,10 @@ static int test_page_faults_inst_block(struct unit_module *m, struct gk20a *g,
 	/* Handle some corner cases */
 	if (scenario == 1) {
 		/* Init inst_block with large page size */
-		big_page_size = g->ops.mm.get_default_big_page_size();
+		big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 	} else if (scenario == 2) {
 		/* Handle branch case in gv11b_init_inst_block() */
-		big_page_size = g->ops.mm.get_default_big_page_size();
+		big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 		g->ops.ramin.set_big_page_size = NULL;
 	}
 
diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c
index 881728faf..5b01facad 100644
--- a/userspace/units/mm/vm/vm.c
+++ b/userspace/units/mm/vm/vm.c
@@ -133,12 +133,12 @@ static int init_test_env(struct unit_module *m, struct gk20a *g)
 	g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
 	g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
 
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 					gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
-	g->ops.mm.gmmu_map = nvgpu_gmmu_map_locked;
-	g->ops.mm.gmmu_unmap = nvgpu_gmmu_unmap_locked;
-	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked;
+	g->ops.mm.gmmu.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
 	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 
@@ -388,7 +388,7 @@ static int test_map_buf(struct unit_module *m, struct gk20a *g, void *__args)
 	unit_info(m, "   - Kernel Reserved Size = 0x%llx\n", kernel_reserved);
 	unit_info(m, "   - Total Aperture Size = 0x%llx\n", aperture_size);
 	vm = nvgpu_vm_init(g,
-			   g->ops.mm.get_default_big_page_size(),
+			   g->ops.mm.gmmu.get_default_big_page_size(),
 			   low_hole,
 			   kernel_reserved,
 			   aperture_size,
@@ -524,7 +524,7 @@ static int test_map_buf_gpu_va(struct unit_module *m,
 	unit_info(m, "   - Kernel Reserved Size = 0x%llx\n", kernel_reserved);
 	unit_info(m, "   - Total Aperture Size = 0x%llx\n", aperture_size);
 	vm = nvgpu_vm_init(g,
-			   g->ops.mm.get_default_big_page_size(),
+			   g->ops.mm.gmmu.get_default_big_page_size(),
 			   low_hole,
 			   kernel_reserved,
 			   aperture_size,