diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 7f4a73e4c..01716e76d 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -151,6 +151,7 @@ nvgpu-y += \
 	common/nvlink/nvlink_tu104.o \
 	hal/mm/cache/flush_gk20a.o \
 	hal/mm/cache/flush_gv11b.o \
+	hal/mm/gmmu/gmmu_gk20a.o \
 	hal/mc/mc_gm20b.o \
 	hal/mc/mc_gp10b.o  \
 	hal/mc/mc_gv11b.o  \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 490e43d3d..fbe39502b 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -260,6 +260,7 @@ srcs += common/sim/sim.c \
 	tu104/func_tu104.c \
 	hal/mm/cache/flush_gk20a.c \
 	hal/mm/cache/flush_gv11b.c \
+	hal/mm/gmmu/gmmu_gk20a.c \
 	hal/mc/mc_gm20b.c  \
 	hal/mc/mc_gp10b.c  \
 	hal/mc/mc_gv11b.c  \
diff --git a/drivers/gpu/nvgpu/common/mm/as.c b/drivers/gpu/nvgpu/common/mm/as.c
index 1c07dfdd8..2f202569a 100644
--- a/drivers/gpu/nvgpu/common/mm/as.c
+++ b/drivers/gpu/nvgpu/common/mm/as.c
@@ -64,7 +64,7 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 	nvgpu_log_fn(g, " ");
 
 	if (big_page_size == 0U) {
-		big_page_size = g->ops.mm.get_default_big_page_size();
+		big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 	} else {
 		if (!is_power_of_2(big_page_size)) {
 			return -EINVAL;
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
index ff3dcf2d7..6b809eb83 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
@@ -99,7 +99,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 	}
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	vaddr = g->ops.mm.gmmu_map(vm, addr,
+	vaddr = g->ops.mm.gmmu.map(vm, addr,
 				   sgt,    /* sg list */
 				   0,      /* sg offset */
 				   size,
@@ -160,7 +160,7 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va)
 	struct gk20a *g = gk20a_from_vm(vm);
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
-	g->ops.mm.gmmu_unmap(vm,
+	g->ops.mm.gmmu.unmap(vm,
 			     gpu_va,
 			     mem->size,
 			     GMMU_PAGE_SIZE_KERNEL,
@@ -618,7 +618,7 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
 			 */
 			phys_addr = nvgpu_sgt_ipa_to_pa(g, sgt, sgl, ipa_addr,
 					&phys_length);
-			phys_addr = g->ops.mm.gpu_phys_addr(g, attrs, phys_addr)
+			phys_addr = g->ops.mm.gmmu.gpu_phys_addr(g, attrs, phys_addr)
 				+ space_to_skip;
 
 			/*
@@ -902,7 +902,8 @@ void nvgpu_gmmu_unmap_locked(struct vm_gk20a *vm,
 
 u32 __nvgpu_pte_words(struct gk20a *g)
 {
-	const struct gk20a_mmu_level *l = g->ops.mm.get_mmu_levels(g, SZ_64K);
+	const struct gk20a_mmu_level *l =
+		g->ops.mm.gmmu.get_mmu_levels(g, SZ_64K);
 	const struct gk20a_mmu_level *next_l;
 
 	/*
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
index 5da4d9677..383e7b559 100644
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -237,7 +237,7 @@ static int nvgpu_init_system_vm(struct mm_gk20a *mm)
 	int err;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 	u64 low_hole, aperture_size;
 
 	/*
@@ -300,7 +300,7 @@ static int nvgpu_init_hwpm(struct mm_gk20a *mm)
 static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
 {
 	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
 				   U64(big_page_size) << U64(10),
@@ -316,7 +316,7 @@ static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
 static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
 {
 	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
 				  U64(big_page_size) << U64(10),
@@ -396,7 +396,7 @@ static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
 	int err;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
 	nvgpu_log_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
@@ -429,7 +429,7 @@ static int nvgpu_init_engine_ucode_vm(struct gk20a *g,
 {
 	int err;
 	struct nvgpu_mem *inst_block = &ucode->inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	/* ucode aperture size is 32MB */
 	ucode->aperture_size = U32(32) << 20U;
@@ -621,7 +621,7 @@ u32 nvgpu_mm_get_default_big_page_size(struct gk20a *g)
 {
 	u32 big_page_size;
 
-	big_page_size = g->ops.mm.get_default_big_page_size();
+	big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	if (g->mm.disable_bigpage) {
 		big_page_size = 0;
@@ -634,12 +634,12 @@ u32 nvgpu_mm_get_available_big_page_sizes(struct gk20a *g)
 {
 	u32 available_big_page_sizes = 0;
 
-	if (!g->mm.disable_bigpage) {
-		available_big_page_sizes =
-			g->ops.mm.get_default_big_page_size();
-		if (g->ops.mm.get_big_page_sizes != NULL) {
-			available_big_page_sizes |= g->ops.mm.get_big_page_sizes();
-		}
+	if (g->mm.disable_bigpage)
+		return available_big_page_sizes;
+
+	available_big_page_sizes = g->ops.mm.gmmu.get_default_big_page_size();
+	if (g->ops.mm.gmmu.get_big_page_sizes != NULL) {
+		available_big_page_sizes |= g->ops.mm.gmmu.get_big_page_sizes();
 	}
 
 	return available_big_page_sizes;
diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
index 798875481..1491a38e5 100644
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -92,8 +92,8 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
 	/* ensure it is not vidmem allocation */
 	WARN_ON(nvgpu_addr_is_vidmem_page_alloc(phys));
 
-	if (nvgpu_iommuable(g) && g->ops.mm.get_iommu_bit != NULL) {
-		return phys | 1ULL << g->ops.mm.get_iommu_bit(g);
+	if (nvgpu_iommuable(g) && g->ops.mm.gmmu.get_iommu_bit != NULL) {
+		return phys | 1ULL << g->ops.mm.gmmu.get_iommu_bit(g);
 	}
 
 	return phys;
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 71d76896e..af721f126 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -336,7 +336,8 @@ int nvgpu_vm_do_init(struct mm_gk20a *mm,
 	vm->big_page_size     = vm->gmmu_page_sizes[GMMU_PAGE_SIZE_BIG];
 	vm->userspace_managed = userspace_managed;
 	vm->unified_va        = unified_va;
-	vm->mmu_levels        = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
+	vm->mmu_levels        =
+		g->ops.mm.gmmu.get_mmu_levels(g, vm->big_page_size);
 
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
 	if (g->is_virtual && userspace_managed) {
@@ -1068,7 +1069,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 		clear_ctags = gk20a_comptags_start_clear(os_buf);
 	}
 
-	map_addr = g->ops.mm.gmmu_map(vm,
+	map_addr = g->ops.mm.gmmu.map(vm,
 				      map_addr,
 				      sgt,
 				      phys_offset,
@@ -1123,7 +1124,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm,
 
 clean_up:
 	if (mapped_buffer->addr != 0ULL) {
-		g->ops.mm.gmmu_unmap(vm,
+		g->ops.mm.gmmu.unmap(vm,
 				     mapped_buffer->addr,
 				     mapped_buffer->size,
 				     mapped_buffer->pgsz_idx,
@@ -1150,7 +1151,7 @@ static void nvgpu_vm_do_unmap(struct nvgpu_mapped_buf *mapped_buffer,
 	struct vm_gk20a *vm = mapped_buffer->vm;
 	struct gk20a *g = vm->mm->g;
 
-	g->ops.mm.gmmu_unmap(vm,
+	g->ops.mm.gmmu.unmap(vm,
 			     mapped_buffer->addr,
 			     mapped_buffer->size,
 			     mapped_buffer->pgsz_idx,
diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c
index 7127abe8d..f251e3761 100644
--- a/drivers/gpu/nvgpu/common/mm/vm_area.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -169,7 +169,7 @@ int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size,
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
 	if ((flags & NVGPU_VM_AREA_ALLOC_SPARSE) != 0U) {
-		u64 map_addr = g->ops.mm.gmmu_map(vm, vaddr_start,
+		u64 map_addr = g->ops.mm.gmmu.map(vm, vaddr_start,
 					 NULL,
 					 0,
 					 vm_area->size,
@@ -241,7 +241,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
 
 	/* if this was a sparse mapping, free the va */
 	if (vm_area->sparse) {
-		g->ops.mm.gmmu_unmap(vm,
+		g->ops.mm.gmmu.unmap(vm,
 				     vm_area->addr,
 				     vm_area->size,
 				     vm_area->pgsz_idx,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index b5e12e551..2f3e4c8d4 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -567,14 +567,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.is_fw_defined = gp10b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = vgpu_locked_gmmu_map,
-		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gk20a_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = NULL,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.init_inst_block = gk20a_init_inst_block,
@@ -591,6 +584,16 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 			.l2_flush = vgpu_mm_l2_flush,
 			.cbc_clean = NULL,
 		},
+		.gmmu = {
+			.map = vgpu_locked_gmmu_map,
+			.unmap = vgpu_locked_gmmu_unmap,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+			.get_iommu_bit = gk20a_mm_get_iommu_bit,
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+		},
 	},
 	.pramin = {
 		.data032_r = NULL,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 5e5134aa1..db7f724b8 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -661,14 +661,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.is_fw_defined = gv11b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = vgpu_locked_gmmu_map,
-		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gk20a_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = NULL,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.init_inst_block = gv11b_init_inst_block,
@@ -686,6 +679,16 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 			.l2_flush = vgpu_mm_l2_flush,
 			.cbc_clean = NULL,
 		},
+		.gmmu = {
+			.map = vgpu_locked_gmmu_map,
+			.unmap = vgpu_locked_gmmu_unmap,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+			.get_iommu_bit = gk20a_mm_get_iommu_bit,
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = NULL,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index f6bc19255..6e2ee77da 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -49,7 +49,6 @@
 
 #include "mm_gk20a.h"
 
-#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
 
 /*
@@ -122,237 +121,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 	return 0;
 }
 
-/* for gk20a the "video memory" apertures here are misnomers. */
-static inline u32 big_valid_pde0_bits(struct gk20a *g,
-				      struct nvgpu_gmmu_pd *pd, u64 addr)
-{
-	u32 pde0_bits =
-		nvgpu_aperture_mask(g, pd->mem,
-				    gmmu_pde_aperture_big_sys_mem_ncoh_f(),
-				    gmmu_pde_aperture_big_sys_mem_coh_f(),
-				    gmmu_pde_aperture_big_video_memory_f()) |
-		gmmu_pde_address_big_sys_f(
-			   (u32)(addr >> gmmu_pde_address_shift_v()));
-
-	return pde0_bits;
-}
-
-static inline u32 small_valid_pde1_bits(struct gk20a *g,
-					struct nvgpu_gmmu_pd *pd, u64 addr)
-{
-	u32 pde1_bits =
-		nvgpu_aperture_mask(g, pd->mem,
-				    gmmu_pde_aperture_small_sys_mem_ncoh_f(),
-				    gmmu_pde_aperture_small_sys_mem_coh_f(),
-				    gmmu_pde_aperture_small_video_memory_f()) |
-		gmmu_pde_vol_small_true_f() | /* tbd: why? */
-		gmmu_pde_address_small_sys_f(
-			   (u32)(addr >> gmmu_pde_address_shift_v()));
-
-	return pde1_bits;
-}
-
-static void update_gmmu_pde_locked(struct vm_gk20a *vm,
-				   const struct gk20a_mmu_level *l,
-				   struct nvgpu_gmmu_pd *pd,
-				   u32 pd_idx,
-				   u64 virt_addr,
-				   u64 phys_addr,
-				   struct nvgpu_gmmu_attrs *attrs)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	bool small_valid, big_valid;
-	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
-	u32 pde_v[2] = {0, 0};
-
-	small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
-	big_valid   = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
-
-	pde_v[0] = gmmu_pde_size_full_f();
-	pde_v[0] |= big_valid ?
-		big_valid_pde0_bits(g, pd, phys_addr) :
-		gmmu_pde_aperture_big_invalid_f();
-
-	pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
-		     (gmmu_pde_aperture_small_invalid_f() |
-		      gmmu_pde_vol_small_false_f()))
-		|
-		(big_valid ? (gmmu_pde_vol_big_true_f()) :
-		 gmmu_pde_vol_big_false_f());
-
-	pte_dbg(g, attrs,
-		"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
-		"GPU %#-12llx  phys %#-12llx "
-		"[0x%08x, 0x%08x]",
-		pd_idx, l->entry_size, pd_offset,
-		small_valid ? 'S' : '-',
-		big_valid ?   'B' : '-',
-		virt_addr, phys_addr,
-		pde_v[1], pde_v[0]);
-
-	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
-	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
-}
-
-static void __update_pte_sparse(u32 *pte_w)
-{
-	pte_w[0]  = gmmu_pte_valid_false_f();
-	pte_w[1] |= gmmu_pte_vol_true_f();
-}
-
-static void __update_pte(struct vm_gk20a *vm,
-			 u32 *pte_w,
-			 u64 phys_addr,
-			 struct nvgpu_gmmu_attrs *attrs)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
-	u32 pte_valid = attrs->valid ?
-		gmmu_pte_valid_true_f() :
-		gmmu_pte_valid_false_f();
-	u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
-	u32 addr = attrs->aperture == APERTURE_SYSMEM ?
-		gmmu_pte_address_sys_f(phys_shifted) :
-		gmmu_pte_address_vid_f(phys_shifted);
-	int ctag_shift = 0;
-	int shamt = ilog2(g->ops.fb.compression_page_size(g));
-	if (shamt < 0) {
-		nvgpu_err(g, "shift amount 'shamt' is negative");
-	} else {
-		ctag_shift = shamt;
-	}
-
-	pte_w[0] = pte_valid | addr;
-
-	if (attrs->priv) {
-		pte_w[0] |= gmmu_pte_privilege_true_f();
-	}
-
-	pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
-					 gmmu_pte_aperture_sys_mem_ncoh_f(),
-					 gmmu_pte_aperture_sys_mem_coh_f(),
-					 gmmu_pte_aperture_video_memory_f()) |
-		gmmu_pte_kind_f(attrs->kind_v) |
-		gmmu_pte_comptagline_f((U32(attrs->ctag) >> U32(ctag_shift)));
-
-	if ((attrs->ctag != 0ULL) &&
-	     vm->mm->use_full_comp_tag_line &&
-	    ((phys_addr & 0x10000ULL) != 0ULL)) {
-		pte_w[1] |= gmmu_pte_comptagline_f(
-			BIT32(gmmu_pte_comptagline_s() - 1U));
-	}
-
-	if (attrs->rw_flag == gk20a_mem_flag_read_only) {
-		pte_w[0] |= gmmu_pte_read_only_true_f();
-		pte_w[1] |= gmmu_pte_write_disable_true_f();
-	} else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
-		pte_w[1] |= gmmu_pte_read_disable_true_f();
-	}
-
-	if (!attrs->cacheable) {
-		pte_w[1] |= gmmu_pte_vol_true_f();
-	}
-
-	if (attrs->ctag != 0ULL) {
-		attrs->ctag += page_size;
-	}
-}
-
-static void update_gmmu_pte_locked(struct vm_gk20a *vm,
-				   const struct gk20a_mmu_level *l,
-				   struct nvgpu_gmmu_pd *pd,
-				   u32 pd_idx,
-				   u64 virt_addr,
-				   u64 phys_addr,
-				   struct nvgpu_gmmu_attrs *attrs)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
-	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
-	u32 pte_w[2] = {0, 0};
-	int ctag_shift = 0;
-	int shamt = ilog2(g->ops.fb.compression_page_size(g));
-	if (shamt < 0) {
-		nvgpu_err(g, "shift amount 'shamt' is negative");
-	} else {
-		ctag_shift = shamt;
-	}
-
-	if (phys_addr != 0ULL) {
-		__update_pte(vm, pte_w, phys_addr, attrs);
-	} else if (attrs->sparse) {
-		__update_pte_sparse(pte_w);
-	}
-
-	pte_dbg(g, attrs,
-		"PTE: i=%-4u size=%-2u offs=%-4u | "
-		"GPU %#-12llx  phys %#-12llx "
-		"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
-		"ctag=0x%08x "
-		"[0x%08x, 0x%08x]",
-		pd_idx, l->entry_size, pd_offset,
-		virt_addr, phys_addr,
-		page_size >> 10,
-		nvgpu_gmmu_perm_str(attrs->rw_flag),
-		attrs->kind_v,
-		nvgpu_aperture_str(g, attrs->aperture),
-		attrs->cacheable ? 'C' : '-',
-		attrs->sparse    ? 'S' : '-',
-		attrs->priv      ? 'P' : '-',
-		attrs->valid     ? 'V' : '-',
-		U32(attrs->ctag) >> U32(ctag_shift),
-		pte_w[1], pte_w[0]);
-
-	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
-	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
-}
-
-u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
-{
-	/*
-	 * big and small page sizes are the same
-	 */
-	return GMMU_PAGE_SIZE_SMALL;
-}
-
-u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
-{
-	/*
-	 * return invalid
-	 */
-	return GMMU_NR_PAGE_SIZES;
-}
-
-const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
-	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
-	 .lo_bit = {26, 26},
-	 .update_entry = update_gmmu_pde_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pde_pgsz},
-	{.hi_bit = {25, 25},
-	 .lo_bit = {12, 16},
-	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pte_pgsz},
-	{.update_entry = NULL}
-};
-
-const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
-	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
-	 .lo_bit = {27, 27},
-	 .update_entry = update_gmmu_pde_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pde_pgsz},
-	{.hi_bit = {26, 26},
-	 .lo_bit = {12, 17},
-	 .update_entry = update_gmmu_pte_locked,
-	 .entry_size = 8,
-	 .get_pgsz = gk20a_get_pte_pgsz},
-	{.update_entry = NULL}
-};
-
 int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 {
 	int err = 0;
@@ -411,13 +179,6 @@ u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 	return 34;
 }
 
-const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
-						      u32 big_page_size)
-{
-	return (big_page_size == SZ_64K) ?
-		 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
-}
-
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
 {
 	struct fifo_gk20a *f = &g->fifo;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 439f3fc80..56026b2fb 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -91,15 +91,5 @@ void pde_range_from_vaddr_range(struct vm_gk20a *vm,
 					      u64 addr_lo, u64 addr_hi,
 					      u32 *pde_lo, u32 *pde_hi);
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g);
-
-const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
-						      u32 big_page_size);
-extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
-extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
-
-u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
-u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
-				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
 u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
 #endif /* MM_GK20A_H */
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 5f982fade..8c489d096 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -34,6 +34,8 @@
 #include "gm20b/mm_gm20b.h"
 #include "mm_gp10b.h"
 
+#include "hal/mm/gmmu/gmmu_gk20a.h"
+
 #include <nvgpu/hw/gp10b/hw_gmmu_gp10b.h>
 
 u32 gp10b_mm_get_default_big_page_size(void)
@@ -51,7 +53,7 @@ int gp10b_init_bar2_vm(struct gk20a *g)
 	int err;
 	struct mm_gk20a *mm = &g->mm;
 	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	/* BAR2 aperture size is 32MB */
 	mm->bar2.aperture_size = U32(32) << 20U;
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index d2e494a94..fec13b14a 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -42,6 +42,7 @@
 #include <nvgpu/gr/fecs_trace.h>
 
 #include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/bus/bus_gm20b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -831,14 +832,7 @@ static const struct gpu_ops gm20b_ops = {
 		.is_fw_defined = gm20b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gm20b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gk20a_mm_get_iommu_bit,
-		.get_mmu_levels = gk20a_mm_get_mmu_levels,
 		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -852,6 +846,16 @@ static const struct gpu_ops gm20b_ops = {
 			.l2_flush = gk20a_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gk20a_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gm20b_mm_get_default_big_page_size,
+			.get_iommu_bit = gk20a_mm_get_iommu_bit,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+		}
 	},
 	.therm = {
 		.init_therm_setup_hw = gm20b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index d379c7328..f2843617a 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -42,6 +42,7 @@
 #include <nvgpu/pmu/pmu_perfmon.h>
 
 #include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -901,14 +902,7 @@ static const struct gpu_ops gp10b_ops = {
 		.is_fw_defined = gp10b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gm20b_gpu_phys_addr,
-		.get_iommu_bit = gp10b_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -924,6 +918,16 @@ static const struct gpu_ops gp10b_ops = {
 			.l2_flush = gk20a_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.get_iommu_bit = gp10b_mm_get_iommu_bit,
+			.gpu_phys_addr = gm20b_gpu_phys_addr,
+		}
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv100.c b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
index 1b3037dc2..a07fd2da0 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
@@ -24,6 +24,7 @@
 
 #include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mm/cache/flush_gv11b.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1089,13 +1090,7 @@ static const struct gpu_ops gv100_ops = {
 		.is_fw_defined = gv100_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gv11b_gpu_phys_addr,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -1114,6 +1109,15 @@ static const struct gpu_ops gv100_ops = {
 			.l2_flush = gv11b_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gv11b_gpu_phys_addr,
+		}
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index cec14bccf..2ec8e52f9 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -32,6 +32,7 @@
 
 #include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mm/cache/flush_gv11b.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1064,14 +1065,7 @@ static const struct gpu_ops gv11b_ops = {
 		.is_fw_defined = gv11b_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gv11b_gpu_phys_addr,
-		.get_iommu_bit = gp10b_mm_get_iommu_bit,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -1089,6 +1083,16 @@ static const struct gpu_ops gv11b_ops = {
 			.l2_flush = gv11b_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.get_iommu_bit = gp10b_mm_get_iommu_bit,
+			.gpu_phys_addr = gv11b_gpu_phys_addr,
+		}
 	},
 	.therm = {
 		.init_therm_setup_hw = gv11b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 24a520603..c6d46dc98 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -24,6 +24,7 @@
 
 #include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mm/cache/flush_gv11b.h"
+#include "hal/mm/gmmu/gmmu_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1124,13 +1125,7 @@ static const struct gpu_ops tu104_ops = {
 		.is_fw_defined = tu104_netlist_is_firmware_defined,
 	},
 	.mm = {
-		.gmmu_map = nvgpu_gmmu_map_locked,
-		.gmmu_unmap = nvgpu_gmmu_unmap_locked,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
-		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
-		.gpu_phys_addr = gv11b_gpu_phys_addr,
-		.get_mmu_levels = gp10b_mm_get_mmu_levels,
 		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
@@ -1149,6 +1144,15 @@ static const struct gpu_ops tu104_ops = {
 			.l2_flush = gv11b_mm_l2_flush,
 			.cbc_clean = gk20a_mm_cbc_clean,
 		},
+		.gmmu = {
+			.get_mmu_levels = gp10b_mm_get_mmu_levels,
+			.map = nvgpu_gmmu_map_locked,
+			.unmap = nvgpu_gmmu_unmap_locked,
+			.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
+			.get_default_big_page_size =
+				gp10b_mm_get_default_big_page_size,
+			.gpu_phys_addr = gv11b_gpu_phys_addr,
+		}
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c
new file mode 100644
index 000000000..eeb9fce05
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/log2.h>
+
+#include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
+
+#include "gmmu_gk20a.h"
+
+/* for gk20a the "video memory" apertures here are misnomers. */
+static inline u32 big_valid_pde0_bits(struct gk20a *g,
+				      struct nvgpu_gmmu_pd *pd, u64 addr)
+{
+	u32 pde0_bits =
+		nvgpu_aperture_mask(g, pd->mem,
+				    gmmu_pde_aperture_big_sys_mem_ncoh_f(),
+				    gmmu_pde_aperture_big_sys_mem_coh_f(),
+				    gmmu_pde_aperture_big_video_memory_f()) |
+		gmmu_pde_address_big_sys_f(
+			   (u32)(addr >> gmmu_pde_address_shift_v()));
+
+	return pde0_bits;
+}
+
+static inline u32 small_valid_pde1_bits(struct gk20a *g,
+					struct nvgpu_gmmu_pd *pd, u64 addr)
+{
+	u32 pde1_bits =
+		nvgpu_aperture_mask(g, pd->mem,
+				    gmmu_pde_aperture_small_sys_mem_ncoh_f(),
+				    gmmu_pde_aperture_small_sys_mem_coh_f(),
+				    gmmu_pde_aperture_small_video_memory_f()) |
+		gmmu_pde_vol_small_true_f() | /* tbd: why? */
+		gmmu_pde_address_small_sys_f(
+			   (u32)(addr >> gmmu_pde_address_shift_v()));
+
+	return pde1_bits;
+}
+
+static void update_gmmu_pde_locked(struct vm_gk20a *vm,
+				   const struct gk20a_mmu_level *l,
+				   struct nvgpu_gmmu_pd *pd,
+				   u32 pd_idx,
+				   u64 virt_addr,
+				   u64 phys_addr,
+				   struct nvgpu_gmmu_attrs *attrs)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	bool small_valid, big_valid;
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
+	u32 pde_v[2] = {0, 0};
+
+	small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
+	big_valid   = attrs->pgsz == GMMU_PAGE_SIZE_BIG;
+
+	pde_v[0] = gmmu_pde_size_full_f();
+	pde_v[0] |= big_valid ?
+		big_valid_pde0_bits(g, pd, phys_addr) :
+		gmmu_pde_aperture_big_invalid_f();
+
+	pde_v[1] |= (small_valid ? small_valid_pde1_bits(g, pd, phys_addr) :
+		     (gmmu_pde_aperture_small_invalid_f() |
+		      gmmu_pde_vol_small_false_f()))
+		|
+		(big_valid ? (gmmu_pde_vol_big_true_f()) :
+		 gmmu_pde_vol_big_false_f());
+
+	pte_dbg(g, attrs,
+		"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
+		"GPU %#-12llx  phys %#-12llx "
+		"[0x%08x, 0x%08x]",
+		pd_idx, l->entry_size, pd_offset,
+		small_valid ? 'S' : '-',
+		big_valid ?   'B' : '-',
+		virt_addr, phys_addr,
+		pde_v[1], pde_v[0]);
+
+	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
+	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
+}
+
+static void __update_pte_sparse(u32 *pte_w)
+{
+	pte_w[0]  = gmmu_pte_valid_false_f();
+	pte_w[1] |= gmmu_pte_vol_true_f();
+}
+
+static void __update_pte(struct vm_gk20a *vm,
+			 u32 *pte_w,
+			 u64 phys_addr,
+			 struct nvgpu_gmmu_attrs *attrs)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
+	u32 pte_valid = attrs->valid ?
+		gmmu_pte_valid_true_f() :
+		gmmu_pte_valid_false_f();
+	u32 phys_shifted = phys_addr >> gmmu_pte_address_shift_v();
+	u32 addr = attrs->aperture == APERTURE_SYSMEM ?
+		gmmu_pte_address_sys_f(phys_shifted) :
+		gmmu_pte_address_vid_f(phys_shifted);
+	int ctag_shift = 0;
+	int shamt = ilog2(g->ops.fb.compression_page_size(g));
+	if (shamt < 0) {
+		nvgpu_err(g, "shift amount 'shamt' is negative");
+	} else {
+		ctag_shift = shamt;
+	}
+
+	pte_w[0] = pte_valid | addr;
+
+	if (attrs->priv) {
+		pte_w[0] |= gmmu_pte_privilege_true_f();
+	}
+
+	pte_w[1] = nvgpu_aperture_mask_raw(g, attrs->aperture,
+					 gmmu_pte_aperture_sys_mem_ncoh_f(),
+					 gmmu_pte_aperture_sys_mem_coh_f(),
+					 gmmu_pte_aperture_video_memory_f()) |
+		gmmu_pte_kind_f(attrs->kind_v) |
+		gmmu_pte_comptagline_f((U32(attrs->ctag) >> U32(ctag_shift)));
+
+	if ((attrs->ctag != 0ULL) &&
+	     vm->mm->use_full_comp_tag_line &&
+	    ((phys_addr & 0x10000ULL) != 0ULL)) {
+		pte_w[1] |= gmmu_pte_comptagline_f(
+			BIT32(gmmu_pte_comptagline_s() - 1U));
+	}
+
+	if (attrs->rw_flag == gk20a_mem_flag_read_only) {
+		pte_w[0] |= gmmu_pte_read_only_true_f();
+		pte_w[1] |= gmmu_pte_write_disable_true_f();
+	} else if (attrs->rw_flag == gk20a_mem_flag_write_only) {
+		pte_w[1] |= gmmu_pte_read_disable_true_f();
+	}
+
+	if (!attrs->cacheable) {
+		pte_w[1] |= gmmu_pte_vol_true_f();
+	}
+
+	if (attrs->ctag != 0ULL) {
+		attrs->ctag += page_size;
+	}
+}
+
+static void update_gmmu_pte_locked(struct vm_gk20a *vm,
+				   const struct gk20a_mmu_level *l,
+				   struct nvgpu_gmmu_pd *pd,
+				   u32 pd_idx,
+				   u64 virt_addr,
+				   u64 phys_addr,
+				   struct nvgpu_gmmu_attrs *attrs)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
+	u32 pte_w[2] = {0, 0};
+	int ctag_shift = 0;
+	int shamt = ilog2(g->ops.fb.compression_page_size(g));
+	if (shamt < 0) {
+		nvgpu_err(g, "shift amount 'shamt' is negative");
+	} else {
+		ctag_shift = shamt;
+	}
+
+	if (phys_addr != 0ULL) {
+		__update_pte(vm, pte_w, phys_addr, attrs);
+	} else if (attrs->sparse) {
+		__update_pte_sparse(pte_w);
+	}
+
+	pte_dbg(g, attrs,
+		"PTE: i=%-4u size=%-2u offs=%-4u | "
+		"GPU %#-12llx  phys %#-12llx "
+		"pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c "
+		"ctag=0x%08x "
+		"[0x%08x, 0x%08x]",
+		pd_idx, l->entry_size, pd_offset,
+		virt_addr, phys_addr,
+		page_size >> 10,
+		nvgpu_gmmu_perm_str(attrs->rw_flag),
+		attrs->kind_v,
+		nvgpu_aperture_str(g, attrs->aperture),
+		attrs->cacheable ? 'C' : '-',
+		attrs->sparse    ? 'S' : '-',
+		attrs->priv      ? 'P' : '-',
+		attrs->valid     ? 'V' : '-',
+		U32(attrs->ctag) >> U32(ctag_shift),
+		pte_w[1], pte_w[0]);
+
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
+}
+
+u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	/*
+	 * big and small page sizes are the same
+	 */
+	return GMMU_PAGE_SIZE_SMALL;
+}
+
+u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
+{
+	/*
+	 * return invalid
+	 */
+	return GMMU_NR_PAGE_SIZES;
+}
+
+const struct gk20a_mmu_level gk20a_mm_levels_64k[] = {
+	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
+	 .lo_bit = {26, 26},
+	 .update_entry = update_gmmu_pde_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
+	{.hi_bit = {25, 25},
+	 .lo_bit = {12, 16},
+	 .update_entry = update_gmmu_pte_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
+	{.update_entry = NULL}
+};
+
+const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
+	{.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1},
+	 .lo_bit = {27, 27},
+	 .update_entry = update_gmmu_pde_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pde_pgsz},
+	{.hi_bit = {26, 26},
+	 .lo_bit = {12, 17},
+	 .update_entry = update_gmmu_pte_locked,
+	 .entry_size = 8,
+	 .get_pgsz = gk20a_get_pte_pgsz},
+	{.update_entry = NULL}
+};
+
+const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
+						      u32 big_page_size)
+{
+	return (big_page_size == SZ_64K) ?
+		 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
+}
diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h
new file mode 100644
index 000000000..e3515ca16
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gk20a.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HAL_MM_GMMU_GMMU_GK20A_H
+#define HAL_MM_GMMU_GMMU_GK20A_H
+
+#include <nvgpu/types.h>
+#include <nvgpu/gmmu.h>
+
+struct gk20a;
+
+/*
+ * Must include the <nvgpu/gmmu.h> header directly since the compiler needs to
+ * know the actual type before it declares an array (even if the size is not
+ * technically known here.
+ */
+extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
+extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
+
+const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
+						      u32 big_page_size);
+
+u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
+u32 gk20a_get_pte_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
+				struct nvgpu_gmmu_pd *pd, u32 pd_idx);
+
+#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 1e9d290ea..9f8cebd89 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1348,42 +1348,12 @@ struct gpu_ops {
 		bool (*is_fw_defined)(void);
 	} netlist;
 	struct {
-		u64 (*gmmu_map)(struct vm_gk20a *vm,
-				u64 map_offset,
-				struct nvgpu_sgt *sgt,
-				u64 buffer_offset,
-				u64 size,
-				u32 pgsz_idx,
-				u8 kind_v,
-				u32 ctag_offset,
-				u32 flags,
-				enum gk20a_mem_rw_flag rw_flag,
-				bool clear_ctags,
-				bool sparse,
-				bool priv,
-				struct vm_gk20a_mapping_batch *batch,
-				enum nvgpu_aperture aperture);
-		void (*gmmu_unmap)(struct vm_gk20a *vm,
-				u64 vaddr,
-				u64 size,
-				u32 pgsz_idx,
-				bool va_allocated,
-				enum gk20a_mem_rw_flag rw_flag,
-				bool sparse,
-				struct vm_gk20a_mapping_batch *batch);
 		int (*vm_bind_channel)(struct vm_gk20a *vm,
 				struct channel_gk20a *ch);
-		u32 (*get_big_page_sizes)(void);
-		u32 (*get_default_big_page_size)(void);
-		u32 (*get_iommu_bit)(struct gk20a *g);
 		int (*init_mm_setup_hw)(struct gk20a *g);
 		bool (*is_bar1_supported)(struct gk20a *g);
 		int (*init_bar2_vm)(struct gk20a *g);
 		void (*remove_bar2_vm)(struct gk20a *g);
-		const struct gk20a_mmu_level *
-			(*get_mmu_levels)(struct gk20a *g, u32 big_page_size);
-		u64 (*gpu_phys_addr)(struct gk20a *g,
-				     struct nvgpu_gmmu_attrs *attrs, u64 phys);
 		int (*alloc_inst_block)(struct gk20a *g,
 					struct nvgpu_mem *inst_block);
 		void (*init_inst_block)(struct nvgpu_mem *inst_block,
@@ -1403,6 +1373,40 @@ struct gpu_ops {
 			int (*l2_flush)(struct gk20a *g, bool invalidate);
 			void (*cbc_clean)(struct gk20a *g);
 		} cache;
+		struct {
+			const struct gk20a_mmu_level *
+				(*get_mmu_levels)(struct gk20a *g,
+						  u32 big_page_size);
+			u64 (*map)(struct vm_gk20a *vm,
+				   u64 map_offset,
+				   struct nvgpu_sgt *sgt,
+				   u64 buffer_offset,
+				   u64 size,
+				   u32 pgsz_idx,
+				   u8 kind_v,
+				   u32 ctag_offset,
+				   u32 flags,
+				   enum gk20a_mem_rw_flag rw_flag,
+				   bool clear_ctags,
+				   bool sparse,
+				   bool priv,
+				   struct vm_gk20a_mapping_batch *batch,
+				   enum nvgpu_aperture aperture);
+			void (*unmap)(struct vm_gk20a *vm,
+				      u64 vaddr,
+				      u64 size,
+				      u32 pgsz_idx,
+				      bool va_allocated,
+				      enum gk20a_mem_rw_flag rw_flag,
+				      bool sparse,
+				      struct vm_gk20a_mapping_batch *batch);
+			u32 (*get_big_page_sizes)(void);
+			u32 (*get_default_big_page_size)(void);
+			u32 (*get_iommu_bit)(struct gk20a *g);
+			u64 (*gpu_phys_addr)(struct gk20a *g,
+					     struct nvgpu_gmmu_attrs *attrs,
+					     u64 phys);
+		} gmmu;
 	} mm;
 	/*
 	 * This function is called to allocate secure memory (memory
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 326c31e74..2c973f635 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -327,7 +327,8 @@ gk20a_ctrl_ioctl_gpu_characteristics(
 
 	gpu.big_page_size = nvgpu_mm_get_default_big_page_size(g);
 	gpu.pde_coverage_bit_count =
-		g->ops.mm.get_mmu_levels(g, gpu.big_page_size)[0].lo_bit[0];
+		g->ops.mm.gmmu.get_mmu_levels(g,
+					gpu.big_page_size)[0].lo_bit[0];
 	gpu.available_big_page_sizes = nvgpu_mm_get_available_big_page_sizes(g);
 
 	gpu.sm_arch_sm_version = g->params.sm_arch_sm_version;
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
index af51a5445..5d801d7d5 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c
@@ -1355,7 +1355,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 	struct mm_gk20a *mm = &g->mm;
 	int err;
 	u32 virt_size;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 
diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
index 3d0f8a6d0..b8a99c68e 100644
--- a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
@@ -64,11 +64,11 @@ u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
 {
 	if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
 	    !nvgpu_iommuable(g))
-		return g->ops.mm.gpu_phys_addr(g, NULL,
+		return g->ops.mm.gmmu.gpu_phys_addr(g, NULL,
 			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
 
 	if (sg_dma_address(sgl) == 0)
-		return g->ops.mm.gpu_phys_addr(g, NULL,
+		return g->ops.mm.gmmu.gpu_phys_addr(g, NULL,
 			__nvgpu_sgl_phys(g, (struct nvgpu_sgl *)sgl));
 
 	if (sg_dma_address(sgl) == DMA_ERROR_CODE)
@@ -230,7 +230,7 @@ static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g,
 					struct nvgpu_gmmu_attrs *attrs)
 {
 	if (sg_dma_address((struct scatterlist *)sgl) == 0)
-		return g->ops.mm.gpu_phys_addr(g, attrs,
+		return g->ops.mm.gmmu.gpu_phys_addr(g, attrs,
 				__nvgpu_sgl_phys(g, sgl));
 
 	if (sg_dma_address((struct scatterlist *)sgl) == DMA_ERROR_CODE)
diff --git a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
index 531b040e2..8101f58d1 100644
--- a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
@@ -85,7 +85,7 @@ u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_sgl *sgl,
 	struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl;
 
 	if (mem->dma == 0U) {
-		return g->ops.mm.gpu_phys_addr(g, attrs, mem->phys);
+		return g->ops.mm.gmmu.gpu_phys_addr(g, attrs, mem->phys);
 	}
 
 	if (mem->dma == DMA_ERROR_CODE) {
diff --git a/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c b/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c
index 3d939b4a1..62b5be1d8 100644
--- a/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c
+++ b/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c
@@ -68,9 +68,9 @@ static struct vm_gk20a *init_vm_env(struct unit_module *m, struct gk20a *g,
 	/* Initialize vm */
 
 	/* Minimum HALs for vm_init */
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 		gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
 
 	/* Minimum HAL init for PRAMIN */
 	g->ops.bus.set_bar0_window = gk20a_bus_set_bar0_window;
@@ -90,7 +90,7 @@ static struct vm_gk20a *init_vm_env(struct unit_module *m, struct gk20a *g,
 	flags |= GPU_ALLOC_GVA_SPACE;
 
 	/* Init vm with big_pages disabled */
-	test_vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	test_vm = nvgpu_vm_init(g, g->ops.mm.gmmu.get_default_big_page_size(),
 				   low_hole,
 				   aperture_size - low_hole,
 				   aperture_size,
diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c
index da6049a93..e398182f4 100644
--- a/userspace/units/mm/gmmu/page_table/page_table.c
+++ b/userspace/units/mm/gmmu/page_table/page_table.c
@@ -296,14 +296,14 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 
 	p->mm_is_iommuable = true;
 
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 		gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
 	g->ops.mm.alloc_inst_block = gk20a_alloc_inst_block;
 	g->ops.mm.init_inst_block = gv11b_init_inst_block;
-	g->ops.mm.gmmu_map = nvgpu_gmmu_map_locked;
-	g->ops.mm.gmmu_unmap = nvgpu_gmmu_unmap_locked;
-	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked;
+	g->ops.mm.gmmu.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.mm.is_bar1_supported = gv11b_mm_is_bar1_supported;
 	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
 	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
@@ -329,7 +329,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
 
 
-	mm->pmu.vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	mm->pmu.vm = nvgpu_vm_init(g,
+				   g->ops.mm.gmmu.get_default_big_page_size(),
 				   low_hole,
 				   aperture_size - low_hole,
 				   aperture_size,
@@ -729,7 +730,7 @@ static struct nvgpu_sgt *custom_sgt_create(struct unit_module *m,
 }
 
 /*
- * Helper function to wrap calls to g->ops.mm.gmmu_map and thus giving
+ * Helper function to wrap calls to g->ops.mm.gmmu.map and thus giving
  * access to more parameters
  */
 static u64 gmmu_map_advanced(struct unit_module *m, struct gk20a *g,
@@ -755,7 +756,7 @@ static u64 gmmu_map_advanced(struct unit_module *m, struct gk20a *g,
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
-	vaddr = g->ops.mm.gmmu_map(vm, (u64) mem->cpu_va,
+	vaddr = g->ops.mm.gmmu.map(vm, (u64) mem->cpu_va,
 				   sgt,
 				   offset,
 				   mem->size,
@@ -775,7 +776,7 @@ static u64 gmmu_map_advanced(struct unit_module *m, struct gk20a *g,
 }
 
 /*
- * Helper function to wrap calls to g->ops.mm.gmmu_unmap and thus giving
+ * Helper function to wrap calls to g->ops.mm.gmmu.unmap and thus giving
  * access to more parameters
  */
 static void gmmu_unmap_advanced(struct vm_gk20a *vm, struct nvgpu_mem *mem,
@@ -786,7 +787,7 @@ static void gmmu_unmap_advanced(struct vm_gk20a *vm, struct nvgpu_mem *mem,
 
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 
-	g->ops.mm.gmmu_unmap(vm,
+	g->ops.mm.gmmu.unmap(vm,
 			     gpu_va,
 			     mem->size,
 			     params->page_size,
@@ -973,9 +974,9 @@ static struct vm_gk20a *init_test_req_vm(struct gk20a *g)
 	/* 1.4. Have a 4GB kernel reserved space */
 	kernel_reserved = 4 * SZ_1G;
 
-	return nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(), low_hole,
-			kernel_reserved - low_hole, aperture_size, big_pages,
-			true, true, "testmem");
+	return nvgpu_vm_init(g, g->ops.mm.gmmu.get_default_big_page_size(),
+			     low_hole, kernel_reserved - low_hole,
+			     aperture_size, big_pages, true, true, "testmem");
 }
 
 /* Test case to cover NVGPU-RQCD-45 C1 */
diff --git a/userspace/units/mm/nvgpu_mem/nvgpu_mem.c b/userspace/units/mm/nvgpu_mem/nvgpu_mem.c
index c26615aa8..52a05560f 100644
--- a/userspace/units/mm/nvgpu_mem/nvgpu_mem.c
+++ b/userspace/units/mm/nvgpu_mem/nvgpu_mem.c
@@ -344,7 +344,7 @@ static int test_nvgpu_mem_iommu_translate(struct unit_module *m,
 	 * Case: mm is iommuable
 	 * Set HAL to enable iommu_translate
 	 */
-	g->ops.mm.get_iommu_bit = gp10b_mm_get_iommu_bit;
+	g->ops.mm.gmmu.get_iommu_bit = gp10b_mm_get_iommu_bit;
 
 	temp_phys = nvgpu_mem_iommu_translate(g, test_sgl->phys);
 	if (temp_phys == test_sgl->phys) {
diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c
index f7a4293d7..caeeb7713 100644
--- a/userspace/units/mm/page_table_faults/page_table_faults.c
+++ b/userspace/units/mm/page_table_faults/page_table_faults.c
@@ -117,14 +117,14 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	p->mm_is_iommuable = true;
 
 	/* Minimum HALs for page_table */
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 		gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
 	g->ops.mm.alloc_inst_block = gk20a_alloc_inst_block;
 	g->ops.mm.init_inst_block = gv11b_init_inst_block;
-	g->ops.mm.gmmu_map = nvgpu_gmmu_map_locked;
-	g->ops.mm.gmmu_unmap = nvgpu_gmmu_unmap_locked;
-	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked;
+	g->ops.mm.gmmu.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
 	g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
 	g->ops.ramin.init_pdb = gp10b_ramin_init_pdb;
@@ -179,7 +179,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	aperture_size = GK20A_PMU_VA_SIZE;
 	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
 
-	mm->pmu.vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	mm->pmu.vm = nvgpu_vm_init(g,
+				   g->ops.mm.gmmu.get_default_big_page_size(),
 				   low_hole,
 				   aperture_size - low_hole,
 				   aperture_size,
@@ -193,7 +194,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 
 	/* BAR2 memory space */
 	mm->bar2.aperture_size = U32(32) << 20U;
-	mm->bar2.vm = nvgpu_vm_init(g, g->ops.mm.get_default_big_page_size(),
+	mm->bar2.vm = nvgpu_vm_init(g,
+		g->ops.mm.gmmu.get_default_big_page_size(),
 		SZ_4K, mm->bar2.aperture_size - SZ_4K,
 		mm->bar2.aperture_size, false, false, false, "bar2");
 	if (mm->bar2.vm == NULL) {
@@ -323,10 +325,10 @@ static int test_page_faults_inst_block(struct unit_module *m, struct gk20a *g,
 	/* Handle some corner cases */
 	if (scenario == 1) {
 		/* Init inst_block with large page size */
-		big_page_size = g->ops.mm.get_default_big_page_size();
+		big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 	} else if (scenario == 2) {
 		/* Handle branch case in gv11b_init_inst_block() */
-		big_page_size = g->ops.mm.get_default_big_page_size();
+		big_page_size = g->ops.mm.gmmu.get_default_big_page_size();
 		g->ops.ramin.set_big_page_size = NULL;
 	}
 
diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c
index 881728faf..5b01facad 100644
--- a/userspace/units/mm/vm/vm.c
+++ b/userspace/units/mm/vm/vm.c
@@ -133,12 +133,12 @@ static int init_test_env(struct unit_module *m, struct gk20a *g)
 	g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
 	g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
 
-	g->ops.mm.get_default_big_page_size =
+	g->ops.mm.gmmu.get_default_big_page_size =
 					gp10b_mm_get_default_big_page_size;
-	g->ops.mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
-	g->ops.mm.gmmu_map = nvgpu_gmmu_map_locked;
-	g->ops.mm.gmmu_unmap = nvgpu_gmmu_unmap_locked;
-	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.gmmu.get_mmu_levels = gp10b_mm_get_mmu_levels;
+	g->ops.mm.gmmu.map = nvgpu_gmmu_map_locked;
+	g->ops.mm.gmmu.unmap = nvgpu_gmmu_unmap_locked;
+	g->ops.mm.gmmu.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
 	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 
@@ -388,7 +388,7 @@ static int test_map_buf(struct unit_module *m, struct gk20a *g, void *__args)
 	unit_info(m, "   - Kernel Reserved Size = 0x%llx\n", kernel_reserved);
 	unit_info(m, "   - Total Aperture Size = 0x%llx\n", aperture_size);
 	vm = nvgpu_vm_init(g,
-			   g->ops.mm.get_default_big_page_size(),
+			   g->ops.mm.gmmu.get_default_big_page_size(),
 			   low_hole,
 			   kernel_reserved,
 			   aperture_size,
@@ -524,7 +524,7 @@ static int test_map_buf_gpu_va(struct unit_module *m,
 	unit_info(m, "   - Kernel Reserved Size = 0x%llx\n", kernel_reserved);
 	unit_info(m, "   - Total Aperture Size = 0x%llx\n", aperture_size);
 	vm = nvgpu_vm_init(g,
-			   g->ops.mm.get_default_big_page_size(),
+			   g->ops.mm.gmmu.get_default_big_page_size(),
 			   low_hole,
 			   kernel_reserved,
 			   aperture_size,