diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index ce4f67b05..e689aa7f0 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -65,6 +65,7 @@ nvgpu-y := \
 	common/mm/vm_area.o \
 	common/mm/nvgpu_mem.o \
 	common/mm/comptags.o \
+	common/mm/mm.o \
 	common/bus.o \
 	common/enabled.o \
 	common/pramin.o \
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
index 56edc11b5..c8831a975 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_dbg.c
@@ -1372,7 +1372,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
 	err = g->ops.dbg_session_ops.perfbuffer_disable(g);
 
 	nvgpu_vm_unmap_buffer(vm, offset, NULL);
-	gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
+	nvgpu_free_inst_block(g, &mm->perfbuf.inst_block);
 	nvgpu_vm_put(vm);
 
 	g->perfbuf.owner = NULL;
diff --git a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
index c6f10a692..a2546e9d4 100644
--- a/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/buddy_allocator.c
@@ -25,8 +25,8 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/log2.h>
 #include <nvgpu/barrier.h>
-
-#include "gk20a/mm_gk20a.h"
+#include <nvgpu/mm.h>
+#include <nvgpu/vm.h>
 
 #include "buddy_allocator_priv.h"
 
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
new file mode 100644
index 000000000..1027ed283
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -0,0 +1,426 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/mm.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/vm_area.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/vidmem.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/pramin.h>
+#include <nvgpu/enabled.h>
+
+#include "gk20a/gk20a.h"
+
+/*
+ * Attempt to find a reserved memory area to determine PTE size for the passed
+ * mapping. If no reserved area can be found use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+					      u64 base, u64 size)
+{
+	struct nvgpu_vm_area *vm_area;
+
+	vm_area = nvgpu_vm_area_find(vm, base);
+	if (!vm_area)
+		return gmmu_page_size_small;
+
+	return vm_area->pgsz_idx;
+}
+
+/*
+ * This is for when the address space does not support unified address spaces.
+ */
+static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
+					       u64 base, u64 size)
+{
+	if (!base) {
+		if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+			return gmmu_page_size_big;
+		return gmmu_page_size_small;
+	} else {
+		if (base < __nv_gmmu_va_small_page_limit())
+			return gmmu_page_size_small;
+		else
+			return gmmu_page_size_big;
+	}
+}
+
+/*
+ * This determines the PTE size for a given alloc. Used by both the GVA space
+ * allocator and the mm core code so that agreement can be reached on how to
+ * map allocations.
+ *
+ * The page size of a buffer is this:
+ *
+ *   o  If the VM doesn't support large pages then obviously small pages
+ *      must be used.
+ *   o  If the base address is non-zero (fixed address map):
+ *      - Attempt to find a reserved memory area and use the page size
+ *        based on that.
+ *      - If no reserved page size is available, default to small pages.
+ *   o  If the base is zero:
+ *      - If the size is larger than or equal to the big page size, use big
+ *        pages.
+ *      - Otherwise use small pages.
+ */
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+
+	if (!vm->big_pages)
+		return gmmu_page_size_small;
+
+	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
+		return __get_pte_size_split_addr(vm, base, size);
+
+	if (base)
+		return __get_pte_size_fixed_map(vm, base, size);
+
+	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
+		return gmmu_page_size_big;
+	return gmmu_page_size_small;
+}
+
+int nvgpu_mm_suspend(struct gk20a *g)
+{
+	nvgpu_info(g, "MM suspend running...");
+
+	nvgpu_vidmem_thread_pause_sync(&g->mm);
+
+	g->ops.mm.cbc_clean(g);
+	g->ops.mm.l2_flush(g, false);
+
+	nvgpu_info(g, "MM suspend done!");
+
+	return 0;
+}
+
+u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	if (g->mm.has_physical_mode)
+		return nvgpu_mem_get_phys_addr(g, inst_block);
+	else
+		return nvgpu_mem_get_addr(g, inst_block);
+}
+
+void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	if (nvgpu_mem_is_valid(inst_block))
+		nvgpu_dma_free(g, inst_block);
+}
+
+static int nvgpu_alloc_sysmem_flush(struct gk20a *g)
+{
+	return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
+}
+
+static void nvgpu_remove_mm_ce_support(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+
+	if (mm->vidmem.ce_ctx_id != (u32)~0)
+		gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
+
+	mm->vidmem.ce_ctx_id = (u32)~0;
+
+	nvgpu_vm_put(mm->ce.vm);
+}
+
+static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+
+	if (g->ops.mm.fault_info_mem_destroy)
+		g->ops.mm.fault_info_mem_destroy(g);
+
+	if (g->ops.mm.remove_bar2_vm)
+		g->ops.mm.remove_bar2_vm(g);
+
+	if (g->ops.mm.is_bar1_supported(g)) {
+		nvgpu_free_inst_block(g, &mm->bar1.inst_block);
+		nvgpu_vm_put(mm->bar1.vm);
+	}
+
+	nvgpu_free_inst_block(g, &mm->pmu.inst_block);
+	nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
+	nvgpu_vm_put(mm->pmu.vm);
+	nvgpu_vm_put(mm->cde.vm);
+
+	nvgpu_semaphore_sea_destroy(g);
+	nvgpu_vidmem_destroy(g);
+	nvgpu_pd_cache_fini(g);
+}
+
+/* pmu vm, share channel_vm interfaces */
+static int nvgpu_init_system_vm(struct mm_gk20a *mm)
+{
+	int err;
+	struct gk20a *g = gk20a_from_mm(mm);
+	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+	u32 low_hole, aperture_size;
+
+	/*
+	 * No user region - so we will pass that as zero sized.
+	 */
+	low_hole = SZ_4K * 16;
+	aperture_size = GK20A_PMU_VA_SIZE * 2;
+
+	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
+	nvgpu_info(g, "pmu vm size = 0x%x", mm->pmu.aperture_size);
+
+	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
+				   low_hole,
+				   aperture_size - low_hole,
+				   aperture_size,
+				   true,
+				   false,
+				   "system");
+	if (!mm->pmu.vm)
+		return -ENOMEM;
+
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
+	if (err)
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
+
+	return 0;
+
+clean_up_vm:
+	nvgpu_vm_put(mm->pmu.vm);
+	return err;
+}
+
+static int nvgpu_init_hwpm(struct mm_gk20a *mm)
+{
+	int err;
+	struct gk20a *g = gk20a_from_mm(mm);
+	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
+
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
+	if (err)
+		return err;
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
+
+	return 0;
+}
+
+static int nvgpu_init_cde_vm(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
+				   big_page_size << 10,
+				   NV_MM_DEFAULT_KERNEL_SIZE,
+				   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				   false, false, "cde");
+	if (!mm->cde.vm)
+		return -ENOMEM;
+	return 0;
+}
+
+static int nvgpu_init_ce_vm(struct mm_gk20a *mm)
+{
+	struct gk20a *g = gk20a_from_mm(mm);
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
+				  big_page_size << 10,
+				  NV_MM_DEFAULT_KERNEL_SIZE,
+				  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				  false, false, "ce");
+	if (!mm->ce.vm)
+		return -ENOMEM;
+	return 0;
+}
+
+void nvgpu_init_mm_ce_context(struct gk20a *g)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
+		g->mm.vidmem.ce_ctx_id =
+			gk20a_ce_create_context_with_cb(g,
+				gk20a_fifo_get_fast_ce_runlist_id(g),
+				-1,
+				-1,
+				-1,
+				NULL);
+
+		if (g->mm.vidmem.ce_ctx_id == (u32)~0)
+			nvgpu_err(g,
+				"Failed to allocate CE context for vidmem page clearing support");
+	}
+#endif
+}
+
+static int nvgpu_init_mm_reset_enable_hw(struct gk20a *g)
+{
+	if (g->ops.fb.reset)
+		g->ops.fb.reset(g);
+
+	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
+		g->ops.clock_gating.slcg_fb_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
+		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
+				g->slcg_enabled);
+	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
+		g->ops.clock_gating.blcg_fb_load_gating_prod(g,
+				g->blcg_enabled);
+	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
+		g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
+				g->blcg_enabled);
+
+	if (g->ops.fb.init_fs_state)
+		g->ops.fb.init_fs_state(g);
+
+	return 0;
+}
+
+static int nvgpu_init_bar1_vm(struct mm_gk20a *mm)
+{
+	int err;
+	struct gk20a *g = gk20a_from_mm(mm);
+	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
+	nvgpu_info(g, "bar1 vm size = 0x%x", mm->bar1.aperture_size);
+	mm->bar1.vm = nvgpu_vm_init(g,
+				    big_page_size,
+				    SZ_4K,
+				    mm->bar1.aperture_size - SZ_4K,
+				    mm->bar1.aperture_size,
+				    true, false,
+				    "bar1");
+	if (!mm->bar1.vm)
+		return -ENOMEM;
+
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
+	if (err)
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
+
+	return 0;
+
+clean_up_vm:
+	nvgpu_vm_put(mm->bar1.vm);
+	return err;
+}
+
+static int nvgpu_init_mm_setup_sw(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	int err;
+
+	if (mm->sw_ready) {
+		nvgpu_info(g, "skip init");
+		return 0;
+	}
+
+	mm->g = g;
+	nvgpu_mutex_init(&mm->l2_op_lock);
+
+	/*TBD: make channel vm size configurable */
+	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
+		NV_MM_DEFAULT_KERNEL_SIZE;
+	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
+
+	nvgpu_info(g, "channel vm size: user %dMB  kernel %dMB",
+		   (int)(mm->channel.user_size >> 20),
+		   (int)(mm->channel.kernel_size >> 20));
+
+	nvgpu_init_pramin(mm);
+
+	mm->vidmem.ce_ctx_id = (u32)~0;
+
+	err = nvgpu_vidmem_init(mm);
+	if (err)
+		return err;
+
+	/*
+	 * this requires fixed allocations in vidmem which must be
+	 * allocated before all other buffers
+	 */
+	if (g->ops.pmu.alloc_blob_space
+			&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
+		err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
+		if (err)
+			return err;
+	}
+
+	err = nvgpu_alloc_sysmem_flush(g);
+	if (err)
+		return err;
+
+	if (g->ops.mm.is_bar1_supported(g)) {
+		err = nvgpu_init_bar1_vm(mm);
+		if (err)
+			return err;
+	}
+	if (g->ops.mm.init_bar2_vm) {
+		err = g->ops.mm.init_bar2_vm(g);
+		if (err)
+			return err;
+	}
+	err = nvgpu_init_system_vm(mm);
+	if (err)
+		return err;
+
+	err = nvgpu_init_hwpm(mm);
+	if (err)
+		return err;
+
+	err = nvgpu_init_cde_vm(mm);
+	if (err)
+		return err;
+
+	err = nvgpu_init_ce_vm(mm);
+	if (err)
+		return err;
+
+	mm->remove_support = nvgpu_remove_mm_support;
+	mm->remove_ce_support = nvgpu_remove_mm_ce_support;
+
+	mm->sw_ready = true;
+
+	return 0;
+}
+
+int nvgpu_init_mm_support(struct gk20a *g)
+{
+	u32 err;
+
+	err = nvgpu_init_mm_reset_enable_hw(g);
+	if (err)
+		return err;
+
+	err = nvgpu_init_mm_setup_sw(g);
+	if (err)
+		return err;
+
+	if (g->ops.mm.init_mm_setup_hw)
+		err = g->ops.mm.init_mm_setup_hw(g);
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
index 938c4b007..9b031bbf4 100644
--- a/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/bus_gk20a.c
@@ -24,6 +24,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/bus.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "bus_gk20a.h"
@@ -137,8 +138,8 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
 
 int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
 {
-	u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
-	u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
+	u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
+	u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
 
 	gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
 
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 725ae2782..e3896981e 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -29,6 +29,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/lock.h>
 #include <nvgpu/dma.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "css_gr_gk20a.h"
@@ -183,7 +184,7 @@ int css_hw_enable_snapshot(struct channel_gk20a *ch,
 	gk20a_writel(g, perf_pmasys_outsize_r(), snapshot_size);
 
 	/* this field is aligned to 4K */
-	inst_pa_page = gk20a_mm_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
+	inst_pa_page = nvgpu_inst_block_addr(g, &g->mm.hwpm.inst_block) >> 12;
 
 	/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
 	 * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 8c39ecb76..802ccd766 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/atomic.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -305,7 +306,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
 		return err;
 	}
 
-	err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
+	err = g->ops.mm.alloc_inst_block(g, &mm->perfbuf.inst_block);
 	if (err)
 		return err;
 
@@ -322,8 +323,7 @@ int gk20a_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
 	gk20a_writel(g, perf_pmasys_outsize_r(), size);
 
 	/* this field is aligned to 4K */
-	inst_pa_page = gk20a_mm_inst_block_addr(g,
-						&mm->perfbuf.inst_block) >> 12;
+	inst_pa_page = nvgpu_inst_block_addr(g,	&mm->perfbuf.inst_block) >> 12;
 
 	/* A write to MEM_BLOCK triggers the block bind operation. MEM_BLOCK
 	 * should be written last */
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 7fd1793ca..12d7dcb9b 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -32,6 +32,7 @@
 #include <nvgpu/circ_buf.h>
 #include <nvgpu/thread.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
@@ -93,7 +94,7 @@ static inline u64 gk20a_fecs_trace_record_ts_timestamp_v(u64 ts)
 
 static u32 gk20a_fecs_trace_fecs_context_ptr(struct gk20a *g, struct channel_gk20a *ch)
 {
-	return (u32) (gk20a_mm_inst_block_addr(g, &ch->inst_block) >> 12LL);
+	return (u32) (nvgpu_inst_block_addr(g, &ch->inst_block) >> 12LL);
 }
 
 static inline int gk20a_fecs_trace_num_ts(void)
@@ -633,12 +634,12 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw,
 			"chid=%d context_ptr=%x inst_block=%llx",
 			ch->chid, context_ptr,
-			gk20a_mm_inst_block_addr(g, &ch->inst_block));
+			nvgpu_inst_block_addr(g, &ch->inst_block));
 
 	if (!trace)
 		return -ENOMEM;
 
-	pa = gk20a_mm_inst_block_addr(g, &trace->trace_buf);
+	pa = nvgpu_inst_block_addr(g, &trace->trace_buf);
 	if (!pa)
 		return -ENOMEM;
 	aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 03ca69846..fc71c358e 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -24,6 +24,7 @@
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/mm.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/semaphore.h>
@@ -1058,7 +1059,7 @@ gk20a_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr)
 		if (!ch)
 			continue;
 
-		ch_inst_ptr = gk20a_mm_inst_block_addr(g, &ch->inst_block);
+		ch_inst_ptr = nvgpu_inst_block_addr(g, &ch->inst_block);
 		if (inst_ptr == ch_inst_ptr)
 			return ch;
 
@@ -1659,10 +1660,10 @@ static bool gk20a_fifo_handle_mmu_fault(
 						ch->chid);
 			}
 		} else if (mmfault_info.inst_ptr ==
-				gk20a_mm_inst_block_addr(g, &g->mm.bar1.inst_block)) {
+				nvgpu_inst_block_addr(g, &g->mm.bar1.inst_block)) {
 			nvgpu_err(g, "mmu fault from bar1");
 		} else if (mmfault_info.inst_ptr ==
-				gk20a_mm_inst_block_addr(g, &g->mm.pmu.inst_block)) {
+				nvgpu_inst_block_addr(g, &g->mm.pmu.inst_block)) {
 			nvgpu_err(g, "mmu fault from pmu");
 		} else
 			nvgpu_err(g, "couldn't locate channel for mmu fault");
@@ -3973,12 +3974,12 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 
 	gk20a_dbg_fn("");
 
-	err = gk20a_alloc_inst_block(g, &ch->inst_block);
+	err = g->ops.mm.alloc_inst_block(g, &ch->inst_block);
 	if (err)
 		return err;
 
 	gk20a_dbg_info("channel %d inst block physical addr: 0x%16llx",
-		ch->chid, gk20a_mm_inst_block_addr(g, &ch->inst_block));
+		ch->chid, nvgpu_inst_block_addr(g, &ch->inst_block));
 
 	gk20a_dbg_fn("done");
 	return 0;
@@ -3986,7 +3987,7 @@ int gk20a_fifo_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
 
 void gk20a_fifo_free_inst(struct gk20a *g, struct channel_gk20a *ch)
 {
-	gk20a_free_inst_block(g, &ch->inst_block);
+	nvgpu_free_inst_block(g, &ch->inst_block);
 }
 
 u32 gk20a_fifo_userd_gp_get(struct gk20a *g, struct channel_gk20a *c)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index 2bc7d9a8b..ea5d55a4a 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -36,6 +36,7 @@
 #include <nvgpu/kref.h>
 
 struct gk20a_debug_output;
+struct mmu_fault_info;
 
 #define MAX_RUNLIST_BUFFERS		2
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 2d09c0bbf..e3c2397c8 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -34,6 +34,7 @@
 #include <nvgpu/gmmu.h>
 #include <nvgpu/ltc.h>
 #include <nvgpu/vidmem.h>
+#include <nvgpu/mm.h>
 
 #include <trace/events/gk20a.h>
 
@@ -107,7 +108,7 @@ int gk20a_prepare_poweroff(struct gk20a *g)
 		ret |= nvgpu_pmu_destroy(g);
 
 	ret |= gk20a_gr_suspend(g);
-	ret |= gk20a_mm_suspend(g);
+	ret |= nvgpu_mm_suspend(g);
 	ret |= gk20a_fifo_suspend(g);
 
 	gk20a_ce_suspend(g);
@@ -213,7 +214,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 		goto done;
 	}
 
-	err = gk20a_init_mm_support(g);
+	err = nvgpu_init_mm_support(g);
 	if (err) {
 		nvgpu_err(g, "failed to init gk20a mm");
 		goto done;
@@ -314,7 +315,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 
 	gk20a_init_ce_support(g);
 
-	gk20a_init_mm_ce_context(g);
+	nvgpu_init_mm_ce_context(g);
 
 	if (g->ops.xve.available_speeds) {
 		u32 speed;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 92bcb6182..9c09e85ff 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -47,6 +47,7 @@ struct nvgpu_warpstate;
 #include <nvgpu/thread.h>
 #include <nvgpu/io.h>
 
+#include <nvgpu/mm.h>
 #include <nvgpu/as.h>
 #include <nvgpu/log.h>
 #include <nvgpu/pramin.h>
@@ -756,6 +757,8 @@ struct gpu_ops {
 		u64 (*gpu_phys_addr)(struct gk20a *g,
 				     struct nvgpu_gmmu_attrs *attrs, u64 phys);
 		size_t (*get_vidmem_size)(struct gk20a *g);
+		int (*alloc_inst_block)(struct gk20a *g,
+					struct nvgpu_mem *inst_block);
 		void (*init_inst_block)(struct nvgpu_mem *inst_block,
 				struct vm_gk20a *vm, u32 big_page_size);
 		bool (*mmu_fault_pending)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index d67324537..6d3702508 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -37,6 +37,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/debug.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "kind_gk20a.h"
@@ -731,7 +732,7 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
 
 static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 {
-	u32 ptr = u64_lo32(gk20a_mm_inst_block_addr(g, inst_block)
+	u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block)
 			>> ram_in_base_shift_v());
 	u32 aperture = nvgpu_aperture_mask(g, inst_block,
 			gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
@@ -744,7 +745,7 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
 static int gr_gk20a_fecs_ctx_bind_channel(struct gk20a *g,
 					struct channel_gk20a *c)
 {
-	u32 inst_base_ptr = u64_lo32(gk20a_mm_inst_block_addr(g, &c->inst_block)
+	u32 inst_base_ptr = u64_lo32(nvgpu_inst_block_addr(g, &c->inst_block)
 				     >> ram_in_base_shift_v());
 	u32 data = fecs_current_ctx_data(g, &c->inst_block);
 	u32 ret;
@@ -1980,7 +1981,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
 	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
 	int err;
 
-	err = gk20a_alloc_inst_block(g, &ucode_info->inst_blk_desc);
+	err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
 	if (err)
 		return err;
 
@@ -2154,7 +2155,7 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
 
 	gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0);
 
-	inst_ptr = gk20a_mm_inst_block_addr(g, &ucode_info->inst_blk_desc);
+	inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
 	gk20a_writel(g, gr_fecs_new_ctx_r(),
 			gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
 			nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
@@ -5455,7 +5456,7 @@ static struct channel_gk20a *gk20a_gr_get_channel_from_ctx(
 		if (!gk20a_channel_get(ch))
 			continue;
 
-		if ((u32)(gk20a_mm_inst_block_addr(g, &ch->inst_block) >>
+		if ((u32)(nvgpu_inst_block_addr(g, &ch->inst_block) >>
 					ram_in_base_shift_v()) ==
 				gr_fecs_current_ctx_ptr_v(curr_ctx)) {
 			tsgid = ch->tsgid;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index d96fa4e1f..a17d6bb68 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1,6 +1,4 @@
 /*
- * GK20A memory management
- *
  * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,6 +22,7 @@
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/mm.h>
 #include <nvgpu/vm.h>
 #include <nvgpu/vm_area.h>
 #include <nvgpu/dma.h>
@@ -88,161 +87,6 @@
  *
  */
 
-static int __must_check gk20a_init_system_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_bar1_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_hwpm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_cde_vm(struct mm_gk20a *mm);
-static int __must_check gk20a_init_ce_vm(struct mm_gk20a *mm);
-
-static int gk20a_init_mm_reset_enable_hw(struct gk20a *g)
-{
-	gk20a_dbg_fn("");
-	if (g->ops.fb.reset)
-		g->ops.fb.reset(g);
-
-	if (g->ops.clock_gating.slcg_fb_load_gating_prod)
-		g->ops.clock_gating.slcg_fb_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.slcg_ltc_load_gating_prod)
-		g->ops.clock_gating.slcg_ltc_load_gating_prod(g,
-				g->slcg_enabled);
-	if (g->ops.clock_gating.blcg_fb_load_gating_prod)
-		g->ops.clock_gating.blcg_fb_load_gating_prod(g,
-				g->blcg_enabled);
-	if (g->ops.clock_gating.blcg_ltc_load_gating_prod)
-		g->ops.clock_gating.blcg_ltc_load_gating_prod(g,
-				g->blcg_enabled);
-
-	if (g->ops.fb.init_fs_state)
-		g->ops.fb.init_fs_state(g);
-
-	return 0;
-}
-
-static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-
-	if (mm->vidmem.ce_ctx_id != (u32)~0)
-		gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
-
-	mm->vidmem.ce_ctx_id = (u32)~0;
-
-	nvgpu_vm_put(mm->ce.vm);
-}
-
-static void gk20a_remove_mm_support(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-
-	if (g->ops.mm.fault_info_mem_destroy)
-		g->ops.mm.fault_info_mem_destroy(g);
-
-	if (g->ops.mm.remove_bar2_vm)
-		g->ops.mm.remove_bar2_vm(g);
-
-	if (g->ops.mm.is_bar1_supported(g)) {
-		gk20a_free_inst_block(g, &mm->bar1.inst_block);
-		nvgpu_vm_put(mm->bar1.vm);
-	}
-
-	gk20a_free_inst_block(g, &mm->pmu.inst_block);
-	gk20a_free_inst_block(g, &mm->hwpm.inst_block);
-	nvgpu_vm_put(mm->pmu.vm);
-	nvgpu_vm_put(mm->cde.vm);
-
-	nvgpu_semaphore_sea_destroy(g);
-	nvgpu_vidmem_destroy(g);
-	nvgpu_pd_cache_fini(g);
-}
-
-static int gk20a_alloc_sysmem_flush(struct gk20a *g)
-{
-	return nvgpu_dma_alloc_sys(g, SZ_4K, &g->mm.sysmem_flush);
-}
-
-int gk20a_init_mm_setup_sw(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	int err;
-
-	gk20a_dbg_fn("");
-
-	if (mm->sw_ready) {
-		gk20a_dbg_fn("skip init");
-		return 0;
-	}
-
-	mm->g = g;
-	nvgpu_mutex_init(&mm->l2_op_lock);
-
-	/*TBD: make channel vm size configurable */
-	mm->channel.user_size = NV_MM_DEFAULT_USER_SIZE -
-		NV_MM_DEFAULT_KERNEL_SIZE;
-	mm->channel.kernel_size = NV_MM_DEFAULT_KERNEL_SIZE;
-
-	gk20a_dbg_info("channel vm size: user %dMB  kernel %dMB",
-		       (int)(mm->channel.user_size >> 20),
-		       (int)(mm->channel.kernel_size >> 20));
-
-	nvgpu_init_pramin(mm);
-
-	mm->vidmem.ce_ctx_id = (u32)~0;
-
-	err = nvgpu_vidmem_init(mm);
-	if (err)
-		return err;
-
-	/*
-	 * this requires fixed allocations in vidmem which must be
-	 * allocated before all other buffers
-	 */
-	if (g->ops.pmu.alloc_blob_space
-			&& !nvgpu_is_enabled(g, NVGPU_MM_UNIFIED_MEMORY)) {
-		err = g->ops.pmu.alloc_blob_space(g, 0, &g->acr.ucode_blob);
-		if (err)
-			return err;
-	}
-
-	err = gk20a_alloc_sysmem_flush(g);
-	if (err)
-		return err;
-
-	if (g->ops.mm.is_bar1_supported(g)) {
-		err = gk20a_init_bar1_vm(mm);
-		if (err)
-			return err;
-	}
-	if (g->ops.mm.init_bar2_vm) {
-		err = g->ops.mm.init_bar2_vm(g);
-		if (err)
-			return err;
-	}
-	err = gk20a_init_system_vm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_hwpm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_cde_vm(mm);
-	if (err)
-		return err;
-
-	err = gk20a_init_ce_vm(mm);
-	if (err)
-		return err;
-
-	mm->remove_support = gk20a_remove_mm_support;
-	mm->remove_ce_support = gk20a_remove_mm_ce_support;
-
-	mm->sw_ready = true;
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
 /* make sure gk20a_init_mm_support is called before */
 int gk20a_init_mm_setup_hw(struct gk20a *g)
 {
@@ -274,43 +118,6 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 	return 0;
 }
 
-int gk20a_init_mm_support(struct gk20a *g)
-{
-	u32 err;
-
-	err = gk20a_init_mm_reset_enable_hw(g);
-	if (err)
-		return err;
-
-	err = gk20a_init_mm_setup_sw(g);
-	if (err)
-		return err;
-
-	if (g->ops.mm.init_mm_setup_hw)
-		err = g->ops.mm.init_mm_setup_hw(g);
-
-	return err;
-}
-
-void gk20a_init_mm_ce_context(struct gk20a *g)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	if (g->mm.vidmem.size && (g->mm.vidmem.ce_ctx_id == (u32)~0)) {
-		g->mm.vidmem.ce_ctx_id =
-			gk20a_ce_create_context_with_cb(g,
-				gk20a_fifo_get_fast_ce_runlist_id(g),
-				-1,
-				-1,
-				-1,
-				NULL);
-
-		if (g->mm.vidmem.ce_ctx_id == (u32)~0)
-			nvgpu_err(g,
-				"Failed to allocate CE context for vidmem page clearing support");
-	}
-#endif
-}
-
 int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
 {
 	return vm->mmu_levels[0].lo_bit[0];
@@ -505,76 +312,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
 	{.update_entry = NULL}
 };
 
-/*
- * Attempt to find a reserved memory area to determine PTE size for the passed
- * mapping. If no reserved area can be found use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-					      u64 base, u64 size)
-{
-	struct nvgpu_vm_area *vm_area;
-
-	vm_area = nvgpu_vm_area_find(vm, base);
-	if (!vm_area)
-		return gmmu_page_size_small;
-
-	return vm_area->pgsz_idx;
-}
-
-/*
- * This is for when the address space does not support unified address spaces.
- */
-static enum gmmu_pgsz_gk20a __get_pte_size_split_addr(struct vm_gk20a *vm,
-					       u64 base, u64 size)
-{
-	if (!base) {
-		if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-			return gmmu_page_size_big;
-		return gmmu_page_size_small;
-	} else {
-		if (base < __nv_gmmu_va_small_page_limit())
-			return gmmu_page_size_small;
-		else
-			return gmmu_page_size_big;
-	}
-}
-
-/*
- * This determines the PTE size for a given alloc. Used by both the GVA space
- * allocator and the mm core code so that agreement can be reached on how to
- * map allocations.
- *
- * The page size of a buffer is this:
- *
- *   o  If the VM doesn't support large pages then obviously small pages
- *      must be used.
- *   o  If the base address is non-zero (fixed address map):
- *      - Attempt to find a reserved memory area and use the page size
- *        based on that.
- *      - If no reserved page size is available, default to small pages.
- *   o  If the base is zero:
- *      - If the size is larger than or equal to the big page size, use big
- *        pages.
- *      - Otherwise use small pages.
- */
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-
-	if (!vm->big_pages)
-		return gmmu_page_size_small;
-
-	if (!nvgpu_is_enabled(g, NVGPU_MM_UNIFY_ADDRESS_SPACES))
-		return __get_pte_size_split_addr(vm, base, size);
-
-	if (base)
-		return __get_pte_size_fixed_map(vm, base, size);
-
-	if (size >= vm->gmmu_page_sizes[gmmu_page_size_big])
-		return gmmu_page_size_big;
-	return gmmu_page_size_small;
-}
-
 int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 {
 	int err = 0;
@@ -599,151 +336,6 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share,
 	return __gk20a_vm_bind_channel(as_share->vm, ch);
 }
 
-int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	int err;
-
-	gk20a_dbg_fn("");
-
-	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
-	if (err) {
-		nvgpu_err(g, "%s: memory allocation failed", __func__);
-		return err;
-	}
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	if (inst_block->size)
-		nvgpu_dma_free(g, inst_block);
-}
-
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
-{
-	if (g->mm.has_physical_mode)
-		return nvgpu_mem_get_phys_addr(g, inst_block);
-	else
-		return nvgpu_mem_get_addr(g, inst_block);
-}
-
-static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
-	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-	mm->bar1.vm = nvgpu_vm_init(g,
-				    big_page_size,
-				    SZ_4K,
-				    mm->bar1.aperture_size - SZ_4K,
-				    mm->bar1.aperture_size,
-				    true, false,
-				    "bar1");
-	if (!mm->bar1.vm)
-		return -ENOMEM;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		goto clean_up_vm;
-	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
-
-	return 0;
-
-clean_up_vm:
-	nvgpu_vm_put(mm->bar1.vm);
-	return err;
-}
-
-/* pmu vm, share channel_vm interfaces */
-static int gk20a_init_system_vm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-	u32 low_hole, aperture_size;
-
-	/*
-	 * No user region - so we will pass that as zero sized.
-	 */
-	low_hole = SZ_4K * 16;
-	aperture_size = GK20A_PMU_VA_SIZE * 2;
-
-	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
-	gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
-
-	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
-				   low_hole,
-				   aperture_size - low_hole,
-				   aperture_size,
-				   true,
-				   false,
-				   "system");
-	if (!mm->pmu.vm)
-		return -ENOMEM;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		goto clean_up_vm;
-	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
-
-	return 0;
-
-clean_up_vm:
-	nvgpu_vm_put(mm->pmu.vm);
-	return err;
-}
-
-static int gk20a_init_hwpm(struct mm_gk20a *mm)
-{
-	int err;
-	struct gk20a *g = gk20a_from_mm(mm);
-	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
-
-	err = gk20a_alloc_inst_block(g, inst_block);
-	if (err)
-		return err;
-	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
-
-	return 0;
-}
-
-static int gk20a_init_cde_vm(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
-				   big_page_size << 10,
-				   NV_MM_DEFAULT_KERNEL_SIZE,
-				   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-				   false, false, "cde");
-	if (!mm->cde.vm)
-		return -ENOMEM;
-	return 0;
-}
-
-static int gk20a_init_ce_vm(struct mm_gk20a *mm)
-{
-	struct gk20a *g = gk20a_from_mm(mm);
-	u32 big_page_size = g->ops.mm.get_default_big_page_size();
-
-	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
-				  big_page_size << 10,
-				  NV_MM_DEFAULT_KERNEL_SIZE,
-				  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-				  false, false, "ce");
-	if (!mm->ce.vm)
-		return -ENOMEM;
-	return 0;
-}
-
 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm)
 {
@@ -770,7 +362,7 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 	struct gk20a *g = gk20a_from_vm(vm);
 
 	gk20a_dbg_info("inst block phys = 0x%llx, kv = 0x%p",
-		gk20a_mm_inst_block_addr(g, inst_block), inst_block->cpu_va);
+		nvgpu_inst_block_addr(g, inst_block), inst_block->cpu_va);
 
 	g->ops.mm.init_pdb(g, inst_block, vm);
 
@@ -784,6 +376,22 @@ void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 		g->ops.mm.set_big_page_size(g, inst_block, big_page_size);
 }
 
+int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
+{
+	int err;
+
+	gk20a_dbg_fn("");
+
+	err = nvgpu_dma_alloc(g, ram_in_alloc_size_v(), inst_block);
+	if (err) {
+		nvgpu_err(g, "%s: memory allocation failed", __func__);
+		return err;
+	}
+
+	gk20a_dbg_fn("done");
+	return 0;
+}
+
 int gk20a_mm_fb_flush(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
@@ -992,19 +600,6 @@ hw_was_off:
 	gk20a_idle_nosuspend(g);
 }
 
-int gk20a_mm_suspend(struct gk20a *g)
-{
-	gk20a_dbg_fn("");
-
-	nvgpu_vidmem_thread_pause_sync(&g->mm);
-
-	g->ops.mm.cbc_clean(g);
-	g->ops.mm.l2_flush(g, false);
-
-	gk20a_dbg_fn("done");
-	return 0;
-}
-
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 {
 	return 34;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 15876b106..434fc4228 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -35,11 +35,6 @@
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/kref.h>
-#include <nvgpu/atomic.h>
-#include <nvgpu/cond.h>
-#include <nvgpu/thread.h>
-
-struct nvgpu_pd_cache;
 
 #ifdef CONFIG_ARM64
 #define outer_flush_range(a, b)
@@ -138,219 +133,24 @@ struct priv_cmd_entry {
 struct gk20a;
 struct channel_gk20a;
 
-int gk20a_init_mm_support(struct gk20a *g);
-int gk20a_init_mm_setup_sw(struct gk20a *g);
-int gk20a_init_mm_setup_hw(struct gk20a *g);
-void gk20a_init_mm_ce_context(struct gk20a *g);
-
 int gk20a_mm_fb_flush(struct gk20a *g);
 void gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
 void gk20a_mm_cbc_clean(struct gk20a *g);
 void gk20a_mm_l2_invalidate(struct gk20a *g);
 
-#define FAULT_TYPE_NUM		2	/* replay and nonreplay faults */
-
-struct mmu_fault_info {
-	u64	inst_ptr;
-	u32	inst_aperture;
-	u64	fault_addr;
-	u32	fault_addr_aperture;
-	u32	timestamp_lo;
-	u32	timestamp_hi;
-	u32	mmu_engine_id;
-	u32	gpc_id;
-	u32	client_type;
-	u32	client_id;
-	u32	fault_type;
-	u32	access_type;
-	u32	protected_mode;
-	u32	replayable_fault;
-	u32	replay_fault_en;
-	u32	valid;
-	u32	faulted_pbdma;
-	u32	faulted_engine;
-	u32	faulted_subid;
-	u32	chid;
-	struct channel_gk20a *refch;
-	const char *client_type_desc;
-	const char *fault_type_desc;
-	const char *client_id_desc;
-};
-
-struct mm_gk20a {
-	struct gk20a *g;
-
-	/* GPU VA default sizes address spaces for channels */
-	struct {
-		u64 user_size;   /* userspace-visible GPU VA region */
-		u64 kernel_size; /* kernel-only GPU VA region */
-	} channel;
-
-	struct {
-		u32 aperture_size;
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} bar1;
-
-	struct {
-		u32 aperture_size;
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} bar2;
-
-	struct {
-		u32 aperture_size;
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} pmu;
-
-	struct {
-		/* using pmu vm currently */
-		struct nvgpu_mem inst_block;
-	} hwpm;
-
-	struct {
-		struct vm_gk20a *vm;
-		struct nvgpu_mem inst_block;
-	} perfbuf;
-
-	struct {
-		struct vm_gk20a *vm;
-	} cde;
-
-	struct {
-		struct vm_gk20a *vm;
-	} ce;
-
-	struct nvgpu_pd_cache *pd_cache;
-
-	struct nvgpu_mutex l2_op_lock;
-	struct nvgpu_mutex tlb_lock;
-	struct nvgpu_mutex priv_lock;
-
-	struct nvgpu_mem bar2_desc;
-
-#ifdef CONFIG_TEGRA_19x_GPU
-	struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
-	unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
-	struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
-	struct nvgpu_mutex hub_isr_mutex;
-	u32    hub_intr_types;
-#endif
-	/*
-	 * Separate function to cleanup the CE since it requires a channel to
-	 * be closed which must happen before fifo cleanup.
-	 */
-	void (*remove_ce_support)(struct mm_gk20a *mm);
-	void (*remove_support)(struct mm_gk20a *mm);
-	bool sw_ready;
-	int physical_bits;
-	bool use_full_comp_tag_line;
-	bool ltc_enabled_current;
-	bool ltc_enabled_target;
-	bool bypass_smmu;
-	bool disable_bigpage;
-	bool has_physical_mode;
-
-	struct nvgpu_mem sysmem_flush;
-
-	u32 pramin_window;
-	struct nvgpu_spinlock pramin_window_lock;
-	bool force_pramin; /* via debugfs */
-
-	struct {
-		size_t size;
-		u64 base;
-		size_t bootstrap_size;
-		u64 bootstrap_base;
-
-		struct nvgpu_allocator allocator;
-		struct nvgpu_allocator bootstrap_allocator;
-
-		u32 ce_ctx_id;
-		volatile bool cleared;
-		struct nvgpu_mutex first_clear_mutex;
-
-		struct nvgpu_list_node clear_list_head;
-		struct nvgpu_mutex clear_list_mutex;
-
-		struct nvgpu_cond clearing_thread_cond;
-		struct nvgpu_thread clearing_thread;
-		struct nvgpu_mutex clearing_thread_lock;
-		nvgpu_atomic_t pause_count;
-
-		nvgpu_atomic64_t bytes_pending;
-	} vidmem;
-};
-
-int gk20a_mm_init(struct mm_gk20a *mm);
-
-#define gk20a_from_mm(mm) ((mm)->g)
-#define gk20a_from_vm(vm) ((vm)->mm->g)
-
 #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
 
-#define DEFAULT_ALLOC_ALIGNMENT (4*1024)
-
-static inline int bar1_aperture_size_mb_gk20a(void)
-{
-	return 16; /* 16MB is more than enough atm. */
-}
-
-/* The maximum GPU VA range supported */
-#define NV_GMMU_VA_RANGE          38
-
-/* The default userspace-visible GPU VA size */
-#define NV_MM_DEFAULT_USER_SIZE   (1ULL << 37)
-
-/* The default kernel-reserved GPU VA size */
-#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
-
-/*
- * When not using unified address spaces, the bottom 56GB of the space are used
- * for small pages, and the remaining high memory is used for large pages.
- */
-static inline u64 __nv_gmmu_va_small_page_limit(void)
-{
-	return ((u64)SZ_1G * 56);
-}
-
-enum nvgpu_flush_op {
-	NVGPU_FLUSH_DEFAULT,
-	NVGPU_FLUSH_FB,
-	NVGPU_FLUSH_L2_INV,
-	NVGPU_FLUSH_L2_FLUSH,
-	NVGPU_FLUSH_CBC_CLEAN,
-};
-
-enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
-					      u64 base, u64 size);
-enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
-
-#if 0 /*related to addr bits above, concern below TBD on which is accurate */
-#define bar1_instance_block_shift_gk20a() (max_physaddr_bits_gk20a() -\
-					   bus_bar1_block_ptr_s())
-#else
-#define bar1_instance_block_shift_gk20a() bus_bar1_block_ptr_shift_v()
-#endif
-
-int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
-void gk20a_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
-void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
-		u32 big_page_size);
-u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem);
-
-void gk20a_mm_dump_vm(struct vm_gk20a *vm,
-		u64 va_begin, u64 va_end, char *label);
-
-int gk20a_mm_suspend(struct gk20a *g);
-
 void gk20a_mm_ltc_isr(struct gk20a *g);
 
 bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g);
 
 int gk20a_mm_mmu_vpr_info_fetch(struct gk20a *g);
 
+int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
+void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
+		u32 big_page_size);
+int gk20a_init_mm_setup_hw(struct gk20a *g);
+
 u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 			  u64 map_offset,
 			  struct nvgpu_sgt *sgt,
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index e4dd6a598..2b954e1ad 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -30,6 +30,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/firmware.h>
 #include <nvgpu/falcon.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a.h"
 #include "gr_gk20a.h"
@@ -181,7 +182,7 @@ int pmu_bootstrap(struct nvgpu_pmu *pmu)
 		pwr_falcon_itfen_ctxen_enable_f());
 	gk20a_writel(g, pwr_pmu_new_instblk_r(),
 		pwr_pmu_new_instblk_ptr_f(
-			gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+			nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 		pwr_pmu_new_instblk_valid_f(1) |
 		pwr_pmu_new_instblk_target_sys_coh_f());
 
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 7029b4772..557948e17 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -21,7 +21,6 @@
  */
 
 #include <nvgpu/types.h>
-
 #include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/timers.h>
@@ -33,6 +32,7 @@
 #include <nvgpu/pmu.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/pmu_gk20a.h"
@@ -1170,7 +1170,7 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
 			pwr_falcon_itfen_ctxen_enable_f());
 	gk20a_writel(g, pwr_pmu_new_instblk_r(),
 			pwr_pmu_new_instblk_ptr_f(
-				gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+				nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 			pwr_pmu_new_instblk_valid_f(1) |
 			pwr_pmu_new_instblk_target_sys_coh_f());
 
diff --git a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
index b8d42f7af..34c8d4b73 100644
--- a/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/bus_gm20b.c
@@ -24,6 +24,7 @@
 
 #include <nvgpu/timers.h>
 #include <nvgpu/bus.h>
+#include <nvgpu/mm.h>
 
 #include "bus_gm20b.h"
 #include "gk20a/gk20a.h"
@@ -35,8 +36,8 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
 {
 	struct nvgpu_timeout timeout;
 	int err = 0;
-	u64 iova = gk20a_mm_inst_block_addr(g, bar1_inst);
-	u32 ptr_v = (u32)(iova >> bar1_instance_block_shift_gk20a());
+	u64 iova = nvgpu_inst_block_addr(g, bar1_inst);
+	u32 ptr_v = (u32)(iova >> bus_bar1_block_ptr_shift_v());
 
 	gk20a_dbg_info("bar1 inst block ptr: 0x%08x", ptr_v);
 
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index f4ddd92fa..0762e8bdd 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -31,6 +31,7 @@
 #include <nvgpu/log.h>
 #include <nvgpu/atomic.h>
 #include <nvgpu/barrier.h>
+#include <nvgpu/mm.h>
 
 #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -42,7 +43,7 @@ void channel_gm20b_bind(struct channel_gk20a *c)
 {
 	struct gk20a *g = c->g;
 
-	u32 inst_ptr = gk20a_mm_inst_block_addr(g, &c->inst_block)
+	u32 inst_ptr = nvgpu_inst_block_addr(g, &c->inst_block)
 		>> ram_in_base_shift_v();
 
 	gk20a_dbg_info("bind channel %d inst ptr 0x%08x",
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index 269fd7f14..d081fb245 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -442,6 +442,7 @@ static const struct gpu_ops gm20b_ops = {
 		.init_pdb = gk20a_mm_init_pdb,
 		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
+		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
 		.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
 		.get_kind_invalid = gm20b_get_kind_invalid,
diff --git a/drivers/gpu/nvgpu/gp106/hal_gp106.c b/drivers/gpu/nvgpu/gp106/hal_gp106.c
index 1246ee7f1..59f72e130 100644
--- a/drivers/gpu/nvgpu/gp106/hal_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/hal_gp106.c
@@ -524,6 +524,7 @@ static const struct gpu_ops gp106_ops = {
 		.init_pdb = gp10b_mm_init_pdb,
 		.init_mm_setup_hw = gp10b_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
+		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
 		.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
 		.init_bar2_vm = gb10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index 9f0fe375d..26ded39e6 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -22,6 +22,7 @@
 
 #include <nvgpu/pmu.h>
 #include <nvgpu/falcon.h>
+#include <nvgpu/mm.h>
 
 #include "gk20a/gk20a.h"
 #include "sec2_gp106.h"
@@ -88,7 +89,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
 
 	gk20a_writel(g, psec_falcon_nxtctx_r(),
 			pwr_pmu_new_instblk_ptr_f(
-			gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+			nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 			pwr_pmu_new_instblk_valid_f(1) |
 			nvgpu_aperture_mask(g, &mm->pmu.inst_block,
 				pwr_pmu_new_instblk_target_sys_coh_f(),
@@ -154,7 +155,7 @@ void init_pmu_setup_hw1(struct gk20a *g)
 				pwr_falcon_itfen_ctxen_enable_f());
 	gk20a_writel(g, pwr_pmu_new_instblk_r(),
 				pwr_pmu_new_instblk_ptr_f(
-					gk20a_mm_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
+					nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
 				pwr_pmu_new_instblk_valid_f(1) |
 				nvgpu_aperture_mask(g, &mm->pmu.inst_block,
 					pwr_pmu_new_instblk_target_sys_coh_f(),
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index b80722b87..a10df7406 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -484,6 +484,7 @@ static const struct gpu_ops gp10b_ops = {
 		.init_pdb = gp10b_mm_init_pdb,
 		.init_mm_setup_hw = gp10b_init_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
+		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
 		.mmu_fault_pending = gk20a_fifo_mmu_fault_pending,
 		.init_bar2_vm = gb10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 06a9b9291..dc7461539 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -22,6 +22,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include <nvgpu/mm.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
 
@@ -95,7 +96,7 @@ int gb10b_init_bar2_vm(struct gk20a *g)
 		return -ENOMEM;
 
 	/* allocate instance mem for bar2 */
-	err = gk20a_alloc_inst_block(g, inst_block);
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
 	if (err)
 		goto clean_up_va;
 
@@ -112,7 +113,7 @@ int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
-	u64 inst_pa = gk20a_mm_inst_block_addr(g, inst_block);
+	u64 inst_pa = nvgpu_inst_block_addr(g, inst_block);
 
 	gk20a_dbg_fn("");
 
@@ -374,6 +375,6 @@ void gp10b_remove_bar2_vm(struct gk20a *g)
 	struct mm_gk20a *mm = &g->mm;
 
 	gp10b_replayable_pagefault_buffer_deinit(g);
-	gk20a_free_inst_block(g, &mm->bar2.inst_block);
+	nvgpu_free_inst_block(g, &mm->bar2.inst_block);
 	nvgpu_vm_put(mm->bar2.vm);
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h
new file mode 100644
index 000000000..13b33d9f5
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -0,0 +1,220 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVGPU_MM_H__
+#define __NVGPU_MM_H__
+
+#include <nvgpu/types.h>
+#include <nvgpu/cond.h>
+#include <nvgpu/thread.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/allocator.h>
+#include <nvgpu/list.h>
+
+struct gk20a;
+struct vm_gk20a;
+struct nvgpu_mem;
+struct nvgpu_pd_cache;
+
+#define FAULT_TYPE_NUM		2	/* replay and nonreplay faults */
+
+struct mmu_fault_info {
+	u64	inst_ptr;
+	u32	inst_aperture;
+	u64	fault_addr;
+	u32	fault_addr_aperture;
+	u32	timestamp_lo;
+	u32	timestamp_hi;
+	u32	mmu_engine_id;
+	u32	gpc_id;
+	u32	client_type;
+	u32	client_id;
+	u32	fault_type;
+	u32	access_type;
+	u32	protected_mode;
+	u32	replayable_fault;
+	u32	replay_fault_en;
+	u32	valid;
+	u32	faulted_pbdma;
+	u32	faulted_engine;
+	u32	faulted_subid;
+	u32	chid;
+	struct channel_gk20a *refch;
+	const char *client_type_desc;
+	const char *fault_type_desc;
+	const char *client_id_desc;
+};
+
+enum nvgpu_flush_op {
+	NVGPU_FLUSH_DEFAULT,
+	NVGPU_FLUSH_FB,
+	NVGPU_FLUSH_L2_INV,
+	NVGPU_FLUSH_L2_FLUSH,
+	NVGPU_FLUSH_CBC_CLEAN,
+};
+
+struct mm_gk20a {
+	struct gk20a *g;
+
+	/* GPU VA default sizes address spaces for channels */
+	struct {
+		u64 user_size;   /* userspace-visible GPU VA region */
+		u64 kernel_size; /* kernel-only GPU VA region */
+	} channel;
+
+	struct {
+		u32 aperture_size;
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} bar1;
+
+	struct {
+		u32 aperture_size;
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} bar2;
+
+	struct {
+		u32 aperture_size;
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} pmu;
+
+	struct {
+		/* using pmu vm currently */
+		struct nvgpu_mem inst_block;
+	} hwpm;
+
+	struct {
+		struct vm_gk20a *vm;
+		struct nvgpu_mem inst_block;
+	} perfbuf;
+
+	struct {
+		struct vm_gk20a *vm;
+	} cde;
+
+	struct {
+		struct vm_gk20a *vm;
+	} ce;
+
+	struct nvgpu_pd_cache *pd_cache;
+
+	struct nvgpu_mutex l2_op_lock;
+	struct nvgpu_mutex tlb_lock;
+	struct nvgpu_mutex priv_lock;
+
+	struct nvgpu_mem bar2_desc;
+
+#ifdef CONFIG_TEGRA_19x_GPU
+	struct nvgpu_mem hw_fault_buf[FAULT_TYPE_NUM];
+	unsigned int hw_fault_buf_status[FAULT_TYPE_NUM];
+	struct mmu_fault_info *fault_info[FAULT_TYPE_NUM];
+	struct nvgpu_mutex hub_isr_mutex;
+	u32    hub_intr_types;
+#endif
+	/*
+	 * Separate function to cleanup the CE since it requires a channel to
+	 * be closed which must happen before fifo cleanup.
+	 */
+	void (*remove_ce_support)(struct mm_gk20a *mm);
+	void (*remove_support)(struct mm_gk20a *mm);
+	bool sw_ready;
+	int physical_bits;
+	bool use_full_comp_tag_line;
+	bool ltc_enabled_current;
+	bool ltc_enabled_target;
+	bool bypass_smmu;
+	bool disable_bigpage;
+	bool has_physical_mode;
+
+	struct nvgpu_mem sysmem_flush;
+
+	u32 pramin_window;
+	struct nvgpu_spinlock pramin_window_lock;
+	bool force_pramin; /* via debugfs */
+
+	struct {
+		size_t size;
+		u64 base;
+		size_t bootstrap_size;
+		u64 bootstrap_base;
+
+		struct nvgpu_allocator allocator;
+		struct nvgpu_allocator bootstrap_allocator;
+
+		u32 ce_ctx_id;
+		volatile bool cleared;
+		struct nvgpu_mutex first_clear_mutex;
+
+		struct nvgpu_list_node clear_list_head;
+		struct nvgpu_mutex clear_list_mutex;
+
+		struct nvgpu_cond clearing_thread_cond;
+		struct nvgpu_thread clearing_thread;
+		struct nvgpu_mutex clearing_thread_lock;
+		nvgpu_atomic_t pause_count;
+
+		nvgpu_atomic64_t bytes_pending;
+	} vidmem;
+};
+
+#define gk20a_from_mm(mm) ((mm)->g)
+#define gk20a_from_vm(vm) ((vm)->mm->g)
+
+static inline int bar1_aperture_size_mb_gk20a(void)
+{
+	return 16; /* 16MB is more than enough atm. */
+}
+
+/* The maximum GPU VA range supported */
+#define NV_GMMU_VA_RANGE          38
+
+/* The default userspace-visible GPU VA size */
+#define NV_MM_DEFAULT_USER_SIZE   (1ULL << 37)
+
+/* The default kernel-reserved GPU VA size */
+#define NV_MM_DEFAULT_KERNEL_SIZE (1ULL << 32)
+
+/*
+ * When not using unified address spaces, the bottom 56GB of the space are used
+ * for small pages, and the remaining high memory is used for large pages.
+ */
+static inline u64 __nv_gmmu_va_small_page_limit(void)
+{
+	return ((u64)SZ_1G * 56);
+}
+
+enum gmmu_pgsz_gk20a __get_pte_size_fixed_map(struct vm_gk20a *vm,
+					      u64 base, u64 size);
+enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size);
+
+void nvgpu_init_mm_ce_context(struct gk20a *g);
+int nvgpu_init_mm_support(struct gk20a *g);
+int nvgpu_init_mm_setup_hw(struct gk20a *g);
+
+u64 nvgpu_inst_block_addr(struct gk20a *g, struct nvgpu_mem *mem);
+void nvgpu_free_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block);
+
+int nvgpu_mm_suspend(struct gk20a *g);
+
+#endif