diff --git a/drivers/gpu/nvgpu/common/as.c b/drivers/gpu/nvgpu/common/as.c
index 481fb807a..99d181958 100644
--- a/drivers/gpu/nvgpu/common/as.c
+++ b/drivers/gpu/nvgpu/common/as.c
@@ -43,7 +43,6 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 	struct mm_gk20a *mm = &g->mm;
 	struct vm_gk20a *vm;
 	char name[32];
-	int err;
 	const bool userspace_managed =
 		(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;
 
@@ -60,7 +59,13 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 			return -EINVAL;
 	}
 
-	vm = nvgpu_kzalloc(g, sizeof(*vm));
+	snprintf(name, sizeof(name), "as_%d", as_share->id);
+
+	vm = nvgpu_vm_init(g, big_page_size,
+			   big_page_size << 10,
+			   mm->channel.kernel_size,
+			   mm->channel.user_size + mm->channel.kernel_size,
+			   !mm->disable_bigpage, userspace_managed, name);
 	if (!vm)
 		return -ENOMEM;
 
@@ -68,15 +73,7 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 	vm->as_share = as_share;
 	vm->enable_ctag = true;
 
-	snprintf(name, sizeof(name), "as_%d", as_share->id);
-
-	err = nvgpu_init_vm(mm, vm, big_page_size,
-			    big_page_size << 10,
-			    mm->channel.kernel_size,
-			    mm->channel.user_size + mm->channel.kernel_size,
-			    !mm->disable_bigpage, userspace_managed, name);
-
-	return err;
+	return 0;
 }
 
 int gk20a_as_alloc_share(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index 171a67ca4..e24d40bfc 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -204,52 +204,15 @@ static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
 	return 0;
 }
 
-/**
- * nvgpu_init_vm() - Initialize an address space.
- *
- * @mm - Parent MM.
- * @vm - The VM to init.
- * @big_page_size - Size of big pages associated with this VM.
- * @low_hole - The size of the low hole (unaddressable memory at the bottom of
- *	       the address space).
- * @kernel_reserved - Space reserved for kernel only allocations.
- * @aperture_size - Total size of the aperture.
- * @big_pages - If true then big pages are possible in the VM. Note this does
- *              not guarantee that big pages will be possible.
- * @name - Name of the address space.
- *
- * This function initializes an address space according to the following map:
- *
- *     +--+ 0x0
- *     |  |
- *     +--+ @low_hole
- *     |  |
- *     ~  ~   This is the "user" section.
- *     |  |
- *     +--+ @aperture_size - @kernel_reserved
- *     |  |
- *     ~  ~   This is the "kernel" section.
- *     |  |
- *     +--+ @aperture_size
- *
- * The user section is therefor what ever is left over after the @low_hole and
- * @kernel_reserved memory have been portioned out. The @kernel_reserved is
- * always persent at the top of the memory space and the @low_hole is always at
- * the bottom.
- *
- * For certain address spaces a "user" section makes no sense (bar1, etc) so in
- * such cases the @kernel_reserved and @low_hole should sum to exactly
- * @aperture_size.
- */
-int nvgpu_init_vm(struct mm_gk20a *mm,
-		  struct vm_gk20a *vm,
-		  u32 big_page_size,
-		  u64 low_hole,
-		  u64 kernel_reserved,
-		  u64 aperture_size,
-		  bool big_pages,
-		  bool userspace_managed,
-		  char *name)
+static int __nvgpu_vm_init(struct mm_gk20a *mm,
+			   struct vm_gk20a *vm,
+			   u32 big_page_size,
+			   u64 low_hole,
+			   u64 kernel_reserved,
+			   u64 aperture_size,
+			   bool big_pages,
+			   bool userspace_managed,
+			   char *name)
 {
 	int err;
 	char alloc_name[32];
@@ -257,7 +220,7 @@ int nvgpu_init_vm(struct mm_gk20a *mm,
 	u64 user_vma_start, user_vma_limit;
 	u64 user_lp_vma_start, user_lp_vma_limit;
 	u64 kernel_vma_start, kernel_vma_limit;
-	struct gk20a *g = mm->g;
+	struct gk20a *g = gk20a_from_mm(mm);
 
 	if (WARN_ON(kernel_reserved + low_hole > aperture_size))
 		return -ENOMEM;
@@ -467,22 +430,71 @@ clean_up_vgpu_vm:
 	return err;
 }
 
-void nvgpu_deinit_vm(struct vm_gk20a *vm)
+/**
+ * nvgpu_init_vm() - Initialize an address space.
+ *
+ * @mm - Parent MM.
+ * @vm - The VM to init.
+ * @big_page_size - Size of big pages associated with this VM.
+ * @low_hole - The size of the low hole (unaddressable memory at the bottom of
+ *	       the address space).
+ * @kernel_reserved - Space reserved for kernel only allocations.
+ * @aperture_size - Total size of the aperture.
+ * @big_pages - If true then big pages are possible in the VM. Note this does
+ *              not guarantee that big pages will be possible.
+ * @name - Name of the address space.
+ *
+ * This function initializes an address space according to the following map:
+ *
+ *     +--+ 0x0
+ *     |  |
+ *     +--+ @low_hole
+ *     |  |
+ *     ~  ~   This is the "user" section.
+ *     |  |
+ *     +--+ @aperture_size - @kernel_reserved
+ *     |  |
+ *     ~  ~   This is the "kernel" section.
+ *     |  |
+ *     +--+ @aperture_size
+ *
+ * The user section is therefor what ever is left over after the @low_hole and
+ * @kernel_reserved memory have been portioned out. The @kernel_reserved is
+ * always persent at the top of the memory space and the @low_hole is always at
+ * the bottom.
+ *
+ * For certain address spaces a "user" section makes no sense (bar1, etc) so in
+ * such cases the @kernel_reserved and @low_hole should sum to exactly
+ * @aperture_size.
+ */
+struct vm_gk20a *nvgpu_vm_init(struct gk20a *g,
+			       u32 big_page_size,
+			       u64 low_hole,
+			       u64 kernel_reserved,
+			       u64 aperture_size,
+			       bool big_pages,
+			       bool userspace_managed,
+			       char *name)
 {
-	if (nvgpu_alloc_initialized(&vm->kernel))
-		nvgpu_alloc_destroy(&vm->kernel);
-	if (nvgpu_alloc_initialized(&vm->user))
-		nvgpu_alloc_destroy(&vm->user);
-	if (nvgpu_alloc_initialized(&vm->user_lp))
-		nvgpu_alloc_destroy(&vm->user_lp);
+	struct vm_gk20a *vm = nvgpu_kzalloc(g, sizeof(*vm));
 
-	gk20a_vm_free_entries(vm, &vm->pdb, 0);
+	if (!vm)
+		return NULL;
+
+	if (__nvgpu_vm_init(&g->mm, vm, big_page_size, low_hole,
+			    kernel_reserved, aperture_size, big_pages,
+			    userspace_managed, name)) {
+		nvgpu_kfree(g, vm);
+		return NULL;
+	}
+
+	return vm;
 }
 
 /*
- * Cleanup the VM but don't nvgpu_kfree() on the vm pointer.
+ * Cleanup the VM!
  */
-void __nvgpu_vm_remove(struct vm_gk20a *vm)
+static void __nvgpu_vm_remove(struct vm_gk20a *vm)
 {
 	struct nvgpu_mapped_buf *mapped_buffer;
 	struct nvgpu_vm_area *vm_area, *vm_area_tmp;
@@ -518,7 +530,14 @@ void __nvgpu_vm_remove(struct vm_gk20a *vm)
 		nvgpu_kfree(vm->mm->g, vm_area);
 	}
 
-	nvgpu_deinit_vm(vm);
+	if (nvgpu_alloc_initialized(&vm->kernel))
+		nvgpu_alloc_destroy(&vm->kernel);
+	if (nvgpu_alloc_initialized(&vm->user))
+		nvgpu_alloc_destroy(&vm->user);
+	if (nvgpu_alloc_initialized(&vm->user_lp))
+		nvgpu_alloc_destroy(&vm->user_lp);
+
+	gk20a_vm_free_entries(vm, &vm->pdb, 0);
 
 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
 	if (g->is_virtual)
@@ -526,34 +545,15 @@ void __nvgpu_vm_remove(struct vm_gk20a *vm)
 #endif
 
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
-}
 
-/*
- * Remove and nvgpu_kfree() the VM struct.
- */
-void nvgpu_vm_remove(struct vm_gk20a *vm)
-{
-	__nvgpu_vm_remove(vm);
-
-	nvgpu_kfree(vm->mm->g, vm);
-}
-
-/*
- * Note: this does not nvgpu_kfree() the vm. This might be a bug.
- */
-void nvgpu_vm_remove_inst(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
-{
-	struct gk20a *g = vm->mm->g;
-
-	gk20a_free_inst_block(g, inst_block);
-	__nvgpu_vm_remove(vm);
+	nvgpu_kfree(g, vm);
 }
 
 static void __nvgpu_vm_remove_kref(struct kref *ref)
 {
 	struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);
 
-	nvgpu_vm_remove(vm);
+	__nvgpu_vm_remove(vm);
 }
 
 void nvgpu_vm_get(struct vm_gk20a *vm)
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 084f17934..730ffe5ce 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1230,7 +1230,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
 	}
 
 	/* bind the channel to the vm */
-	err = __gk20a_vm_bind_channel(&g->mm.cde.vm, ch);
+	err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch);
 	if (err) {
 		nvgpu_warn(g, "cde: could not bind vm");
 		goto err_commit_va;
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index c905bedb2..8e600c18e 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -451,7 +451,7 @@ u32 gk20a_ce_create_context_with_cb(struct gk20a *g,
 	ce_ctx->submitted_seq_number = 0;
 	ce_ctx->completed_seq_number = 0;
 
-	ce_ctx->vm = &g->mm.ce.vm;
+	ce_ctx->vm = g->mm.ce.vm;
 
 	/* always kernel client needs privileged channel */
 	ce_ctx->ch = gk20a_open_new_channel_with_cb(g, gk20a_ce_finished_ctx_cb,
@@ -465,7 +465,7 @@ u32 gk20a_ce_create_context_with_cb(struct gk20a *g,
 	ce_ctx->ch->wdt_enabled = false;
 
 	/* bind the channel to the vm */
-	err = __gk20a_vm_bind_channel(&g->mm.ce.vm, ce_ctx->ch);
+	err = __gk20a_vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
 	if (err) {
 		nvgpu_err(g, "ce: could not bind vm");
 		goto end;
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index f5176e984..452bcd114 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -146,7 +146,7 @@ static int css_hw_enable_snapshot(struct channel_gk20a *ch,
 	if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE)
 		snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE;
 
-	ret = nvgpu_dma_alloc_map_sys(&g->mm.pmu.vm, snapshot_size,
+	ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size,
 							&data->hw_memdesc);
 	if (ret)
 		return ret;
@@ -195,7 +195,7 @@ static int css_hw_enable_snapshot(struct channel_gk20a *ch,
 
 failed_allocation:
 	if (data->hw_memdesc.size) {
-		nvgpu_dma_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc);
+		nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
 		memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
 	}
 	data->hw_snapshot = NULL;
@@ -223,7 +223,7 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr)
 			perf_pmasys_mem_block_valid_false_f() |
 			perf_pmasys_mem_block_target_f(0));
 
-	nvgpu_dma_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc);
+	nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
 	memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
 	data->hw_snapshot = NULL;
 
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 09268b6b9..4bfa041ee 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1838,7 +1838,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 {
 	struct gk20a *g = dbg_s->g;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->perfbuf.vm;
 	int err;
 	u32 virt_size;
 	u32 virt_addr_lo;
@@ -1853,23 +1852,23 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		return -EBUSY;
 	}
 
-	err = nvgpu_init_vm(mm, vm, big_page_size,
+	mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
 			big_page_size << 10,
 			NV_MM_DEFAULT_KERNEL_SIZE,
 			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
 			false, false, "perfbuf");
-	if (err) {
+	if (!mm->perfbuf.vm) {
 		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		return err;
+		return -ENOMEM;
 	}
 
 	err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
 	if (err)
 		goto err_remove_vm;
 
-	g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
+	g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);
 
-	err = nvgpu_vm_map_buffer(vm,
+	err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
 			args->dmabuf_fd,
 			&args->offset,
 			0,
@@ -1922,9 +1921,10 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 	return 0;
 
 err_unmap:
-	nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
+	nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL);
 err_remove_vm:
-	nvgpu_vm_remove_inst(vm, &mm->perfbuf.inst_block);
+	gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
+	nvgpu_vm_put(mm->perfbuf.vm);
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
@@ -1956,13 +1956,14 @@ static int gk20a_perfbuf_disable_locked(struct gk20a *g)
 static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->perfbuf.vm;
+	struct vm_gk20a *vm = mm->perfbuf.vm;
 	int err;
 
 	err = gk20a_perfbuf_disable_locked(g);
 
 	nvgpu_vm_unmap_buffer(vm, offset, NULL);
-	nvgpu_vm_remove_inst(vm, &mm->perfbuf.inst_block);
+	gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
+	nvgpu_vm_put(vm);
 
 	g->perfbuf.owner = NULL;
 	g->perfbuf.offset = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 63896228e..7eb28f2f2 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -541,7 +541,7 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 	nvgpu_vfree(g, f->channel);
 	nvgpu_vfree(g, f->tsg);
 	if (g->ops.mm.is_bar1_supported(g))
-		nvgpu_dma_unmap_free(&g->mm.bar1.vm, &f->userd);
+		nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
 	else
 		nvgpu_dma_free(g, &f->userd);
 
@@ -923,7 +923,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 	nvgpu_mutex_init(&f->free_chs_mutex);
 
 	if (g->ops.mm.is_bar1_supported(g))
-		err = nvgpu_dma_alloc_map_sys(&g->mm.bar1.vm,
+		err = nvgpu_dma_alloc_map_sys(g->mm.bar1.vm,
 				   f->userd_entry_size * f->num_channels,
 				   &f->userd);
 
@@ -963,7 +963,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 clean_up:
 	gk20a_dbg_fn("fail");
 	if (g->ops.mm.is_bar1_supported(g))
-		nvgpu_dma_unmap_free(&g->mm.bar1.vm, &f->userd);
+		nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
 	else
 		nvgpu_dma_free(g, &f->userd);
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 31b0a771b..e4d454fee 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -436,10 +436,10 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 
 	gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */
 
-	gpu->big_page_size = g->mm.pmu.vm.big_page_size;
 	gpu->compression_page_size = g->ops.fb.compression_page_size(g);
+	gpu->big_page_size = platform->default_big_page_size;
 	gpu->pde_coverage_bit_count =
-		gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm);
+		g->ops.mm.get_mmu_levels(g, gpu->big_page_size)[0].lo_bit[0];
 
 	if (g->mm.disable_bigpage) {
 		gpu->big_page_size = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index f8e058185..b2ae77c31 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2191,7 +2191,7 @@ static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
 static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
 	int err;
 
@@ -2265,7 +2265,7 @@ static int gr_gk20a_copy_ctxsw_ucode_segments(
 int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
 	struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
 	struct nvgpu_firmware *fecs_fw;
@@ -5195,7 +5195,7 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;
 
 	u32 size;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 87e6f30c5..a1873a303 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -465,8 +465,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
 
 	mm->vidmem.ce_ctx_id = (u32)~0;
 
-	__nvgpu_vm_remove(&mm->ce.vm);
-
+	nvgpu_vm_put(mm->ce.vm);
 }
 
 static void gk20a_remove_mm_support(struct mm_gk20a *mm)
@@ -476,12 +475,15 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
 	if (g->ops.mm.remove_bar2_vm)
 		g->ops.mm.remove_bar2_vm(g);
 
-	if (g->ops.mm.is_bar1_supported(g))
-		nvgpu_vm_remove_inst(&mm->bar1.vm, &mm->bar1.inst_block);
+	if (g->ops.mm.is_bar1_supported(g)) {
+		gk20a_free_inst_block(g, &mm->bar1.inst_block);
+		nvgpu_vm_put(mm->bar1.vm);
+	}
 
-	nvgpu_vm_remove_inst(&mm->pmu.vm, &mm->pmu.inst_block);
-	gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
-	__nvgpu_vm_remove(&mm->cde.vm);
+	gk20a_free_inst_block(g, &mm->pmu.inst_block);
+	gk20a_free_inst_block(g, &mm->hwpm.inst_block);
+	nvgpu_vm_put(mm->pmu.vm);
+	nvgpu_vm_put(mm->cde.vm);
 
 	gk20a_semaphore_sea_destroy(g);
 	gk20a_vidmem_destroy(g);
@@ -2641,30 +2643,31 @@ u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
 static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 {
 	int err;
-	struct vm_gk20a *vm = &mm->bar1.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
 	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
 	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-	nvgpu_init_vm(mm, vm,
-		      big_page_size,
-		      SZ_4K,				/* Low hole */
-		      mm->bar1.aperture_size - SZ_4K,	/* Kernel reserved. */
-		      mm->bar1.aperture_size,
-		      true, false,
-		      "bar1");
+	mm->bar1.vm = nvgpu_vm_init(g,
+				    big_page_size,
+				    SZ_4K,
+				    mm->bar1.aperture_size - SZ_4K,
+				    mm->bar1.aperture_size,
+				    true, false,
+				    "bar1");
+	if (!mm->bar1.vm)
+		return -ENOMEM;
 
 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
-		goto clean_up_va;
-	g->ops.mm.init_inst_block(inst_block, vm, big_page_size);
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);
 
 	return 0;
 
-clean_up_va:
-	nvgpu_deinit_vm(vm);
+clean_up_vm:
+	nvgpu_vm_put(mm->bar1.vm);
 	return err;
 }
 
@@ -2672,7 +2675,6 @@ clean_up_va:
 static int gk20a_init_system_vm(struct mm_gk20a *mm)
 {
 	int err;
-	struct vm_gk20a *vm = &mm->pmu.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
@@ -2687,65 +2689,70 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
 	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
 	gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);
 
-	nvgpu_init_vm(mm, vm, big_page_size,
-		      low_hole,
-		      aperture_size - low_hole,
-		      aperture_size,
-		      true,
-		      false,
-		      "system");
+	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
+				   low_hole,
+				   aperture_size - low_hole,
+				   aperture_size,
+				   true,
+				   false,
+				   "system");
+	if (!mm->pmu.vm)
+		return -ENOMEM;
 
 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
-		goto clean_up_va;
-	g->ops.mm.init_inst_block(inst_block, vm, big_page_size);
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);
 
 	return 0;
 
-clean_up_va:
-	nvgpu_deinit_vm(vm);
+clean_up_vm:
+	nvgpu_vm_put(mm->pmu.vm);
 	return err;
 }
 
 static int gk20a_init_hwpm(struct mm_gk20a *mm)
 {
 	int err;
-	struct vm_gk20a *vm = &mm->pmu.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;
 
 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
 		return err;
-	g->ops.mm.init_inst_block(inst_block, vm, 0);
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);
 
 	return 0;
 }
 
 static int gk20a_init_cde_vm(struct mm_gk20a *mm)
 {
-	struct vm_gk20a *vm = &mm->cde.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
-	return nvgpu_init_vm(mm, vm, big_page_size,
-			big_page_size << 10,
-			NV_MM_DEFAULT_KERNEL_SIZE,
-			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, false, "cde");
+	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
+				   big_page_size << 10,
+				   NV_MM_DEFAULT_KERNEL_SIZE,
+				   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				   false, false, "cde");
+	if (!mm->cde.vm)
+		return -ENOMEM;
+	return 0;
 }
 
 static int gk20a_init_ce_vm(struct mm_gk20a *mm)
 {
-	struct vm_gk20a *vm = &mm->ce.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
-	return nvgpu_init_vm(mm, vm, big_page_size,
-			big_page_size << 10,
-			NV_MM_DEFAULT_KERNEL_SIZE,
-			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, false, "ce");
+	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
+				  big_page_size << 10,
+				  NV_MM_DEFAULT_KERNEL_SIZE,
+				  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				  false, false, "ce");
+	if (!mm->ce.vm)
+		return -ENOMEM;
+	return 0;
 }
 
 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 16c35d34a..7e2ba051e 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -78,8 +78,6 @@ struct pm_ctx_desc {
 	u32 pm_mode;
 };
 
-struct gk20a;
-
 struct compbit_store_desc {
 	struct nvgpu_mem mem;
 
@@ -191,19 +189,19 @@ struct mm_gk20a {
 
 	struct {
 		u32 aperture_size;
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} bar1;
 
 	struct {
 		u32 aperture_size;
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} bar2;
 
 	struct {
 		u32 aperture_size;
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} pmu;
 
@@ -213,16 +211,16 @@ struct mm_gk20a {
 	} hwpm;
 
 	struct {
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} perfbuf;
 
 	struct {
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 	} cde;
 
 	struct {
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 	} ce;
 
 	struct nvgpu_mutex l2_op_lock;
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index e74a52645..f296b57c5 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -3132,7 +3132,7 @@ static int gk20a_prepare_ucode(struct gk20a *g)
 	struct nvgpu_pmu *pmu = &g->pmu;
 	int err = 0;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 
 	if (pmu->fw)
 		return gk20a_init_pmu(pmu);
@@ -3170,7 +3170,7 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	unsigned int i;
 	int err = 0;
 	u8 *ptr;
@@ -4793,7 +4793,7 @@ int gk20a_pmu_vidmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 size)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err;
 
 	err = nvgpu_dma_alloc_map_vid(vm, size, mem);
@@ -4809,7 +4809,7 @@ int gk20a_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 size)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err;
 
 	err = nvgpu_dma_alloc_map_sys(vm, size, mem);
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 666f629e6..580ba5e5b 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -388,7 +388,7 @@ int prepare_ucode_blob(struct gk20a *g)
 	u32 wprsize;
 	int i;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	struct wpr_carveout_info wpr_inf;
 	struct page **pages;
 
@@ -1067,7 +1067,7 @@ static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm)
 static int gm20b_bootstrap_hs_flcn(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;
 	u64 *acr_dmem;
 	u32 img_size_in_bytes = 0;
@@ -1385,7 +1385,7 @@ static int gm20b_init_pmu_setup_hw1(struct gk20a *g,
 int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;
 	u32 bl_sz;
 	struct acr_desc *acr = &g->acr;
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index c7d71ab05..46bc50551 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -1045,7 +1045,7 @@ static int lsf_gen_wpr_requirements(struct gk20a *g,
 static int gp106_bootstrap_hs_flcn(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;
 	u64 *acr_dmem;
 	u32 img_size_in_bytes = 0;
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index bc4aee3aa..3cd3eb504 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -67,32 +67,33 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 {
 	int err;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->bar2.vm;
 	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
 	/* BAR2 aperture size is 32MB */
 	mm->bar2.aperture_size = 32 << 20;
 	gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
-	nvgpu_init_vm(mm, vm, big_page_size, SZ_4K,
+
+	mm->bar2.vm = nvgpu_vm_init(g, big_page_size, SZ_4K,
 		mm->bar2.aperture_size - SZ_4K,
 		mm->bar2.aperture_size, false, false, "bar2");
+	if (!mm->bar2.vm)
+		return -ENOMEM;
 
 	/* allocate instance mem for bar2 */
 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
 		goto clean_up_va;
 
-	g->ops.mm.init_inst_block(inst_block, vm, big_page_size);
+	g->ops.mm.init_inst_block(inst_block, mm->bar2.vm, big_page_size);
 
 	return 0;
 
 clean_up_va:
-	nvgpu_deinit_vm(vm);
+	nvgpu_vm_put(mm->bar2.vm);
 	return err;
 }
 
-
 static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
@@ -401,7 +402,8 @@ static void gp10b_remove_bar2_vm(struct gk20a *g)
 	struct mm_gk20a *mm = &g->mm;
 
 	gp10b_replayable_pagefault_buffer_deinit(g);
-	nvgpu_vm_remove_inst(&mm->bar2.vm, &mm->bar2.inst_block);
+	gk20a_free_inst_block(g, &mm->bar2.inst_block);
+	nvgpu_vm_put(mm->bar2.vm);
 }
 
 
diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
index 0fb4248cf..567869295 100644
--- a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -28,7 +28,7 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
 {
 	u32 addr_lo;
 	u32 addr_hi;
-	struct vm_gk20a *vm = &g->mm.bar2.vm;
+	struct vm_gk20a *vm = g->mm.bar2.vm;
 	int err;
 	size_t rbfb_size = NV_UVM_FAULT_BUF_SIZE *
 		fifo_replay_fault_buffer_size_hw_entries_v();
@@ -57,7 +57,7 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
 
 void gp10b_replayable_pagefault_buffer_deinit(struct gk20a *g)
 {
-	struct vm_gk20a *vm = &g->mm.bar2.vm;
+	struct vm_gk20a *vm = g->mm.bar2.vm;
 
 	nvgpu_dma_unmap_free(vm, &g->mm.bar2_desc);
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index 403f3b187..f6d88cc3b 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -225,20 +225,14 @@ int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
 void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
 			     struct nvgpu_mapped_buf *mapped_buffer);
 
-void nvgpu_deinit_vm(struct vm_gk20a *vm);
-void __nvgpu_vm_remove(struct vm_gk20a *vm);
-void nvgpu_vm_remove(struct vm_gk20a *vm);
-void nvgpu_vm_remove_inst(struct vm_gk20a *vm, struct nvgpu_mem *inst_block);
-
-int nvgpu_init_vm(struct mm_gk20a *mm,
-		  struct vm_gk20a *vm,
-		  u32 big_page_size,
-		  u64 low_hole,
-		  u64 kernel_reserved,
-		  u64 aperture_size,
-		  bool big_pages,
-		  bool userspace_managed,
-		  char *name);
+struct vm_gk20a *nvgpu_vm_init(struct gk20a *g,
+			       u32 big_page_size,
+			       u64 low_hole,
+			       u64 kernel_reserved,
+			       u64 aperture_size,
+			       bool big_pages,
+			       bool userspace_managed,
+			       char *name);
 
 /*
  * These are private to the VM code but are unfortunately used by the vgpu code.
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index b2bc6f0af..a4ffc7e80 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -31,8 +31,6 @@
 static int vgpu_init_mm_setup_sw(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
-	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
 
 	gk20a_dbg_fn("");
 
@@ -54,11 +52,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g)
 		       (int)(mm->channel.user_size >> 20),
 		       (int)(mm->channel.kernel_size >> 20));
 
-	/* gk20a_init_gpu_characteristics expects this to be populated */
-	vm->big_page_size = big_page_size;
-	vm->mmu_levels = (vm->big_page_size == SZ_64K) ?
-			 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
-
 	mm->sw_ready = true;
 
 	return 0;