gpu: nvgpu: Remove extraneous VM init/deinit APIs

Support only VM pointers and ref-counting for maintaining VMs. This dramatically reduces the complexity of the APIs, avoids the API abuse that has existed, and ensures that future VM usage is consistent with current usage. Also remove the combined VM free/instance block deletion. Any place where this was done is now replaced with an explict free of the instance block and a nvgpu_vm_put(). JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: Ib73e8d574ecc9abf6dad0b40a2c5795d6396cc8c Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1480227 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2017-05-10 02:34:54 +01:00
parent c2b63150cd
commit c21f5bca9a
18 changed files with 196 additions and 204 deletions
--- a/drivers/gpu/nvgpu/common/as.c
+++ b/drivers/gpu/nvgpu/common/as.c
@@ -43,7 +43,6 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 	struct mm_gk20a *mm = &g->mm;
 	struct vm_gk20a *vm;
 	char name[32];
-	int err;
 	const bool userspace_managed =
 		(flags & NVGPU_GPU_IOCTL_ALLOC_AS_FLAGS_USERSPACE_MANAGED) != 0;

@@ -60,7 +59,13 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 			return -EINVAL;
 	}

-	vm = nvgpu_kzalloc(g, sizeof(*vm));
+	snprintf(name, sizeof(name), "as_%d", as_share->id);
+
+	vm = nvgpu_vm_init(g, big_page_size,
+			   big_page_size << 10,
+			   mm->channel.kernel_size,
+			   mm->channel.user_size + mm->channel.kernel_size,
+			   !mm->disable_bigpage, userspace_managed, name);
 	if (!vm)
 		return -ENOMEM;

@@ -68,15 +73,7 @@ static int gk20a_vm_alloc_share(struct gk20a_as_share *as_share,
 	vm->as_share = as_share;
 	vm->enable_ctag = true;

-	snprintf(name, sizeof(name), "as_%d", as_share->id);
-
-	err = nvgpu_init_vm(mm, vm, big_page_size,
-			    big_page_size << 10,
-			    mm->channel.kernel_size,
-			    mm->channel.user_size + mm->channel.kernel_size,
-			    !mm->disable_bigpage, userspace_managed, name);
-
-	return err;
+	return 0;
 }

 int gk20a_as_alloc_share(struct gk20a *g,
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -204,52 +204,15 @@ static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
 	return 0;
 }

-/**
- * nvgpu_init_vm() - Initialize an address space.
- *
- * @mm - Parent MM.
- * @vm - The VM to init.
- * @big_page_size - Size of big pages associated with this VM.
- * @low_hole - The size of the low hole (unaddressable memory at the bottom of
- *	       the address space).
- * @kernel_reserved - Space reserved for kernel only allocations.
- * @aperture_size - Total size of the aperture.
- * @big_pages - If true then big pages are possible in the VM. Note this does
- *              not guarantee that big pages will be possible.
- * @name - Name of the address space.
- *
- * This function initializes an address space according to the following map:
- *
- *     +--+ 0x0
- *     |  |
- *     +--+ @low_hole
- *     |  |
- *     ~  ~   This is the "user" section.
- *     |  |
- *     +--+ @aperture_size - @kernel_reserved
- *     |  |
- *     ~  ~   This is the "kernel" section.
- *     |  |
- *     +--+ @aperture_size
- *
- * The user section is therefor what ever is left over after the @low_hole and
- * @kernel_reserved memory have been portioned out. The @kernel_reserved is
- * always persent at the top of the memory space and the @low_hole is always at
- * the bottom.
- *
- * For certain address spaces a "user" section makes no sense (bar1, etc) so in
- * such cases the @kernel_reserved and @low_hole should sum to exactly
- * @aperture_size.
- */
-int nvgpu_init_vm(struct mm_gk20a *mm,
-		  struct vm_gk20a *vm,
-		  u32 big_page_size,
-		  u64 low_hole,
-		  u64 kernel_reserved,
-		  u64 aperture_size,
-		  bool big_pages,
-		  bool userspace_managed,
-		  char *name)
+static int __nvgpu_vm_init(struct mm_gk20a *mm,
+			   struct vm_gk20a *vm,
+			   u32 big_page_size,
+			   u64 low_hole,
+			   u64 kernel_reserved,
+			   u64 aperture_size,
+			   bool big_pages,
+			   bool userspace_managed,
+			   char *name)
 {
 	int err;
 	char alloc_name[32];
@@ -257,7 +220,7 @@ int nvgpu_init_vm(struct mm_gk20a *mm,
 	u64 user_vma_start, user_vma_limit;
 	u64 user_lp_vma_start, user_lp_vma_limit;
 	u64 kernel_vma_start, kernel_vma_limit;
-	struct gk20a *g = mm->g;
+	struct gk20a *g = gk20a_from_mm(mm);

 	if (WARN_ON(kernel_reserved + low_hole > aperture_size))
 		return -ENOMEM;
@@ -467,22 +430,71 @@ clean_up_vgpu_vm:
 	return err;
 }

-void nvgpu_deinit_vm(struct vm_gk20a *vm)
+/**
+ * nvgpu_init_vm() - Initialize an address space.
+ *
+ * @mm - Parent MM.
+ * @vm - The VM to init.
+ * @big_page_size - Size of big pages associated with this VM.
+ * @low_hole - The size of the low hole (unaddressable memory at the bottom of
+ *	       the address space).
+ * @kernel_reserved - Space reserved for kernel only allocations.
+ * @aperture_size - Total size of the aperture.
+ * @big_pages - If true then big pages are possible in the VM. Note this does
+ *              not guarantee that big pages will be possible.
+ * @name - Name of the address space.
+ *
+ * This function initializes an address space according to the following map:
+ *
+ *     +--+ 0x0
+ *     |  |
+ *     +--+ @low_hole
+ *     |  |
+ *     ~  ~   This is the "user" section.
+ *     |  |
+ *     +--+ @aperture_size - @kernel_reserved
+ *     |  |
+ *     ~  ~   This is the "kernel" section.
+ *     |  |
+ *     +--+ @aperture_size
+ *
+ * The user section is therefor what ever is left over after the @low_hole and
+ * @kernel_reserved memory have been portioned out. The @kernel_reserved is
+ * always persent at the top of the memory space and the @low_hole is always at
+ * the bottom.
+ *
+ * For certain address spaces a "user" section makes no sense (bar1, etc) so in
+ * such cases the @kernel_reserved and @low_hole should sum to exactly
+ * @aperture_size.
+ */
+struct vm_gk20a *nvgpu_vm_init(struct gk20a *g,
+			       u32 big_page_size,
+			       u64 low_hole,
+			       u64 kernel_reserved,
+			       u64 aperture_size,
+			       bool big_pages,
+			       bool userspace_managed,
+			       char *name)
 {
-	if (nvgpu_alloc_initialized(&vm->kernel))
-		nvgpu_alloc_destroy(&vm->kernel);
-	if (nvgpu_alloc_initialized(&vm->user))
-		nvgpu_alloc_destroy(&vm->user);
-	if (nvgpu_alloc_initialized(&vm->user_lp))
-		nvgpu_alloc_destroy(&vm->user_lp);
+	struct vm_gk20a *vm = nvgpu_kzalloc(g, sizeof(*vm));

-	gk20a_vm_free_entries(vm, &vm->pdb, 0);
+	if (!vm)
+		return NULL;
+
+	if (__nvgpu_vm_init(&g->mm, vm, big_page_size, low_hole,
+			    kernel_reserved, aperture_size, big_pages,
+			    userspace_managed, name)) {
+		nvgpu_kfree(g, vm);
+		return NULL;
+	}
+
+	return vm;
 }

 /*
- * Cleanup the VM but don't nvgpu_kfree() on the vm pointer.
+ * Cleanup the VM!
 */
-void __nvgpu_vm_remove(struct vm_gk20a *vm)
+static void __nvgpu_vm_remove(struct vm_gk20a *vm)
 {
 	struct nvgpu_mapped_buf *mapped_buffer;
 	struct nvgpu_vm_area *vm_area, *vm_area_tmp;
@@ -518,7 +530,14 @@ void __nvgpu_vm_remove(struct vm_gk20a *vm)
 		nvgpu_kfree(vm->mm->g, vm_area);
 	}

-	nvgpu_deinit_vm(vm);
+	if (nvgpu_alloc_initialized(&vm->kernel))
+		nvgpu_alloc_destroy(&vm->kernel);
+	if (nvgpu_alloc_initialized(&vm->user))
+		nvgpu_alloc_destroy(&vm->user);
+	if (nvgpu_alloc_initialized(&vm->user_lp))
+		nvgpu_alloc_destroy(&vm->user_lp);
+
+	gk20a_vm_free_entries(vm, &vm->pdb, 0);

 #ifdef CONFIG_TEGRA_GR_VIRTUALIZATION
 	if (g->is_virtual)
@@ -526,34 +545,15 @@ void __nvgpu_vm_remove(struct vm_gk20a *vm)
 #endif

 	nvgpu_mutex_release(&vm->update_gmmu_lock);
-}

-/*
- * Remove and nvgpu_kfree() the VM struct.
- */
-void nvgpu_vm_remove(struct vm_gk20a *vm)
-{
-	__nvgpu_vm_remove(vm);
-
-	nvgpu_kfree(vm->mm->g, vm);
-}
-
-/*
- * Note: this does not nvgpu_kfree() the vm. This might be a bug.
- */
-void nvgpu_vm_remove_inst(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
-{
-	struct gk20a *g = vm->mm->g;
-
-	gk20a_free_inst_block(g, inst_block);
-	__nvgpu_vm_remove(vm);
+	nvgpu_kfree(g, vm);
 }

 static void __nvgpu_vm_remove_kref(struct kref *ref)
 {
 	struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);

-	nvgpu_vm_remove(vm);
+	__nvgpu_vm_remove(vm);
 }

 void nvgpu_vm_get(struct vm_gk20a *vm)
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -1230,7 +1230,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx)
 	}

 	/* bind the channel to the vm */
-	err = __gk20a_vm_bind_channel(&g->mm.cde.vm, ch);
+	err = __gk20a_vm_bind_channel(g->mm.cde.vm, ch);
 	if (err) {
 		nvgpu_warn(g, "cde: could not bind vm");
 		goto err_commit_va;
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -451,7 +451,7 @@ u32 gk20a_ce_create_context_with_cb(struct gk20a *g,
 	ce_ctx->submitted_seq_number = 0;
 	ce_ctx->completed_seq_number = 0;

-	ce_ctx->vm = &g->mm.ce.vm;
+	ce_ctx->vm = g->mm.ce.vm;

 	/* always kernel client needs privileged channel */
 	ce_ctx->ch = gk20a_open_new_channel_with_cb(g, gk20a_ce_finished_ctx_cb,
@@ -465,7 +465,7 @@ u32 gk20a_ce_create_context_with_cb(struct gk20a *g,
 	ce_ctx->ch->wdt_enabled = false;

 	/* bind the channel to the vm */
-	err = __gk20a_vm_bind_channel(&g->mm.ce.vm, ce_ctx->ch);
+	err = __gk20a_vm_bind_channel(g->mm.ce.vm, ce_ctx->ch);
 	if (err) {
 		nvgpu_err(g, "ce: could not bind vm");
 		goto end;
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -146,7 +146,7 @@ static int css_hw_enable_snapshot(struct channel_gk20a *ch,
 	if (snapshot_size < CSS_MIN_HW_SNAPSHOT_SIZE)
 		snapshot_size = CSS_MIN_HW_SNAPSHOT_SIZE;

-	ret = nvgpu_dma_alloc_map_sys(&g->mm.pmu.vm, snapshot_size,
+	ret = nvgpu_dma_alloc_map_sys(g->mm.pmu.vm, snapshot_size,
 							&data->hw_memdesc);
 	if (ret)
 		return ret;
@@ -195,7 +195,7 @@ static int css_hw_enable_snapshot(struct channel_gk20a *ch,

 failed_allocation:
 	if (data->hw_memdesc.size) {
-		nvgpu_dma_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc);
+		nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
 		memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
 	}
 	data->hw_snapshot = NULL;
@@ -223,7 +223,7 @@ static void css_hw_disable_snapshot(struct gr_gk20a *gr)
 			perf_pmasys_mem_block_valid_false_f() |
 			perf_pmasys_mem_block_target_f(0));

-	nvgpu_dma_unmap_free(&g->mm.pmu.vm, &data->hw_memdesc);
+	nvgpu_dma_unmap_free(g->mm.pmu.vm, &data->hw_memdesc);
 	memset(&data->hw_memdesc, 0, sizeof(data->hw_memdesc));
 	data->hw_snapshot = NULL;

--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -1838,7 +1838,6 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 {
 	struct gk20a *g = dbg_s->g;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->perfbuf.vm;
 	int err;
 	u32 virt_size;
 	u32 virt_addr_lo;
@@ -1853,23 +1852,23 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 		return -EBUSY;
 	}

-	err = nvgpu_init_vm(mm, vm, big_page_size,
+	mm->perfbuf.vm = nvgpu_vm_init(g, big_page_size,
 			big_page_size << 10,
 			NV_MM_DEFAULT_KERNEL_SIZE,
 			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
 			false, false, "perfbuf");
-	if (err) {
+	if (!mm->perfbuf.vm) {
 		nvgpu_mutex_release(&g->dbg_sessions_lock);
-		return err;
+		return -ENOMEM;
 	}

 	err = gk20a_alloc_inst_block(g, &mm->perfbuf.inst_block);
 	if (err)
 		goto err_remove_vm;

-	g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0);
+	g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, mm->perfbuf.vm, 0);

-	err = nvgpu_vm_map_buffer(vm,
+	err = nvgpu_vm_map_buffer(mm->perfbuf.vm,
 			args->dmabuf_fd,
 			&args->offset,
 			0,
@@ -1922,9 +1921,10 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
 	return 0;

 err_unmap:
-	nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
+	nvgpu_vm_unmap_buffer(mm->perfbuf.vm, args->offset, NULL);
 err_remove_vm:
-	nvgpu_vm_remove_inst(vm, &mm->perfbuf.inst_block);
+	gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
+	nvgpu_vm_put(mm->perfbuf.vm);
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 	return err;
 }
@@ -1956,13 +1956,14 @@ static int gk20a_perfbuf_disable_locked(struct gk20a *g)
 static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->perfbuf.vm;
+	struct vm_gk20a *vm = mm->perfbuf.vm;
 	int err;

 	err = gk20a_perfbuf_disable_locked(g);

 	nvgpu_vm_unmap_buffer(vm, offset, NULL);
-	nvgpu_vm_remove_inst(vm, &mm->perfbuf.inst_block);
+	gk20a_free_inst_block(g, &mm->perfbuf.inst_block);
+	nvgpu_vm_put(vm);

 	g->perfbuf.owner = NULL;
 	g->perfbuf.offset = 0;
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -541,7 +541,7 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)
 	nvgpu_vfree(g, f->channel);
 	nvgpu_vfree(g, f->tsg);
 	if (g->ops.mm.is_bar1_supported(g))
-		nvgpu_dma_unmap_free(&g->mm.bar1.vm, &f->userd);
+		nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
 	else
 		nvgpu_dma_free(g, &f->userd);

@@ -923,7 +923,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 	nvgpu_mutex_init(&f->free_chs_mutex);

 	if (g->ops.mm.is_bar1_supported(g))
-		err = nvgpu_dma_alloc_map_sys(&g->mm.bar1.vm,
+		err = nvgpu_dma_alloc_map_sys(g->mm.bar1.vm,
 				   f->userd_entry_size * f->num_channels,
 				   &f->userd);

@@ -963,7 +963,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 clean_up:
 	gk20a_dbg_fn("fail");
 	if (g->ops.mm.is_bar1_supported(g))
-		nvgpu_dma_unmap_free(&g->mm.bar1.vm, &f->userd);
+		nvgpu_dma_unmap_free(g->mm.bar1.vm, &f->userd);
 	else
 		nvgpu_dma_free(g, &f->userd);

--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -436,10 +436,10 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)

 	gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */

-	gpu->big_page_size = g->mm.pmu.vm.big_page_size;
 	gpu->compression_page_size = g->ops.fb.compression_page_size(g);
+	gpu->big_page_size = platform->default_big_page_size;
 	gpu->pde_coverage_bit_count =
-		gk20a_mm_pde_coverage_bit_count(&g->mm.pmu.vm);
+		g->ops.mm.get_mmu_levels(g, gpu->big_page_size)[0].lo_bit[0];

 	if (g->mm.disable_bigpage) {
 		gpu->big_page_size = 0;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2191,7 +2191,7 @@ static void gr_gk20a_start_falcon_ucode(struct gk20a *g)
 static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
 	int err;

@@ -2265,7 +2265,7 @@ static int gr_gk20a_copy_ctxsw_ucode_segments(
 int gr_gk20a_init_ctxsw_ucode(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
 	struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
 	struct nvgpu_firmware *fecs_fw;
@@ -5195,7 +5195,7 @@ static int gk20a_init_gr_bind_fecs_elpg(struct gk20a *g)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;

 	u32 size;
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -465,8 +465,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)

 	mm->vidmem.ce_ctx_id = (u32)~0;

-	__nvgpu_vm_remove(&mm->ce.vm);
-
+	nvgpu_vm_put(mm->ce.vm);
 }

 static void gk20a_remove_mm_support(struct mm_gk20a *mm)
@@ -476,12 +475,15 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
 	if (g->ops.mm.remove_bar2_vm)
 		g->ops.mm.remove_bar2_vm(g);

-	if (g->ops.mm.is_bar1_supported(g))
-		nvgpu_vm_remove_inst(&mm->bar1.vm, &mm->bar1.inst_block);
+	if (g->ops.mm.is_bar1_supported(g)) {
+		gk20a_free_inst_block(g, &mm->bar1.inst_block);
+		nvgpu_vm_put(mm->bar1.vm);
+	}

-	nvgpu_vm_remove_inst(&mm->pmu.vm, &mm->pmu.inst_block);
-	gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
-	__nvgpu_vm_remove(&mm->cde.vm);
+	gk20a_free_inst_block(g, &mm->pmu.inst_block);
+	gk20a_free_inst_block(g, &mm->hwpm.inst_block);
+	nvgpu_vm_put(mm->pmu.vm);
+	nvgpu_vm_put(mm->cde.vm);

 	gk20a_semaphore_sea_destroy(g);
 	gk20a_vidmem_destroy(g);
@@ -2641,30 +2643,31 @@ u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
 static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
 {
 	int err;
-	struct vm_gk20a *vm = &mm->bar1.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->bar1.inst_block;
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;

 	mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20;
 	gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size);
-	nvgpu_init_vm(mm, vm,
-		      big_page_size,
-		      SZ_4K,				/* Low hole */
-		      mm->bar1.aperture_size - SZ_4K,	/* Kernel reserved. */
-		      mm->bar1.aperture_size,
-		      true, false,
-		      "bar1");
+	mm->bar1.vm = nvgpu_vm_init(g,
+				    big_page_size,
+				    SZ_4K,
+				    mm->bar1.aperture_size - SZ_4K,
+				    mm->bar1.aperture_size,
+				    true, false,
+				    "bar1");
+	if (!mm->bar1.vm)
+		return -ENOMEM;

 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
-		goto clean_up_va;
-	g->ops.mm.init_inst_block(inst_block, vm, big_page_size);
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->bar1.vm, big_page_size);

 	return 0;

-clean_up_va:
-	nvgpu_deinit_vm(vm);
+clean_up_vm:
+	nvgpu_vm_put(mm->bar1.vm);
 	return err;
 }

@@ -2672,7 +2675,6 @@ clean_up_va:
 static int gk20a_init_system_vm(struct mm_gk20a *mm)
 {
 	int err;
-	struct vm_gk20a *vm = &mm->pmu.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->pmu.inst_block;
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;
@@ -2687,65 +2689,70 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
 	mm->pmu.aperture_size = GK20A_PMU_VA_SIZE;
 	gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size);

-	nvgpu_init_vm(mm, vm, big_page_size,
-		      low_hole,
-		      aperture_size - low_hole,
-		      aperture_size,
-		      true,
-		      false,
-		      "system");
+	mm->pmu.vm = nvgpu_vm_init(g, big_page_size,
+				   low_hole,
+				   aperture_size - low_hole,
+				   aperture_size,
+				   true,
+				   false,
+				   "system");
+	if (!mm->pmu.vm)
+		return -ENOMEM;

 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
-		goto clean_up_va;
-	g->ops.mm.init_inst_block(inst_block, vm, big_page_size);
+		goto clean_up_vm;
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, big_page_size);

 	return 0;

-clean_up_va:
-	nvgpu_deinit_vm(vm);
+clean_up_vm:
+	nvgpu_vm_put(mm->pmu.vm);
 	return err;
 }

 static int gk20a_init_hwpm(struct mm_gk20a *mm)
 {
 	int err;
-	struct vm_gk20a *vm = &mm->pmu.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	struct nvgpu_mem *inst_block = &mm->hwpm.inst_block;

 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
 		return err;
-	g->ops.mm.init_inst_block(inst_block, vm, 0);
+	g->ops.mm.init_inst_block(inst_block, mm->pmu.vm, 0);

 	return 0;
 }

 static int gk20a_init_cde_vm(struct mm_gk20a *mm)
 {
-	struct vm_gk20a *vm = &mm->cde.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;

-	return nvgpu_init_vm(mm, vm, big_page_size,
-			big_page_size << 10,
-			NV_MM_DEFAULT_KERNEL_SIZE,
-			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, false, "cde");
+	mm->cde.vm = nvgpu_vm_init(g, big_page_size,
+				   big_page_size << 10,
+				   NV_MM_DEFAULT_KERNEL_SIZE,
+				   NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				   false, false, "cde");
+	if (!mm->cde.vm)
+		return -ENOMEM;
+	return 0;
 }

 static int gk20a_init_ce_vm(struct mm_gk20a *mm)
 {
-	struct vm_gk20a *vm = &mm->ce.vm;
 	struct gk20a *g = gk20a_from_mm(mm);
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;

-	return nvgpu_init_vm(mm, vm, big_page_size,
-			big_page_size << 10,
-			NV_MM_DEFAULT_KERNEL_SIZE,
-			NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
-			false, false, "ce");
+	mm->ce.vm = nvgpu_vm_init(g, big_page_size,
+				  big_page_size << 10,
+				  NV_MM_DEFAULT_KERNEL_SIZE,
+				  NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE,
+				  false, false, "ce");
+	if (!mm->ce.vm)
+		return -ENOMEM;
+	return 0;
 }

 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -78,8 +78,6 @@ struct pm_ctx_desc {
 	u32 pm_mode;
 };

-struct gk20a;
-
 struct compbit_store_desc {
 	struct nvgpu_mem mem;

@@ -191,19 +189,19 @@ struct mm_gk20a {

 	struct {
 		u32 aperture_size;
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} bar1;

 	struct {
 		u32 aperture_size;
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} bar2;

 	struct {
 		u32 aperture_size;
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} pmu;

@@ -213,16 +211,16 @@ struct mm_gk20a {
 	} hwpm;

 	struct {
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
 	} perfbuf;

 	struct {
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 	} cde;

 	struct {
-		struct vm_gk20a vm;
+		struct vm_gk20a *vm;
 	} ce;

 	struct nvgpu_mutex l2_op_lock;
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -3132,7 +3132,7 @@ static int gk20a_prepare_ucode(struct gk20a *g)
 	struct nvgpu_pmu *pmu = &g->pmu;
 	int err = 0;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;

 	if (pmu->fw)
 		return gk20a_init_pmu(pmu);
@@ -3170,7 +3170,7 @@ static int gk20a_init_pmu_setup_sw(struct gk20a *g)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	unsigned int i;
 	int err = 0;
 	u8 *ptr;
@@ -4793,7 +4793,7 @@ int gk20a_pmu_vidmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 size)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err;

 	err = nvgpu_dma_alloc_map_vid(vm, size, mem);
@@ -4809,7 +4809,7 @@ int gk20a_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 size)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err;

 	err = nvgpu_dma_alloc_map_sys(vm, size, mem);
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -388,7 +388,7 @@ int prepare_ucode_blob(struct gk20a *g)
 	u32 wprsize;
 	int i;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	struct wpr_carveout_info wpr_inf;
 	struct page **pages;

@@ -1067,7 +1067,7 @@ static int lsf_gen_wpr_requirements(struct gk20a *g, struct ls_flcn_mgr *plsfm)
 static int gm20b_bootstrap_hs_flcn(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;
 	u64 *acr_dmem;
 	u32 img_size_in_bytes = 0;
@@ -1385,7 +1385,7 @@ static int gm20b_init_pmu_setup_hw1(struct gk20a *g,
 int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;
 	u32 bl_sz;
 	struct acr_desc *acr = &g->acr;
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -1045,7 +1045,7 @@ static int lsf_gen_wpr_requirements(struct gk20a *g,
 static int gp106_bootstrap_hs_flcn(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
+	struct vm_gk20a *vm = mm->pmu.vm;
 	int err = 0;
 	u64 *acr_dmem;
 	u32 img_size_in_bytes = 0;
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -67,32 +67,33 @@ static int gb10b_init_bar2_vm(struct gk20a *g)
 {
 	int err;
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->bar2.vm;
 	struct nvgpu_mem *inst_block = &mm->bar2.inst_block;
 	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;

 	/* BAR2 aperture size is 32MB */
 	mm->bar2.aperture_size = 32 << 20;
 	gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size);
-	nvgpu_init_vm(mm, vm, big_page_size, SZ_4K,
+
+	mm->bar2.vm = nvgpu_vm_init(g, big_page_size, SZ_4K,
 		mm->bar2.aperture_size - SZ_4K,
 		mm->bar2.aperture_size, false, false, "bar2");
+	if (!mm->bar2.vm)
+		return -ENOMEM;

 	/* allocate instance mem for bar2 */
 	err = gk20a_alloc_inst_block(g, inst_block);
 	if (err)
 		goto clean_up_va;

-	g->ops.mm.init_inst_block(inst_block, vm, big_page_size);
+	g->ops.mm.init_inst_block(inst_block, mm->bar2.vm, big_page_size);

 	return 0;

 clean_up_va:
-	nvgpu_deinit_vm(vm);
+	nvgpu_vm_put(mm->bar2.vm);
 	return err;
 }

-
 static int gb10b_init_bar2_mm_hw_setup(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
@@ -401,7 +402,8 @@ static void gp10b_remove_bar2_vm(struct gk20a *g)
 	struct mm_gk20a *mm = &g->mm;

 	gp10b_replayable_pagefault_buffer_deinit(g);
-	nvgpu_vm_remove_inst(&mm->bar2.vm, &mm->bar2.inst_block);
+	gk20a_free_inst_block(g, &mm->bar2.inst_block);
+	nvgpu_vm_put(mm->bar2.vm);
 }


--- a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -28,7 +28,7 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)
 {
 	u32 addr_lo;
 	u32 addr_hi;
-	struct vm_gk20a *vm = &g->mm.bar2.vm;
+	struct vm_gk20a *vm = g->mm.bar2.vm;
 	int err;
 	size_t rbfb_size = NV_UVM_FAULT_BUF_SIZE *
 		fifo_replay_fault_buffer_size_hw_entries_v();
@@ -57,7 +57,7 @@ int gp10b_replayable_pagefault_buffer_init(struct gk20a *g)

 void gp10b_replayable_pagefault_buffer_deinit(struct gk20a *g)
 {
-	struct vm_gk20a *vm = &g->mm.bar2.vm;
+	struct vm_gk20a *vm = g->mm.bar2.vm;

 	nvgpu_dma_unmap_free(vm, &g->mm.bar2_desc);
 }
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -225,20 +225,14 @@ int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
 void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
 			     struct nvgpu_mapped_buf *mapped_buffer);

-void nvgpu_deinit_vm(struct vm_gk20a *vm);
-void __nvgpu_vm_remove(struct vm_gk20a *vm);
-void nvgpu_vm_remove(struct vm_gk20a *vm);
-void nvgpu_vm_remove_inst(struct vm_gk20a *vm, struct nvgpu_mem *inst_block);
-
-int nvgpu_init_vm(struct mm_gk20a *mm,
-		  struct vm_gk20a *vm,
-		  u32 big_page_size,
-		  u64 low_hole,
-		  u64 kernel_reserved,
-		  u64 aperture_size,
-		  bool big_pages,
-		  bool userspace_managed,
-		  char *name);
+struct vm_gk20a *nvgpu_vm_init(struct gk20a *g,
+			       u32 big_page_size,
+			       u64 low_hole,
+			       u64 kernel_reserved,
+			       u64 aperture_size,
+			       bool big_pages,
+			       bool userspace_managed,
+			       char *name);

 /*
 * These are private to the VM code but are unfortunately used by the vgpu code.
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -31,8 +31,6 @@
 static int vgpu_init_mm_setup_sw(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = &mm->pmu.vm;
-	u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size;

 	gk20a_dbg_fn("");

@@ -54,11 +52,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g)
 		       (int)(mm->channel.user_size >> 20),
 		       (int)(mm->channel.kernel_size >> 20));

-	/* gk20a_init_gpu_characteristics expects this to be populated */
-	vm->big_page_size = big_page_size;
-	vm->mmu_levels = (vm->big_page_size == SZ_64K) ?
-			 gk20a_mm_levels_64k : gk20a_mm_levels_128k;
-
 	mm->sw_ready = true;

 	return 0;