From abee92ab928e5c5a5c5763db74f0cf96bc7f7f85 Mon Sep 17 00:00:00 2001
From: David Nieto <dmartineznie@nvidia.com>
Date: Mon, 13 Mar 2017 20:23:03 -0700
Subject: [PATCH] gpu: nvgpu: refactor teardown to support unbind

This change refactors the teardown in remove to ensure that it is
possible to unload the driver while leaving fds open. This is achieved
by making sure that the SW state is kept alive till all fds are closed
and by checking that subsequent calls to ioctls after the teardown fail.

Normally, this would be achieved ny calls into gk20a_busy(), but in
kickoff we dont call into that to reduce latency, so we need to check
the driver status directly, and also in some of the functions
as we need to make sure the ioctl does not dereference the device or
platform struct

bug 200277762
JIRA: EVLR-1023

Change-Id: I163e47a08c29d4d5b3ab79f0eb531ef234f40bde
Signed-off-by: David Nieto <dmartineznie@nvidia.com>
Reviewed-on: http://git-master/r/1320219
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: Shreshtha Sahu <ssahu@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
(cherry picked from commit e0f2afe5eb43fb32490ccabd504879c3e3e54623)
Reviewed-on: http://git-master/r/1327755
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sumeet Gupta <sumeetg@nvidia.com>
---
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.c     |  6 ++++
 drivers/gpu/nvgpu/gk20a/ce2_gk20a.h     |  3 ++
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c | 22 ++++++++++---
 drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c    |  3 +-
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c | 24 +++++++-------
 drivers/gpu/nvgpu/gk20a/gk20a.c         | 44 +++++++++++++++----------
 drivers/gpu/nvgpu/gk20a/gk20a.h         |  7 ++--
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c      | 21 +++++-------
 drivers/gpu/nvgpu/pci.c                 | 11 ++++---
 drivers/gpu/nvgpu/vgpu/vgpu.c           | 11 ++++---
 10 files changed, 91 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index 14dbf7836..f9b3c7eb9 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -681,6 +681,12 @@ void gk20a_ce_delete_context(struct device *dev,
 		u32 ce_ctx_id)
 {
 	struct gk20a *g = gk20a_from_dev(dev);
+	gk20a_ce_delete_context_priv(g, ce_ctx_id);
+}
+
+void gk20a_ce_delete_context_priv(struct gk20a *g,
+		u32 ce_ctx_id)
+{
 	struct gk20a_ce_app *ce_app = &g->ce_app;
 	struct gk20a_gpu_ctx *ce_ctx, *ce_ctx_save;
 
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 5cdd233e6..7ecf130f8 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -144,9 +144,12 @@ int gk20a_ce_execute_ops(struct device *dev,
 		struct gk20a_fence *gk20a_fence_in,
 		u32 submit_flags,
 		struct gk20a_fence **gk20a_fence_out);
+void gk20a_ce_delete_context_priv(struct gk20a *g,
+		u32 ce_ctx_id);
 void gk20a_ce_delete_context(struct device *dev,
 		u32 ce_ctx_id);
 
+
 #ifdef CONFIG_DEBUG_FS
 /* CE app debugfs api */
 void gk20a_ce_debugfs_init(struct device *dev);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d29228c19..c95e83584 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -123,7 +123,8 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
 static void free_channel(struct fifo_gk20a *f,
 		struct channel_gk20a *ch)
 {
-	struct gk20a_platform *platform = gk20a_get_platform(f->g->dev);
+	struct gk20a_platform *platform;
+	struct gk20a *g = f->g;
 
 	trace_gk20a_release_used_channel(ch->hw_chid);
 	/* refcount is zero here and channel is in a freed/dead state */
@@ -133,10 +134,18 @@ static void free_channel(struct fifo_gk20a *f,
 	f->used_channels--;
 	nvgpu_mutex_release(&f->free_chs_mutex);
 
-	if (platform->aggressive_sync_destroy_thresh &&
+	/*
+	 * On teardown it is not possible to dereference platform, but ignoring
+	 * this is fine then because no new channels would be created.
+	 */
+	if (!g->driver_is_dying) {
+		platform = gk20a_get_platform(g->dev);
+
+		if (platform->aggressive_sync_destroy_thresh &&
 			(f->used_channels <
 			 platform->aggressive_sync_destroy_thresh))
-		platform->aggressive_sync_destroy = false;
+			platform->aggressive_sync_destroy = false;
+	}
 }
 
 int channel_gk20a_commit_va(struct channel_gk20a *c)
@@ -3022,7 +3031,12 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	bool need_deferred_cleanup = false;
 	struct nvgpu_gpfifo __user *user_gpfifo = args ?
 		(struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
-	struct gk20a_platform *platform = gk20a_get_platform(d);
+	struct gk20a_platform *platform;
+
+	if (g->driver_is_dying)
+		return -ENODEV;
+
+	platform = gk20a_get_platform(d);
 
 	if (c->has_timedout)
 		return -ETIMEDOUT;
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index 37077c7d6..afeb37a48 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -1343,8 +1343,7 @@ static int nvgpu_gpu_set_therm_alert_limit(struct gk20a *g,
 long gk20a_ctrl_dev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
-	struct device *dev = priv->dev;
-	struct gk20a *g = get_gk20a(dev);
+	struct gk20a *g = priv->g;
 	struct nvgpu_gpu_zcull_get_ctx_size_args *get_ctx_size_args;
 	struct nvgpu_gpu_zcull_get_info_args *get_info_args;
 	struct nvgpu_gpu_zbc_set_table_args *set_table_args;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 507abb47d..1b96d73de 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -654,7 +654,7 @@ static int nvgpu_dbg_gpu_ioctl_timeout(struct dbg_session_gk20a *dbg_s,
 			 struct nvgpu_dbg_gpu_timeout_args *args)
 {
 	int err;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 
 	gk20a_dbg_fn("powergate mode = %d", args->enable);
 
@@ -669,7 +669,7 @@ static void nvgpu_dbg_gpu_ioctl_get_timeout(struct dbg_session_gk20a *dbg_s,
 			 struct nvgpu_dbg_gpu_timeout_args *args)
 {
 	int status;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 
 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
 	status = g->timeouts_enabled;
@@ -700,7 +700,7 @@ static int nvgpu_dbg_gpu_ioctl_read_single_sm_error_state(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_read_single_sm_error_state_args *args)
 {
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	struct gr_gk20a *gr = &g->gr;
 	struct nvgpu_dbg_gpu_sm_error_state_record *sm_error_state;
 	u32 sm_id;
@@ -739,7 +739,7 @@ static int nvgpu_dbg_gpu_ioctl_clear_single_sm_error_state(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_clear_single_sm_error_state_args *args)
 {
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	struct gr_gk20a *gr = &g->gr;
 	u32 sm_id;
 	struct channel_gk20a *ch;
@@ -770,7 +770,7 @@ static int nvgpu_dbg_gpu_ioctl_write_single_sm_error_state(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_write_single_sm_error_state_args *args)
 {
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	struct gr_gk20a *gr = &g->gr;
 	u32 sm_id;
 	struct channel_gk20a *ch;
@@ -941,7 +941,7 @@ long gk20a_dbg_gpu_dev_ioctl(struct file *filp, unsigned int cmd,
 			     unsigned long arg)
 {
 	struct dbg_session_gk20a *dbg_s = filp->private_data;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	u8 buf[NVGPU_DBG_GPU_IOCTL_MAX_ARG_SIZE];
 	int err = 0;
 
@@ -1130,7 +1130,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 	bool is_pg_disabled = false;
 
 	struct device *dev = dbg_s->dev;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	struct channel_gk20a *ch;
 
 	gk20a_dbg_fn("%d ops, max fragment %d", args->num_ops, g->dbg_regops_tmp_buf_ops);
@@ -1246,7 +1246,7 @@ static int nvgpu_ioctl_channel_reg_ops(struct dbg_session_gk20a *dbg_s,
 static int dbg_set_powergate(struct dbg_session_gk20a *dbg_s, u32  powermode)
 {
 	int err = 0;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 
 	 /* This function must be called with g->dbg_sessions_lock held */
 
@@ -1349,7 +1349,7 @@ static int nvgpu_ioctl_powergate_gk20a(struct dbg_session_gk20a *dbg_s,
 				struct nvgpu_dbg_gpu_powergate_args *args)
 {
 	int err;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	gk20a_dbg_fn("%s  powergate mode = %d",
 		      dev_name(dbg_s->dev), args->mode);
 
@@ -1363,7 +1363,7 @@ static int nvgpu_dbg_gpu_ioctl_smpc_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 			       struct nvgpu_dbg_gpu_smpc_ctxsw_mode_args *args)
 {
 	int err;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	struct channel_gk20a *ch_gk20a;
 
 	gk20a_dbg_fn("%s smpc ctxsw mode = %d",
@@ -1405,7 +1405,7 @@ static int nvgpu_dbg_gpu_ioctl_hwpm_ctxsw_mode(struct dbg_session_gk20a *dbg_s,
 			       struct nvgpu_dbg_gpu_hwpm_ctxsw_mode_args *args)
 {
 	int err;
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	struct channel_gk20a *ch_gk20a;
 
 	gk20a_dbg_fn("%s pm ctxsw mode = %d",
@@ -1457,7 +1457,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm(
 		struct dbg_session_gk20a *dbg_s,
 		struct nvgpu_dbg_gpu_suspend_resume_all_sms_args *args)
 {
-	struct gk20a *g = get_gk20a(dbg_s->dev);
+	struct gk20a *g = dbg_s->g;
 	struct channel_gk20a *ch;
 	int err = 0, action = args->mode;
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index 03b4f1a93..861eda5d3 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -697,26 +697,14 @@ static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
 	return g->ops.mc.isr_thread_stall(g);
 }
 
-void gk20a_remove_support(struct device *dev)
+void gk20a_remove_support(struct gk20a *g)
 {
-	struct gk20a *g = get_gk20a(dev);
-
 #ifdef CONFIG_TEGRA_COMMON
 	tegra_unregister_idle_unidle();
 #endif
 	if (g->dbg_regops_tmp_buf)
 		kfree(g->dbg_regops_tmp_buf);
 
-	nvgpu_wait_for_deferred_interrupts(g);
-
-	gk20a_channel_cancel_pending_sema_waits(g);
-
-	if (g->nonstall_work_queue) {
-		cancel_work_sync(&g->nonstall_fn_work);
-		destroy_workqueue(g->nonstall_work_queue);
-		g->nonstall_work_queue = NULL;
-	}
-
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);
 
@@ -1636,6 +1624,11 @@ static int gk20a_probe(struct platform_device *dev)
 	if (gk20a->irq_stall != gk20a->irq_nonstall)
 		disable_irq(gk20a->irq_nonstall);
 
+	/*
+	 * is_fmodel needs to be in gk20a struct for deferred teardown
+	 */
+	gk20a->is_fmodel = platform->is_fmodel;
+
 	err = gk20a_init_support(dev);
 	if (err)
 		return err;
@@ -1682,11 +1675,6 @@ static int __exit gk20a_remove(struct platform_device *pdev)
 	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
 		gk20a_scale_exit(dev);
 
-	if (g->remove_support)
-		g->remove_support(dev);
-
-	gk20a_ce_destroy(g);
-
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
 	nvgpu_clk_arb_cleanup_arbiter(g);
 #endif
@@ -1775,7 +1763,21 @@ void gk20a_busy_noresume(struct device *dev)
 void gk20a_driver_start_unload(struct gk20a *g)
 {
 	gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n");
+
+	down_write(&g->busy_lock);
 	g->driver_is_dying = 1;
+	up_write(&g->busy_lock);
+
+	gk20a_wait_for_idle(g->dev);
+
+	nvgpu_wait_for_deferred_interrupts(g);
+	gk20a_channel_cancel_pending_sema_waits(g);
+
+	if (g->nonstall_work_queue) {
+		cancel_work_sync(&g->nonstall_fn_work);
+		destroy_workqueue(g->nonstall_work_queue);
+		g->nonstall_work_queue = NULL;
+	}
 }
 
 int gk20a_wait_for_idle(struct device *dev)
@@ -2283,6 +2285,12 @@ static void gk20a_free_cb(struct kref *refcount)
 		struct gk20a, refcount);
 
 	gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!");
+
+	gk20a_ce_destroy(g);
+
+	if (g->remove_support)
+		g->remove_support(g);
+
 	kfree(g);
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 0cf1f24f6..e969e53a5 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -862,6 +862,9 @@ struct gk20a {
 	atomic_t nonstall_ops;
 	struct work_struct nonstall_fn_work;
 	struct workqueue_struct *nonstall_work_queue;
+
+	bool is_fmodel;
+
 	struct kref refcount;
 
 	struct resource *reg_mem;
@@ -972,7 +975,7 @@ struct gk20a {
 	bool global_profiler_reservation_held;
 	int profiler_reservation_count;
 
-	void (*remove_support)(struct device *);
+	void (*remove_support)(struct gk20a *);
 
 	u64 pg_ingating_time_us;
 	u64 pg_ungating_time_us;
@@ -1443,7 +1446,7 @@ extern struct class nvgpu_class;
 
 int gk20a_pm_init(struct device *dev);
 int gk20a_pm_finalize_poweron(struct device *dev);
-void gk20a_remove_support(struct device *dev);
+void gk20a_remove_support(struct gk20a *g);
 
 static inline struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch)
 {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index fdb452988..98f19165a 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -808,7 +808,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm)
 	struct gk20a *g = gk20a_from_mm(mm);
 
 	if (mm->vidmem.ce_ctx_id != (u32)~0)
-		gk20a_ce_delete_context(g->dev, mm->vidmem.ce_ctx_id);
+		gk20a_ce_delete_context_priv(g, mm->vidmem.ce_ctx_id);
 
 	mm->vidmem.ce_ctx_id = (u32)~0;
 
@@ -1237,11 +1237,10 @@ static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
 	u32 num_pages = 1 << order;
 	u32 len = num_pages * PAGE_SIZE;
 	int err;
-	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
 
 	gk20a_dbg_fn("");
 
-	if (platform->is_fmodel)
+	if (g->is_fmodel)
 		return alloc_gmmu_phys_pages(vm, order, entry);
 
 	/*
@@ -1267,7 +1266,6 @@ void free_gmmu_pages(struct vm_gk20a *vm,
 		     struct gk20a_mm_entry *entry)
 {
 	struct gk20a *g = gk20a_from_vm(vm);
-	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
 
 	gk20a_dbg_fn("");
 
@@ -1277,7 +1275,7 @@ void free_gmmu_pages(struct vm_gk20a *vm,
 	if (entry->woffset) /* fake shadow mem */
 		return;
 
-	if (platform->is_fmodel) {
+	if (g->is_fmodel) {
 		free_gmmu_phys_pages(vm, entry);
 		return;
 	}
@@ -1295,11 +1293,9 @@ void free_gmmu_pages(struct vm_gk20a *vm,
 
 int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
 {
-	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
-
 	gk20a_dbg_fn("");
 
-	if (platform->is_fmodel)
+	if (g->is_fmodel)
 		return map_gmmu_phys_pages(entry);
 
 	if (IS_ENABLED(CONFIG_ARM64)) {
@@ -1321,11 +1317,9 @@ int map_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
 
 void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
 {
-	struct gk20a_platform *platform = dev_get_drvdata(g->dev);
-
 	gk20a_dbg_fn("");
 
-	if (platform->is_fmodel) {
+	if (g->is_fmodel) {
 		unmap_gmmu_phys_pages(entry);
 		return;
 	}
@@ -4089,6 +4083,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
 	struct mapped_buffer_node *mapped_buffer;
 	struct vm_reserved_va_node *va_node, *va_node_tmp;
 	struct rb_node *node;
+	struct gk20a *g = vm->mm->g;
 
 	gk20a_dbg_fn("");
 
@@ -4097,7 +4092,7 @@ static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm)
 	 * pool involves unmapping a GMMU mapping which means aquiring the
 	 * update_gmmu_lock.
 	 */
-	if (!gk20a_platform_has_syncpoints(gk20a_from_vm(vm)->dev)) {
+	if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) {
 		if (vm->sema_pool) {
 			nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
 			nvgpu_semaphore_pool_put(vm->sema_pool);
@@ -4191,7 +4186,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm)
 	/*
 	 * Don't waste the memory on semaphores if we don't need them.
 	 */
-	if (gk20a_platform_has_syncpoints(g->dev))
+	if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)
 		return 0;
 
 	if (vm->sema_pool)
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c
index da3fb986a..33625526e 100644
--- a/drivers/gpu/nvgpu/pci.c
+++ b/drivers/gpu/nvgpu/pci.c
@@ -393,6 +393,11 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
 	}
 	disable_irq(g->irq_stall);
 
+	/*
+	 * is_fmodel needs to be in gk20a struct for deferred teardown
+	 */
+	g->is_fmodel = platform->is_fmodel;
+
 	err = nvgpu_pci_init_support(pdev);
 	if (err)
 		return err;
@@ -430,7 +435,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
 	struct gk20a *g = get_gk20a(&pdev->dev);
 
 	gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n");
-	gk20a_driver_start_unload(g);
 
 	if (g->irqs_enabled)
 		disable_irq(g->irq_stall);
@@ -449,7 +453,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
 	 * Wait for the driver to finish up all the IOCTLs it's working on
 	 * before cleaning up the driver's data structures.
 	 */
-	gk20a_wait_for_idle(&pdev->dev);
+	gk20a_driver_start_unload(g);
 	gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n");
 
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
@@ -459,9 +463,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
 	gk20a_user_deinit(g->dev, &nvgpu_pci_class);
 	gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b");
 
-	if (g->remove_support)
-		g->remove_support(g->dev);
-
 	debugfs_remove_recursive(platform->debugfs);
 	debugfs_remove_recursive(platform->debugfs_alias);
 
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 44c55361c..db3082fb3 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -192,10 +192,9 @@ static int vgpu_intr_thread(void *dev_id)
 	return 0;
 }
 
-static void vgpu_remove_support(struct device *dev)
+static void vgpu_remove_support(struct gk20a *g)
 {
-	struct gk20a *g = get_gk20a(dev);
-	struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(dev);
+	struct vgpu_priv_data *priv = vgpu_get_priv_data_from_dev(g->dev);
 	struct tegra_vgpu_intr_msg msg;
 	int err;
 
@@ -266,7 +265,7 @@ static int vgpu_init_support(struct platform_device *pdev)
 	return 0;
 
  fail:
-	vgpu_remove_support(&pdev->dev);
+	vgpu_remove_support(g);
 	return err;
 }
 
@@ -581,6 +580,8 @@ int vgpu_probe(struct platform_device *pdev)
 	platform->vgpu_priv = priv;
 	gk20a->dev = dev;
 
+	gk20a->is_fmodel = platform->is_fmodel;
+
 	nvgpu_kmem_init(gk20a);
 
 	err = gk20a_user_init(dev, INTERFACE_NAME, &nvgpu_class);
@@ -663,7 +664,7 @@ int vgpu_remove(struct platform_device *pdev)
 
 	vgpu_pm_qos_remove(dev);
 	if (g->remove_support)
-		g->remove_support(dev);
+		g->remove_support(g);
 
 	vgpu_comm_deinit();
 	gk20a_sched_ctrl_cleanup(g);