diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/nvgpu_common.c
index e963ca6ae..d7ff48417 100644
--- a/drivers/gpu/nvgpu/common/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/common/nvgpu_common.c
@@ -174,6 +174,8 @@ int nvgpu_probe(struct gk20a *g,
 
 	g->remove_support = gk20a_remove_support;
 
+	kref_init(&g->refcount);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/as_gk20a.c b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
index e4bd8b738..a3c8c1ecb 100644
--- a/drivers/gpu/nvgpu/gk20a/as_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/as_gk20a.c
@@ -1,7 +1,7 @@
 /*
  * GK20A Address Spaces
  *
- * Copyright (c) 2011-2015, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -46,6 +46,9 @@ int gk20a_as_alloc_share(struct gk20a_as *as,
 	int err = 0;
 
 	gk20a_dbg_fn("");
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
 
 	*out = NULL;
 	as_share = kzalloc(sizeof(*as_share), GFP_KERNEL);
@@ -85,15 +88,19 @@ int gk20a_as_release_share(struct gk20a_as_share *as_share)
 	gk20a_dbg_fn("");
 
 	err = gk20a_busy(g->dev);
+
 	if (err)
-		return err;
+		goto release_fail;
 
 	err = gk20a_vm_release_share(as_share);
 
 	gk20a_idle(g->dev);
 
+release_fail:
 	release_as_share_id(as_share->as, as_share->id);
+	gk20a_put(g);
 	kfree(as_share);
+
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 12f912183..c56e13c8f 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1234,7 +1234,7 @@ int gk20a_channel_release(struct inode *inode, struct file *filp)
 	err = gk20a_busy(g->dev);
 	if (err) {
 		gk20a_err(dev_from_gk20a(g), "failed to release a channel!");
-		return err;
+		goto channel_release;
 	}
 
 	trace_gk20a_channel_release(dev_name(g->dev));
@@ -1242,6 +1242,8 @@ int gk20a_channel_release(struct inode *inode, struct file *filp)
 	gk20a_channel_close(ch);
 	gk20a_idle(g->dev);
 
+channel_release:
+	gk20a_put(g);
 	kfree(filp->private_data);
 	filp->private_data = NULL;
 	return 0;
@@ -1382,11 +1384,17 @@ static int __gk20a_channel_open(struct gk20a *g, struct file *filp, s32 runlist_
 
 	gk20a_dbg_fn("");
 
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
+
 	trace_gk20a_channel_open(dev_name(g->dev));
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	if (!priv) {
+		err = -ENOMEM;
+		goto free_ref;
+	}
 
 	err = gk20a_busy(g->dev);
 	if (err) {
@@ -1414,6 +1422,8 @@ static int __gk20a_channel_open(struct gk20a *g, struct file *filp, s32 runlist_
 
 fail_busy:
 	kfree(priv);
+free_ref:
+	gk20a_put(g);
 	return err;
 }
 
@@ -3465,6 +3475,7 @@ static int gk20a_event_id_release(struct inode *inode, struct file *filp)
 		nvgpu_mutex_release(&ch->event_id_list_lock);
 	}
 
+	gk20a_put(g);
 	kfree(event_id_data);
 	filp->private_data = NULL;
 
@@ -3529,20 +3540,28 @@ static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
 					 int event_id,
 					 int *fd)
 {
+	struct gk20a *g;
 	int err = 0;
 	int local_fd;
 	struct file *file;
 	char *name;
 	struct gk20a_event_id_data *event_id_data;
 
+	g = gk20a_get(ch->g);
+	if (!g)
+		return -ENODEV;
+
 	err = gk20a_channel_get_event_data_from_id(ch,
 				event_id, &event_id_data);
-	if (err == 0) /* We already have event enabled */
-		return -EINVAL;
+	if (err == 0) {
+		/* We already have event enabled */
+		err = -EINVAL;
+		goto free_ref;
+	}
 
 	err = get_unused_fd_flags(O_RDWR);
 	if (err < 0)
-		return err;
+		goto free_ref;
 	local_fd = err;
 
 	name = kasprintf(GFP_KERNEL, "nvgpu-event%d-fd%d",
@@ -3561,7 +3580,7 @@ static int gk20a_channel_event_id_enable(struct channel_gk20a *ch,
 		err = -ENOMEM;
 		goto clean_up_file;
 	}
-	event_id_data->g = ch->g;
+	event_id_data->g = g;
 	event_id_data->id = ch->hw_chid;
 	event_id_data->is_tsg = false;
 	event_id_data->event_id = event_id;
@@ -3585,6 +3604,8 @@ clean_up_file:
 	fput(file);
 clean_up:
 	put_unused_fd(local_fd);
+free_ref:
+	gk20a_put(g);
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
index f72fc769d..7db10e702 100644
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -55,10 +55,15 @@ int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
 
 	g = container_of(inode->i_cdev,
 			 struct gk20a, ctrl.cdev);
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
 
 	priv = kzalloc(sizeof(struct gk20a_ctrl_priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	if (!priv) {
+		err = -ENOMEM;
+		goto free_ref;
+	}
 	filp->private_data = priv;
 	priv->dev = g->dev;
 	/*
@@ -71,29 +76,30 @@ int gk20a_ctrl_dev_open(struct inode *inode, struct file *filp)
 	if (!g->gr.sw_ready) {
 		err = gk20a_busy(g->dev);
 		if (err)
-			return err;
+			goto free_ref;
 		gk20a_idle(g->dev);
 	}
 
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
 	err = nvgpu_clk_arb_init_session(g, &priv->clk_session);
-	if (err)
-		return err;
 #endif
-
+free_ref:
+	if (err)
+		gk20a_put(g);
 	return err;
 }
 int gk20a_ctrl_dev_release(struct inode *inode, struct file *filp)
 {
 	struct gk20a_ctrl_priv *priv = filp->private_data;
+	struct gk20a *g = priv->g;
 	gk20a_dbg_fn("");
 
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
 	if (priv->clk_session)
-		nvgpu_clk_arb_release_session(gk20a_from_dev(priv->dev),
-				priv->clk_session);
+		nvgpu_clk_arb_release_session(g, priv->clk_session);
 #endif
 
+	gk20a_put(g);
 	kfree(priv);
 
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index 4e265b81c..00beb257c 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -276,14 +276,20 @@ int gk20a_ctxsw_dev_open(struct inode *inode, struct file *filp)
 	const int vmid = 0;
 
 	g = container_of(inode->i_cdev, struct gk20a, ctxsw.cdev);
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
+
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "g=%p", g);
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto free_ref;
+	}
 
 	err = gk20a_busy(g->dev);
 	if (err)
-		return err;
+		goto free_ref;
 
 	trace = g->ctxsw_trace;
 	if (!trace) {
@@ -325,7 +331,9 @@ done:
 
 idle:
 	gk20a_idle(g->dev);
-
+free_ref:
+	if (err)
+		gk20a_put(g);
 	return err;
 }
 
@@ -346,7 +354,7 @@ int gk20a_ctxsw_dev_release(struct inode *inode, struct file *filp)
 		dev->g->ops.fecs_trace.free_user_buffer(dev->g);
 		dev->hdr = NULL;
 	}
-
+	gk20a_put(g);
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 6a695cab2..d53442004 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -118,13 +118,17 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 	else
 		g = container_of(inode->i_cdev,
 				 struct gk20a, prof.cdev);
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
+
 	dev = g->dev;
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, "dbg session: %s", dev_name(dev));
 
 	err  = alloc_session(&dbg_session);
 	if (err)
-		return err;
+		goto free_ref;
 
 	filp->private_data = dbg_session;
 	dbg_session->dev   = dev;
@@ -141,6 +145,9 @@ static int gk20a_dbg_gpu_do_dev_open(struct inode *inode,
 	dbg_session->dbg_events.num_pending_events = 0;
 
 	return 0;
+free_ref:
+	gk20a_put(g);
+	return err;
 }
 
 /* used in scenarios where the debugger session can take just the inter-session
@@ -529,6 +536,8 @@ int gk20a_dbg_gpu_dev_release(struct inode *inode, struct file *filp)
 	nvgpu_mutex_release(&g->dbg_sessions_lock);
 
 	kfree(dbg_s);
+	gk20a_put(g);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index e995dcbfb..38e4b5adc 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1712,7 +1712,7 @@ static int __exit gk20a_remove(struct platform_device *pdev)
 		platform->remove(dev);
 
 	set_gk20a(pdev, NULL);
-	kfree(g);
+	gk20a_put(g);
 
 	gk20a_dbg_fn("removed");
 
@@ -2274,6 +2274,68 @@ int gk20a_read_ptimer(struct gk20a *g, u64 *value)
 	return -EBUSY;
 }
 
+/*
+ * Free the gk20a struct.
+ */
+static void gk20a_free_cb(struct kref *refcount)
+{
+	struct gk20a *g = container_of(refcount,
+		struct gk20a, refcount);
+
+	gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!");
+	kfree(g);
+}
+
+/**
+ * gk20a_get() - Increment ref count on driver
+ *
+ * @g The driver to increment
+ * This will fail if the driver is in the process of being released. In that
+ * case it will return NULL. Otherwise a pointer to the driver passed in will
+ * be returned.
+ */
+struct gk20a * __must_check gk20a_get(struct gk20a *g)
+{
+	int success;
+
+	/*
+	 * Handle the possibility we are still freeing the gk20a struct while
+	 * gk20a_get() is called. Unlikely but plausible race condition. Ideally
+	 * the code will never be in such a situation that this race is
+	 * possible.
+	 */
+	success = kref_get_unless_zero(&g->refcount);
+
+	gk20a_dbg(gpu_dbg_shutdown, "GET: refs currently %d %s",
+		atomic_read(&g->refcount.refcount), success ? "" : "(FAILED)");
+
+	return success ? g : NULL;
+}
+
+/**
+ * gk20a_put() - Decrement ref count on driver
+ *
+ * @g - The driver to decrement
+ *
+ * Decrement the driver ref-count. If neccesary also free the underlying driver
+ * memory
+ */
+void gk20a_put(struct gk20a *g)
+{
+	/*
+	 * Note - this is racy, two instances of this could run before the
+	 * actual kref_put(0 runs, you could see something like:
+	 *
+	 *  ... PUT: refs currently 2
+	 *  ... PUT: refs currently 2
+	 *  ... Freeing GK20A struct!
+	 */
+	gk20a_dbg(gpu_dbg_shutdown, "PUT: refs currently %d",
+		atomic_read(&g->refcount.refcount));
+
+	kref_put(&g->refcount, gk20a_free_cb);
+}
+
 MODULE_LICENSE("GPL v2");
 module_init(gk20a_init);
 module_exit(gk20a_exit);
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 3d5609b21..555660906 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -852,6 +852,7 @@ struct gk20a {
 	atomic_t nonstall_ops;
 	struct work_struct nonstall_fn_work;
 	struct workqueue_struct *nonstall_work_queue;
+	struct kref refcount;
 
 	struct resource *reg_mem;
 	void __iomem *regs;
@@ -1468,6 +1469,9 @@ static inline void gk20a_channel_trace_sched_param(
 
 void nvgpu_wait_for_deferred_interrupts(struct gk20a *g);
 
+struct gk20a * __must_check gk20a_get(struct gk20a *g);
+void gk20a_put(struct gk20a *g);
+
 #ifdef CONFIG_DEBUG_FS
 int gk20a_railgating_debugfs_init(struct device *dev);
 #endif
diff --git a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
index 6fdc27746..a73e79931 100644
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
@@ -377,21 +377,28 @@ int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
 {
 	struct gk20a *g = container_of(inode->i_cdev,
 				struct gk20a, sched.cdev);
-	struct gk20a_sched_ctrl *sched = &g->sched_ctrl;
-	int err;
+	struct gk20a_sched_ctrl *sched;
+	int err = 0;
+
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
+	sched = &g->sched_ctrl;
 
 	gk20a_dbg(gpu_dbg_fn | gpu_dbg_sched, "g=%p", g);
 
 	if (!sched->sw_ready) {
 		err = gk20a_busy(g->dev);
 		if (err)
-			return err;
+			goto free_ref;
 
 		gk20a_idle(g->dev);
 	}
 
-	if (!nvgpu_mutex_tryacquire(&sched->busy_lock))
-		return -EBUSY;
+	if (!nvgpu_mutex_tryacquire(&sched->busy_lock)) {
+		err = -EBUSY;
+		goto free_ref;
+	}
 
 	memcpy(sched->recent_tsg_bitmap, sched->active_tsg_bitmap,
 			sched->bitmap_size);
@@ -400,7 +407,10 @@ int gk20a_sched_dev_open(struct inode *inode, struct file *filp)
 	filp->private_data = sched;
 	gk20a_dbg(gpu_dbg_sched, "filp=%p sched=%p", filp, sched);
 
-	return 0;
+free_ref:
+	if (err)
+		gk20a_put(g);
+	return err;
 }
 
 long gk20a_sched_dev_ioctl(struct file *filp, unsigned int cmd,
@@ -511,6 +521,7 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
 	nvgpu_mutex_release(&sched->control_lock);
 
 	nvgpu_mutex_release(&sched->busy_lock);
+	gk20a_put(g);
 	return 0;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
index aadf54631..009536be1 100644
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -254,15 +254,23 @@ static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
 	struct file *file;
 	char *name;
 	struct gk20a_event_id_data *event_id_data;
+	struct gk20a *g;
+
+	g = gk20a_get(tsg->g);
+	if (!g)
+		return -ENODEV;
 
 	err = gk20a_tsg_get_event_data_from_id(tsg,
 				event_id, &event_id_data);
-	if (err == 0) /* We already have event enabled */
-		return -EINVAL;
+	if (err == 0) {
+		/* We already have event enabled */
+		err = -EINVAL;
+		goto free_ref;
+	}
 
 	err = get_unused_fd_flags(O_RDWR);
 	if (err < 0)
-		return err;
+		goto free_ref;
 	local_fd = err;
 
 	name = kasprintf(GFP_KERNEL, "nvgpu-event%d-fd%d",
@@ -281,7 +289,7 @@ static int gk20a_tsg_event_id_enable(struct tsg_gk20a *tsg,
 		err = -ENOMEM;
 		goto clean_up_file;
 	}
-	event_id_data->g = tsg->g;
+	event_id_data->g = g;
 	event_id_data->id = tsg->tsgid;
 	event_id_data->is_tsg = true;
 	event_id_data->event_id = event_id;
@@ -305,6 +313,8 @@ clean_up_file:
 	fput(file);
 clean_up:
 	put_unused_fd(local_fd);
+free_ref:
+	gk20a_put(g);
 	return err;
 }
 
@@ -400,18 +410,25 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
 	struct device *dev;
 	int err;
 
+	g = gk20a_get(g);
+	if (!g)
+		return -ENODEV;
+
 	dev  = dev_from_gk20a(g);
 
 	gk20a_dbg(gpu_dbg_fn, "tsg: %s", dev_name(dev));
 
 	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	if (!priv) {
+		err = -ENOMEM;
+		goto free_ref;
+	}
 
 	tsg = acquire_unused_tsg(&g->fifo);
 	if (!tsg) {
 		kfree(priv);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto free_ref;
 	}
 
 	tsg->g = g;
@@ -448,6 +465,8 @@ int gk20a_tsg_open(struct gk20a *g, struct file *filp)
 
 clean_up:
 	kref_put(&tsg->refcount, gk20a_tsg_release);
+free_ref:
+	gk20a_put(g);
 	return err;
 }
 
@@ -495,16 +514,13 @@ void gk20a_tsg_release(struct kref *ref)
 	tsg->runlist_id = ~0;
 
 	gk20a_dbg(gpu_dbg_fn, "tsg released %d\n", tsg->tsgid);
+	gk20a_put(g);
 }
 
 int gk20a_tsg_dev_release(struct inode *inode, struct file *filp)
 {
 	struct tsg_private *priv = filp->private_data;
 	struct tsg_gk20a *tsg = priv->tsg;
-	struct gk20a *g = priv->g;
-
-	if (g->driver_is_dying)
-		return -ENODEV;
 
 	kref_put(&tsg->refcount, gk20a_tsg_release);
 	kfree(priv);
diff --git a/drivers/gpu/nvgpu/pci.c b/drivers/gpu/nvgpu/pci.c
index 7ef626c23..0ed621ce4 100644
--- a/drivers/gpu/nvgpu/pci.c
+++ b/drivers/gpu/nvgpu/pci.c
@@ -481,7 +481,8 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
 
 	enable_irq(g->irq_stall);
 
-	kfree(g);
+	gk20a_get_platform(&pdev->dev)->g = NULL;
+	gk20a_put(g);
 }
 
 static struct pci_driver nvgpu_pci_driver = {
diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c
index 9f381c5b4..44c55361c 100644
--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -650,6 +650,8 @@ int vgpu_probe(struct platform_device *pdev)
 	vgpu_create_sysfs(dev);
 	gk20a_init_gr(gk20a);
 
+	kref_init(&gk20a->refcount);
+
 	return 0;
 }
 
@@ -668,6 +670,7 @@ int vgpu_remove(struct platform_device *pdev)
 	gk20a_user_deinit(dev, &nvgpu_class);
 	vgpu_remove_sysfs(dev);
 	gk20a_get_platform(dev)->g = NULL;
-	kfree(g);
+	gk20a_put(g);
+
 	return 0;
 }