gpu: nvgpu: Replace kref for refcounting in nvgpu

- added wrapper struct nvgpu_ref over nvgpu_atomic_t - added nvgpu_ref_* APIs to access the above struct JIRA NVGPU-140 Change-Id: Id47f897995dd4721751f7610b6d4d4fbfe4d6b9a Signed-off-by: Debarshi Dutta <ddutta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1540899 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
2025-12-23 18:16:01 +03:00 · 2017-08-08 12:08:03 +05:30
parent 8662fae334
commit 3fa47b877d
25 changed files with 176 additions and 98 deletions
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -61,8 +61,8 @@ static long nvgpu_clk_arb_ioctl_event_dev(struct file *filp, unsigned int cmd,
 static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work);
 static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work);
 static int nvgpu_clk_arb_update_vf_table(struct nvgpu_clk_arb *arb);
-static void nvgpu_clk_arb_free_fd(struct kref *refcount);
-static void nvgpu_clk_arb_free_session(struct kref *refcount);
+static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount);
+static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount);
 static int nvgpu_clk_arb_change_vf_point(struct gk20a *g, u16 gpc2clk_target,
 	u16 sys2clk_target, u16 xbar2clk_target, u16 mclk_target, u32 voltuv,
 	u32 voltuv_sram);
@@ -214,13 +214,13 @@ struct nvgpu_clk_dev {
 	nvgpu_atomic_t enabled_mask;
 	struct nvgpu_clk_notification_queue queue;
 	u32 arb_queue_head;
-	struct kref refcount;
+	struct nvgpu_ref refcount;
 };

 struct nvgpu_clk_session {
 	bool zombie;
 	struct gk20a *g;
-	struct kref refcount;
+	struct nvgpu_ref refcount;
 	struct list_head link;
 	struct llist_head targets;

@@ -541,9 +541,9 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
 	nvgpu_atomic_set(&dev->poll_mask, 0);

 	dev->session = session;
-	kref_init(&dev->refcount);
+	nvgpu_ref_init(&dev->refcount);

-	kref_get(&session->refcount);
+	nvgpu_ref_get(&session->refcount);

 	*_dev = dev;

@@ -573,7 +573,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 		return -ENOMEM;
 	session->g = g;

-	kref_init(&session->refcount);
+	nvgpu_ref_init(&session->refcount);

 	session->zombie = false;
 	session->target_pool[0].pstate = CTRL_PERF_PSTATE_P8;
@@ -593,7 +593,7 @@ int nvgpu_clk_arb_init_session(struct gk20a *g,
 	return 0;
 }

-static void nvgpu_clk_arb_free_fd(struct kref *refcount)
+static void nvgpu_clk_arb_free_fd(struct nvgpu_ref *refcount)
 {
 	struct nvgpu_clk_dev *dev = container_of(refcount,
 			struct nvgpu_clk_dev, refcount);
@@ -602,7 +602,7 @@ static void nvgpu_clk_arb_free_fd(struct kref *refcount)
 	nvgpu_kfree(session->g, dev);
 }

-static void nvgpu_clk_arb_free_session(struct kref *refcount)
+static void nvgpu_clk_arb_free_session(struct nvgpu_ref *refcount)
 {
 	struct nvgpu_clk_session *session = container_of(refcount,
 			struct nvgpu_clk_session, refcount);
@@ -621,7 +621,7 @@ static void nvgpu_clk_arb_free_session(struct kref *refcount)

 	head = llist_del_all(&session->targets);
 	llist_for_each_entry_safe(dev, tmp, head, node) {
-		kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+		nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
 	}
 	synchronize_rcu();
 	nvgpu_kfree(g, session);
@@ -635,7 +635,7 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
 	gk20a_dbg_fn("");

 	session->zombie = true;
-	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
 	if (arb && arb->update_work_queue)
 		queue_work(arb->update_work_queue, &arb->update_fn_work);
 }
@@ -1099,7 +1099,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 							dev->gpc2clk_target_mhz;
 						gpc2clk_set = true;
 					}
-					kref_get(&dev->refcount);
+					nvgpu_ref_get(&dev->refcount);
 					llist_add(&dev->node, &arb->requests);
 				}
 				/* Ensure target is updated before ptr sawp */
@@ -1305,7 +1305,7 @@ exit_arb:
 	llist_for_each_entry_safe(dev, tmp, head, node) {
 		nvgpu_atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
 		wake_up_interruptible(&dev->readout_wq);
-		kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+		nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
 	}

 	nvgpu_atomic_set(&arb->notification_queue.head,
@@ -1523,7 +1523,7 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
 		err = -EINVAL;
 		goto fdput_fd;
 	}
-	kref_get(&dev->refcount);
+	nvgpu_ref_get(&dev->refcount);
 	llist_add(&dev->node, &session->targets);
 	if (arb->update_work_queue)
 		queue_work(arb->update_work_queue, &arb->update_fn_work);
@@ -1607,8 +1607,8 @@ static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,

 	gk20a_dbg_fn("");

-	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
-	kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
 	return 0;
 }

@@ -1631,8 +1631,8 @@ static int nvgpu_clk_arb_release_event_dev(struct inode *inode,
 	}

 	synchronize_rcu();
-	kref_put(&session->refcount, nvgpu_clk_arb_free_session);
-	kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
+	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
+	nvgpu_ref_put(&dev->refcount, nvgpu_clk_arb_free_fd);

 	return 0;
 }
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -20,7 +20,7 @@

 #include <nvgpu/sort.h>

-void __gk20a_fifo_profile_free(struct kref *ref);
+void __gk20a_fifo_profile_free(struct nvgpu_ref *ref);

 static void *gk20a_fifo_sched_debugfs_seq_start(
 		struct seq_file *s, loff_t *pos)
@@ -145,14 +145,15 @@ static int gk20a_fifo_profile_enable(void *data, u64 val)
 	if (val == 0) {
 		if (f->profile.enabled) {
 			f->profile.enabled = false;
-			kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+			nvgpu_ref_put(&f->profile.ref,
+				__gk20a_fifo_profile_free);
 		}
 	} else {
 		if (!f->profile.enabled) {
 			/* not kref init as it can have a running condition if
 			 * we enable/disable/enable while kickoff is happening
 			 */
-			if (!kref_get_unless_zero(&f->profile.ref)) {
+			if (!nvgpu_ref_get_unless_zero(&f->profile.ref)) {
 				f->profile.data = vzalloc(
 							FIFO_PROFILING_ENTRIES *
 					sizeof(struct fifo_profile_gk20a));
@@ -165,7 +166,7 @@ static int gk20a_fifo_profile_enable(void *data, u64 val)
 					nvgpu_mutex_release(&f->profile.lock);
 					return -ENOMEM;
 				}
-				kref_init(&f->profile.ref);
+				nvgpu_ref_init(&f->profile.ref);
 			}
 			atomic_set(&f->profile.get.atomic_var, 0);
 			f->profile.enabled = true;
@@ -241,7 +242,7 @@ static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
 	u64 percentiles_append[PERCENTILE_RANGES];
 	u64 percentiles_userd[PERCENTILE_RANGES];

-	if (!kref_get_unless_zero(&g->fifo.profile.ref)) {
+	if (!nvgpu_ref_get_unless_zero(&g->fifo.profile.ref)) {
 		seq_printf(s, "Profiling disabled\n");
 		return 0;
 	}
@@ -271,7 +272,7 @@ static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
 			percentiles_jobtracking[index],
 			percentiles_userd[index]);

-	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);

 	return 0;
 }
@@ -312,7 +313,7 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)
 	nvgpu_mutex_init(&g->fifo.profile.lock);
 	g->fifo.profile.enabled = false;
 	atomic_set(&g->fifo.profile.get.atomic_var, 0);
-	atomic_set(&g->fifo.profile.ref.refcount, 0);
+	atomic_set(&g->fifo.profile.ref.refcount.atomic_var, 0);

 	debugfs_create_file("enable", 0600, profile_root, g,
 		&gk20a_fifo_profile_enable_debugfs_fops);
@@ -322,7 +323,7 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)

 }

-void __gk20a_fifo_profile_free(struct kref *ref)
+void __gk20a_fifo_profile_free(struct nvgpu_ref *ref)
 {
 	struct fifo_gk20a *f = container_of(ref, struct fifo_gk20a,
 						profile.ref);
@@ -340,7 +341,7 @@ struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
 	unsigned int index;

 	/* If kref is zero, profiling is not enabled */
-	if (!kref_get_unless_zero(&f->profile.ref))
+	if (!nvgpu_ref_get_unless_zero(&f->profile.ref))
 		return NULL;
 	index = atomic_inc_return(&f->profile.get.atomic_var);
 	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
@@ -352,7 +353,7 @@ struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
 void gk20a_fifo_profile_release(struct gk20a *g,
 					struct fifo_profile_gk20a *profile)
 {
-	kref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
+	nvgpu_ref_put(&g->fifo.profile.ref, __gk20a_fifo_profile_free);
 }

 void gk20a_fifo_debugfs_deinit(struct gk20a *g)
@@ -362,7 +363,7 @@ void gk20a_fifo_debugfs_deinit(struct gk20a *g)
 	nvgpu_mutex_acquire(&f->profile.lock);
 	if (f->profile.enabled) {
 		f->profile.enabled = false;
-		kref_put(&f->profile.ref, __gk20a_fifo_profile_free);
+		nvgpu_ref_put(&f->profile.ref, __gk20a_fifo_profile_free);
 	}
 	nvgpu_mutex_release(&f->profile.lock);
 }
--- a/drivers/gpu/nvgpu/common/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/common/linux/driver_common.c
@@ -217,7 +217,7 @@ int nvgpu_probe(struct gk20a *g,

 	g->remove_support = gk20a_remove_support;

-	kref_init(&g->refcount);
+	nvgpu_ref_init(&g->refcount);

 	return 0;
 }
--- a/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_tsg.c
@@ -260,7 +260,7 @@ int nvgpu_ioctl_tsg_dev_release(struct inode *inode, struct file *filp)
 	struct tsg_private *priv = filp->private_data;
 	struct tsg_gk20a *tsg = priv->tsg;

-	kref_put(&tsg->refcount, gk20a_tsg_release);
+	nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);
 	nvgpu_kfree(tsg->g, priv);
 	return 0;
 }
--- a/drivers/gpu/nvgpu/common/linux/vm.c
+++ b/drivers/gpu/nvgpu/common/linux/vm.c
@@ -155,7 +155,7 @@ static u64 __nvgpu_vm_find_mapping(struct vm_gk20a *vm,
 		else
 			mapped_buffer->own_mem_ref = true;
 	}
-	kref_get(&mapped_buffer->ref);
+	nvgpu_ref_get(&mapped_buffer->ref);

 	nvgpu_log(g, gpu_dbg_map,
 		  "gv: 0x%04x_%08x + 0x%-7zu "
@@ -380,7 +380,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm,
 	mapped_buffer->user_mapped = user_mapped ? 1 : 0;
 	mapped_buffer->own_mem_ref = user_mapped;
 	nvgpu_init_list_node(&mapped_buffer->buffer_list);
-	kref_init(&mapped_buffer->ref);
+	nvgpu_ref_init(&mapped_buffer->ref);

 	err = nvgpu_insert_mapped_buf(vm, mapped_buffer);
 	if (err) {
@@ -425,6 +425,6 @@ void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset)
 		return;
 	}

-	kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
+	nvgpu_ref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_ref);
 	nvgpu_mutex_release(&vm->update_gmmu_lock);
 }
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -404,7 +404,7 @@ static int __nvgpu_vm_init(struct mm_gk20a *mm,
 	vm->mapped_buffers = NULL;

 	nvgpu_mutex_init(&vm->update_gmmu_lock);
-	kref_init(&vm->ref);
+	nvgpu_ref_init(&vm->ref);
 	nvgpu_init_list_node(&vm->vm_area_list);

 	/*
@@ -557,7 +557,7 @@ static void __nvgpu_vm_remove(struct vm_gk20a *vm)
 	nvgpu_kfree(g, vm);
 }

-static void __nvgpu_vm_remove_kref(struct kref *ref)
+static void __nvgpu_vm_remove_ref(struct nvgpu_ref *ref)
 {
 	struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref);

@@ -566,12 +566,12 @@ static void __nvgpu_vm_remove_kref(struct kref *ref)

 void nvgpu_vm_get(struct vm_gk20a *vm)
 {
-	kref_get(&vm->ref);
+	nvgpu_ref_get(&vm->ref);
 }

 void nvgpu_vm_put(struct vm_gk20a *vm)
 {
-	kref_put(&vm->ref, __nvgpu_vm_remove_kref);
+	nvgpu_ref_put(&vm->ref, __nvgpu_vm_remove_ref);
 }

 int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
--- a/drivers/gpu/nvgpu/common/mm/vm_area.c
+++ b/drivers/gpu/nvgpu/common/mm/vm_area.c
@@ -202,7 +202,7 @@ int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr)
 				       &vm_area->buffer_list_head,
 				       nvgpu_mapped_buf, buffer_list) {
 		nvgpu_list_del(&buffer->buffer_list);
-		kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref);
+		nvgpu_ref_put(&buffer->ref, gk20a_vm_unmap_locked_ref);
 	}

 	/* if this was a sparse mapping, free the va */
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -156,7 +156,7 @@ struct nvgpu_semaphore_pool *nvgpu_semaphore_pool_alloc(
 	p->sema_sea = sea;
 	nvgpu_init_list_node(&p->hw_semas);
 	nvgpu_init_list_node(&p->pool_list_entry);
-	kref_init(&p->ref);
+	nvgpu_ref_init(&p->ref);

 	sea->page_count++;
 	nvgpu_list_add(&p->pool_list_entry, &sea->pool_list);
@@ -285,7 +285,7 @@ void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
 * Completely free a semaphore_pool. You should make sure this pool is not
 * mapped otherwise there's going to be a memory leak.
 */
-static void nvgpu_semaphore_pool_free(struct kref *ref)
+static void nvgpu_semaphore_pool_free(struct nvgpu_ref *ref)
 {
 	struct nvgpu_semaphore_pool *p =
 		container_of(ref, struct nvgpu_semaphore_pool, ref);
@@ -314,12 +314,12 @@ static void nvgpu_semaphore_pool_free(struct kref *ref)

 void nvgpu_semaphore_pool_get(struct nvgpu_semaphore_pool *p)
 {
-	kref_get(&p->ref);
+	nvgpu_ref_get(&p->ref);
 }

 void nvgpu_semaphore_pool_put(struct nvgpu_semaphore_pool *p)
 {
-	kref_put(&p->ref, nvgpu_semaphore_pool_free);
+	nvgpu_ref_put(&p->ref, nvgpu_semaphore_pool_free);
 }

 /*
@@ -423,7 +423,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
 	if (!s)
 		return NULL;

-	kref_init(&s->ref);
+	nvgpu_ref_init(&s->ref);
 	s->hw_sema = ch->hw_sema;
 	nvgpu_atomic_set(&s->value, 0);

@@ -438,7 +438,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
 	return s;
 }

-static void nvgpu_semaphore_free(struct kref *ref)
+static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
 {
 	struct nvgpu_semaphore *s =
 		container_of(ref, struct nvgpu_semaphore, ref);
@@ -450,10 +450,10 @@ static void nvgpu_semaphore_free(struct kref *ref)

 void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
 {
-	kref_put(&s->ref, nvgpu_semaphore_free);
+	nvgpu_ref_put(&s->ref, nvgpu_semaphore_free);
 }

 void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
 {
-	kref_get(&s->ref);
+	nvgpu_ref_get(&s->ref);
 }
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -33,10 +33,10 @@
 struct gk20a_fence_ops {
 	int (*wait)(struct gk20a_fence *, long timeout);
 	bool (*is_expired)(struct gk20a_fence *);
-	void *(*free)(struct kref *);
+	void *(*free)(struct nvgpu_ref *);
 };

-static void gk20a_fence_free(struct kref *ref)
+static void gk20a_fence_free(struct nvgpu_ref *ref)
 {
 	struct gk20a_fence *f =
 		container_of(ref, struct gk20a_fence, ref);
@@ -59,13 +59,13 @@ static void gk20a_fence_free(struct kref *ref)
 void gk20a_fence_put(struct gk20a_fence *f)
 {
 	if (f)
-		kref_put(&f->ref, gk20a_fence_free);
+		nvgpu_ref_put(&f->ref, gk20a_fence_free);
 }

 struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
 {
 	if (f)
-		kref_get(&f->ref);
+		nvgpu_ref_get(&f->ref);
 	return f;
 }

@@ -175,7 +175,7 @@ struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
 		fence = nvgpu_kzalloc(c->g, sizeof(struct gk20a_fence));

 	if (fence) {
-		kref_init(&fence->ref);
+		nvgpu_ref_init(&fence->ref);
 		fence->g = c->g;
 	}

--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
@@ -33,7 +33,7 @@ struct gk20a_fence {

 	/* Valid for all fence types: */
 	bool valid;
-	struct kref ref;
+	struct nvgpu_ref ref;
 	bool wfi;
 	struct sync_fence *sync_fence;
 	const struct gk20a_fence_ops *ops;
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -159,7 +159,7 @@ struct fifo_gk20a {
 		nvgpu_atomic_t get;
 		bool enabled;
 		u64 *sorted;
-		struct kref ref;
+		struct nvgpu_ref ref;
 		struct nvgpu_mutex lock;
 	} profile;
 #endif
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -511,7 +511,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g)
 /*
 * Free the gk20a struct.
 */
-static void gk20a_free_cb(struct kref *refcount)
+static void gk20a_free_cb(struct nvgpu_ref *refcount)
 {
 	struct gk20a *g = container_of(refcount,
 		struct gk20a, refcount);
@@ -544,10 +544,11 @@ struct gk20a * __must_check gk20a_get(struct gk20a *g)
 	 * the code will never be in such a situation that this race is
 	 * possible.
 	 */
-	success = kref_get_unless_zero(&g->refcount);
+	success = nvgpu_ref_get_unless_zero(&g->refcount);

 	gk20a_dbg(gpu_dbg_shutdown, "GET: refs currently %d %s",
-		atomic_read(&g->refcount.refcount), success ? "" : "(FAILED)");
+		nvgpu_atomic_read(&g->refcount.refcount),
+			success ? "" : "(FAILED)");

 	return success ? g : NULL;
 }
@@ -571,7 +572,7 @@ void gk20a_put(struct gk20a *g)
 	 *  ... Freeing GK20A struct!
 	 */
 	gk20a_dbg(gpu_dbg_shutdown, "PUT: refs currently %d",
-		atomic_read(&g->refcount.refcount));
+		nvgpu_atomic_read(&g->refcount.refcount));

-	kref_put(&g->refcount, gk20a_free_cb);
+	nvgpu_ref_put(&g->refcount, gk20a_free_cb);
 }
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1036,7 +1036,7 @@ struct gk20a {

 	nvgpu_atomic_t usage_count;

-	struct kref refcount;
+	struct nvgpu_ref refcount;

 	struct resource *reg_mem;
 	void __iomem *regs;
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -811,7 +811,7 @@ int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
 		mapped_buffer = mapped_buffer_from_rbtree_node(node);
 		if (mapped_buffer->user_mapped) {
 			buffer_list[i] = mapped_buffer;
-			kref_get(&mapped_buffer->ref);
+			nvgpu_ref_get(&mapped_buffer->ref);
 			i++;
 		}
 		nvgpu_rbtree_enum_next(&node, node);
@@ -827,7 +827,7 @@ int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
 	return 0;
 }

-void gk20a_vm_unmap_locked_kref(struct kref *ref)
+void gk20a_vm_unmap_locked_ref(struct nvgpu_ref *ref)
 {
 	struct nvgpu_mapped_buf *mapped_buffer =
 		container_of(ref, struct nvgpu_mapped_buf, ref);
@@ -849,8 +849,8 @@ void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
 	vm->kref_put_batch = &batch;

 	for (i = 0; i < num_buffers; ++i)
-		kref_put(&mapped_buffers[i]->ref,
-			 gk20a_vm_unmap_locked_kref);
+		nvgpu_ref_put(&mapped_buffers[i]->ref,
+			 gk20a_vm_unmap_locked_ref);

 	vm->kref_put_batch = NULL;
 	nvgpu_vm_mapping_batch_finish_locked(vm, &batch);
@@ -882,8 +882,9 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
 		nvgpu_timeout_init(vm->mm->g, &timeout, 10000,
 				   NVGPU_TIMER_RETRY_TIMER);
 		do {
-			if (atomic_read(&mapped_buffer->ref.refcount) == 1)
-				break;
+			if (nvgpu_atomic_read(
+				&mapped_buffer->ref.refcount) == 1)
+					break;
 			nvgpu_udelay(5);
 		} while (!nvgpu_timeout_expired_msg(&timeout,
 					    "sync-unmap failed on 0x%llx"));
@@ -902,7 +903,7 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset,
 		vm->num_user_mapped_buffers--;

 	vm->kref_put_batch = batch;
-	kref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_kref);
+	nvgpu_ref_put(&mapped_buffer->ref, gk20a_vm_unmap_locked_ref);
 	vm->kref_put_batch = NULL;

 	nvgpu_mutex_release(&vm->update_gmmu_lock);
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -434,6 +434,6 @@ extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];

 int gk20a_mm_get_buffer_info(struct device *dev, int dmabuf_fd,
 			     u64 *buffer_id, u64 *buffer_len);
-void gk20a_vm_unmap_locked_kref(struct kref *ref);
+void gk20a_vm_unmap_locked_ref(struct nvgpu_ref *ref);

 #endif /* MM_GK20A_H */
--- a/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sched_gk20a.c
@@ -189,7 +189,7 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
 		return -EINVAL;

 	tsg = &f->tsg[tsgid];
-	if (!kref_get_unless_zero(&tsg->refcount))
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
 		return -ENXIO;

 	arg->pid = tsg->tgid;	/* kernel tgid corresponds to user pid */
@@ -206,7 +206,7 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched,
 		arg->compute_preempt_mode = 0;
 	}

-	kref_put(&tsg->refcount, gk20a_tsg_release);
+	nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);

 	return 0;
 }
@@ -227,7 +227,7 @@ static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
 		return -EINVAL;

 	tsg = &f->tsg[tsgid];
-	if (!kref_get_unless_zero(&tsg->refcount))
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
 		return -ENXIO;

 	err = gk20a_busy(g);
@@ -239,7 +239,7 @@ static int gk20a_sched_dev_ioctl_tsg_set_timeslice(
 	gk20a_idle(g);

 done:
-	kref_put(&tsg->refcount, gk20a_tsg_release);
+	nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);

 	return err;
 }
@@ -260,7 +260,7 @@ static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
 		return -EINVAL;

 	tsg = &f->tsg[tsgid];
-	if (!kref_get_unless_zero(&tsg->refcount))
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
 		return -ENXIO;

 	err = gk20a_busy(g);
@@ -272,7 +272,7 @@ static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave(
 	gk20a_idle(g);

 done:
-	kref_put(&tsg->refcount, gk20a_tsg_release);
+	nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);

 	return err;
 }
@@ -320,7 +320,7 @@ static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
 		return -EINVAL;

 	tsg = &f->tsg[tsgid];
-	if (!kref_get_unless_zero(&tsg->refcount))
+	if (!nvgpu_ref_get_unless_zero(&tsg->refcount))
 		return -ENXIO;

 	nvgpu_mutex_acquire(&sched->status_lock);
@@ -328,7 +328,7 @@ static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched,
 		nvgpu_warn(g, "tsgid=%d already referenced", tsgid);
 		/* unlock status_lock as gk20a_tsg_release locks it */
 		nvgpu_mutex_release(&sched->status_lock);
-		kref_put(&tsg->refcount, gk20a_tsg_release);
+		nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);
 		return -ENXIO;
 	}

@@ -364,7 +364,7 @@ static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched,
 	nvgpu_mutex_release(&sched->status_lock);

 	tsg = &f->tsg[tsgid];
-	kref_put(&tsg->refcount, gk20a_tsg_release);
+	nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);

 	return 0;
 }
@@ -507,7 +507,7 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp)
 	for (tsgid = 0; tsgid < f->num_channels; tsgid++) {
 		if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) {
 			tsg = &f->tsg[tsgid];
-			kref_put(&tsg->refcount, gk20a_tsg_release);
+			nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);
 		}
 	}

--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -43,7 +43,7 @@ struct gk20a_sync_timeline {
 */
 struct gk20a_sync_pt {
 	struct gk20a			*g;
-	struct kref			refcount;
+	struct nvgpu_ref			refcount;
 	u32				thresh;
 	struct nvgpu_semaphore		*sema;
 	struct gk20a_sync_timeline	*obj;
@@ -170,7 +170,7 @@ static struct gk20a_sync_timeline *to_gk20a_timeline(struct sync_timeline *obj)
 	return (struct gk20a_sync_timeline *)obj;
 }

-static void gk20a_sync_pt_free_shared(struct kref *ref)
+static void gk20a_sync_pt_free_shared(struct nvgpu_ref *ref)
 {
 	struct gk20a_sync_pt *pt =
 		container_of(ref, struct gk20a_sync_pt, refcount);
@@ -192,7 +192,7 @@ static struct gk20a_sync_pt *gk20a_sync_pt_create_shared(
 	if (!shared)
 		return NULL;

-	kref_init(&shared->refcount);
+	nvgpu_ref_init(&shared->refcount);
 	shared->g = g;
 	shared->obj = obj;
 	shared->sema = sema;
@@ -229,7 +229,7 @@ static void gk20a_sync_pt_free_inst(struct sync_pt *sync_pt)
 {
 	struct gk20a_sync_pt *pt = to_gk20a_sync_pt(sync_pt);
 	if (pt)
-		kref_put(&pt->refcount, gk20a_sync_pt_free_shared);
+		nvgpu_ref_put(&pt->refcount, gk20a_sync_pt_free_shared);
 }

 static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
@@ -242,7 +242,7 @@ static struct sync_pt *gk20a_sync_pt_dup_inst(struct sync_pt *sync_pt)
 	if (!pti)
 		return NULL;
 	pti->shared = pt;
-	kref_get(&pt->refcount);
+	nvgpu_ref_get(&pt->refcount);
 	return &pti->pt;
 }

--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c
@@ -104,7 +104,7 @@ int gk20a_tsg_bind_channel(struct tsg_gk20a *tsg,
 	nvgpu_list_add_tail(&ch->ch_entry, &tsg->ch_list);
 	up_write(&tsg->ch_list_lock);

-	kref_get(&tsg->refcount);
+	nvgpu_ref_get(&tsg->refcount);

 	gk20a_dbg(gpu_dbg_fn, "BIND tsg:%d channel:%d\n",
 					tsg->tsgid, ch->chid);
@@ -122,7 +122,7 @@ int gk20a_tsg_unbind_channel(struct channel_gk20a *ch)
 	nvgpu_list_del(&ch->ch_entry);
 	up_write(&tsg->ch_list_lock);

-	kref_put(&tsg->refcount, gk20a_tsg_release);
+	nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);

 	ch->tsgid = NVGPU_INVALID_TSG_ID;

@@ -257,7 +257,7 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g)

 	tsg->g = g;
 	tsg->num_active_channels = 0;
-	kref_init(&tsg->refcount);
+	nvgpu_ref_init(&tsg->refcount);

 	tsg->tsg_gr_ctx = NULL;
 	tsg->vm = NULL;
@@ -287,11 +287,11 @@ struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g)
 	return tsg;

 clean_up:
-	kref_put(&tsg->refcount, gk20a_tsg_release);
+	nvgpu_ref_put(&tsg->refcount, gk20a_tsg_release);
 	return NULL;
 }

-void gk20a_tsg_release(struct kref *ref)
+void gk20a_tsg_release(struct nvgpu_ref *ref)
 {
 	struct tsg_gk20a *tsg = container_of(ref, struct tsg_gk20a, refcount);
 	struct gk20a *g = tsg->g;
--- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.h
@@ -25,7 +25,7 @@ struct channel_gk20a;

 bool gk20a_is_channel_marked_as_tsg(struct channel_gk20a *ch);
 struct tsg_gk20a *gk20a_tsg_open(struct gk20a *g);
-void gk20a_tsg_release(struct kref *ref);
+void gk20a_tsg_release(struct nvgpu_ref *ref);

 int gk20a_init_tsg_support(struct gk20a *g, u32 tsgid);
 struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch);
@@ -36,7 +36,7 @@ struct tsg_gk20a {
 	bool in_use;
 	int tsgid;

-	struct kref refcount;
+	struct nvgpu_ref refcount;

 	struct nvgpu_list_node ch_list;
 	int num_active_channels;
--- a/drivers/gpu/nvgpu/include/nvgpu/atomic.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/atomic.h
@@ -61,10 +61,18 @@ static inline bool nvgpu_atomic_dec_and_test(nvgpu_atomic_t *v)
 {
 	return __nvgpu_atomic_dec_and_test(v);
 }
+static inline bool nvgpu_atomic_sub_and_test(int i, nvgpu_atomic_t *v)
+{
+	return __nvgpu_atomic_sub_and_test(i, v);
+}
 static inline int nvgpu_atomic_add_return(int i, nvgpu_atomic_t *v)
 {
 	return __nvgpu_atomic_add_return(i, v);
 }
+static inline int nvgpu_atomic_add_unless(nvgpu_atomic_t *v, int a, int u)
+{
+	return __nvgpu_atomic_add_unless(v, a, u);
+}
 static inline void nvgpu_atomic64_set(nvgpu_atomic64_t *v, long i)
 {
 	return  __nvgpu_atomic64_set(v, i);
--- a/drivers/gpu/nvgpu/include/nvgpu/kref.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/kref.h
@@ -10,11 +10,68 @@
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */
+
+/*
+ * The following structure is used for reference counting of objects in nvgpu.
+ */
 #ifndef __NVGPU_KREF_H__
 #define __NVGPU_KREF_H__

-#ifdef __KERNEL__
-#include <linux/kref.h>
-#endif
+#include <nvgpu/atomic.h>
+
+struct nvgpu_ref {
+	nvgpu_atomic_t refcount;
+};
+
+/*
+ * Initialize object.
+ * @ref: the nvgpu_ref object to initialize
+ */
+static inline void nvgpu_ref_init(struct nvgpu_ref *ref)
+{
+	nvgpu_atomic_set(&ref->refcount, 1);
+}
+
+/*
+ * Increment reference count for the object
+ * @ref: the nvgpu_ref object
+ */
+static inline void nvgpu_ref_get(struct nvgpu_ref *ref)
+{
+	nvgpu_atomic_inc(&ref->refcount);
+}
+
+/*
+ * Decrement reference count for the object and call release() if it becomes
+ * zero.
+ * @ref: the nvgpu_ref object
+ * @release: pointer to the function that would be invoked to clean up the
+ *	object when the reference count becomes zero, i.e. the last
+ *	reference corresponding to this object is removed.
+ * Return 1 if object was removed, otherwise return 0. The user should not
+ * make any assumptions about the status of the object in the memory when
+ * the function returns 0 and should only use it to know that there are no
+ * further references to this object.
+ */
+static inline int nvgpu_ref_put(struct nvgpu_ref *ref,
+		void (*release)(struct nvgpu_ref *r))
+{
+	if (nvgpu_atomic_sub_and_test(1, &ref->refcount)) {
+		if (release != NULL)
+			release(ref);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Increment reference count for the object unless it is zero.
+ * @ref: the nvgpu_ref object
+ * Return non-zero if the increment succeeds, Otherwise return 0.
+ */
+static inline int __must_check nvgpu_ref_get_unless_zero(struct nvgpu_ref *ref)
+{
+	return nvgpu_atomic_add_unless(&ref->refcount, 1, 0);
+}

 #endif /* __NVGPU_KREF_H__ */
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h
@@ -81,11 +81,21 @@ static inline bool __nvgpu_atomic_dec_and_test(nvgpu_atomic_t *v)
 	return atomic_dec_and_test(&v->atomic_var);
 }

+static inline bool __nvgpu_atomic_sub_and_test(int i, nvgpu_atomic_t *v)
+{
+	return atomic_sub_and_test(i, &v->atomic_var);
+}
+
 static inline int __nvgpu_atomic_add_return(int i, nvgpu_atomic_t *v)
 {
 	return atomic_add_return(i, &v->atomic_var);
 }

+static inline int __nvgpu_atomic_add_unless(nvgpu_atomic_t *v, int a, int u)
+{
+	return atomic_add_unless(&v->atomic_var, a, u);
+}
+
 static inline void __nvgpu_atomic64_set(nvgpu_atomic64_t *v, long i)
 {
 	atomic64_set(&v->atomic_var, i);
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -73,7 +73,7 @@ struct nvgpu_semaphore {
 	nvgpu_atomic_t value;
 	int incremented;

-	struct kref ref;
+	struct nvgpu_ref ref;
 };

 /*
@@ -106,7 +106,7 @@ struct nvgpu_semaphore_pool {
 	 * done waiting on it. This ref count ensures that the pool doesn't
 	 * go away until all semaphores using this pool are cleaned up first.
 	 */
-	struct kref ref;
+	struct nvgpu_ref ref;
 };

 static inline struct nvgpu_semaphore_pool *
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -88,7 +88,7 @@ struct nvgpu_mapped_buf {
 	u64 size;
 	struct dma_buf *dmabuf;
 	struct sg_table *sgt;
-	struct kref ref;
+	struct nvgpu_ref ref;
 	u32 user_mapped;
 	bool own_mem_ref;
 	u32 pgsz_idx;
@@ -142,7 +142,7 @@ struct vm_gk20a {

 	const struct gk20a_mmu_level *mmu_levels;

-	struct kref ref;
+	struct nvgpu_ref ref;

 	struct nvgpu_mutex update_gmmu_lock;

--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -767,7 +767,7 @@ int vgpu_probe(struct platform_device *pdev)
 	vgpu_create_sysfs(dev);
 	gk20a_init_gr(gk20a);

-	kref_init(&gk20a->refcount);
+	nvgpu_ref_init(&gk20a->refcount);

 	return 0;
 }