From 98186ec2c2127c2af65a34f9e697e04f518a79ab Mon Sep 17 00:00:00 2001
From: Debarshi Dutta <ddutta@nvidia.com>
Date: Thu, 3 Aug 2017 15:34:44 +0530
Subject: [PATCH] gpu: nvgpu: Add wrapper over atomic_t and atomic64_t

- added wrapper structs nvgpu_atomic_t and nvgpu_atomic64_t over
  atomic_t and atomic64_t
- added nvgpu_atomic_* and nvgpu_atomic64_* APIs to access the above
  wrappers.

JIRA NVGPU-121

Change-Id: I61667bb0a84c2fc475365abb79bffb42b8b4786a
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1533044
Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/clk/clk_arb.c               |  80 +++++-----
 drivers/gpu/nvgpu/common/linux/debug_fifo.c   |   8 +-
 drivers/gpu/nvgpu/common/linux/dma.c          |   4 +-
 drivers/gpu/nvgpu/common/linux/module.c       |  10 +-
 .../gpu/nvgpu/common/mm/lockless_allocator.c  |  13 +-
 .../nvgpu/common/mm/lockless_allocator_priv.h |   4 +-
 drivers/gpu/nvgpu/common/semaphore.c          |   4 +-
 drivers/gpu/nvgpu/gk20a/channel_gk20a.c       |  39 ++---
 drivers/gpu/nvgpu/gk20a/channel_gk20a.h       |   5 +-
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c  |   6 +-
 drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h  |   2 +-
 drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c   |  14 +-
 drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c       |   5 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.c          |   6 +-
 drivers/gpu/nvgpu/gk20a/fifo_gk20a.h          |   2 +-
 drivers/gpu/nvgpu/gk20a/gk20a.c               |   4 +-
 drivers/gpu/nvgpu/gk20a/gk20a.h               |   5 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c            |   6 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h            |   3 +-
 drivers/gpu/nvgpu/gm20b/fifo_gm20b.c          |   2 +-
 drivers/gpu/nvgpu/include/nvgpu/atomic.h      |  98 ++++++++++++-
 .../gpu/nvgpu/include/nvgpu/linux/atomic.h    | 137 ++++++++++++++++++
 drivers/gpu/nvgpu/include/nvgpu/semaphore.h   |  12 +-
 drivers/gpu/nvgpu/vgpu/fifo_vgpu.c            |   6 +-
 24 files changed, 355 insertions(+), 120 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h

diff --git a/drivers/gpu/nvgpu/clk/clk_arb.c b/drivers/gpu/nvgpu/clk/clk_arb.c
index 3caa5409e..b00ecd31b 100644
--- a/drivers/gpu/nvgpu/clk/clk_arb.c
+++ b/drivers/gpu/nvgpu/clk/clk_arb.c
@@ -108,8 +108,8 @@ struct nvgpu_clk_notification {
 
 struct nvgpu_clk_notification_queue {
 	u32 size;
-	atomic_t head;
-	atomic_t tail;
+	nvgpu_atomic_t head;
+	nvgpu_atomic_t tail;
 	struct nvgpu_clk_notification *notifications;
 };
 
@@ -183,13 +183,13 @@ struct nvgpu_clk_arb {
 	u32 vf_table_index;
 
 	u16 *mclk_f_points;
-	atomic_t req_nr;
+	nvgpu_atomic_t req_nr;
 
 	u32 mclk_f_numpoints;
 	u16 *gpc2clk_f_points;
 	u32 gpc2clk_f_numpoints;
 
-	atomic64_t alarm_mask;
+	nvgpu_atomic64_t alarm_mask;
 	struct nvgpu_clk_notification_queue notification_queue;
 
 #ifdef CONFIG_DEBUG_FS
@@ -206,11 +206,11 @@ struct nvgpu_clk_dev {
 		struct llist_node node;
 	};
 	wait_queue_head_t readout_wq;
-	atomic_t poll_mask;
+	nvgpu_atomic_t poll_mask;
 	u16 gpc2clk_target_mhz;
 	u16 mclk_target_mhz;
 	u32 alarms_reported;
-	atomic_t enabled_mask;
+	nvgpu_atomic_t enabled_mask;
 	struct nvgpu_clk_notification_queue queue;
 	u32 arb_queue_head;
 	struct kref refcount;
@@ -253,8 +253,8 @@ static int nvgpu_clk_notification_queue_alloc(struct gk20a *g,
 		return -ENOMEM;
 	queue->size = events_number;
 
-	atomic_set(&queue->head, 0);
-	atomic_set(&queue->tail, 0);
+	nvgpu_atomic_set(&queue->head, 0);
+	nvgpu_atomic_set(&queue->tail, 0);
 
 	return 0;
 }
@@ -263,8 +263,8 @@ static void nvgpu_clk_notification_queue_free(struct gk20a *g,
 		struct nvgpu_clk_notification_queue *queue) {
 	nvgpu_kfree(g, queue->notifications);
 	queue->size = 0;
-	atomic_set(&queue->head, 0);
-	atomic_set(&queue->tail, 0);
+	nvgpu_atomic_set(&queue->head, 0);
+	nvgpu_atomic_set(&queue->tail, 0);
 }
 
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
@@ -346,9 +346,9 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 
 	arb->actual = &arb->actual_pool[0];
 
-	atomic_set(&arb->req_nr, 0);
+	nvgpu_atomic_set(&arb->req_nr, 0);
 
-	atomic64_set(&arb->alarm_mask, 0);
+	nvgpu_atomic64_set(&arb->alarm_mask, 0);
 	err = nvgpu_clk_notification_queue_alloc(g, &arb->notification_queue,
 		DEFAULT_EVENT_NUMBER);
 	if (err < 0)
@@ -388,8 +388,8 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 		/* Check that first run is completed */
 		smp_mb();
 		wait_event_interruptible(arb->request_wq,
-			atomic_read(&arb->req_nr));
-	} while (!atomic_read(&arb->req_nr));
+			nvgpu_atomic_read(&arb->req_nr));
+	} while (!nvgpu_atomic_read(&arb->req_nr));
 
 
 	return arb->status;
@@ -430,7 +430,7 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
 	u64 new_mask;
 
 	do {
-		current_mask = atomic64_read(&arb->alarm_mask);
+		current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
 		/* atomic operations are strong so they do not need masks */
 
 		refcnt = ((u32) (current_mask >> 32)) + 1;
@@ -438,7 +438,7 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
 		new_mask = ((u64) refcnt << 32) | alarm_mask;
 
 	} while (unlikely(current_mask !=
-			(u64)atomic64_cmpxchg(&arb->alarm_mask,
+			(u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
 					current_mask, new_mask)));
 }
 
@@ -452,7 +452,7 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
 	u64 new_mask;
 
 	do {
-		current_mask = atomic64_read(&arb->alarm_mask);
+		current_mask = nvgpu_atomic64_read(&arb->alarm_mask);
 		/* atomic operations are strong so they do not need masks */
 
 		refcnt = ((u32) (current_mask >> 32)) + 1;
@@ -460,7 +460,7 @@ static void nvgpu_clk_arb_set_global_alarm(struct gk20a *g, u32 alarm)
 		new_mask = ((u64) refcnt << 32) | alarm_mask;
 
 	} while (unlikely(current_mask !=
-			(u64)atomic64_cmpxchg(&arb->alarm_mask,
+			(u64)nvgpu_atomic64_cmpxchg(&arb->alarm_mask,
 						current_mask, new_mask)));
 
 	nvgpu_clk_arb_queue_notification(g, &arb->notification_queue, alarm);
@@ -537,7 +537,7 @@ static int nvgpu_clk_arb_install_fd(struct gk20a *g,
 
 	init_waitqueue_head(&dev->readout_wq);
 
-	atomic_set(&dev->poll_mask, 0);
+	nvgpu_atomic_set(&dev->poll_mask, 0);
 
 	dev->session = session;
 	kref_init(&dev->refcount);
@@ -657,11 +657,11 @@ int nvgpu_clk_arb_install_event_fd(struct gk20a *g,
 	 * updated
 	 */
 	if (alarm_mask)
-		atomic_set(&dev->enabled_mask, alarm_mask);
+		nvgpu_atomic_set(&dev->enabled_mask, alarm_mask);
 	else
-		atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
+		nvgpu_atomic_set(&dev->enabled_mask, EVENT(VF_UPDATE));
 
-	dev->arb_queue_head = atomic_read(&arb->notification_queue.head);
+	dev->arb_queue_head = nvgpu_atomic_read(&arb->notification_queue.head);
 
 	nvgpu_spinlock_acquire(&arb->users_lock);
 	list_add_tail_rcu(&dev->link, &arb->users);
@@ -1056,7 +1056,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 	gk20a_dbg_fn("");
 
 	/* bail out if gpu is down */
-	if (atomic_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
+	if (nvgpu_atomic64_read(&arb->alarm_mask) & EVENT(ALARM_GPU_LOST))
 		goto exit_arb;
 
 #ifdef CONFIG_DEBUG_FS
@@ -1247,7 +1247,7 @@ static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
 
 	/* status must be visible before atomic inc */
 	smp_wmb();
-	atomic_inc(&arb->req_nr);
+	nvgpu_atomic_inc(&arb->req_nr);
 
 	/* Unlock pstate change for PG */
 	nvgpu_mutex_release(&arb->pstate_lock);
@@ -1298,17 +1298,17 @@ exit_arb:
 			EVENT(ALARM_CLOCK_ARBITER_FAILED));
 	}
 
-	current_alarm = (u32) atomic64_read(&arb->alarm_mask);
+	current_alarm = (u32) nvgpu_atomic64_read(&arb->alarm_mask);
 	/* notify completion for all requests */
 	head = llist_del_all(&arb->requests);
 	llist_for_each_entry_safe(dev, tmp, head, node) {
-		atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
+		nvgpu_atomic_set(&dev->poll_mask, POLLIN | POLLRDNORM);
 		wake_up_interruptible(&dev->readout_wq);
 		kref_put(&dev->refcount, nvgpu_clk_arb_free_fd);
 	}
 
-	atomic_set(&arb->notification_queue.head,
-		atomic_read(&arb->notification_queue.tail));
+	nvgpu_atomic_set(&arb->notification_queue.head,
+		nvgpu_atomic_read(&arb->notification_queue.tail));
 	/* notify event for all users */
 	rcu_read_lock();
 	list_for_each_entry_rcu(dev, &arb->users, link) {
@@ -1329,7 +1329,7 @@ static void nvgpu_clk_arb_queue_notification(struct gk20a *g,
 	u32 queue_index;
 	u64 timestamp;
 
-	queue_index = (atomic_inc_return(&queue->tail)) % queue->size;
+	queue_index = (nvgpu_atomic_inc_return(&queue->tail)) % queue->size;
 	/* get current timestamp */
 	timestamp = (u64) sched_clock();
 
@@ -1355,14 +1355,14 @@ static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
 	size_t size;
 	int index;
 
-	enabled_mask = atomic_read(&dev->enabled_mask);
+	enabled_mask = nvgpu_atomic_read(&dev->enabled_mask);
 	size = arb->notification_queue.size;
 
 	/* queue global arbiter notifications in buffer */
 	do {
-		tail = atomic_read(&arb->notification_queue.tail);
+		tail = nvgpu_atomic_read(&arb->notification_queue.tail);
 		/* copy items to the queue */
-		queue_index = atomic_read(&dev->queue.tail);
+		queue_index = nvgpu_atomic_read(&dev->queue.tail);
 		head = dev->arb_queue_head;
 		head = (tail - head) < arb->notification_queue.size ?
 			head : tail - arb->notification_queue.size;
@@ -1389,10 +1389,10 @@ static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
 
 			queue_alarm_mask |= alarm_detected;
 		}
-	} while (unlikely(atomic_read(&arb->notification_queue.tail) !=
+	} while (unlikely(nvgpu_atomic_read(&arb->notification_queue.tail) !=
 			(int)tail));
 
-	atomic_set(&dev->queue.tail, queue_index);
+	nvgpu_atomic_set(&dev->queue.tail, queue_index);
 	/* update the last notification we processed from global queue */
 
 	dev->arb_queue_head = tail;
@@ -1429,7 +1429,7 @@ static u32 nvgpu_clk_arb_notify(struct nvgpu_clk_dev *dev,
 	}
 
 	if (poll_mask) {
-		atomic_set(&dev->poll_mask, poll_mask);
+		nvgpu_atomic_set(&dev->poll_mask, poll_mask);
 		wake_up_interruptible_all(&dev->readout_wq);
 	}
 
@@ -1454,7 +1454,7 @@ static int nvgpu_clk_arb_set_event_filter(struct nvgpu_clk_dev *dev,
 		return -EFAULT;
 
 	/* update alarm mask */
-	atomic_set(&dev->enabled_mask, mask);
+	nvgpu_atomic_set(&dev->enabled_mask, mask);
 
 	return 0;
 }
@@ -1539,8 +1539,8 @@ static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
 	u32 events = 0;
 	struct nvgpu_clk_notification *p_notif;
 
-	tail = atomic_read(&dev->queue.tail);
-	head = atomic_read(&dev->queue.head);
+	tail = nvgpu_atomic_read(&dev->queue.tail);
+	head = nvgpu_atomic_read(&dev->queue.head);
 
 	head = (tail - head) < dev->queue.size ? head : tail - dev->queue.size;
 
@@ -1550,7 +1550,7 @@ static inline u32 __pending_event(struct nvgpu_clk_dev *dev,
 		events |= p_notif->notification;
 		info->event_id = ffs(events) - 1;
 		info->timestamp = p_notif->timestamp;
-		atomic_set(&dev->queue.head, head);
+		nvgpu_atomic_set(&dev->queue.head, head);
 	}
 
 	return events;
@@ -1594,7 +1594,7 @@ static unsigned int nvgpu_clk_arb_poll_dev(struct file *filp, poll_table *wait)
 	gk20a_dbg_fn("");
 
 	poll_wait(filp, &dev->readout_wq, wait);
-	return atomic_xchg(&dev->poll_mask, 0);
+	return nvgpu_atomic_xchg(&dev->poll_mask, 0);
 }
 
 static int nvgpu_clk_arb_release_completion_dev(struct inode *inode,
diff --git a/drivers/gpu/nvgpu/common/linux/debug_fifo.c b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
index a240a1385..1763eb7ea 100644
--- a/drivers/gpu/nvgpu/common/linux/debug_fifo.c
+++ b/drivers/gpu/nvgpu/common/linux/debug_fifo.c
@@ -167,7 +167,7 @@ static int gk20a_fifo_profile_enable(void *data, u64 val)
 				}
 				kref_init(&f->profile.ref);
 			}
-			atomic_set(&f->profile.get, 0);
+			atomic_set(&f->profile.get.atomic_var, 0);
 			f->profile.enabled = true;
 		}
 	}
@@ -246,7 +246,7 @@ static int gk20a_fifo_profile_stats(struct seq_file *s, void *unused)
 		return 0;
 	}
 
-	get = atomic_read(&g->fifo.profile.get);
+	get = atomic_read(&g->fifo.profile.get.atomic_var);
 
 	__gk20a_fifo_create_stats(g, percentiles_ioctl,
 		PROFILE_IOCTL_EXIT, PROFILE_IOCTL_ENTRY);
@@ -311,7 +311,7 @@ void gk20a_fifo_debugfs_init(struct gk20a *g)
 
 	nvgpu_mutex_init(&g->fifo.profile.lock);
 	g->fifo.profile.enabled = false;
-	atomic_set(&g->fifo.profile.get, 0);
+	atomic_set(&g->fifo.profile.get.atomic_var, 0);
 	atomic_set(&g->fifo.profile.ref.refcount, 0);
 
 	debugfs_create_file("enable", 0600, profile_root, g,
@@ -342,7 +342,7 @@ struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g)
 	/* If kref is zero, profiling is not enabled */
 	if (!kref_get_unless_zero(&f->profile.ref))
 		return NULL;
-	index = atomic_inc_return(&f->profile.get);
+	index = atomic_inc_return(&f->profile.get.atomic_var);
 	profile = &f->profile.data[index % FIFO_PROFILING_ENTRIES];
 
 	return profile;
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
index ea5b28374..2116053d8 100644
--- a/drivers/gpu/nvgpu/common/linux/dma.c
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -197,7 +197,7 @@ int nvgpu_dma_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
 	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
 
 	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
+	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending.atomic_var);
 	addr = __nvgpu_dma_alloc(vidmem_alloc, at, size);
 	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
 	if (!addr) {
@@ -394,7 +394,7 @@ static void nvgpu_dma_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
 		was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
 		nvgpu_list_add_tail(&mem->clear_list_entry,
 			      &g->mm.vidmem.clear_list_head);
-		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
+		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending.atomic_var);
 		nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
 
 		if (was_empty) {
diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index bfbe7a58f..f5c6ca1f6 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -68,13 +68,13 @@ int gk20a_busy(struct gk20a *g)
 	if (!g)
 		return -ENODEV;
 
-	atomic_inc(&g->usage_count);
+	atomic_inc(&g->usage_count.atomic_var);
 
 	down_read(&g->busy_lock);
 
 	if (!gk20a_can_busy(g)) {
 		ret = -ENODEV;
-		atomic_dec(&g->usage_count);
+		atomic_dec(&g->usage_count.atomic_var);
 		goto fail;
 	}
 
@@ -87,7 +87,7 @@ int gk20a_busy(struct gk20a *g)
 			/* Mark suspended so runtime pm will retry later */
 			pm_runtime_set_suspended(dev);
 			pm_runtime_put_noidle(dev);
-			atomic_dec(&g->usage_count);
+			atomic_dec(&g->usage_count.atomic_var);
 			goto fail;
 		}
 	} else {
@@ -97,7 +97,7 @@ int gk20a_busy(struct gk20a *g)
 				vgpu_pm_finalize_poweron(dev)
 				: gk20a_pm_finalize_poweron(dev);
 			if (ret) {
-				atomic_dec(&g->usage_count);
+				atomic_dec(&g->usage_count.atomic_var);
 				nvgpu_mutex_release(&g->poweron_lock);
 				goto fail;
 			}
@@ -120,7 +120,7 @@ void gk20a_idle(struct gk20a *g)
 {
 	struct device *dev;
 
-	atomic_dec(&g->usage_count);
+	atomic_dec(&g->usage_count.atomic_var);
 
 	dev = dev_from_gk20a(g);
 
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
index 2a569efd6..eeb86095d 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator.c
@@ -65,7 +65,9 @@ static u64 nvgpu_lockless_alloc(struct nvgpu_allocator *a, u64 len)
 		ret = cmpxchg(&pa->head, head, new_head);
 		if (ret == head) {
 			addr = pa->base + head * pa->blk_size;
-			atomic_inc(&pa->nr_allocs);
+			nvgpu_atomic_inc(&pa->nr_allocs);
+			alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
+				  addr);
 			break;
 		}
 		head = ACCESS_ONCE(pa->head);
@@ -94,7 +96,8 @@ static void nvgpu_lockless_free(struct nvgpu_allocator *a, u64 addr)
 		ACCESS_ONCE(pa->next[cur_idx]) = head;
 		ret = cmpxchg(&pa->head, head, cur_idx);
 		if (ret == head) {
-			atomic_dec(&pa->nr_allocs);
+			nvgpu_atomic_dec(&pa->nr_allocs);
+			alloc_dbg(a, "Free node # %llu\n", cur_idx);
 			break;
 		}
 	}
@@ -125,9 +128,9 @@ static void nvgpu_lockless_print_stats(struct nvgpu_allocator *a,
 	/* Actual stats. */
 	__alloc_pstat(s, a, "Stats:\n");
 	__alloc_pstat(s, a, "  Number allocs = %d\n",
-		      atomic_read(&pa->nr_allocs));
+		      nvgpu_atomic_read(&pa->nr_allocs));
 	__alloc_pstat(s, a, "  Number free   = %d\n",
-		      pa->nr_nodes - atomic_read(&pa->nr_allocs));
+		      pa->nr_nodes - nvgpu_atomic_read(&pa->nr_allocs));
 }
 #endif
 
@@ -193,7 +196,7 @@ int nvgpu_lockless_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a,
 	a->blk_size = blk_size;
 	a->nr_nodes = nr_nodes;
 	a->flags = flags;
-	atomic_set(&a->nr_allocs, 0);
+	nvgpu_atomic_set(&a->nr_allocs, 0);
 
 	wmb();
 	a->inited = true;
diff --git a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
index 32421ac1c..c527bff9e 100644
--- a/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
+++ b/drivers/gpu/nvgpu/common/mm/lockless_allocator_priv.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016 - 2017, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -109,7 +109,7 @@ struct nvgpu_lockless_allocator {
 	bool inited;
 
 	/* Statistics */
-	atomic_t nr_allocs;
+	nvgpu_atomic_t nr_allocs;
 };
 
 static inline struct nvgpu_lockless_allocator *lockless_allocator(
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 3e916b9dd..ac45aaaa9 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -364,7 +364,7 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
 	hw_sema->p = p;
 	hw_sema->idx = hw_sema_idx;
 	hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
-	atomic_set(&hw_sema->next_value, 0);
+	nvgpu_atomic_set(&hw_sema->next_value, 0);
 	nvgpu_init_list_node(&hw_sema->hw_sema_list);
 	nvgpu_mem_wr(ch->g, &p->rw_mem, hw_sema->offset, 0);
 
@@ -425,7 +425,7 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
 
 	kref_init(&s->ref);
 	s->hw_sema = ch->hw_sema;
-	atomic_set(&s->value, 0);
+	nvgpu_atomic_set(&s->value, 0);
 
 	/*
 	 * Take a ref on the pool so that we can keep this pool alive for
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 62b312b27..d96872f3f 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -100,7 +100,7 @@ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
 		ch = nvgpu_list_first_entry(&f->free_chs, channel_gk20a,
 							  free_chs);
 		nvgpu_list_del(&ch->free_chs);
-		WARN_ON(atomic_read(&ch->ref_count));
+		WARN_ON(nvgpu_atomic_read(&ch->ref_count));
 		WARN_ON(ch->referenceable);
 		f->used_channels++;
 	}
@@ -394,20 +394,20 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error)
 }
 
 static void gk20a_wait_until_counter_is_N(
-	struct channel_gk20a *ch, atomic_t *counter, int wait_value,
+	struct channel_gk20a *ch, nvgpu_atomic_t *counter, int wait_value,
 	struct nvgpu_cond *c, const char *caller, const char *counter_name)
 {
 	while (true) {
 		if (NVGPU_COND_WAIT(
 			    c,
-			    atomic_read(counter) == wait_value,
+			    nvgpu_atomic_read(counter) == wait_value,
 			    5000) == 0)
 			break;
 
 		nvgpu_warn(ch->g,
 			   "%s: channel %d, still waiting, %s left: %d, waiting for: %d",
 			   caller, ch->chid, counter_name,
-			   atomic_read(counter), wait_value);
+			   nvgpu_atomic_read(counter), wait_value);
 
 		gk20a_channel_dump_ref_actions(ch);
 	}
@@ -491,7 +491,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	nvgpu_spinlock_release(&ch->ref_obtain_lock);
 
 	/* matches with the initial reference in gk20a_open_new_channel() */
-	atomic_dec(&ch->ref_count);
+	nvgpu_atomic_dec(&ch->ref_count);
 
 	/* wait until no more refs to the channel */
 	if (!force)
@@ -635,7 +635,7 @@ static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch)
 	nvgpu_spinlock_acquire(&ch->ref_actions_lock);
 
 	dev_info(dev, "ch %d: refs %d. Actions, most recent last:\n",
-			ch->chid, atomic_read(&ch->ref_count));
+			ch->chid, nvgpu_atomic_read(&ch->ref_count));
 
 	/* start at the oldest possible entry. put is next insertion point */
 	get = ch->ref_actions_put;
@@ -709,7 +709,7 @@ struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch,
 
 	if (likely(ch->referenceable)) {
 		gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_get);
-		atomic_inc(&ch->ref_count);
+		nvgpu_atomic_inc(&ch->ref_count);
 		ret = ch;
 	} else
 		ret = NULL;
@@ -726,17 +726,17 @@ void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller)
 {
 	gk20a_channel_save_ref_source(ch, channel_gk20a_ref_action_put);
 	trace_gk20a_channel_put(ch->chid, caller);
-	atomic_dec(&ch->ref_count);
+	nvgpu_atomic_dec(&ch->ref_count);
 	nvgpu_cond_broadcast(&ch->ref_count_dec_wq);
 
 	/* More puts than gets. Channel is probably going to get
 	 * stuck. */
-	WARN_ON(atomic_read(&ch->ref_count) < 0);
+	WARN_ON(nvgpu_atomic_read(&ch->ref_count) < 0);
 
 	/* Also, more puts than gets. ref_count can go to 0 only if
 	 * the channel is closing. Channel is probably going to get
 	 * stuck. */
-	WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable);
+	WARN_ON(nvgpu_atomic_read(&ch->ref_count) == 0 && ch->referenceable);
 }
 
 void gk20a_channel_close(struct channel_gk20a *ch)
@@ -879,7 +879,7 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 	 * references. The initial reference will be decreased in
 	 * gk20a_free_channel() */
 	ch->referenceable = true;
-	atomic_set(&ch->ref_count, 1);
+	nvgpu_atomic_set(&ch->ref_count, 1);
 	wmb();
 
 	return ch;
@@ -1745,7 +1745,7 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
 	 * pair.
 	 */
 
-	put = atomic_inc_return(&g->channel_worker.put);
+	put = nvgpu_atomic_inc_return(&g->channel_worker.put);
 	nvgpu_cond_signal(&g->channel_worker.wq);
 
 	return put;
@@ -1761,7 +1761,7 @@ static int __gk20a_channel_worker_wakeup(struct gk20a *g)
  */
 static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
 {
-	bool pending = atomic_read(&g->channel_worker.put) != get;
+	bool pending = nvgpu_atomic_read(&g->channel_worker.put) != get;
 
 	/*
 	 * This would be the place for a rmb() pairing a wmb() for a wakeup
@@ -1864,7 +1864,7 @@ int nvgpu_channel_worker_init(struct gk20a *g)
 	int err;
 	char thread_name[64];
 
-	atomic_set(&g->channel_worker.put, 0);
+	nvgpu_atomic_set(&g->channel_worker.put, 0);
 	nvgpu_cond_init(&g->channel_worker.wq);
 	nvgpu_init_list_node(&g->channel_worker.items);
 	nvgpu_spinlock_init(&g->channel_worker.items_lock);
@@ -2086,7 +2086,8 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 
 			if (g->aggressive_sync_destroy_thresh) {
 				nvgpu_mutex_acquire(&c->sync_lock);
-				if (atomic_dec_and_test(&c->sync->refcount) &&
+				if (nvgpu_atomic_dec_and_test(
+					&c->sync->refcount) &&
 						g->aggressive_sync_destroy) {
 					gk20a_channel_sync_destroy(c->sync);
 					c->sync = NULL;
@@ -2321,7 +2322,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
 			}
 			new_sync_created = true;
 		}
-		atomic_inc(&c->sync->refcount);
+		nvgpu_atomic_inc(&c->sync->refcount);
 		nvgpu_mutex_release(&c->sync_lock);
 	}
 
@@ -2774,9 +2775,9 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 
 	c->g = NULL;
 	c->chid = chid;
-	atomic_set(&c->bound, false);
+	nvgpu_atomic_set(&c->bound, false);
 	nvgpu_spinlock_init(&c->ref_obtain_lock);
-	atomic_set(&c->ref_count, 0);
+	nvgpu_atomic_set(&c->ref_count, 0);
 	c->referenceable = false;
 	nvgpu_cond_init(&c->ref_count_dec_wq);
 
@@ -2935,7 +2936,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
 	for (chid = 0; chid < f->num_channels; chid++) {
 		struct channel_gk20a *c = g->fifo.channel+chid;
 		if (gk20a_channel_get(c)) {
-			if (atomic_read(&c->bound)) {
+			if (nvgpu_atomic_read(&c->bound)) {
 				nvgpu_cond_broadcast_interruptible(
 						&c->semaphore_wq);
 				if (post_events) {
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index a9ccd93f9..f022e6306 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -24,6 +24,7 @@
 #include <nvgpu/lock.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/cond.h>
+#include <nvgpu/atomic.h>
 
 struct gk20a;
 struct gr_gk20a;
@@ -173,7 +174,7 @@ struct channel_gk20a {
 
 	struct nvgpu_spinlock ref_obtain_lock;
 	bool referenceable;
-	atomic_t ref_count;
+	nvgpu_atomic_t ref_count;
 	struct nvgpu_cond ref_count_dec_wq;
 #if GK20A_CHANNEL_REFCOUNT_TRACKING
 	/*
@@ -191,7 +192,7 @@ struct channel_gk20a {
 
 	int chid;
 	bool wdt_enabled;
-	atomic_t bound;
+	nvgpu_atomic_t bound;
 	bool first_init;
 	bool vpr;
 	bool deterministic;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index c9c03d374..aa340ba6a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -346,7 +346,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
 
 	nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
 
-	atomic_set(&sp->ops.refcount, 0);
+	nvgpu_atomic_set(&sp->ops.refcount, 0);
 	sp->ops.wait_syncpt		= gk20a_channel_syncpt_wait_syncpt;
 	sp->ops.wait_fd			= gk20a_channel_syncpt_wait_fd;
 	sp->ops.incr			= gk20a_channel_syncpt_incr;
@@ -619,7 +619,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
 		return err;
 
 	nvgpu_semaphore_get(sema);
-	BUG_ON(!atomic_read(&sema->value));
+	BUG_ON(!nvgpu_atomic_read(&sema->value));
 	add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);
 
 	/*
@@ -922,7 +922,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
 		return NULL;
 	}
 #endif
-	atomic_set(&sema->ops.refcount, 0);
+	nvgpu_atomic_set(&sema->ops.refcount, 0);
 	sema->ops.wait_syncpt	= gk20a_channel_semaphore_wait_syncpt;
 	sema->ops.wait_fd	= gk20a_channel_semaphore_wait_fd;
 	sema->ops.incr		= gk20a_channel_semaphore_incr;
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
index 4efd1b764..9bdc5d12a 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h
@@ -25,7 +25,7 @@ struct gk20a_fence;
 struct gk20a;
 
 struct gk20a_channel_sync {
-	atomic_t refcount;
+	nvgpu_atomic_t refcount;
 
 	/* Generate a gpu wait cmdbuf from syncpoint.
 	 * Returns
diff --git a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
index cc05cefff..546917f17 100644
--- a/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctxsw_trace_gk20a.c
@@ -47,7 +47,7 @@ struct gk20a_ctxsw_dev {
 	size_t size;
 	u32 num_ents;
 
-	atomic_t vma_ref;
+	nvgpu_atomic_t vma_ref;
 
 	struct nvgpu_mutex write_lock;
 };
@@ -152,7 +152,7 @@ static int gk20a_ctxsw_dev_alloc_buffer(struct gk20a_ctxsw_dev *dev,
 	void *buf;
 	int err;
 
-	if ((dev->write_enabled) || (atomic_read(&dev->vma_ref)))
+	if ((dev->write_enabled) || (nvgpu_atomic_read(&dev->vma_ref)))
 		return -EBUSY;
 
 	err = g->ops.fecs_trace.alloc_user_buffer(g, &buf, &size);
@@ -438,18 +438,18 @@ static void gk20a_ctxsw_dev_vma_open(struct vm_area_struct *vma)
 {
 	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
 
-	atomic_inc(&dev->vma_ref);
+	nvgpu_atomic_inc(&dev->vma_ref);
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-		atomic_read(&dev->vma_ref));
+		nvgpu_atomic_read(&dev->vma_ref));
 }
 
 static void gk20a_ctxsw_dev_vma_close(struct vm_area_struct *vma)
 {
 	struct gk20a_ctxsw_dev *dev = vma->vm_private_data;
 
-	atomic_dec(&dev->vma_ref);
+	nvgpu_atomic_dec(&dev->vma_ref);
 	gk20a_dbg(gpu_dbg_fn|gpu_dbg_ctxsw, "vma_ref=%d",
-		atomic_read(&dev->vma_ref));
+		nvgpu_atomic_read(&dev->vma_ref));
 }
 
 static struct vm_operations_struct gk20a_ctxsw_dev_vma_ops = {
@@ -497,7 +497,7 @@ static int gk20a_ctxsw_init_devs(struct gk20a *g)
 		err = nvgpu_mutex_init(&dev->write_lock);
 		if (err)
 			return err;
-		atomic_set(&dev->vma_ref, 0);
+		nvgpu_atomic_set(&dev->vma_ref, 0);
 		dev++;
 	}
 	return 0;
diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
index 1572ff48b..000508501 100644
--- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/vm.h>
+#include <nvgpu/atomic.h>
 
 #include "gk20a.h"
 #include "gk20a/platform_gk20a.h"
@@ -74,10 +75,10 @@ nvgpu_dbg_gpu_get_session_channel(struct dbg_session_gk20a *dbg_s)
 }
 
 /* silly allocator - just increment id */
-static atomic_t unique_id = ATOMIC_INIT(0);
+static nvgpu_atomic_t unique_id = NVGPU_ATOMIC_INIT(0);
 static int generate_unique_id(void)
 {
-	return atomic_add_return(1, &unique_id);
+	return nvgpu_atomic_add_return(1, &unique_id);
 }
 
 static int alloc_session(struct gk20a *g, struct dbg_session_gk20a **_dbg_s)
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index abd455d76..47e7d82ee 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -3439,7 +3439,7 @@ void gk20a_dump_channel_status_ramfc(struct gk20a *g,
 		gk20a_debug_output(o, "SEMA STATE: value: 0x%08x "
 				   "next_val: 0x%08x addr: 0x%010llx\n",
 				   __nvgpu_semaphore_read(hw_sema),
-				   atomic_read(&hw_sema->next_value),
+				   nvgpu_atomic_read(&hw_sema->next_value),
 				   nvgpu_hw_sema_addr(hw_sema));
 
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
@@ -3489,7 +3489,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
 			continue;
 
 		ch_state[chid]->pid = ch->pid;
-		ch_state[chid]->refs = atomic_read(&ch->ref_count);
+		ch_state[chid]->refs = nvgpu_atomic_read(&ch->ref_count);
 		ch_state[chid]->deterministic = ch->deterministic;
 		nvgpu_mem_rd_n(g, &ch->inst_block, 0,
 				&ch_state[chid]->inst_block[0],
@@ -3591,7 +3591,7 @@ void gk20a_fifo_channel_unbind(struct channel_gk20a *ch_gk20a)
 
 	gk20a_dbg_fn("");
 
-	if (atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
+	if (nvgpu_atomic_cmpxchg(&ch_gk20a->bound, true, false)) {
 		gk20a_writel(g, ccsr_channel_inst_r(ch_gk20a->chid),
 			ccsr_channel_inst_ptr_f(0) |
 			ccsr_channel_inst_bind_false_f());
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
index b19a7b680..a6eae8ca7 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h
@@ -156,7 +156,7 @@ struct fifo_gk20a {
 #ifdef CONFIG_DEBUG_FS
 	struct {
 		struct fifo_profile_gk20a *data;
-		atomic_t get;
+		nvgpu_atomic_t get;
 		bool enabled;
 		u64 *sorted;
 		struct kref ref;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c
index c50d800fb..550b22c01 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -373,13 +373,13 @@ int gk20a_wait_for_idle(struct gk20a *g)
 	if (g->user_railgate_disabled)
 		target_usage_count = 1;
 
-	while ((atomic_read(&g->usage_count) != target_usage_count)
+	while ((nvgpu_atomic_read(&g->usage_count) != target_usage_count)
 			&& (wait_length-- >= 0))
 		nvgpu_msleep(20);
 
 	if (wait_length < 0) {
 		pr_warn("%s: Timed out waiting for idle (%d)!\n",
-			__func__, atomic_read(&g->usage_count));
+			__func__, nvgpu_atomic_read(&g->usage_count));
 		return -ETIMEDOUT;
 	}
 
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 4878fdd68..47fd3aef9 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -48,6 +48,7 @@ struct nvgpu_cpu_time_correlation_sample;
 #include <nvgpu/kref.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/pmu.h>
+#include <nvgpu/atomic.h>
 
 #include "clk_gk20a.h"
 #include "ce2_gk20a.h"
@@ -1038,7 +1039,7 @@ struct gk20a {
 	 */
 	unsigned long *enabled_flags;
 
-	atomic_t usage_count;
+	nvgpu_atomic_t usage_count;
 
 	struct kref refcount;
 
@@ -1205,7 +1206,7 @@ struct gk20a {
 
 	struct gk20a_channel_worker {
 		struct nvgpu_thread poll_task;
-		atomic_t put;
+		nvgpu_atomic_t put;
 		struct nvgpu_cond wq;
 		struct nvgpu_list_node items;
 		struct nvgpu_spinlock items_lock;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 16fe71492..e21be1e52 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -617,7 +617,7 @@ static int gk20a_init_vidmem(struct mm_gk20a *mm)
 	nvgpu_mutex_init(&mm->vidmem.first_clear_mutex);
 
 	INIT_WORK(&mm->vidmem.clear_mem_worker, gk20a_vidmem_clear_mem_worker);
-	atomic64_set(&mm->vidmem.bytes_pending, 0);
+	nvgpu_atomic64_set(&mm->vidmem.bytes_pending, 0);
 	nvgpu_init_list_node(&mm->vidmem.clear_list_head);
 	nvgpu_mutex_init(&mm->vidmem.clear_list_mutex);
 
@@ -1165,7 +1165,7 @@ int gk20a_vidmem_get_space(struct gk20a *g, u64 *space)
 
 	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
 	*space = nvgpu_alloc_space(allocator) +
-		atomic64_read(&g->mm.vidmem.bytes_pending);
+		nvgpu_atomic64_read(&g->mm.vidmem.bytes_pending);
 	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
 	return 0;
 #else
@@ -1483,7 +1483,7 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
 			   (u64)get_vidmem_page_alloc(mem->priv.sgt->sgl));
 		nvgpu_free_sgtable(g, &mem->priv.sgt);
 
-		WARN_ON(atomic64_sub_return(mem->size,
+		WARN_ON(nvgpu_atomic64_sub_return(mem->size,
 					&g->mm.vidmem.bytes_pending) < 0);
 		mem->size = 0;
 		mem->aperture = APERTURE_INVALID;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 7b2c0dfc6..af176a739 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -30,6 +30,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/rbtree.h>
 #include <nvgpu/kref.h>
+#include <nvgpu/atomic.h>
 
 struct nvgpu_pd_cache;
 
@@ -283,7 +284,7 @@ struct mm_gk20a {
 		struct nvgpu_mutex clear_list_mutex;
 
 		struct work_struct clear_mem_worker;
-		atomic64_t bytes_pending;
+		nvgpu_atomic64_t bytes_pending;
 	} vidmem;
 };
 
diff --git a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
index afdfba916..e688c863a 100644
--- a/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fifo_gm20b.c
@@ -51,7 +51,7 @@ void channel_gm20b_bind(struct channel_gk20a *c)
 		 ~ccsr_channel_enable_set_f(~0)) |
 		 ccsr_channel_enable_set_true_f());
 	wmb();
-	atomic_set(&c->bound, true);
+	nvgpu_atomic_set(&c->bound, true);
 }
 
 static inline u32 gm20b_engine_id_to_mmu_id(struct gk20a *g, u32 engine_id)
diff --git a/drivers/gpu/nvgpu/include/nvgpu/atomic.h b/drivers/gpu/nvgpu/include/nvgpu/atomic.h
index 700e29fe7..c7a5fcd93 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/atomic.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/atomic.h
@@ -9,12 +9,102 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #ifndef __NVGPU_ATOMIC_H__
 #define __NVGPU_ATOMIC_H__
 
-#ifdef __KERNEL__
-#include <linux/atomic.h>
-#endif
+#include <nvgpu/linux/atomic.h>
 
-#endif
+#define NVGPU_ATOMIC_INIT(i)	__nvgpu_atomic_init(i)
+#define NVGPU_ATOMIC64_INIT(i)	__nvgpu_atomic64_init(i)
+
+static inline void nvgpu_atomic_set(nvgpu_atomic_t *v, int i)
+{
+	__nvgpu_atomic_set(v, i);
+}
+static inline int nvgpu_atomic_read(nvgpu_atomic_t *v)
+{
+	return __nvgpu_atomic_read(v);
+}
+static inline void nvgpu_atomic_inc(nvgpu_atomic_t *v)
+{
+	__nvgpu_atomic_inc(v);
+}
+static inline int nvgpu_atomic_inc_return(nvgpu_atomic_t *v)
+{
+	return __nvgpu_atomic_inc_return(v);
+}
+static inline void nvgpu_atomic_dec(nvgpu_atomic_t *v)
+{
+	 __nvgpu_atomic_dec(v);
+}
+static inline int nvgpu_atomic_dec_return(nvgpu_atomic_t *v)
+{
+	return __nvgpu_atomic_dec_return(v);
+}
+static inline int nvgpu_atomic_cmpxchg(nvgpu_atomic_t *v, int old, int new)
+{
+	return __nvgpu_atomic_cmpxchg(v, old, new);
+}
+static inline int nvgpu_atomic_xchg(nvgpu_atomic_t *v, int new)
+{
+	return __nvgpu_atomic_xchg(v, new);
+}
+static inline bool nvgpu_atomic_inc_and_test(nvgpu_atomic_t *v)
+{
+	return __nvgpu_atomic_inc_and_test(v);
+}
+static inline bool nvgpu_atomic_dec_and_test(nvgpu_atomic_t *v)
+{
+	return __nvgpu_atomic_dec_and_test(v);
+}
+static inline int nvgpu_atomic_add_return(int i, nvgpu_atomic_t *v)
+{
+	return __nvgpu_atomic_add_return(i, v);
+}
+static inline void nvgpu_atomic64_set(nvgpu_atomic64_t *v, long i)
+{
+	return  __nvgpu_atomic64_set(v, i);
+}
+static inline long nvgpu_atomic64_read(nvgpu_atomic64_t *v)
+{
+	return  __nvgpu_atomic64_read(v);
+}
+static inline void nvgpu_atomic64_add(long x, nvgpu_atomic64_t *v)
+{
+	__nvgpu_atomic64_add(x, v);
+}
+static inline void nvgpu_atomic64_inc(nvgpu_atomic64_t *v)
+{
+	__nvgpu_atomic64_inc(v);
+}
+static inline long nvgpu_atomic64_inc_return(nvgpu_atomic64_t *v)
+{
+	return __nvgpu_atomic64_inc_return(v);
+}
+static inline void nvgpu_atomic64_dec(nvgpu_atomic64_t *v)
+{
+	__nvgpu_atomic64_dec(v);
+}
+static inline void nvgpu_atomic64_dec_return(nvgpu_atomic64_t *v)
+{
+	__nvgpu_atomic64_dec_return(v);
+}
+static inline long nvgpu_atomic64_cmpxchg(nvgpu_atomic64_t *v, long old,
+					long new)
+{
+	return __nvgpu_atomic64_cmpxchg(v, old, new);
+}
+static inline void nvgpu_atomic64_sub(long x, nvgpu_atomic64_t *v)
+{
+	__nvgpu_atomic64_sub(x, v);
+}
+static inline long nvgpu_atomic64_sub_return(long x, nvgpu_atomic64_t *v)
+{
+	return __nvgpu_atomic64_sub_return(x, v);
+}
+
+#endif /* __NVGPU_ATOMIC_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h b/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h
new file mode 100644
index 000000000..c6dd46501
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/atomic.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __NVGPU_ATOMIC_LINUX_H__
+#define __NVGPU_ATOMIC_LINUX_H__
+
+#include <linux/atomic.h>
+
+typedef struct nvgpu_atomic {
+  atomic_t atomic_var;
+} nvgpu_atomic_t;
+
+typedef struct nvgpu_atomic64 {
+  atomic64_t atomic_var;
+} nvgpu_atomic64_t;
+
+#define __nvgpu_atomic_init(i)	{ ATOMIC_INIT(i) }
+#define __nvgpu_atomic64_init(i)	{ ATOMIC64_INIT(i) }
+
+static inline void __nvgpu_atomic_set(nvgpu_atomic_t *v, int i)
+{
+	atomic_set(&v->atomic_var, i);
+}
+
+static inline int __nvgpu_atomic_read(nvgpu_atomic_t *v)
+{
+	return atomic_read(&v->atomic_var);
+}
+
+static inline void __nvgpu_atomic_inc(nvgpu_atomic_t *v)
+{
+	atomic_inc(&v->atomic_var);
+}
+
+static inline int __nvgpu_atomic_inc_return(nvgpu_atomic_t *v)
+{
+	return atomic_inc_return(&v->atomic_var);
+}
+
+static inline void __nvgpu_atomic_dec(nvgpu_atomic_t *v)
+{
+	atomic_dec(&v->atomic_var);
+}
+
+static inline int __nvgpu_atomic_dec_return(nvgpu_atomic_t *v)
+{
+	return atomic_dec_return(&v->atomic_var);
+}
+
+static inline int __nvgpu_atomic_cmpxchg(nvgpu_atomic_t *v, int old, int new)
+{
+	return atomic_cmpxchg(&v->atomic_var, old, new);
+}
+
+static inline int __nvgpu_atomic_xchg(nvgpu_atomic_t *v, int new)
+{
+	return atomic_xchg(&v->atomic_var, new);
+}
+
+static inline bool __nvgpu_atomic_inc_and_test(nvgpu_atomic_t *v)
+{
+	return atomic_inc_and_test(&v->atomic_var);
+}
+
+static inline bool __nvgpu_atomic_dec_and_test(nvgpu_atomic_t *v)
+{
+	return atomic_dec_and_test(&v->atomic_var);
+}
+
+static inline int __nvgpu_atomic_add_return(int i, nvgpu_atomic_t *v)
+{
+	return atomic_add_return(i, &v->atomic_var);
+}
+
+static inline void __nvgpu_atomic64_set(nvgpu_atomic64_t *v, long i)
+{
+	atomic64_set(&v->atomic_var, i);
+}
+
+static inline long __nvgpu_atomic64_read(nvgpu_atomic64_t *v)
+{
+	return atomic64_read(&v->atomic_var);
+}
+
+static inline void __nvgpu_atomic64_add(long x, nvgpu_atomic64_t *v)
+{
+	atomic64_add(x, &v->atomic_var);
+}
+
+static inline void __nvgpu_atomic64_inc(nvgpu_atomic64_t *v)
+{
+	atomic64_inc(&v->atomic_var);
+}
+
+static inline long __nvgpu_atomic64_inc_return(nvgpu_atomic64_t *v)
+{
+	return atomic64_inc_return(&v->atomic_var);
+}
+
+static inline void __nvgpu_atomic64_dec(nvgpu_atomic64_t *v)
+{
+	atomic64_dec(&v->atomic_var);
+}
+
+static inline void __nvgpu_atomic64_dec_return(nvgpu_atomic64_t *v)
+{
+	atomic64_dec_return(&v->atomic_var);
+}
+
+static inline long __nvgpu_atomic64_cmpxchg(nvgpu_atomic64_t *v,
+					long old, long new)
+{
+	return atomic64_cmpxchg(&v->atomic_var, old, new);
+}
+
+static inline void __nvgpu_atomic64_sub(long x, nvgpu_atomic64_t *v)
+{
+	atomic64_sub(x, &v->atomic_var);
+}
+
+static inline long __nvgpu_atomic64_sub_return(long x, nvgpu_atomic64_t *v)
+{
+	return atomic64_sub_return(x, &v->atomic_var);
+}
+#endif /*__NVGPU_ATOMIC_LINUX_H__ */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index faa8d945d..90261d815 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -48,7 +48,7 @@ struct nvgpu_semaphore_sea;
 struct nvgpu_semaphore_int {
 	int idx;			/* Semaphore index. */
 	u32 offset;			/* Offset into the pool. */
-	atomic_t next_value;		/* Next available value. */
+	nvgpu_atomic_t next_value;	/* Next available value. */
 	u32 nr_incrs;			/* Number of increments programmed. */
 	struct nvgpu_semaphore_pool *p;	/* Pool that owns this sema. */
 	struct channel_gk20a *ch;	/* Channel that owns this sema. */
@@ -70,7 +70,7 @@ nvgpu_semaphore_int_from_hw_sema_list(struct nvgpu_list_node *node)
 struct nvgpu_semaphore {
 	struct nvgpu_semaphore_int *hw_sema;
 
-	atomic_t value;
+	nvgpu_atomic_t value;
 	int incremented;
 
 	struct kref ref;
@@ -242,7 +242,7 @@ static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
 	 * the value of the semaphore then the semaphore has been signaled
 	 * (a.k.a. released).
 	 */
-	return (int)sema_val >= atomic_read(&s->value);
+	return (int)sema_val >= nvgpu_atomic_read(&s->value);
 }
 
 static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
@@ -252,12 +252,12 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 
 static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
 {
-	return (u32)atomic_read(&s->value);
+	return (u32)nvgpu_atomic_read(&s->value);
 }
 
 static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
 {
-	return (u32)atomic_read(&s->hw_sema->next_value);
+	return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
 }
 
 /*
@@ -320,7 +320,7 @@ static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s)
 {
 	BUG_ON(s->incremented);
 
-	atomic_set(&s->value, atomic_add_return(1, &s->hw_sema->next_value));
+	nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value));
 	s->incremented = 1;
 
 	gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a,
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index f1ae2f1f2..c85199058 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -43,7 +43,7 @@ static void vgpu_channel_bind(struct channel_gk20a *ch)
 	WARN_ON(err || msg.ret);
 
 	wmb();
-	atomic_set(&ch->bound, true);
+	nvgpu_atomic_set(&ch->bound, true);
 }
 
 static void vgpu_channel_unbind(struct channel_gk20a *ch)
@@ -51,7 +51,7 @@ static void vgpu_channel_unbind(struct channel_gk20a *ch)
 
 	gk20a_dbg_fn("");
 
-	if (atomic_cmpxchg(&ch->bound, true, false)) {
+	if (nvgpu_atomic_cmpxchg(&ch->bound, true, false)) {
 		struct tegra_vgpu_cmd_msg msg;
 		struct tegra_vgpu_channel_config_params *p =
 				&msg.params.channel_config;
@@ -425,7 +425,7 @@ static int vgpu_fifo_preempt_channel(struct gk20a *g, u32 chid)
 
 	gk20a_dbg_fn("");
 
-	if (!atomic_read(&ch->bound))
+	if (!nvgpu_atomic_read(&ch->bound))
 		return 0;
 
 	msg.cmd = TEGRA_VGPU_CMD_CHANNEL_PREEMPT;