diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index b8e389199..33d158bb3 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -57,6 +57,7 @@ nvgpu-y := \
gk20a/gk20a_allocator_bitmap.o \
gk20a/gk20a_allocator_buddy.o \
gk20a/gk20a_allocator_page.o \
+ gk20a/gk20a_allocator_lockless.o \
gk20a/cde_gk20a.o \
gk20a/platform_gk20a_generic.o \
gk20a/tsg_gk20a.o \
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
index 938651907..0d611ba32 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator.h
@@ -165,6 +165,15 @@ int gk20a_page_allocator_init(struct gk20a_allocator *__a,
const char *name, u64 base, u64 length,
u64 blk_size, u64 flags);
+/*
+ * Lockless allocatior initializers.
+ * Note: This allocator can only allocate fixed-size structures of a
+ * pre-defined size.
+ */
+int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
+ const char *name, u64 base, u64 length,
+ u64 struct_size, u64 flags);
+
#define GPU_BALLOC_MAX_ORDER 31
/*
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
new file mode 100644
index 000000000..32455c982
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/gk20a_allocator_lockless.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+#include
+#include
+#include
+#include
+
+#include "gk20a_allocator.h"
+#include "lockless_allocator_priv.h"
+
+static u64 gk20a_lockless_alloc_length(struct gk20a_allocator *a)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+
+ return pa->length;
+}
+
+static u64 gk20a_lockless_alloc_base(struct gk20a_allocator *a)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+
+ return pa->base;
+}
+
+static int gk20a_lockless_alloc_inited(struct gk20a_allocator *a)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+ int inited = pa->inited;
+
+ rmb();
+ return inited;
+}
+
+static u64 gk20a_lockless_alloc_end(struct gk20a_allocator *a)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+
+ return pa->base + pa->length;
+}
+
+static u64 gk20a_lockless_alloc(struct gk20a_allocator *a, u64 len)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+ int head, new_head, ret;
+ u64 addr = 0;
+
+ if (len != pa->blk_size)
+ return 0;
+
+ head = ACCESS_ONCE(pa->head);
+ while (head >= 0) {
+ new_head = ACCESS_ONCE(pa->next[head]);
+ ret = cmpxchg(&pa->head, head, new_head);
+ if (ret == head) {
+ addr = pa->base + head * pa->blk_size;
+ atomic_inc(&pa->nr_allocs);
+ alloc_dbg(a, "Alloc node # %d @ addr 0x%llx\n", head,
+ addr);
+ break;
+ }
+ head = ACCESS_ONCE(pa->head);
+ }
+ return addr;
+}
+
+static void gk20a_lockless_free(struct gk20a_allocator *a, u64 addr)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+ int head, ret;
+ u64 cur_idx, rem;
+
+ cur_idx = addr - pa->base;
+ rem = do_div(cur_idx, pa->blk_size);
+
+ while (1) {
+ head = ACCESS_ONCE(pa->head);
+ ACCESS_ONCE(pa->next[cur_idx]) = head;
+ ret = cmpxchg(&pa->head, head, cur_idx);
+ if (ret == head) {
+ atomic_dec(&pa->nr_allocs);
+ alloc_dbg(a, "Free node # %llu\n", cur_idx);
+ break;
+ }
+ }
+}
+
+static void gk20a_lockless_alloc_destroy(struct gk20a_allocator *a)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+
+ vfree(pa->next);
+ kfree(pa);
+}
+
+static void gk20a_lockless_print_stats(struct gk20a_allocator *a,
+ struct seq_file *s, int lock)
+{
+ struct gk20a_lockless_allocator *pa = a->priv;
+
+ __alloc_pstat(s, a, "Lockless allocator params:\n");
+ __alloc_pstat(s, a, " start = 0x%llx\n", pa->base);
+ __alloc_pstat(s, a, " end = 0x%llx\n", pa->base + pa->length);
+
+ /* Actual stats. */
+ __alloc_pstat(s, a, "Stats:\n");
+ __alloc_pstat(s, a, " Number allocs = %d\n",
+ atomic_read(&pa->nr_allocs));
+ __alloc_pstat(s, a, " Number free = %d\n",
+ pa->nr_nodes - atomic_read(&pa->nr_allocs));
+}
+
+static const struct gk20a_allocator_ops pool_ops = {
+ .alloc = gk20a_lockless_alloc,
+ .free = gk20a_lockless_free,
+
+ .base = gk20a_lockless_alloc_base,
+ .length = gk20a_lockless_alloc_length,
+ .end = gk20a_lockless_alloc_end,
+ .inited = gk20a_lockless_alloc_inited,
+
+ .fini = gk20a_lockless_alloc_destroy,
+
+ .print_stats = gk20a_lockless_print_stats,
+};
+
+int gk20a_lockless_allocator_init(struct gk20a_allocator *__a,
+ const char *name, u64 base, u64 length,
+ u64 blk_size, u64 flags)
+{
+ int i;
+ int err;
+ int nr_nodes;
+ u64 count, rem;
+ struct gk20a_lockless_allocator *a;
+
+ if (!blk_size)
+ return -EINVAL;
+
+ /*
+ * Ensure we have space for atleast one node & there's no overflow.
+ * In order to control memory footprint, we require count < INT_MAX
+ */
+ count = length;
+ rem = do_div(count, blk_size);
+ if (!base || !count || count > INT_MAX)
+ return -EINVAL;
+
+ a = kzalloc(sizeof(struct gk20a_lockless_allocator), GFP_KERNEL);
+ if (!a)
+ return -ENOMEM;
+
+ err = __gk20a_alloc_common_init(__a, name, a, false, &pool_ops);
+ if (err)
+ goto fail;
+
+ a->next = vzalloc(sizeof(*a->next) * count);
+ if (!a->next) {
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /* chain the elements together to form the initial free list */
+ nr_nodes = (int)count;
+ for (i = 0; i < nr_nodes; i++)
+ a->next[i] = i + 1;
+ a->next[nr_nodes - 1] = -1;
+
+ a->base = base;
+ a->length = length;
+ a->blk_size = blk_size;
+ a->nr_nodes = nr_nodes;
+ a->flags = flags;
+ atomic_set(&a->nr_allocs, 0);
+
+ wmb();
+ a->inited = true;
+
+ gk20a_init_alloc_debug(__a);
+ alloc_dbg(__a, "New allocator: type lockless\n");
+ alloc_dbg(__a, " base 0x%llx\n", a->base);
+ alloc_dbg(__a, " nodes %d\n", a->nr_nodes);
+ alloc_dbg(__a, " blk_size 0x%llx\n", a->blk_size);
+ alloc_dbg(__a, " flags 0x%llx\n", a->flags);
+
+ return 0;
+
+fail:
+ kfree(a);
+ return err;
+}
diff --git a/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
new file mode 100644
index 000000000..f9b03e0e8
--- /dev/null
+++ b/drivers/gpu/nvgpu/gk20a/lockless_allocator_priv.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
+/*
+ * Basics:
+ *
+ * - Lockless memory allocator for fixed-size structures, whose
+ * size is defined up front at init time.
+ * - Memory footprint scales linearly w/ the number of structures in
+ * the pool. It is ~= sizeof(int) * N.
+ * - Memory is pre-allocated by the client. The allocator itself
+ * only computes the addresses for allocations.
+ * - Limit of MAX_INT nodes that the allocator can be responsible for.
+ *
+ * Implementation details:
+ *
+ * The allocator maintains a single list of free nodes. We allocate &
+ * free nodes from the head of the list. We rely on the cmpxchg() operator
+ * to maintain atomicity on the head.
+ *
+ * So, both allocs & frees are O(1)!!
+ *
+ * -- Definitions --
+ * Block Size - size of a single structure that this allocator will
+ * allocate.
+ * Node - one of the elements of size blk_size in the
+ * client-allocated buffer.
+ * Node Index - zero-based index of a node in the client-allocated
+ * contiguous buffer.
+ *
+ * -- Initial State --
+ * We maintain the following to track the state of the free list:
+ *
+ * 1) A "head" index to track the index of the first free node in the list
+ * 2) A "next" array to track the index of the next free node in the list
+ * for every node. So next[head], will give the index to the 2nd free
+ * element in the list.
+ *
+ * So, to begin with, the free list consists of all node indices, and each
+ * position in the next array contains index N + 1:
+ *
+ * head = 0
+ * next = [1, 2, 3, 4, -1] : Example for a user-allocated buffer of 5 nodes
+ * free_list = 0->1->2->3->4->-1
+ *
+ * -- Allocations --
+ * 1) Read the current head (aka acq_head)
+ * 2) Read next[acq_head], to get the 2nd free element (aka new_head)
+ * 3) cmp_xchg(&head, acq_head, new_head)
+ * 4) If it succeeds, compute the address of the node, based on
+ * base address, blk_size, & acq_head.
+ *
+ * head = 1;
+ * next = [1, 2, 3, 4, -1] : Example after allocating Node #0
+ * free_list = 1->2->3->4->-1
+ *
+ * head = 2;
+ * next = [1, 2, 3, 4, -1] : Example after allocating Node #1
+ * free_list = 2->3->4->-1
+ *
+ * -- Frees --
+ * 1) Based on the address to be freed, calculate the index of the node
+ * being freed (cur_idx)
+ * 2) Read the current head (old_head)
+ * 3) So the freed node is going to go at the head of the list, and we
+ * want to put the old_head after it. So next[cur_idx] = old_head
+ * 4) cmpxchg(head, old_head, cur_idx)
+ *
+ * head = 0
+ * next = [2, 2, 3, 4, -1]
+ * free_list = 0->2->3->4->-1 : Example after freeing Node #0
+ *
+ * head = 1
+ * next = [2, 0, 3, 4, -1]
+ * free_list = 1->0->2->3->4->-1 : Example after freeing Node #1
+ */
+
+#ifndef LOCKLESS_ALLOCATOR_PRIV_H
+#define LOCKLESS_ALLOCATOR_PRIV_H
+
+struct gk20a_allocator;
+
+struct gk20a_lockless_allocator {
+ struct gk20a_allocator *owner;
+
+ u64 base; /* Base address of the space. */
+ u64 length; /* Length of the space. */
+ u64 blk_size; /* Size of the structure being allocated */
+ int nr_nodes; /* Number of nodes available for allocation */
+
+ int *next; /* An array holding the next indices per node */
+ int head; /* Current node at the top of the stack */
+
+ u64 flags;
+
+ bool inited;
+
+ /* Statistics */
+ atomic_t nr_allocs;
+};
+
+static inline struct gk20a_lockless_allocator *lockless_allocator(
+ struct gk20a_allocator *a)
+{
+ return (struct gk20a_lockless_allocator *)(a)->priv;
+}
+
+#endif