diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 7b419e508..2b410ab03 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -62,6 +62,10 @@ nvgpu-y += \
 	common/sync/channel_sync_semaphore.o \
 	common/sync/sema_cmdbuf_gk20a.o \
 	common/sync/sema_cmdbuf_gv11b.o \
+	common/semaphore/semaphore_sea.o \
+	common/semaphore/semaphore_pool.o \
+	common/semaphore/semaphore_hw.o \
+	common/semaphore/semaphore.o \
 	common/boardobj/boardobj.o \
 	common/boardobj/boardobjgrp.o \
 	common/boardobj/boardobjgrpmask.o \
@@ -319,7 +323,6 @@ nvgpu-y += \
 	common/enabled.o \
 	common/string.o \
 	common/pramin.o \
-	common/semaphore.o \
 	common/rbtree.o \
 	common/vbios/bios.o \
 	common/falcon/falcon.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 6bb28ec64..ba04d661b 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -103,7 +103,6 @@ srcs += common/sim.c \
 	common/top/top_gv100.c \
 	common/enabled.c \
 	common/pramin.c \
-	common/semaphore.c \
 	common/string.c \
 	common/rbtree.c \
 	common/ltc/ltc.c \
@@ -176,6 +175,10 @@ srcs += common/sim.c \
 	common/sync/syncpt_cmdbuf_gv11b.c \
 	common/sync/sema_cmdbuf_gk20a.c \
 	common/sync/sema_cmdbuf_gv11b.c \
+	common/semaphore/semaphore_sea.c \
+	common/semaphore/semaphore_pool.c \
+	common/semaphore/semaphore_hw.c \
+	common/semaphore/semaphore.c \
 	common/clock_gating/gm20b_gating_reglist.c \
 	common/clock_gating/gp10b_gating_reglist.c \
 	common/clock_gating/gv11b_gating_reglist.c \
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index e3bf6806a..2d6f7386c 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -453,7 +453,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	 * as the semaphore pool might get freed after that point.
 	 */
 	if (ch->hw_sema != NULL) {
-		nvgpu_semaphore_free_hw_sema(ch);
+		nvgpu_hw_semaphore_free(ch);
 	}
 
 	/*
diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c
index ee5221510..b6ec67381 100644
--- a/drivers/gpu/nvgpu/common/mm/vm.c
+++ b/drivers/gpu/nvgpu/common/mm/vm.c
@@ -248,13 +248,14 @@ static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
 	 *
 	 * !!! TODO: cleanup.
 	 */
-	sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
+	nvgpu_semaphore_sea_allocate_gpu_va(sema_sea, &vm->kernel,
 					     vm->va_limit -
 					     mm->channel.kernel_size,
 					     512U * PAGE_SIZE,
 					     (u32)SZ_4K);
-	if (sema_sea->gpu_va == 0ULL) {
-		nvgpu_free(&vm->kernel, sema_sea->gpu_va);
+	if (nvgpu_semaphore_sea_get_gpu_va(sema_sea) == 0ULL) {
+		nvgpu_free(&vm->kernel,
+			nvgpu_semaphore_sea_get_gpu_va(sema_sea));
 		nvgpu_vm_put(vm);
 		return -ENOMEM;
 	}
@@ -263,7 +264,7 @@ static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
 	if (err != 0) {
 		nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
 		nvgpu_free(vm->vma[GMMU_PAGE_SIZE_SMALL],
-			   vm->sema_pool->gpu_va);
+			   nvgpu_semaphore_pool_gpu_va(vm->sema_pool, false));
 		return err;
 	}
 
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
deleted file mode 100644
index 656e005e4..000000000
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ /dev/null
@@ -1,641 +0,0 @@
-/*
- * Nvgpu Semaphores
- *
- * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <nvgpu/dma.h>
-#include <nvgpu/gmmu.h>
-#include <nvgpu/semaphore.h>
-#include <nvgpu/kmem.h>
-#include <nvgpu/bug.h>
-#include <nvgpu/sizes.h>
-#include <nvgpu/channel.h>
-#include <nvgpu/gk20a.h>
-
-#include "gk20a/mm_gk20a.h"
-
-#define pool_to_gk20a(p) ((p)->sema_sea->gk20a)
-
-#define __lock_sema_sea(s)						\
-	do {								\
-		gpu_sema_verbose_dbg((s)->gk20a, "Acquiring sema lock..."); \
-		nvgpu_mutex_acquire(&(s)->sea_lock);			\
-		gpu_sema_verbose_dbg((s)->gk20a, "Sema lock aquried!");	\
-	} while (false)
-
-#define __unlock_sema_sea(s)						\
-	do {								\
-		nvgpu_mutex_release(&(s)->sea_lock);			\
-		gpu_sema_verbose_dbg((s)->gk20a, "Released sema lock");	\
-	} while (false)
-
-/*
- * Return the sema_sea pointer.
- */
-struct nvgpu_semaphore_sea *nvgpu_semaphore_get_sea(struct gk20a *g)
-{
-	return g->sema_sea;
-}
-
-static int __nvgpu_semaphore_sea_grow(struct nvgpu_semaphore_sea *sea)
-{
-	int ret = 0;
-	struct gk20a *gk20a = sea->gk20a;
-	u32 i;
-
-	__lock_sema_sea(sea);
-
-	ret = nvgpu_dma_alloc_sys(gk20a,
-				  PAGE_SIZE * SEMAPHORE_POOL_COUNT,
-				  &sea->sea_mem);
-	if (ret != 0) {
-		goto out;
-	}
-
-	sea->size = SEMAPHORE_POOL_COUNT;
-	sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
-
-	/*
-	 * Start the semaphores at values that will soon overflow the 32-bit
-	 * integer range. This way any buggy comparisons would start to fail
-	 * sooner rather than later.
-	 */
-	for (i = 0U; i < PAGE_SIZE * SEMAPHORE_POOL_COUNT; i += 4U) {
-		nvgpu_mem_wr(gk20a, &sea->sea_mem, i, 0xfffffff0U);
-	}
-
-out:
-	__unlock_sema_sea(sea);
-	return ret;
-}
-
-void nvgpu_semaphore_sea_destroy(struct gk20a *g)
-{
-	if (g->sema_sea == NULL) {
-		return;
-	}
-
-	nvgpu_dma_free(g, &g->sema_sea->sea_mem);
-	nvgpu_mutex_destroy(&g->sema_sea->sea_lock);
-	nvgpu_kfree(g, g->sema_sea);
-	g->sema_sea = NULL;
-}
-
-/*
- * Create the semaphore sea. Only create it once - subsequent calls to this will
- * return the originally created sea pointer.
- */
-struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g)
-{
-	if (g->sema_sea != NULL) {
-		return g->sema_sea;
-	}
-
-	g->sema_sea = nvgpu_kzalloc(g, sizeof(*g->sema_sea));
-	if (g->sema_sea == NULL) {
-		return NULL;
-	}
-
-	g->sema_sea->size = 0;
-	g->sema_sea->page_count = 0;
-	g->sema_sea->gk20a = g;
-	nvgpu_init_list_node(&g->sema_sea->pool_list);
-	if (nvgpu_mutex_init(&g->sema_sea->sea_lock) != 0) {
-		goto cleanup_free;
-	}
-
-	if (__nvgpu_semaphore_sea_grow(g->sema_sea) != 0) {
-		goto cleanup_destroy;
-	}
-
-	gpu_sema_dbg(g, "Created semaphore sea!");
-	return g->sema_sea;
-
-cleanup_destroy:
-	nvgpu_mutex_destroy(&g->sema_sea->sea_lock);
-cleanup_free:
-	nvgpu_kfree(g, g->sema_sea);
-	g->sema_sea = NULL;
-	gpu_sema_dbg(g, "Failed to creat semaphore sea!");
-	return NULL;
-}
-
-static int __semaphore_bitmap_alloc(unsigned long *bitmap, unsigned long len)
-{
-	unsigned long idx = find_first_zero_bit(bitmap, len);
-
-	if (idx == len) {
-		return -ENOSPC;
-	}
-
-	set_bit(idx, bitmap);
-
-	return (int)idx;
-}
-
-/*
- * Allocate a pool from the sea.
- */
-int nvgpu_semaphore_pool_alloc(struct nvgpu_semaphore_sea *sea,
-			       struct nvgpu_semaphore_pool **pool)
-{
-	struct nvgpu_semaphore_pool *p;
-	unsigned long page_idx;
-	int ret;
-
-	p = nvgpu_kzalloc(sea->gk20a, sizeof(*p));
-	if (p == NULL) {
-		return -ENOMEM;
-	}
-
-	__lock_sema_sea(sea);
-
-	ret = nvgpu_mutex_init(&p->pool_lock);
-	if (ret != 0) {
-		goto fail;
-	}
-
-	ret = __semaphore_bitmap_alloc(sea->pools_alloced,
-				       SEMAPHORE_POOL_COUNT);
-	if (ret < 0) {
-		goto fail_alloc;
-	}
-
-	page_idx = (unsigned long)ret;
-
-	p->page_idx = page_idx;
-	p->sema_sea = sea;
-	nvgpu_init_list_node(&p->pool_list_entry);
-	nvgpu_ref_init(&p->ref);
-
-	sea->page_count++;
-	nvgpu_list_add(&p->pool_list_entry, &sea->pool_list);
-	__unlock_sema_sea(sea);
-
-	gpu_sema_dbg(sea->gk20a,
-		     "Allocated semaphore pool: page-idx=%llu", p->page_idx);
-
-	*pool = p;
-	return 0;
-
-fail_alloc:
-	nvgpu_mutex_destroy(&p->pool_lock);
-fail:
-	__unlock_sema_sea(sea);
-	nvgpu_kfree(sea->gk20a, p);
-	gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!");
-	return ret;
-}
-
-/*
- * Map a pool into the passed vm's address space. This handles both the fixed
- * global RO mapping and the non-fixed private RW mapping.
- */
-int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
-			     struct vm_gk20a *vm)
-{
-	int err = 0;
-	u64 addr;
-
-	if (p->mapped) {
-		return -EBUSY;
-	}
-
-	gpu_sema_dbg(pool_to_gk20a(p),
-		     "Mapping semaphore pool! (idx=%llu)", p->page_idx);
-
-	/*
-	 * Take the sea lock so that we don't race with a possible change to the
-	 * nvgpu_mem in the sema sea.
-	 */
-	__lock_sema_sea(p->sema_sea);
-
-	addr = nvgpu_gmmu_map_fixed(vm, &p->sema_sea->sea_mem,
-				    p->sema_sea->gpu_va,
-				    p->sema_sea->map_size,
-				    0, gk20a_mem_flag_read_only, 0,
-				    p->sema_sea->sea_mem.aperture);
-	if (addr == 0ULL) {
-		err = -ENOMEM;
-		goto fail_unlock;
-	}
-
-	p->gpu_va_ro = addr;
-	p->mapped = true;
-
-	gpu_sema_dbg(pool_to_gk20a(p),
-		     "  %llu: GPU read-only  VA = 0x%llx",
-		     p->page_idx, p->gpu_va_ro);
-
-	/*
-	 * Now the RW mapping. This is a bit more complicated. We make a
-	 * nvgpu_mem describing a page of the bigger RO space and then map
-	 * that. Unlike above this does not need to be a fixed address.
-	 */
-	err = nvgpu_mem_create_from_mem(vm->mm->g,
-					&p->rw_mem, &p->sema_sea->sea_mem,
-					p->page_idx, 1);
-	if (err != 0) {
-		goto fail_unmap;
-	}
-
-	addr = nvgpu_gmmu_map(vm, &p->rw_mem, SZ_4K, 0,
-			      gk20a_mem_flag_none, 0,
-			      p->rw_mem.aperture);
-
-	if (addr == 0ULL) {
-		err = -ENOMEM;
-		goto fail_free_submem;
-	}
-
-	p->gpu_va = addr;
-
-	__unlock_sema_sea(p->sema_sea);
-
-	gpu_sema_dbg(pool_to_gk20a(p),
-		     "  %llu: GPU read-write VA = 0x%llx",
-		     p->page_idx, p->gpu_va);
-	gpu_sema_dbg(pool_to_gk20a(p),
-		     "  %llu: CPU VA            = 0x%p",
-		     p->page_idx, p->rw_mem.cpu_va);
-
-	return 0;
-
-fail_free_submem:
-	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
-fail_unmap:
-	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
-	gpu_sema_dbg(pool_to_gk20a(p),
-		     "  %llu: Failed to map semaphore pool!", p->page_idx);
-fail_unlock:
-	__unlock_sema_sea(p->sema_sea);
-	return err;
-}
-
-/*
- * Unmap a semaphore_pool.
- */
-void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
-				struct vm_gk20a *vm)
-{
-	__lock_sema_sea(p->sema_sea);
-
-	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
-	nvgpu_gmmu_unmap(vm, &p->rw_mem, p->gpu_va);
-	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
-
-	p->gpu_va = 0;
-	p->gpu_va_ro = 0;
-	p->mapped = false;
-
-	__unlock_sema_sea(p->sema_sea);
-
-	gpu_sema_dbg(pool_to_gk20a(p),
-		     "Unmapped semaphore pool! (idx=%llu)", p->page_idx);
-}
-
-static struct nvgpu_semaphore_pool *
-nvgpu_semaphore_pool_from_ref(struct nvgpu_ref *ref)
-{
-	return (struct nvgpu_semaphore_pool *)
-		((uintptr_t)ref - offsetof(struct nvgpu_semaphore_pool, ref));
-}
-
-/*
- * Completely free a semaphore_pool. You should make sure this pool is not
- * mapped otherwise there's going to be a memory leak.
- */
-static void nvgpu_semaphore_pool_free(struct nvgpu_ref *ref)
-{
-	struct nvgpu_semaphore_pool *p = nvgpu_semaphore_pool_from_ref(ref);
-	struct nvgpu_semaphore_sea *s = p->sema_sea;
-
-	/* Freeing a mapped pool is a bad idea. */
-	WARN_ON((p->mapped) ||
-		(p->gpu_va != 0ULL) ||
-		(p->gpu_va_ro != 0ULL));
-
-	__lock_sema_sea(s);
-	nvgpu_list_del(&p->pool_list_entry);
-	clear_bit((int)p->page_idx, s->pools_alloced);
-	s->page_count--;
-	__unlock_sema_sea(s);
-
-	nvgpu_mutex_destroy(&p->pool_lock);
-
-	gpu_sema_dbg(pool_to_gk20a(p),
-		     "Freed semaphore pool! (idx=%llu)", p->page_idx);
-	nvgpu_kfree(p->sema_sea->gk20a, p);
-}
-
-void nvgpu_semaphore_pool_get(struct nvgpu_semaphore_pool *p)
-{
-	nvgpu_ref_get(&p->ref);
-}
-
-void nvgpu_semaphore_pool_put(struct nvgpu_semaphore_pool *p)
-{
-	nvgpu_ref_put(&p->ref, nvgpu_semaphore_pool_free);
-}
-
-/*
- * Get the address for a semaphore_pool - if global is true then return the
- * global RO address instead of the RW address owned by the semaphore's VM.
- */
-u64 __nvgpu_semaphore_pool_gpu_va(struct nvgpu_semaphore_pool *p, bool global)
-{
-	if (!global) {
-		return p->gpu_va;
-	}
-
-	return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
-}
-
-static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
-{
-	int hw_sema_idx;
-	int ret = 0;
-	struct nvgpu_semaphore_int *hw_sema;
-	struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
-	int current_value;
-
-	BUG_ON(p == NULL);
-
-	nvgpu_mutex_acquire(&p->pool_lock);
-
-	/* Find an available HW semaphore. */
-	hw_sema_idx = __semaphore_bitmap_alloc(p->semas_alloced,
-					       PAGE_SIZE / SEMAPHORE_SIZE);
-	if (hw_sema_idx < 0) {
-		ret = hw_sema_idx;
-		goto fail;
-	}
-
-	hw_sema = nvgpu_kzalloc(ch->g, sizeof(struct nvgpu_semaphore_int));
-	if (hw_sema == NULL) {
-		ret = -ENOMEM;
-		goto fail_free_idx;
-	}
-
-	ch->hw_sema = hw_sema;
-	hw_sema->ch = ch;
-	hw_sema->location.pool = p;
-	hw_sema->location.offset = SEMAPHORE_SIZE * (u32)hw_sema_idx;
-	current_value = nvgpu_mem_rd(ch->g, &p->rw_mem,
-			hw_sema->location.offset);
-	nvgpu_atomic_set(&hw_sema->next_value, current_value);
-
-	nvgpu_mutex_release(&p->pool_lock);
-
-	return 0;
-
-fail_free_idx:
-	clear_bit(hw_sema_idx, p->semas_alloced);
-fail:
-	nvgpu_mutex_release(&p->pool_lock);
-	return ret;
-}
-
-/*
- * Free the channel used semaphore index
- */
-void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
-{
-	struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
-	struct nvgpu_semaphore_int *hw_sema = ch->hw_sema;
-	int idx = hw_sema->location.offset / SEMAPHORE_SIZE;
-
-	BUG_ON(p == NULL);
-
-	nvgpu_mutex_acquire(&p->pool_lock);
-
-	clear_bit(idx, p->semas_alloced);
-
-	nvgpu_kfree(ch->g, hw_sema);
-	ch->hw_sema = NULL;
-
-	nvgpu_mutex_release(&p->pool_lock);
-}
-
-/*
- * Allocate a semaphore from the passed pool.
- *
- * Since semaphores are ref-counted there's no explicit free for external code
- * to use. When the ref-count hits 0 the internal free will happen.
- */
-struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
-{
-	struct nvgpu_semaphore *s;
-	int ret;
-
-	if (ch->hw_sema == NULL) {
-		ret = __nvgpu_init_hw_sema(ch);
-		if (ret != 0) {
-			return NULL;
-		}
-	}
-
-	s = nvgpu_kzalloc(ch->g, sizeof(*s));
-	if (s == NULL) {
-		return NULL;
-	}
-
-	nvgpu_ref_init(&s->ref);
-	s->g = ch->g;
-	s->location = ch->hw_sema->location;
-	nvgpu_atomic_set(&s->value, 0);
-
-	/*
-	 * Take a ref on the pool so that we can keep this pool alive for
-	 * as long as this semaphore is alive.
-	 */
-	nvgpu_semaphore_pool_get(s->location.pool);
-
-	gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid);
-
-	return s;
-}
-
-static struct nvgpu_semaphore *nvgpu_semaphore_from_ref(struct nvgpu_ref *ref)
-{
-	return (struct nvgpu_semaphore *)
-		((uintptr_t)ref - offsetof(struct nvgpu_semaphore, ref));
-}
-
-static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
-{
-	struct nvgpu_semaphore *s = nvgpu_semaphore_from_ref(ref);
-
-	nvgpu_semaphore_pool_put(s->location.pool);
-
-	nvgpu_kfree(s->g, s);
-}
-
-void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
-{
-	nvgpu_ref_put(&s->ref, nvgpu_semaphore_free);
-}
-
-void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
-{
-	nvgpu_ref_get(&s->ref);
-}
-
-/*
- * Return the address of a specific semaphore.
- *
- * Don't call this on a semaphore you don't own - the VA returned will make no
- * sense in your specific channel's VM.
- */
-u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
-{
-	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
-		s->location.offset;
-}
-
-/*
- * Get the global RO address for the semaphore. Can be called on any semaphore
- * regardless of whether you own it.
- */
-u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
-{
-	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
-		s->location.offset;
-}
-
-u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
-{
-	return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
-		hw_sema->location.offset;
-}
-
-u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
-{
-	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-			hw_sema->location.offset);
-}
-
-/*
- * Read the underlying value from a semaphore.
- */
-u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
-{
-	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
-			s->location.offset);
-}
-
-/*
- * Check if "racer" is over "goal" with wraparound handling.
- */
-static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer)
-{
-	/*
-	 * Handle wraparound with the same heuristic as the hardware does:
-	 * although the integer will eventually wrap around, consider a sema
-	 * released against a threshold if its value has passed that threshold
-	 * but has not wrapped over half of the u32 range over that threshold;
-	 * such wrapping is unlikely to happen during a sema lifetime.
-	 *
-	 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
-	 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
-	 * then it needs 0x80000000 increments to wrap again and signal.
-	 *
-	 * Unsigned arithmetic is used because it's well-defined. This is
-	 * effectively the same as: signed_racer - signed_goal > 0.
-	 */
-
-	return racer - goal < 0x80000000U;
-}
-
-u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
-{
-	return (u32)nvgpu_atomic_read(&s->value);
-}
-
-bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
-{
-	u32 sema_val = nvgpu_semaphore_read(s);
-	u32 wait_payload = nvgpu_semaphore_get_value(s);
-
-	return __nvgpu_semaphore_value_released(wait_payload, sema_val);
-}
-
-bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
-{
-	return !nvgpu_semaphore_is_released(s);
-}
-
-/*
- * Fast-forward the hw sema to its tracked max value.
- *
- * Return true if the sema wasn't at the max value and needed updating, false
- * otherwise.
- */
-bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema)
-{
-	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
-	u32 current_val = __nvgpu_semaphore_read(hw_sema);
-
-	/*
-	 * If the semaphore has already reached the value we would write then
-	 * this is really just a NO-OP. However, the sema value shouldn't be
-	 * more than what we expect to be the max.
-	 */
-
-	if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1U,
-						     current_val))) {
-		return false;
-	}
-
-	if (current_val == threshold) {
-		return false;
-	}
-
-	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
-			hw_sema->location.offset, threshold);
-
-	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
-			hw_sema->ch->chid, current_val, threshold);
-
-	return true;
-}
-
-/*
- * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
- * value to "s" which represents the increment that the caller must write in a
- * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
- * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
- */
-void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
-		struct nvgpu_semaphore_int *hw_sema)
-{
-	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
-
-	/* "s" should be an uninitialized sema. */
-	WARN_ON(s->incremented);
-
-	nvgpu_atomic_set(&s->value, next);
-	s->incremented = true;
-
-	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
-			     hw_sema->ch->chid, next);
-}
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore.c b/drivers/gpu/nvgpu/common/semaphore/semaphore.c
new file mode 100644
index 000000000..b0acc58bb
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore.c
@@ -0,0 +1,181 @@
+/*
+ * Nvgpu Semaphores
+ *
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/sizes.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/semaphore.h>
+
+#include "semaphore_priv.h"
+
+#include "gk20a/mm_gk20a.h"
+
+/*
+ * Allocate a semaphore from the passed pool.
+ *
+ * Since semaphores are ref-counted there's no explicit free for external code
+ * to use. When the ref-count hits 0 the internal free will happen.
+ */
+struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
+{
+	struct nvgpu_semaphore *s;
+	int ret;
+
+	if (ch->hw_sema == NULL) {
+		ret = nvgpu_hw_semaphore_init(ch);
+		if (ret != 0) {
+			return NULL;
+		}
+	}
+
+	s = nvgpu_kzalloc(ch->g, sizeof(*s));
+	if (s == NULL) {
+		return NULL;
+	}
+
+	nvgpu_ref_init(&s->ref);
+	s->g = ch->g;
+	s->location = ch->hw_sema->location;
+	nvgpu_atomic_set(&s->value, 0);
+
+	/*
+	 * Take a ref on the pool so that we can keep this pool alive for
+	 * as long as this semaphore is alive.
+	 */
+	nvgpu_semaphore_pool_get(s->location.pool);
+
+	gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid);
+
+	return s;
+}
+
+static struct nvgpu_semaphore *nvgpu_semaphore_from_ref(struct nvgpu_ref *ref)
+{
+	return (struct nvgpu_semaphore *)
+		((uintptr_t)ref - offsetof(struct nvgpu_semaphore, ref));
+}
+
+static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
+{
+	struct nvgpu_semaphore *s = nvgpu_semaphore_from_ref(ref);
+
+	nvgpu_semaphore_pool_put(s->location.pool);
+
+	nvgpu_kfree(s->g, s);
+}
+
+void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
+{
+	nvgpu_ref_put(&s->ref, nvgpu_semaphore_free);
+}
+
+void nvgpu_semaphore_get(struct nvgpu_semaphore *s)
+{
+	nvgpu_ref_get(&s->ref);
+}
+
+/*
+ * Return the address of a specific semaphore.
+ *
+ * Don't call this on a semaphore you don't own - the VA returned will make no
+ * sense in your specific channel's VM.
+ */
+u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
+{
+	return nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
+		s->location.offset;
+}
+
+/*
+ * Get the global RO address for the semaphore. Can be called on any semaphore
+ * regardless of whether you own it.
+ */
+u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
+{
+	return nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
+		s->location.offset;
+}
+
+/*
+ * Read the underlying value from a semaphore.
+ */
+u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
+{
+	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+			s->location.offset);
+}
+
+u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s)
+{
+	return (u32)nvgpu_atomic_read(&s->value);
+}
+
+bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s)
+{
+	u32 sema_val = nvgpu_semaphore_read(s);
+	u32 wait_payload = nvgpu_semaphore_get_value(s);
+
+	return nvgpu_semaphore_value_released(wait_payload, sema_val);
+}
+
+bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
+{
+	return !nvgpu_semaphore_is_released(s);
+}
+
+bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s)
+{
+	return s->ready_to_wait;
+}
+
+/*
+ * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold
+ * value to "s" which represents the increment that the caller must write in a
+ * pushbuf. The same nvgpu_semaphore will also represent an output fence; when
+ * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment.
+ */
+void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
+		struct nvgpu_hw_semaphore *hw_sema)
+{
+	int next = nvgpu_hw_semaphore_update_next(hw_sema);
+
+	/* "s" should be an uninitialized sema. */
+	WARN_ON(s->ready_to_wait);
+
+	nvgpu_atomic_set(&s->value, next);
+	s->ready_to_wait = true;
+
+	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+			     hw_sema->ch->chid, next);
+}
+
+u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s)
+{
+	return nvgpu_semaphore_pool_get_page_idx(s->location.pool);
+}
+
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
new file mode 100644
index 000000000..11772db6a
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/lock.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/semaphore.h>
+#include <nvgpu/channel.h>
+
+#include "semaphore_priv.h"
+
+int nvgpu_hw_semaphore_init(struct channel_gk20a *ch)
+{
+	int hw_sema_idx;
+	int ret = 0;
+	struct nvgpu_hw_semaphore *hw_sema;
+	struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
+	int current_value;
+
+	nvgpu_assert(p != NULL);
+
+	nvgpu_mutex_acquire(&p->pool_lock);
+
+	/* Find an available HW semaphore. */
+	hw_sema_idx = semaphore_bitmap_alloc(p->semas_alloced,
+					       PAGE_SIZE / SEMAPHORE_SIZE);
+	if (hw_sema_idx < 0) {
+		ret = hw_sema_idx;
+		goto fail;
+	}
+
+	hw_sema = nvgpu_kzalloc(ch->g, sizeof(struct nvgpu_hw_semaphore));
+	if (hw_sema == NULL) {
+		ret = -ENOMEM;
+		goto fail_free_idx;
+	}
+
+	ch->hw_sema = hw_sema;
+	hw_sema->ch = ch;
+	hw_sema->location.pool = p;
+	hw_sema->location.offset = SEMAPHORE_SIZE * (u32)hw_sema_idx;
+	current_value = (int)nvgpu_mem_rd(ch->g, &p->rw_mem,
+			hw_sema->location.offset);
+	nvgpu_atomic_set(&hw_sema->next_value, current_value);
+
+	nvgpu_mutex_release(&p->pool_lock);
+
+	return 0;
+
+fail_free_idx:
+	clear_bit(hw_sema_idx, p->semas_alloced);
+fail:
+	nvgpu_mutex_release(&p->pool_lock);
+	return ret;
+}
+
+/*
+ * Free the channel used semaphore index
+ */
+void nvgpu_hw_semaphore_free(struct channel_gk20a *ch)
+{
+	struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
+	struct nvgpu_hw_semaphore *hw_sema = ch->hw_sema;
+	int idx = (int)(hw_sema->location.offset / SEMAPHORE_SIZE);
+
+	nvgpu_assert(p != NULL);
+
+	nvgpu_mutex_acquire(&p->pool_lock);
+
+	clear_bit(idx, p->semas_alloced);
+
+	nvgpu_kfree(ch->g, hw_sema);
+	ch->hw_sema = NULL;
+
+	nvgpu_mutex_release(&p->pool_lock);
+}
+
+u64 nvgpu_hw_semaphore_addr(struct nvgpu_hw_semaphore *hw_sema)
+{
+	return nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
+		hw_sema->location.offset;
+}
+
+u32 nvgpu_hw_semaphore_read(struct nvgpu_hw_semaphore *hw_sema)
+{
+	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+			hw_sema->location.offset);
+}
+
+/*
+ * Fast-forward the hw sema to its tracked max value.
+ *
+ * Return true if the sema wasn't at the max value and needed updating, false
+ * otherwise.
+ */
+bool nvgpu_hw_semaphore_reset(struct nvgpu_hw_semaphore *hw_sema)
+{
+	u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value);
+	u32 current_val = nvgpu_hw_semaphore_read(hw_sema);
+
+	/*
+	 * If the semaphore has already reached the value we would write then
+	 * this is really just a NO-OP. However, the sema value shouldn't be
+	 * more than what we expect to be the max.
+	 */
+
+	bool is_released = nvgpu_semaphore_value_released(threshold + 1U,
+				current_val);
+
+	nvgpu_assert(!is_released);
+
+	if (is_released) {
+		return false;
+	}
+
+	if (current_val == threshold) {
+		return false;
+	}
+
+	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+			hw_sema->location.offset, threshold);
+
+	gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u",
+			hw_sema->ch->chid, current_val, threshold);
+
+	return true;
+}
+
+int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema)
+{
+	return nvgpu_atomic_read(&hw_sema->next_value);
+}
+
+int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema)
+{
+	return nvgpu_atomic_add_return(1, &hw_sema->next_value);
+}
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c
new file mode 100644
index 000000000..7d27ebaee
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_pool.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gmmu.h>
+#include <nvgpu/vm.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/bug.h>
+#include <nvgpu/semaphore.h>
+
+#include "semaphore_priv.h"
+
+#define pool_to_gk20a(p) ((p)->sema_sea->gk20a)
+
+/*
+ * Allocate a pool from the sea.
+ */
+int nvgpu_semaphore_pool_alloc(struct nvgpu_semaphore_sea *sea,
+			       struct nvgpu_semaphore_pool **pool)
+{
+	struct nvgpu_semaphore_pool *p;
+	unsigned long page_idx;
+	int ret;
+
+	p = nvgpu_kzalloc(sea->gk20a, sizeof(*p));
+	if (p == NULL) {
+		return -ENOMEM;
+	}
+
+	nvgpu_semaphore_sea_lock(sea);
+
+	ret = nvgpu_mutex_init(&p->pool_lock);
+	if (ret != 0) {
+		goto fail;
+	}
+
+	ret = semaphore_bitmap_alloc(sea->pools_alloced,
+				       SEMAPHORE_POOL_COUNT);
+	if (ret < 0) {
+		goto fail_alloc;
+	}
+
+	page_idx = (unsigned long)ret;
+
+	p->page_idx = page_idx;
+	p->sema_sea = sea;
+	nvgpu_init_list_node(&p->pool_list_entry);
+	nvgpu_ref_init(&p->ref);
+
+	sea->page_count++;
+	nvgpu_list_add(&p->pool_list_entry, &sea->pool_list);
+	nvgpu_semaphore_sea_unlock(sea);
+
+	gpu_sema_dbg(sea->gk20a,
+		     "Allocated semaphore pool: page-idx=%llu", p->page_idx);
+
+	*pool = p;
+	return 0;
+
+fail_alloc:
+	nvgpu_mutex_destroy(&p->pool_lock);
+fail:
+	nvgpu_semaphore_sea_unlock(sea);
+	nvgpu_kfree(sea->gk20a, p);
+	gpu_sema_dbg(sea->gk20a, "Failed to allocate semaphore pool!");
+	return ret;
+}
+
+/*
+ * Map a pool into the passed vm's address space. This handles both the fixed
+ * global RO mapping and the non-fixed private RW mapping.
+ */
+int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
+			     struct vm_gk20a *vm)
+{
+	int err = 0;
+	u64 addr;
+
+	if (p->mapped) {
+		return -EBUSY;
+	}
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "Mapping semaphore pool! (idx=%llu)", p->page_idx);
+
+	/*
+	 * Take the sea lock so that we don't race with a possible change to the
+	 * nvgpu_mem in the sema sea.
+	 */
+	nvgpu_semaphore_sea_lock(p->sema_sea);
+
+	addr = nvgpu_gmmu_map_fixed(vm, &p->sema_sea->sea_mem,
+				    p->sema_sea->gpu_va,
+				    p->sema_sea->map_size,
+				    0, gk20a_mem_flag_read_only, 0,
+				    p->sema_sea->sea_mem.aperture);
+	if (addr == 0ULL) {
+		err = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	p->gpu_va_ro = addr;
+	p->mapped = true;
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: GPU read-only  VA = 0x%llx",
+		     p->page_idx, p->gpu_va_ro);
+
+	/*
+	 * Now the RW mapping. This is a bit more complicated. We make a
+	 * nvgpu_mem describing a page of the bigger RO space and then map
+	 * that. Unlike above this does not need to be a fixed address.
+	 */
+	err = nvgpu_mem_create_from_mem(vm->mm->g,
+					&p->rw_mem, &p->sema_sea->sea_mem,
+					p->page_idx, 1);
+	if (err != 0) {
+		goto fail_unmap;
+	}
+
+	addr = nvgpu_gmmu_map(vm, &p->rw_mem, SZ_4K, 0,
+			      gk20a_mem_flag_none, 0,
+			      p->rw_mem.aperture);
+
+	if (addr == 0ULL) {
+		err = -ENOMEM;
+		goto fail_free_submem;
+	}
+
+	p->gpu_va = addr;
+
+	nvgpu_semaphore_sea_unlock(p->sema_sea);
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: GPU read-write VA = 0x%llx",
+		     p->page_idx, p->gpu_va);
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: CPU VA            = 0x%p",
+		     p->page_idx, p->rw_mem.cpu_va);
+
+	return 0;
+
+fail_free_submem:
+	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
+fail_unmap:
+	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "  %llu: Failed to map semaphore pool!", p->page_idx);
+fail_unlock:
+	nvgpu_semaphore_sea_unlock(p->sema_sea);
+	return err;
+}
+
+/*
+ * Unmap a semaphore_pool.
+ */
+void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
+				struct vm_gk20a *vm)
+{
+	nvgpu_semaphore_sea_lock(p->sema_sea);
+
+	nvgpu_gmmu_unmap(vm, &p->sema_sea->sea_mem, p->gpu_va_ro);
+	nvgpu_gmmu_unmap(vm, &p->rw_mem, p->gpu_va);
+	nvgpu_dma_free(pool_to_gk20a(p), &p->rw_mem);
+
+	p->gpu_va = 0;
+	p->gpu_va_ro = 0;
+	p->mapped = false;
+
+	nvgpu_semaphore_sea_unlock(p->sema_sea);
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "Unmapped semaphore pool! (idx=%llu)", p->page_idx);
+}
+
+static struct nvgpu_semaphore_pool *
+nvgpu_semaphore_pool_from_ref(struct nvgpu_ref *ref)
+{
+	return (struct nvgpu_semaphore_pool *)
+		((uintptr_t)ref - offsetof(struct nvgpu_semaphore_pool, ref));
+}
+
+/*
+ * Completely free a semaphore_pool. You should make sure this pool is not
+ * mapped otherwise there's going to be a memory leak.
+ */
+static void nvgpu_semaphore_pool_free(struct nvgpu_ref *ref)
+{
+	struct nvgpu_semaphore_pool *p = nvgpu_semaphore_pool_from_ref(ref);
+	struct nvgpu_semaphore_sea *s = p->sema_sea;
+
+	/* Freeing a mapped pool is a bad idea. */
+	WARN_ON((p->mapped) ||
+		(p->gpu_va != 0ULL) ||
+		(p->gpu_va_ro != 0ULL));
+
+	nvgpu_semaphore_sea_lock(s);
+	nvgpu_list_del(&p->pool_list_entry);
+	clear_bit((int)p->page_idx, s->pools_alloced);
+	s->page_count--;
+	nvgpu_semaphore_sea_unlock(s);
+
+	nvgpu_mutex_destroy(&p->pool_lock);
+
+	gpu_sema_dbg(pool_to_gk20a(p),
+		     "Freed semaphore pool! (idx=%llu)", p->page_idx);
+	nvgpu_kfree(p->sema_sea->gk20a, p);
+}
+
+void nvgpu_semaphore_pool_get(struct nvgpu_semaphore_pool *p)
+{
+	nvgpu_ref_get(&p->ref);
+}
+
+void nvgpu_semaphore_pool_put(struct nvgpu_semaphore_pool *p)
+{
+	nvgpu_ref_put(&p->ref, nvgpu_semaphore_pool_free);
+}
+
+/*
+ * Get the address for a semaphore_pool - if global is true then return the
+ * global RO address instead of the RW address owned by the semaphore's VM.
+ */
+u64 nvgpu_semaphore_pool_gpu_va(struct nvgpu_semaphore_pool *p, bool global)
+{
+	if (!global) {
+		return p->gpu_va;
+	}
+
+	return p->gpu_va_ro + (PAGE_SIZE * p->page_idx);
+}
+
+/*
+ * Return the index into the sea bitmap
+ */
+u64 nvgpu_semaphore_pool_get_page_idx(struct nvgpu_semaphore_pool *p)
+{
+	return p->page_idx;
+}
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h b/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h
new file mode 100644
index 000000000..4a0dc7bb2
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_priv.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef NVGPU_SEMAPHORE_PRIV_H
+#define NVGPU_SEMAPHORE_PRIV_H
+
+#include <nvgpu/errno.h>
+#include <nvgpu/types.h>
+#include <nvgpu/list.h>
+#include <nvgpu/lock.h>
+#include <nvgpu/atomic.h>
+#include <nvgpu/kref.h>
+#include <nvgpu/nvgpu_mem.h>
+
+struct gk20a;
+
+#define SEMAPHORE_SIZE			16U
+/*
+ * Max number of channels that can be used is 512. This of course needs to be
+ * fixed to be dynamic but still fast.
+ */
+#define SEMAPHORE_POOL_COUNT		512U
+#define SEMAPHORE_SEA_GROWTH_RATE	32U
+
+/*
+ * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
+ * channels can share a VM each channel gets it's own HW semaphore from the
+ * pool. Channels then allocate regular semaphores - basically just a value that
+ * signifies when a particular job is done.
+ */
+struct nvgpu_semaphore_sea {
+	struct nvgpu_list_node pool_list;	/* List of pools in this sea. */
+	struct gk20a *gk20a;
+
+	size_t size;			/* Number of pages available. */
+	u64 gpu_va;			/* GPU virtual address of sema sea. */
+	u64 map_size;			/* Size of the mapping. */
+
+	/*
+	 * TODO:
+	 * List of pages that we use to back the pools. The number of pages
+	 * can grow dynamically since allocating 512 pages for all channels at
+	 * once would be a tremendous waste.
+	 */
+	int page_count;			/* Pages allocated to pools. */
+
+	/*
+	 * The read-only memory for the entire semaphore sea. Each semaphore
+	 * pool needs a sub-nvgpu_mem that will be mapped as RW in its address
+	 * space. This sea_mem cannot be freed until all semaphore_pools have
+	 * been freed.
+	 */
+	struct nvgpu_mem sea_mem;
+
+	/*
+	 * Can't use a regular allocator here since the full range of pools are
+	 * not always allocated. Instead just use a bitmap.
+	 */
+	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
+
+	struct nvgpu_mutex sea_lock;		/* Lock alloc/free calls. */
+};
+
+/*
+ * A semaphore pool. Each address space will own exactly one of these.
+ */
+struct nvgpu_semaphore_pool {
+	struct nvgpu_list_node pool_list_entry;	/* Node for list of pools. */
+	u64 gpu_va;				/* GPU access to the pool. */
+	u64 gpu_va_ro;				/* GPU access to the pool. */
+	u64 page_idx;				/* Index into sea bitmap. */
+
+	DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
+
+	struct nvgpu_semaphore_sea *sema_sea;	/* Sea that owns this pool. */
+
+	struct nvgpu_mutex pool_lock;
+
+	/*
+	 * This is the address spaces's personal RW table. Other channels will
+	 * ultimately map this page as RO. This is a sub-nvgpu_mem from the
+	 * sea's mem.
+	 */
+	struct nvgpu_mem rw_mem;
+
+	bool mapped;
+
+	/*
+	 * Sometimes a channel can be released before other channels are
+	 * done waiting on it. This ref count ensures that the pool doesn't
+	 * go away until all semaphores using this pool are cleaned up first.
+	 */
+	struct nvgpu_ref ref;
+};
+
+struct nvgpu_semaphore_loc {
+	struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */
+	u32 offset;			   /* Byte offset into the pool. */
+};
+
+/*
+ * Underlying semaphore data structure. This semaphore can be shared amongst
+ * other semaphore instances.
+ */
+struct nvgpu_hw_semaphore {
+	struct nvgpu_semaphore_loc location;
+	nvgpu_atomic_t next_value;	/* Next available value. */
+	struct channel_gk20a *ch;	/* Channel that owns this sema. */
+};
+
+/*
+ * A semaphore which the rest of the driver actually uses. This consists of a
+ * pointer to a real semaphore and a value to wait for. This allows one physical
+ * semaphore to be shared among an essentially infinite number of submits.
+ */
+struct nvgpu_semaphore {
+	struct gk20a *g;
+	struct nvgpu_semaphore_loc location;
+
+	nvgpu_atomic_t value;
+	bool ready_to_wait;
+
+	struct nvgpu_ref ref;
+};
+
+
+static inline int semaphore_bitmap_alloc(unsigned long *bitmap,
+		unsigned long len)
+{
+	unsigned long idx = find_first_zero_bit(bitmap, len);
+
+	if (idx == len) {
+		return -ENOSPC;
+	}
+
+	set_bit(idx, bitmap);
+
+	return (int)idx;
+}
+
+/*
+ * Check if "racer" is over "goal" with wraparound handling.
+ */
+static inline bool nvgpu_semaphore_value_released(u32 goal, u32 racer)
+{
+	/*
+	 * Handle wraparound with the same heuristic as the hardware does:
+	 * although the integer will eventually wrap around, consider a sema
+	 * released against a threshold if its value has passed that threshold
+	 * but has not wrapped over half of the u32 range over that threshold;
+	 * such wrapping is unlikely to happen during a sema lifetime.
+	 *
+	 * Values for [goal, goal + 0x7fffffff] are considered signaled; that's
+	 * precisely half of the 32-bit space. If racer == goal + 0x80000000,
+	 * then it needs 0x80000000 increments to wrap again and signal.
+	 *
+	 * Unsigned arithmetic is used because it's well-defined. This is
+	 * effectively the same as: signed_racer - signed_goal > 0.
+	 */
+
+	return racer - goal < 0x80000000U;
+}
+
+#endif /* NVGPU_SEMAPHORE_PRIV_H */
\ No newline at end of file
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c
new file mode 100644
index 000000000..cc1d70525
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_sea.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/semaphore.h>
+
+#include "semaphore_priv.h"
+
+void nvgpu_semaphore_sea_lock(struct nvgpu_semaphore_sea *s)
+{
+	gpu_sema_verbose_dbg(s->gk20a, "Acquiring sema lock...");
+	nvgpu_mutex_acquire(&s->sea_lock);
+	gpu_sema_verbose_dbg(s->gk20a, "Sema lock aquried!");
+}
+
+void nvgpu_semaphore_sea_unlock(struct nvgpu_semaphore_sea *s)
+{
+	nvgpu_mutex_release(&s->sea_lock);
+	gpu_sema_verbose_dbg(s->gk20a, "Released sema lock");
+}
+
+static int semaphore_sea_grow(struct nvgpu_semaphore_sea *sea)
+{
+	int ret = 0;
+	struct gk20a *g = sea->gk20a;
+	u32 i;
+
+	nvgpu_semaphore_sea_lock(sea);
+
+	ret = nvgpu_dma_alloc_sys(g,
+				  PAGE_SIZE * SEMAPHORE_POOL_COUNT,
+				  &sea->sea_mem);
+	if (ret != 0) {
+		goto out;
+	}
+
+	sea->size = SEMAPHORE_POOL_COUNT;
+	sea->map_size = SEMAPHORE_POOL_COUNT * PAGE_SIZE;
+
+	/*
+	 * Start the semaphores at values that will soon overflow the 32-bit
+	 * integer range. This way any buggy comparisons would start to fail
+	 * sooner rather than later.
+	 */
+	for (i = 0U; i < PAGE_SIZE * SEMAPHORE_POOL_COUNT; i += 4U) {
+		nvgpu_mem_wr(g, &sea->sea_mem, i, 0xfffffff0U);
+	}
+
+out:
+	nvgpu_semaphore_sea_unlock(sea);
+	return ret;
+}
+
+
+/*
+ * Return the sema_sea pointer.
+ */
+struct nvgpu_semaphore_sea *nvgpu_semaphore_get_sea(struct gk20a *g)
+{
+	return g->sema_sea;
+}
+
+void nvgpu_semaphore_sea_allocate_gpu_va(struct nvgpu_semaphore_sea *s,
+	struct nvgpu_allocator *a, u64 base, u64 len, u32 page_size)
+{
+	s->gpu_va = nvgpu_alloc_fixed(a, base, len, page_size);
+}
+
+u64 nvgpu_semaphore_sea_get_gpu_va(struct nvgpu_semaphore_sea *s)
+{
+	return s->gpu_va;
+}
+
+/*
+ * Create the semaphore sea. Only create it once - subsequent calls to this will
+ * return the originally created sea pointer.
+ */
+struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g)
+{
+	if (g->sema_sea != NULL) {
+		return g->sema_sea;
+	}
+
+	g->sema_sea = nvgpu_kzalloc(g, sizeof(*g->sema_sea));
+	if (g->sema_sea == NULL) {
+		return NULL;
+	}
+
+	g->sema_sea->size = 0;
+	g->sema_sea->page_count = 0;
+	g->sema_sea->gk20a = g;
+	nvgpu_init_list_node(&g->sema_sea->pool_list);
+	if (nvgpu_mutex_init(&g->sema_sea->sea_lock) != 0) {
+		goto cleanup_free;
+	}
+
+	if (semaphore_sea_grow(g->sema_sea) != 0) {
+		goto cleanup_destroy;
+	}
+
+	gpu_sema_dbg(g, "Created semaphore sea!");
+	return g->sema_sea;
+
+cleanup_destroy:
+	nvgpu_mutex_destroy(&g->sema_sea->sea_lock);
+cleanup_free:
+	nvgpu_kfree(g, g->sema_sea);
+	g->sema_sea = NULL;
+	gpu_sema_dbg(g, "Failed to creat semaphore sea!");
+	return NULL;
+}
+
+void nvgpu_semaphore_sea_destroy(struct gk20a *g)
+{
+	if (g->sema_sea == NULL) {
+		return;
+	}
+
+	nvgpu_dma_free(g, &g->sema_sea->sea_mem);
+	nvgpu_mutex_destroy(&g->sema_sea->sea_lock);
+	nvgpu_kfree(g, g->sema_sea);
+	g->sema_sea = NULL;
+}
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
index 8820203a5..f7dad2e2b 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
@@ -22,7 +22,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/atomic.h>
@@ -30,6 +29,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/nvhost.h>
 #include <nvgpu/gk20a.h>
+#include <nvgpu/semaphore.h>
 #include <nvgpu/os_fence.h>
 #include <nvgpu/os_fence_semas.h>
 #include <nvgpu/channel.h>
@@ -87,14 +87,14 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 		gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3llu"
 				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
-				     s->location.pool->page_idx, va, cmd->gva,
-				     cmd->mem->gpu_va, ob);
+				     nvgpu_semaphore_get_hw_pool_page_idx(s),
+				     va, cmd->gva, cmd->mem->gpu_va, ob);
 	} else {
 		gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3llu"
 				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
 				     nvgpu_semaphore_read(s),
-				     s->location.pool->page_idx,
+				     nvgpu_semaphore_get_hw_pool_page_idx(s),
 				     va, cmd->gva, cmd->mem->gpu_va, ob);
 	}
 }
@@ -103,13 +103,16 @@ static void channel_sync_semaphore_gen_wait_cmd(struct channel_gk20a *c,
 	struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd,
 	u32 wait_cmd_size, u32 pos)
 {
+	bool has_incremented;
+
 	if (sema == NULL) {
 		/* expired */
 		nvgpu_memset(c->g, wait_cmd->mem,
 			(wait_cmd->off + pos * wait_cmd_size) * (u32)sizeof(u32),
 			0, wait_cmd_size * (u32)sizeof(u32));
 	} else {
-		WARN_ON(!sema->incremented);
+		has_incremented = nvgpu_semaphore_can_wait(sema);
+		nvgpu_assert(has_incremented);
 		add_sema_cmd(c->g, c, sema, wait_cmd,
 			pos * wait_cmd_size, true, false);
 		nvgpu_semaphore_put(sema);
@@ -284,7 +287,7 @@ static void channel_sync_semaphore_set_min_eq_max(struct nvgpu_channel_sync *s)
 		return;
 	}
 
-	updated = nvgpu_semaphore_reset(c->hw_sema);
+	updated = nvgpu_hw_semaphore_reset(c->hw_sema);
 
 	if (updated) {
 		nvgpu_cond_broadcast_interruptible(&c->semaphore_wq);
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
index ad7f3f25f..f94b45bd9 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
@@ -22,7 +22,6 @@
 
 #include "fence_gk20a.h"
 
-#include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/nvhost.h>
@@ -30,6 +29,7 @@
 #include <nvgpu/os_fence.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/semaphore.h>
 
 struct gk20a_fence_ops {
 	int (*wait)(struct gk20a_fence *f, u32 timeout);
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 78b9bcd90..fe3a679ab 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -27,8 +27,8 @@
 #include <nvgpu/mm.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
-#include <nvgpu/semaphore.h>
 #include <nvgpu/enabled.h>
+#include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/log.h>
 #include <nvgpu/soc.h>
@@ -2828,7 +2828,7 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
 	for (chid = 0; chid < f->num_channels; chid++) {
 		struct channel_gk20a *ch = &f->channel[chid];
 		struct nvgpu_channel_dump_info *info = infos[chid];
-		struct nvgpu_semaphore_int *hw_sema = ch->hw_sema;
+		struct nvgpu_hw_semaphore *hw_sema = ch->hw_sema;
 
 		/* if this info exists, the above loop took a channel ref */
 		if (info == NULL) {
@@ -2842,10 +2842,10 @@ void gk20a_debug_dump_all_channel_status_ramfc(struct gk20a *g,
 		info->deterministic = ch->deterministic;
 
 		if (hw_sema != NULL) {
-			info->sema.value = __nvgpu_semaphore_read(hw_sema);
+			info->sema.value = nvgpu_hw_semaphore_read(hw_sema);
 			info->sema.next =
-				(u32)nvgpu_atomic_read(&hw_sema->next_value);
-			info->sema.addr = nvgpu_hw_sema_addr(hw_sema);
+				(u32)nvgpu_hw_semaphore_read_next(hw_sema);
+			info->sema.addr = nvgpu_hw_semaphore_addr(hw_sema);
 		}
 
 		g->ops.fifo.capture_channel_ram_dump(g, ch, info);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 64dc07469..78cce2cd3 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -241,7 +241,7 @@ struct channel_gk20a {
 	struct nvgpu_spinlock ref_actions_lock;
 #endif
 
-	struct nvgpu_semaphore_int *hw_sema;
+	struct nvgpu_hw_semaphore *hw_sema;
 
 	nvgpu_atomic_t bound;
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index b95dc76b9..05a275a51 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,8 +20,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#ifndef SEMAPHORE_GK20A_H
-#define SEMAPHORE_GK20A_H
+#ifndef NVGPU_SEMAPHORE_H
+#define NVGPU_SEMAPHORE_H
 
 #include <nvgpu/log.h>
 #include <nvgpu/atomic.h>
@@ -29,144 +29,30 @@
 #include <nvgpu/list.h>
 #include <nvgpu/nvgpu_mem.h>
 
-#include "gk20a/mm_gk20a.h"
-
 struct gk20a;
+struct channel_gk20a;
+struct nvgpu_semaphore_pool;
+struct nvgpu_hw_semaphore;
+struct nvgpu_semaphore;
+struct vm_gk20a;
+struct nvgpu_allocator;
 
 #define gpu_sema_dbg(g, fmt, args...)		\
 	nvgpu_log(g, gpu_dbg_sema, fmt, ##args)
 #define gpu_sema_verbose_dbg(g, fmt, args...)	\
 	nvgpu_log(g, gpu_dbg_sema_v, fmt, ##args)
 
-/*
- * Max number of channels that can be used is 512. This of course needs to be
- * fixed to be dynamic but still fast.
- */
-#define SEMAPHORE_POOL_COUNT		512U
-#define SEMAPHORE_SIZE			16U
-#define SEMAPHORE_SEA_GROWTH_RATE	32U
-
-struct nvgpu_semaphore_sea;
-
-struct nvgpu_semaphore_loc {
-	struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */
-	u32 offset;			   /* Byte offset into the pool. */
-};
-
-/*
- * Underlying semaphore data structure. This semaphore can be shared amongst
- * other semaphore instances.
- */
-struct nvgpu_semaphore_int {
-	struct nvgpu_semaphore_loc location;
-	nvgpu_atomic_t next_value;	/* Next available value. */
-	struct channel_gk20a *ch;	/* Channel that owns this sema. */
-};
-
-/*
- * A semaphore which the rest of the driver actually uses. This consists of a
- * pointer to a real semaphore and a value to wait for. This allows one physical
- * semaphore to be shared among an essentially infinite number of submits.
- */
-struct nvgpu_semaphore {
-	struct gk20a *g;
-	struct nvgpu_semaphore_loc location;
-
-	nvgpu_atomic_t value;
-	bool incremented;
-
-	struct nvgpu_ref ref;
-};
-
-/*
- * A semaphore pool. Each address space will own exactly one of these.
- */
-struct nvgpu_semaphore_pool {
-	struct nvgpu_list_node pool_list_entry;	/* Node for list of pools. */
-	u64 gpu_va;				/* GPU access to the pool. */
-	u64 gpu_va_ro;				/* GPU access to the pool. */
-	u64 page_idx;				/* Index into sea bitmap. */
-
-	DECLARE_BITMAP(semas_alloced, PAGE_SIZE / SEMAPHORE_SIZE);
-
-	struct nvgpu_semaphore_sea *sema_sea;	/* Sea that owns this pool. */
-
-	struct nvgpu_mutex pool_lock;
-
-	/*
-	 * This is the address spaces's personal RW table. Other channels will
-	 * ultimately map this page as RO. This is a sub-nvgpu_mem from the
-	 * sea's mem.
-	 */
-	struct nvgpu_mem rw_mem;
-
-	bool mapped;
-
-	/*
-	 * Sometimes a channel can be released before other channels are
-	 * done waiting on it. This ref count ensures that the pool doesn't
-	 * go away until all semaphores using this pool are cleaned up first.
-	 */
-	struct nvgpu_ref ref;
-};
-
-static inline struct nvgpu_semaphore_pool *
-nvgpu_semaphore_pool_from_pool_list_entry(struct nvgpu_list_node *node)
-{
-	return (struct nvgpu_semaphore_pool *)
-		((uintptr_t)node -
-		offsetof(struct nvgpu_semaphore_pool, pool_list_entry));
-};
-
-/*
- * A sea of semaphores pools. Each pool is owned by a single VM. Since multiple
- * channels can share a VM each channel gets it's own HW semaphore from the
- * pool. Channels then allocate regular semaphores - basically just a value that
- * signifies when a particular job is done.
- */
-struct nvgpu_semaphore_sea {
-	struct nvgpu_list_node pool_list;	/* List of pools in this sea. */
-	struct gk20a *gk20a;
-
-	size_t size;			/* Number of pages available. */
-	u64 gpu_va;			/* GPU virtual address of sema sea. */
-	u64 map_size;			/* Size of the mapping. */
-
-	/*
-	 * TODO:
-	 * List of pages that we use to back the pools. The number of pages
-	 * can grow dynamically since allocating 512 pages for all channels at
-	 * once would be a tremendous waste.
-	 */
-	int page_count;			/* Pages allocated to pools. */
-
-	/*
-	 * The read-only memory for the entire semaphore sea. Each semaphore
-	 * pool needs a sub-nvgpu_mem that will be mapped as RW in its address
-	 * space. This sea_mem cannot be freed until all semaphore_pools have
-	 * been freed.
-	 */
-	struct nvgpu_mem sea_mem;
-
-	/*
-	 * Can't use a regular allocator here since the full range of pools are
-	 * not always allocated. Instead just use a bitmap.
-	 */
-	DECLARE_BITMAP(pools_alloced, SEMAPHORE_POOL_COUNT);
-
-	struct nvgpu_mutex sea_lock;		/* Lock alloc/free calls. */
-};
-
 /*
  * Semaphore sea functions.
  */
 struct nvgpu_semaphore_sea *nvgpu_semaphore_sea_create(struct gk20a *g);
-void nvgpu_semaphore_sea_destroy(struct gk20a *g);
-int nvgpu_semaphore_sea_map(struct nvgpu_semaphore_pool *sea,
-			    struct vm_gk20a *vm);
-void nvgpu_semaphore_sea_unmap(struct nvgpu_semaphore_pool *sea,
-			       struct vm_gk20a *vm);
+void nvgpu_semaphore_sea_lock(struct nvgpu_semaphore_sea *s);
+void nvgpu_semaphore_sea_unlock(struct nvgpu_semaphore_sea *s);
 struct nvgpu_semaphore_sea *nvgpu_semaphore_get_sea(struct gk20a *g);
+void nvgpu_semaphore_sea_destroy(struct gk20a *g);
+void nvgpu_semaphore_sea_allocate_gpu_va(struct nvgpu_semaphore_sea *s,
+	struct nvgpu_allocator *a, u64 base, u64 len, u32 page_size);
+u64 nvgpu_semaphore_sea_get_gpu_va(struct nvgpu_semaphore_sea *s);
 
 /*
  * Semaphore pool functions.
@@ -177,9 +63,21 @@ int nvgpu_semaphore_pool_map(struct nvgpu_semaphore_pool *p,
 			     struct vm_gk20a *vm);
 void nvgpu_semaphore_pool_unmap(struct nvgpu_semaphore_pool *p,
 				struct vm_gk20a *vm);
-u64 __nvgpu_semaphore_pool_gpu_va(struct nvgpu_semaphore_pool *p, bool global);
+u64 nvgpu_semaphore_pool_gpu_va(struct nvgpu_semaphore_pool *p, bool global);
 void nvgpu_semaphore_pool_get(struct nvgpu_semaphore_pool *p);
 void nvgpu_semaphore_pool_put(struct nvgpu_semaphore_pool *p);
+u64 nvgpu_semaphore_pool_get_page_idx(struct nvgpu_semaphore_pool *p);
+
+/*
+ * Hw semaphore functions
+ */
+int nvgpu_hw_semaphore_init(struct channel_gk20a *ch);
+void nvgpu_hw_semaphore_free(struct channel_gk20a *ch);
+u64 nvgpu_hw_semaphore_addr(struct nvgpu_hw_semaphore *hw_sema);
+u32 nvgpu_hw_semaphore_read(struct nvgpu_hw_semaphore *hw_sema);
+bool nvgpu_hw_semaphore_reset(struct nvgpu_hw_semaphore *hw_sema);
+int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema);
+int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema);
 
 /*
  * Semaphore functions.
@@ -187,20 +85,18 @@ void nvgpu_semaphore_pool_put(struct nvgpu_semaphore_pool *p);
 struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch);
 void nvgpu_semaphore_put(struct nvgpu_semaphore *s);
 void nvgpu_semaphore_get(struct nvgpu_semaphore *s);
-void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
 
 u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s);
 u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s);
-u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema);
 
-u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema);
 u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s);
 u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s);
 bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s);
 bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s);
+bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s);
 
-bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema);
 void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
-		struct nvgpu_semaphore_int *hw_sema);
+		struct nvgpu_hw_semaphore *hw_sema);
+u64 nvgpu_semaphore_get_hw_pool_page_idx(struct nvgpu_semaphore *s);
 
-#endif
+#endif /* NVGPU_SEMAPHORE_H */
diff --git a/drivers/gpu/nvgpu/os/linux/sync_sema_android.c b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c
index 1e98e644a..f4f2c0957 100644
--- a/drivers/gpu/nvgpu/os/linux/sync_sema_android.c
+++ b/drivers/gpu/nvgpu/os/linux/sync_sema_android.c
@@ -22,10 +22,10 @@
 #include <nvgpu/lock.h>
 
 #include <nvgpu/kmem.h>
-#include <nvgpu/semaphore.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/kref.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/semaphore.h>
 #include "../linux/channel.h"
 
 #include "../drivers/staging/android/sync.h"
@@ -285,7 +285,7 @@ static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
 	struct nvgpu_semaphore *s = pt->sema;
 
 	(void) snprintf(str, size, "S: pool=%llu [v=%u,r_v=%u]",
-		 s->location.pool->page_idx,
+		 nvgpu_semaphore_get_hw_pool_page_idx(s),
 		 nvgpu_semaphore_get_value(s),
 		 nvgpu_semaphore_read(s));
 }