diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 2ea8369e4..77d1a178d 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -480,6 +480,7 @@ nvgpu-y += \
 	common/fifo/engines.o \
 	common/fifo/pbdma_status.o \
 	common/fifo/userd.o \
+	common/fence/fence.o \
 	common/ecc.o \
 	common/ce2.o \
 	common/debugger.o \
@@ -487,7 +488,6 @@ nvgpu-y += \
 	gk20a/fifo_gk20a.o \
 	gk20a/gr_gk20a.o \
 	gk20a/mm_gk20a.o \
-	gk20a/fence_gk20a.o \
 	gm20b/hal_gm20b.o \
 	gm20b/gr_gm20b.o \
 	gm20b/clk_gm20b.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index dd8967152..888b9c814 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -260,11 +260,11 @@ srcs += common/sim.c \
 	common/nvlink/nvlink_gv100.c \
 	common/nvlink/nvlink_tu104.c	\
 	common/nvlink/nvlink.c \
+	common/fence/fence.c \
 	gk20a/ce2_gk20a.c \
 	gk20a/fifo_gk20a.c \
 	gk20a/gr_gk20a.c \
 	gk20a/mm_gk20a.c \
-	gk20a/fence_gk20a.c \
 	gm20b/hal_gm20b.c \
 	gm20b/gr_gm20b.c \
 	gm20b/clk_gm20b.c \
diff --git a/drivers/gpu/nvgpu/common/ce2.c b/drivers/gpu/nvgpu/common/ce2.c
index c534c900d..00ae7f404 100644
--- a/drivers/gpu/nvgpu/common/ce2.c
+++ b/drivers/gpu/nvgpu/common/ce2.c
@@ -24,11 +24,11 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/utils.h>
+#include <nvgpu/fence.h>
 
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
 
 #include "gk20a/ce2_gk20a.h"
-#include "gk20a/fence_gk20a.h"
 
 static inline u32 gk20a_get_valid_launch_flags(struct gk20a *g, u32 launch_flags)
 {
@@ -50,7 +50,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
 		u32 launch_flags,
 		u32 request_operation,
 		u32 submit_flags,
-		struct gk20a_fence **gk20a_fence_out)
+		struct nvgpu_fence_type **fence_out)
 {
 	int ret = -EPERM;
 	struct gk20a_ce_app *ce_app = g->ce_app;
@@ -63,7 +63,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
 	u32 dma_copy_class;
 	struct nvgpu_gpfifo_entry gpfifo;
 	struct nvgpu_channel_fence fence = {0, 0};
-	struct gk20a_fence *ce_cmd_buf_fence_out = NULL;
+	struct nvgpu_fence_type *ce_cmd_buf_fence_out = NULL;
 
 	if (!ce_app->initialised || ce_app->app_state != NVGPU_CE_ACTIVE) {
 		goto end;
@@ -102,13 +102,13 @@ int gk20a_ce_execute_ops(struct gk20a *g,
 	cmd_buf_cpu_va = (u32 *)ce_ctx->cmd_buf_mem.cpu_va;
 
 	if (ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] != NULL) {
-		struct gk20a_fence **prev_post_fence =
+		struct nvgpu_fence_type **prev_post_fence =
 			&ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset];
 
-		ret = gk20a_fence_wait(g, *prev_post_fence,
+		ret = nvgpu_fence_wait(g, *prev_post_fence,
 				       nvgpu_get_poll_timeout(g));
 
-		gk20a_fence_put(*prev_post_fence);
+		nvgpu_fence_put(*prev_post_fence);
 		*prev_post_fence = NULL;
 		if (ret != 0) {
 			goto noop;
@@ -147,9 +147,9 @@ int gk20a_ce_execute_ops(struct gk20a *g,
 		if (ret == 0) {
 			ce_ctx->postfences[ce_ctx->cmd_buf_read_queue_offset] =
 				ce_cmd_buf_fence_out;
-			if (gk20a_fence_out != NULL) {
-				gk20a_fence_get(ce_cmd_buf_fence_out);
-				*gk20a_fence_out = ce_cmd_buf_fence_out;
+			if (fence_out != NULL) {
+				nvgpu_fence_get(ce_cmd_buf_fence_out);
+				*fence_out = ce_cmd_buf_fence_out;
 			}
 
 			/* Next available command buffer queue Index */
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c b/drivers/gpu/nvgpu/common/fence/fence.c
similarity index 61%
rename from drivers/gpu/nvgpu/gk20a/fence_gk20a.c
rename to drivers/gpu/nvgpu/common/fence/fence.c
index f94b45bd9..36bea080b 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.c
+++ b/drivers/gpu/nvgpu/common/fence/fence.c
@@ -20,8 +20,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include "fence_gk20a.h"
-
 #include <nvgpu/kmem.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/nvhost.h>
@@ -30,23 +28,17 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/semaphore.h>
+#include <nvgpu/fence.h>
 
-struct gk20a_fence_ops {
-	int (*wait)(struct gk20a_fence *f, u32 timeout);
-	bool (*is_expired)(struct gk20a_fence *f);
-	void *(*free)(struct nvgpu_ref *ref);
-};
-
-
-static struct gk20a_fence *gk20a_fence_from_ref(struct nvgpu_ref *ref)
+static struct nvgpu_fence_type *nvgpu_fence_from_ref(struct nvgpu_ref *ref)
 {
-	return (struct gk20a_fence *)((uintptr_t)ref -
-				offsetof(struct gk20a_fence, ref));
+	return (struct nvgpu_fence_type *)((uintptr_t)ref -
+				offsetof(struct nvgpu_fence_type, ref));
 }
 
-static void gk20a_fence_free(struct nvgpu_ref *ref)
+static void nvgpu_fence_free(struct nvgpu_ref *ref)
 {
-	struct gk20a_fence *f = gk20a_fence_from_ref(ref);
+	struct nvgpu_fence_type *f = nvgpu_fence_from_ref(ref);
 	struct gk20a *g = f->g;
 
 	if (nvgpu_os_fence_is_initialized(&f->os_fence)) {
@@ -66,14 +58,14 @@ static void gk20a_fence_free(struct nvgpu_ref *ref)
 	}
 }
 
-void gk20a_fence_put(struct gk20a_fence *f)
+void nvgpu_fence_put(struct nvgpu_fence_type *f)
 {
 	if (f != NULL) {
-		nvgpu_ref_put(&f->ref, gk20a_fence_free);
+		nvgpu_ref_put(&f->ref, nvgpu_fence_free);
 	}
 }
 
-struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
+struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f)
 {
 	if (f != NULL) {
 		nvgpu_ref_get(&f->ref);
@@ -81,7 +73,7 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
 	return f;
 }
 
-static bool gk20a_fence_is_valid(struct gk20a_fence *f)
+static bool nvgpu_fence_is_valid(struct nvgpu_fence_type *f)
 {
 	bool valid = f->valid;
 
@@ -89,11 +81,11 @@ static bool gk20a_fence_is_valid(struct gk20a_fence *f)
 	return valid;
 }
 
-int gk20a_fence_install_fd(struct gk20a_fence *f, int fd)
+int nvgpu_fence_install_fd(struct nvgpu_fence_type *f, int fd)
 {
-	if ((f == NULL) || !gk20a_fence_is_valid(f) ||
-		!nvgpu_os_fence_is_initialized(&f->os_fence)) {
-			return -EINVAL;
+	if ((f == NULL) || !nvgpu_fence_is_valid(f) ||
+			!nvgpu_os_fence_is_initialized(&f->os_fence)) {
+		return -EINVAL;
 	}
 
 	f->os_fence.ops->install_fence(&f->os_fence, fd);
@@ -101,10 +93,10 @@ int gk20a_fence_install_fd(struct gk20a_fence *f, int fd)
 	return 0;
 }
 
-int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f,
+int nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f,
 							u32 timeout)
 {
-	if ((f != NULL) && gk20a_fence_is_valid(f)) {
+	if ((f != NULL) && nvgpu_fence_is_valid(f)) {
 		if (!nvgpu_platform_is_silicon(g)) {
 			timeout = U32_MAX;
 		}
@@ -113,34 +105,34 @@ int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f,
 	return 0;
 }
 
-bool gk20a_fence_is_expired(struct gk20a_fence *f)
+bool nvgpu_fence_is_expired(struct nvgpu_fence_type *f)
 {
-	if ((f != NULL) && gk20a_fence_is_valid(f) && (f->ops != NULL)) {
+	if ((f != NULL) && nvgpu_fence_is_valid(f) && (f->ops != NULL)) {
 		return f->ops->is_expired(f);
 	} else {
 		return true;
 	}
 }
 
-int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count)
+int nvgpu_fence_pool_alloc(struct channel_gk20a *ch, unsigned int count)
 {
 	int err;
 	size_t size;
-	struct gk20a_fence *fence_pool = NULL;
+	struct nvgpu_fence_type *fence_pool = NULL;
 
-	size = sizeof(struct gk20a_fence);
+	size = sizeof(struct nvgpu_fence_type);
 	if (count <= UINT_MAX / size) {
 		size = count * size;
-		fence_pool = nvgpu_vzalloc(c->g, size);
+		fence_pool = nvgpu_vzalloc(ch->g, size);
 	}
 
 	if (fence_pool == NULL) {
 		return -ENOMEM;
 	}
 
-	err = nvgpu_lockless_allocator_init(c->g, &c->fence_allocator,
+	err = nvgpu_lockless_allocator_init(ch->g, &ch->fence_allocator,
 				"fence_pool", (size_t)fence_pool, size,
-				sizeof(struct gk20a_fence), 0);
+				sizeof(struct nvgpu_fence_type), 0);
 	if (err != 0) {
 		goto fail;
 	}
@@ -148,51 +140,51 @@ int gk20a_alloc_fence_pool(struct channel_gk20a *c, unsigned int count)
 	return 0;
 
 fail:
-	nvgpu_vfree(c->g, fence_pool);
+	nvgpu_vfree(ch->g, fence_pool);
 	return err;
 }
 
-void gk20a_free_fence_pool(struct channel_gk20a *c)
+void nvgpu_fence_pool_free(struct channel_gk20a *ch)
 {
-	if (nvgpu_alloc_initialized(&c->fence_allocator)) {
-		struct gk20a_fence *fence_pool;
-			fence_pool = (struct gk20a_fence *)(uintptr_t)
-				nvgpu_alloc_base(&c->fence_allocator);
-		nvgpu_alloc_destroy(&c->fence_allocator);
-		nvgpu_vfree(c->g, fence_pool);
+	if (nvgpu_alloc_initialized(&ch->fence_allocator)) {
+		struct nvgpu_fence_type *fence_pool;
+			fence_pool = (struct nvgpu_fence_type *)(uintptr_t)
+				nvgpu_alloc_base(&ch->fence_allocator);
+		nvgpu_alloc_destroy(&ch->fence_allocator);
+		nvgpu_vfree(ch->g, fence_pool);
 	}
 }
 
-struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
+struct nvgpu_fence_type *nvgpu_fence_alloc(struct channel_gk20a *ch)
 {
-	struct gk20a_fence *fence = NULL;
+	struct nvgpu_fence_type *fence = NULL;
 
-	if (channel_gk20a_is_prealloc_enabled(c)) {
-		if (nvgpu_alloc_initialized(&c->fence_allocator)) {
-			fence = (struct gk20a_fence *)(uintptr_t)
-				nvgpu_alloc(&c->fence_allocator,
-					sizeof(struct gk20a_fence));
+	if (channel_gk20a_is_prealloc_enabled(ch)) {
+		if (nvgpu_alloc_initialized(&ch->fence_allocator)) {
+			fence = (struct nvgpu_fence_type *)(uintptr_t)
+				nvgpu_alloc(&ch->fence_allocator,
+					sizeof(struct nvgpu_fence_type));
 
 			/* clear the node and reset the allocator pointer */
 			if (fence != NULL) {
 				(void) memset(fence, 0, sizeof(*fence));
-				fence->allocator = &c->fence_allocator;
+				fence->allocator = &ch->fence_allocator;
 			}
 		}
 	} else {
-		fence = nvgpu_kzalloc(c->g, sizeof(struct gk20a_fence));
+		fence = nvgpu_kzalloc(ch->g, sizeof(struct nvgpu_fence_type));
 	}
 
 	if (fence != NULL) {
 		nvgpu_ref_init(&fence->ref);
-		fence->g = c->g;
+		fence->g = ch->g;
 	}
 
 	return fence;
 }
 
-void gk20a_init_fence(struct gk20a_fence *f,
-		const struct gk20a_fence_ops *ops,
+void nvgpu_fence_init(struct nvgpu_fence_type *f,
+		const struct nvgpu_fence_ops *ops,
 		struct nvgpu_os_fence os_fence)
 {
 	if (f == NULL) {
@@ -206,7 +198,7 @@ void gk20a_init_fence(struct gk20a_fence *f,
 
 /* Fences that are backed by GPU semaphores: */
 
-static int nvgpu_semaphore_fence_wait(struct gk20a_fence *f, u32 timeout)
+static int nvgpu_semaphore_fence_wait(struct nvgpu_fence_type *f, u32 timeout)
 {
 	if (!nvgpu_semaphore_is_acquired(f->semaphore)) {
 		return 0;
@@ -218,26 +210,26 @@ static int nvgpu_semaphore_fence_wait(struct gk20a_fence *f, u32 timeout)
 		timeout);
 }
 
-static bool nvgpu_semaphore_fence_is_expired(struct gk20a_fence *f)
+static bool nvgpu_semaphore_fence_is_expired(struct nvgpu_fence_type *f)
 {
 	return !nvgpu_semaphore_is_acquired(f->semaphore);
 }
 
-static const struct gk20a_fence_ops nvgpu_semaphore_fence_ops = {
+static const struct nvgpu_fence_ops nvgpu_semaphore_fence_ops = {
 	.wait = &nvgpu_semaphore_fence_wait,
 	.is_expired = &nvgpu_semaphore_fence_is_expired,
 };
 
 /* This function takes ownership of the semaphore as well as the os_fence */
-int gk20a_fence_from_semaphore(
-		struct gk20a_fence *fence_out,
+int nvgpu_fence_from_semaphore(
+		struct nvgpu_fence_type *fence_out,
 		struct nvgpu_semaphore *semaphore,
 		struct nvgpu_cond *semaphore_wq,
 		struct nvgpu_os_fence os_fence)
 {
-	struct gk20a_fence *f = fence_out;
+	struct nvgpu_fence_type *f = fence_out;
 
-	gk20a_init_fence(f, &nvgpu_semaphore_fence_ops, os_fence);
+	nvgpu_fence_init(f, &nvgpu_semaphore_fence_ops, os_fence);
 	if (f == NULL) {
 		return -EINVAL;
 	}
@@ -256,14 +248,14 @@ int gk20a_fence_from_semaphore(
 #ifdef CONFIG_TEGRA_GK20A_NVHOST
 /* Fences that are backed by host1x syncpoints: */
 
-static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, u32 timeout)
+static int nvgpu_fence_syncpt_wait(struct nvgpu_fence_type *f, u32 timeout)
 {
 	return nvgpu_nvhost_syncpt_wait_timeout_ext(
 			f->nvhost_dev, f->syncpt_id, f->syncpt_value,
 			timeout, NULL, NULL);
 }
 
-static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
+static bool nvgpu_fence_syncpt_is_expired(struct nvgpu_fence_type *f)
 {
 
 	/*
@@ -286,20 +278,20 @@ static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
 	return true;
 }
 
-static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
-	.wait = &gk20a_syncpt_fence_wait,
-	.is_expired = &gk20a_syncpt_fence_is_expired,
+static const struct nvgpu_fence_ops nvgpu_fence_syncpt_ops = {
+	.wait = &nvgpu_fence_syncpt_wait,
+	.is_expired = &nvgpu_fence_syncpt_is_expired,
 };
 
 /* This function takes the ownership of the os_fence */
-int gk20a_fence_from_syncpt(
-		struct gk20a_fence *fence_out,
+int nvgpu_fence_from_syncpt(
+		struct nvgpu_fence_type *fence_out,
 		struct nvgpu_nvhost_dev *nvhost_dev,
 		u32 id, u32 value, struct nvgpu_os_fence os_fence)
 {
-	struct gk20a_fence *f = fence_out;
+	struct nvgpu_fence_type *f = fence_out;
 
-	gk20a_init_fence(f, &gk20a_syncpt_fence_ops, os_fence);
+	nvgpu_fence_init(f, &nvgpu_fence_syncpt_ops, os_fence);
 	if (!f) {
 		return -EINVAL;
 	}
@@ -315,8 +307,8 @@ int gk20a_fence_from_syncpt(
 	return 0;
 }
 #else
-int gk20a_fence_from_syncpt(
-		struct gk20a_fence *fence_out,
+int nvgpu_fence_from_syncpt(
+		struct nvgpu_fence_type *fence_out,
 		struct nvgpu_nvhost_dev *nvhost_dev,
 		u32 id, u32 value, struct nvgpu_os_fence os_fence)
 {
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 328b34b84..8cfd6b168 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -50,14 +50,14 @@
 #include <nvgpu/channel_sync_syncpt.h>
 #include <nvgpu/runlist.h>
 #include <nvgpu/fifo/userd.h>
+#include <nvgpu/fence.h>
 
-#include "gk20a/fence_gk20a.h"
 #include "gk20a/gr_gk20a.h"
 
 static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *ch);
 static void gk20a_channel_dump_ref_actions(struct channel_gk20a *ch);
 
-static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c);
+static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *ch);
 
 static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c);
 
@@ -767,18 +767,18 @@ clean_up:
 
 /* allocate private cmd buffer.
    used for inserting commands before/after user submitted buffers. */
-static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c,
+static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *ch,
 	u32 num_in_flight)
 {
-	struct gk20a *g = c->g;
-	struct vm_gk20a *ch_vm = c->vm;
-	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	struct gk20a *g = ch->g;
+	struct vm_gk20a *ch_vm = ch->vm;
+	struct priv_cmd_queue *q = &ch->priv_cmd_q;
 	u64 size, tmp_size;
 	int err = 0;
 	bool gpfifo_based = false;
 
 	if (num_in_flight == 0U) {
-		num_in_flight = c->gpfifo.entry_num;
+		num_in_flight = ch->gpfifo.entry_num;
 		gpfifo_based = true;
 	}
 
@@ -828,14 +828,14 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c,
 	return 0;
 
 clean_up:
-	channel_gk20a_free_priv_cmdbuf(c);
+	channel_gk20a_free_priv_cmdbuf(ch);
 	return err;
 }
 
-static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
+static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *ch)
 {
-	struct vm_gk20a *ch_vm = c->vm;
-	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	struct vm_gk20a *ch_vm = ch->vm;
+	struct priv_cmd_queue *q = &ch->priv_cmd_q;
 
 	if (q->size == 0U) {
 		return;
@@ -1053,7 +1053,7 @@ bool channel_gk20a_is_prealloc_enabled(struct channel_gk20a *c)
 	return pre_alloc_enabled;
 }
 
-static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
+static int channel_gk20a_prealloc_resources(struct channel_gk20a *ch,
 	       unsigned int num_jobs)
 {
 	unsigned int i;
@@ -1061,7 +1061,7 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
 	size_t size;
 	struct priv_cmd_entry *entries = NULL;
 
-	if ((channel_gk20a_is_prealloc_enabled(c)) || (num_jobs == 0U)) {
+	if ((channel_gk20a_is_prealloc_enabled(ch)) || (num_jobs == 0U)) {
 		return -EINVAL;
 	}
 
@@ -1072,10 +1072,10 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
 	 */
 	size = sizeof(struct channel_gk20a_job);
 	if (num_jobs <= ULONG_MAX / size) {
-		c->joblist.pre_alloc.jobs = nvgpu_vzalloc(c->g,
+		ch->joblist.pre_alloc.jobs = nvgpu_vzalloc(ch->g,
 							  num_jobs * size);
 	}
-	if (c->joblist.pre_alloc.jobs == NULL) {
+	if (ch->joblist.pre_alloc.jobs == NULL) {
 		err = -ENOMEM;
 		goto clean_up;
 	}
@@ -1087,7 +1087,7 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
 	 */
 	size = sizeof(struct priv_cmd_entry);
 	if (num_jobs <= ULONG_MAX / (size << 1)) {
-		entries = nvgpu_vzalloc(c->g,
+		entries = nvgpu_vzalloc(ch->g,
 					((unsigned long)num_jobs << 1UL) *
 					(unsigned long)size);
 	}
@@ -1097,20 +1097,20 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
 	}
 
 	for (i = 0; i < num_jobs; i++) {
-		c->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
-		c->joblist.pre_alloc.jobs[i].incr_cmd =
+		ch->joblist.pre_alloc.jobs[i].wait_cmd = &entries[i];
+		ch->joblist.pre_alloc.jobs[i].incr_cmd =
 			&entries[i + num_jobs];
 	}
 
 	/* pre-allocate a fence pool */
-	err = gk20a_alloc_fence_pool(c, num_jobs);
+	err = nvgpu_fence_pool_alloc(ch, num_jobs);
 	if (err != 0) {
 		goto clean_up_priv_cmd;
 	}
 
-	c->joblist.pre_alloc.length = num_jobs;
-	c->joblist.pre_alloc.put = 0;
-	c->joblist.pre_alloc.get = 0;
+	ch->joblist.pre_alloc.length = num_jobs;
+	ch->joblist.pre_alloc.put = 0;
+	ch->joblist.pre_alloc.get = 0;
 
 	/*
 	 * commit the previous writes before setting the flag.
@@ -1118,16 +1118,16 @@ static int channel_gk20a_prealloc_resources(struct channel_gk20a *c,
 	 * channel_gk20a_is_prealloc_enabled()
 	 */
 	nvgpu_smp_wmb();
-	c->joblist.pre_alloc.enabled = true;
+	ch->joblist.pre_alloc.enabled = true;
 
 	return 0;
 
 clean_up_priv_cmd:
-	nvgpu_vfree(c->g, entries);
+	nvgpu_vfree(ch->g, entries);
 clean_up_joblist:
-	nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
+	nvgpu_vfree(ch->g, ch->joblist.pre_alloc.jobs);
 clean_up:
-	(void) memset(&c->joblist.pre_alloc, 0, sizeof(c->joblist.pre_alloc));
+	(void) memset(&ch->joblist.pre_alloc, 0, sizeof(ch->joblist.pre_alloc));
 	return err;
 }
 
@@ -1135,7 +1135,7 @@ static void channel_gk20a_free_prealloc_resources(struct channel_gk20a *c)
 {
 	nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs[0].wait_cmd);
 	nvgpu_vfree(c->g, c->joblist.pre_alloc.jobs);
-	gk20a_free_fence_pool(c);
+	nvgpu_fence_pool_free(c);
 
 	/*
 	 * commit the previous writes before disabling the flag.
@@ -2159,7 +2159,7 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 		job = channel_gk20a_joblist_peek(c);
 		channel_gk20a_joblist_unlock(c);
 
-		completed = gk20a_fence_is_expired(job->post_fence);
+		completed = nvgpu_fence_is_expired(job->post_fence);
 		if (!completed) {
 			/*
 			 * The watchdog eventually sees an updated gp_get if
@@ -2209,7 +2209,7 @@ void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 
 		/* Close the fence (this will unref the semaphore and release
 		 * it to the pool). */
-		gk20a_fence_put(job->post_fence);
+		nvgpu_fence_put(job->post_fence);
 
 		/* Free the private command buffers (wait_cmd first and
 		 * then incr_cmd i.e. order of allocation) */
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index afa7ab46e..f9aea83cd 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -28,13 +28,12 @@
 #include <nvgpu/channel_sync.h>
 #include <nvgpu/channel_sync_syncpt.h>
 #include <nvgpu/bug.h>
-
-#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
-
-#include "gk20a/fence_gk20a.h"
+#include <nvgpu/fence.h>
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
+
 /*
  * Handle the submit synchronization - pre-fences and post-fences.
  */
@@ -43,7 +42,7 @@ static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c,
 				      struct channel_gk20a_job *job,
 				      struct priv_cmd_entry **wait_cmd,
 				      struct priv_cmd_entry **incr_cmd,
-				      struct gk20a_fence **post_fence,
+				      struct nvgpu_fence_type **post_fence,
 				      bool register_irq,
 				      u32 flags)
 {
@@ -132,7 +131,7 @@ static int nvgpu_submit_prepare_syncs(struct channel_gk20a *c,
 	 * is used to keep track of method completion for idle railgating. The
 	 * sync_pt/semaphore PB is added to the GPFIFO later on in submit.
 	 */
-	job->post_fence = gk20a_alloc_fence(c);
+	job->post_fence = nvgpu_fence_alloc(c);
 	if (job->post_fence == NULL) {
 		err = -ENOMEM;
 		goto clean_up_wait_cmd;
@@ -171,7 +170,7 @@ clean_up_incr_cmd:
 		job->incr_cmd = NULL;
 	}
 clean_up_post_fence:
-	gk20a_fence_put(job->post_fence);
+	nvgpu_fence_put(job->post_fence);
 	job->post_fence = NULL;
 clean_up_wait_cmd:
 	if (job->wait_cmd != NULL) {
@@ -328,13 +327,13 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
 				u32 num_entries,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
-				struct gk20a_fence **fence_out,
+				struct nvgpu_fence_type **fence_out,
 				struct fifo_profile_gk20a *profile)
 {
 	struct gk20a *g = c->g;
 	struct priv_cmd_entry *wait_cmd = NULL;
 	struct priv_cmd_entry *incr_cmd = NULL;
-	struct gk20a_fence *post_fence = NULL;
+	struct nvgpu_fence_type *post_fence = NULL;
 	struct channel_gk20a_job *job = NULL;
 	/* we might need two extra gpfifo entries - one for pre fence
 	 * and one for post fence. */
@@ -558,7 +557,7 @@ static int nvgpu_submit_channel_gpfifo(struct channel_gk20a *c,
 	}
 
 	if (fence_out != NULL) {
-		*fence_out = gk20a_fence_get(post_fence);
+		*fence_out = nvgpu_fence_get(post_fence);
 	}
 
 	if (need_job_tracking) {
@@ -593,7 +592,7 @@ clean_up_job:
 	channel_gk20a_free_job(c, job);
 clean_up:
 	nvgpu_log_fn(g, "fail");
-	gk20a_fence_put(post_fence);
+	nvgpu_fence_put(post_fence);
 	if (c->deterministic) {
 		nvgpu_rwsem_up_read(&g->deterministic_busy);
 	} else if (need_deferred_cleanup) {
@@ -608,7 +607,7 @@ int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
 				u32 num_entries,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
-				struct gk20a_fence **fence_out,
+				struct nvgpu_fence_type **fence_out,
 				struct fifo_profile_gk20a *profile)
 {
 	return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
@@ -620,7 +619,7 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
 				u32 num_entries,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
-				struct gk20a_fence **fence_out)
+				struct nvgpu_fence_type **fence_out)
 {
 	struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
 
diff --git a/drivers/gpu/nvgpu/common/mm/vidmem.c b/drivers/gpu/nvgpu/common/mm/vidmem.c
index 860505106..01d3913a9 100644
--- a/drivers/gpu/nvgpu/common/mm/vidmem.c
+++ b/drivers/gpu/nvgpu/common/mm/vidmem.c
@@ -29,9 +29,9 @@
 #include <nvgpu/sizes.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/nvgpu_sgt.h>
+#include <nvgpu/fence.h>
 
 #include "gk20a/mm_gk20a.h"
-#include "gk20a/fence_gk20a.h"
 #include "gk20a/ce2_gk20a.h"
 
 /*
@@ -95,7 +95,7 @@ void nvgpu_vidmem_destroy(struct gk20a *g)
 static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
-	struct gk20a_fence *gk20a_fence_out = NULL;
+	struct nvgpu_fence_type *fence_out = NULL;
 	int err = 0;
 
 	if (mm->vidmem.ce_ctx_id == NVGPU_CE_INVAL_CTX_ID) {
@@ -113,14 +113,14 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
 			NVGPU_CE_DST_LOCATION_LOCAL_FB,
 			NVGPU_CE_MEMSET,
 			0,
-			&gk20a_fence_out);
+			&fence_out);
 	if (err != 0) {
 		nvgpu_err(g,
 			"Failed to clear vidmem : %d", err);
 		return err;
 	}
 
-	if (gk20a_fence_out) {
+	if (fence_out) {
 		struct nvgpu_timeout timeout;
 
 		err = nvgpu_timeout_init(g, &timeout,
@@ -132,12 +132,12 @@ static int __nvgpu_vidmem_do_clear_all(struct gk20a *g)
 		}
 
 		do {
-			err = gk20a_fence_wait(g, gk20a_fence_out,
+			err = nvgpu_fence_wait(g, fence_out,
 					       nvgpu_get_poll_timeout(g));
 		} while (err == -ERESTARTSYS &&
 			 !nvgpu_timeout_expired(&timeout));
 
-		gk20a_fence_put(gk20a_fence_out);
+		nvgpu_fence_put(fence_out);
 		if (err != 0) {
 			nvgpu_err(g,
 				"fence wait failed for CE execute ops");
@@ -455,8 +455,8 @@ int nvgpu_vidmem_get_space(struct gk20a *g, u64 *space)
 
 int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
 {
-	struct gk20a_fence *gk20a_fence_out = NULL;
-	struct gk20a_fence *gk20a_last_fence = NULL;
+	struct nvgpu_fence_type *fence_out = NULL;
+	struct nvgpu_fence_type *last_fence = NULL;
 	struct nvgpu_page_alloc *alloc = NULL;
 	struct nvgpu_sgl *sgl = NULL;
 	int err = 0;
@@ -468,8 +468,8 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
 	alloc = mem->vidmem_alloc;
 
 	nvgpu_sgt_for_each_sgl(sgl, &alloc->sgt) {
-		if (gk20a_last_fence) {
-			gk20a_fence_put(gk20a_last_fence);
+		if (last_fence) {
+			nvgpu_fence_put(last_fence);
 		}
 
 		err = gk20a_ce_execute_ops(g,
@@ -481,7 +481,7 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
 			NVGPU_CE_DST_LOCATION_LOCAL_FB,
 			NVGPU_CE_MEMSET,
 			0,
-			&gk20a_fence_out);
+			&fence_out);
 
 		if (err != 0) {
 			nvgpu_err(g,
@@ -493,10 +493,10 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
 			   nvgpu_sgt_get_phys(g, &alloc->sgt, sgl),
 			   nvgpu_sgt_get_length(&alloc->sgt, sgl));
 
-		gk20a_last_fence = gk20a_fence_out;
+		last_fence = fence_out;
 	}
 
-	if (gk20a_last_fence) {
+	if (last_fence) {
 		struct nvgpu_timeout timeout;
 
 		err = nvgpu_timeout_init(g, &timeout,
@@ -508,12 +508,12 @@ int nvgpu_vidmem_clear(struct gk20a *g, struct nvgpu_mem *mem)
 		}
 
 		do {
-			err = gk20a_fence_wait(g, gk20a_last_fence,
+			err = nvgpu_fence_wait(g, last_fence,
 					       nvgpu_get_poll_timeout(g));
 		} while (err == -ERESTARTSYS &&
 			 !nvgpu_timeout_expired(&timeout));
 
-		gk20a_fence_put(gk20a_last_fence);
+		nvgpu_fence_put(last_fence);
 		if (err != 0) {
 			nvgpu_err(g,
 				"fence wait failed for CE execute ops");
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c
index 660c2bfb6..c2139ce3a 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c
@@ -1,7 +1,7 @@
 /*
  * GK20A Channel Synchronization Abstraction
  *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -35,9 +35,9 @@
 #include <nvgpu/channel_sync.h>
 #include <nvgpu/channel_sync_syncpt.h>
 #include <nvgpu/channel_sync_semaphore.h>
+#include <nvgpu/fence.h>
 
 #include "channel_sync_priv.h"
-#include "gk20a/fence_gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
 struct nvgpu_channel_sync *nvgpu_channel_sync_create(struct channel_gk20a *c,
@@ -72,7 +72,7 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
 }
 
 int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
-	struct priv_cmd_entry *entry, struct gk20a_fence *fence,
+	struct priv_cmd_entry *entry, struct nvgpu_fence_type *fence,
 	bool need_sync_fence, bool register_irq)
 {
 	return s->incr(s, entry, fence, need_sync_fence, register_irq);
@@ -80,7 +80,7 @@ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
 
 int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
 	int wait_fence_fd, struct priv_cmd_entry *entry,
-	struct gk20a_fence *fence, bool wfi, bool need_sync_fence,
+	struct nvgpu_fence_type *fence, bool wfi, bool need_sync_fence,
 	bool register_irq)
 {
 	return s->incr_user(s, wait_fence_fd, entry, fence, wfi,
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
index a76e18566..c92f70bb0 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
@@ -2,7 +2,7 @@
  *
  * Nvgpu Channel Synchronization Abstraction
  *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -30,7 +30,7 @@
 #include <nvgpu/types.h>
 
 struct priv_cmd_entry;
-struct gk20a_fence;
+struct nvgpu_fence_type;
 
 /*
  * This struct is private and should not be used directly. Users should
@@ -47,14 +47,14 @@ struct nvgpu_channel_sync {
 
 	int (*incr)(struct nvgpu_channel_sync *s,
 		    struct priv_cmd_entry *entry,
-		    struct gk20a_fence *fence,
+		    struct nvgpu_fence_type *fence,
 		    bool need_sync_fence,
 		    bool register_irq);
 
 	int (*incr_user)(struct nvgpu_channel_sync *s,
 			 int wait_fence_fd,
 			 struct priv_cmd_entry *entry,
-			 struct gk20a_fence *fence,
+			 struct nvgpu_fence_type *fence,
 			 bool wfi,
 			 bool need_sync_fence,
 			 bool register_irq);
@@ -66,4 +66,4 @@ struct nvgpu_channel_sync {
 	void (*destroy)(struct nvgpu_channel_sync *s);
 };
 
-#endif /* NVGPU_CHANNEL_SYNC_PRIV_H */
\ No newline at end of file
+#endif /* NVGPU_CHANNEL_SYNC_PRIV_H */
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
index 74988d73a..c19517d50 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
@@ -35,9 +35,9 @@
 #include <nvgpu/channel.h>
 #include <nvgpu/channel_sync.h>
 #include <nvgpu/channel_sync_semaphore.h>
+#include <nvgpu/fence.h>
 
 #include "channel_sync_priv.h"
-#include "gk20a/fence_gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
 struct nvgpu_channel_sync_semaphore {
@@ -177,7 +177,7 @@ cleanup:
 static int channel_sync_semaphore_incr_common(
 		struct nvgpu_channel_sync *s, bool wfi_cmd,
 		struct priv_cmd_entry *incr_cmd,
-		struct gk20a_fence *fence,
+		struct nvgpu_fence_type *fence,
 		bool need_sync_fence)
 {
 	u32 incr_cmd_size;
@@ -215,7 +215,7 @@ static int channel_sync_semaphore_incr_common(
 		}
 	}
 
-	err = gk20a_fence_from_semaphore(fence,
+	err = nvgpu_fence_from_semaphore(fence,
 		semaphore,
 		&c->semaphore_wq,
 		os_fence);
@@ -237,7 +237,7 @@ clean_up_sema:
 static int channel_sync_semaphore_incr(
 		struct nvgpu_channel_sync *s,
 		struct priv_cmd_entry *entry,
-		struct gk20a_fence *fence,
+		struct nvgpu_fence_type *fence,
 		bool need_sync_fence,
 		bool register_irq)
 {
@@ -252,7 +252,7 @@ static int channel_sync_semaphore_incr_user(
 		struct nvgpu_channel_sync *s,
 		int wait_fence_fd,
 		struct priv_cmd_entry *entry,
-		struct gk20a_fence *fence,
+		struct nvgpu_fence_type *fence,
 		bool wfi,
 		bool need_sync_fence,
 		bool register_irq)
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
index d015d631b..cedccde60 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -34,9 +34,9 @@
 #include <nvgpu/channel.h>
 #include <nvgpu/channel_sync.h>
 #include <nvgpu/channel_sync_syncpt.h>
+#include <nvgpu/fence.h>
 
 #include "channel_sync_priv.h"
-#include "gk20a/fence_gk20a.h"
 #include "gk20a/mm_gk20a.h"
 
 struct nvgpu_channel_sync_syncpt {
@@ -185,7 +185,7 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
 				       bool wfi_cmd,
 				       bool register_irq,
 				       struct priv_cmd_entry *incr_cmd,
-				       struct gk20a_fence *fence,
+				       struct nvgpu_fence_type *fence,
 				       bool need_sync_fence)
 {
 	u32 thresh;
@@ -246,7 +246,7 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
 		}
 	}
 
-	err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev,
+	err = nvgpu_fence_from_syncpt(fence, sp->nvhost_dev,
 	 sp->id, thresh, os_fence);
 
 	if (err != 0) {
@@ -265,7 +265,7 @@ clean_up_priv_cmd:
 
 static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s,
 			      struct priv_cmd_entry *entry,
-			      struct gk20a_fence *fence,
+			      struct nvgpu_fence_type *fence,
 			      bool need_sync_fence,
 			      bool register_irq)
 {
@@ -280,7 +280,7 @@ static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s,
 static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s,
 				   int wait_fence_fd,
 				   struct priv_cmd_entry *entry,
-				   struct gk20a_fence *fence,
+				   struct nvgpu_fence_type *fence,
 				   bool wfi,
 				   bool need_sync_fence,
 				   bool register_irq)
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index ee0a87fde..1de7faf7d 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -33,15 +33,15 @@
 #include <nvgpu/channel.h>
 #include <nvgpu/engines.h>
 #include <nvgpu/power_features/cg.h>
+#include <nvgpu/fence.h>
+#include <nvgpu/barrier.h>
 
-#include "gk20a/fence_gk20a.h"
 #include "gk20a/ce2_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_ce2_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pbdma_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_top_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_gr_gk20a.h>
-#include <nvgpu/barrier.h>
 
 /*
  * Copy engine defines line size in pixels
@@ -114,9 +114,9 @@ static void gk20a_ce_put_fences(struct gk20a_gpu_ctx *ce_ctx)
 	u32 i;
 
 	for (i = 0; i < NVGPU_CE_MAX_INFLIGHT_JOBS; i++) {
-		struct gk20a_fence **fence = &ce_ctx->postfences[i];
+		struct nvgpu_fence_type **fence = &ce_ctx->postfences[i];
 		if (*fence != NULL) {
-			gk20a_fence_put(*fence);
+			nvgpu_fence_put(*fence);
 		}
 		*fence = NULL;
 	}
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
index 1020d15cb..32c8868f1 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.h
@@ -102,7 +102,7 @@ struct gk20a_gpu_ctx {
 
 	/* cmd buf mem_desc */
 	struct nvgpu_mem cmd_buf_mem;
-	struct gk20a_fence *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
+	struct nvgpu_fence_type *postfences[NVGPU_CE_MAX_INFLIGHT_JOBS];
 
 	struct nvgpu_list_node list;
 
@@ -135,7 +135,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
 		u32 launch_flags,
 		u32 request_operation,
 		u32 submit_flags,
-		struct gk20a_fence **gk20a_fence_out);
+		struct nvgpu_fence_type **fence_out);
 void gk20a_ce_delete_context_priv(struct gk20a *g,
 		u32 ce_ctx_id);
 void gk20a_ce_delete_context(struct gk20a *g,
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 3d6cd3d27..af6f36e18 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -45,9 +45,9 @@
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
 #include <nvgpu/pd_cache.h>
+#include <nvgpu/fence.h>
 
 #include "mm_gk20a.h"
-#include "fence_gk20a.h"
 
 #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b49955d38..b7749a897 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -59,7 +59,7 @@ struct gk20a_buffer_state {
 
 	/* This struct reflects the state of the buffer when this
 	 * fence signals. */
-	struct gk20a_fence *fence;
+	struct nvgpu_fence_type *fence;
 };
 
 static inline struct gk20a_buffer_state *
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 07adf3244..5ef1d1e86 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -34,7 +34,7 @@
 
 struct gk20a;
 struct dbg_session_gk20a;
-struct gk20a_fence;
+struct nvgpu_fence_type;
 struct fifo_profile_gk20a;
 struct nvgpu_channel_sync;
 struct nvgpu_gpfifo_userdata;
@@ -175,7 +175,7 @@ struct priv_cmd_entry {
 struct channel_gk20a_job {
 	struct nvgpu_mapped_buf **mapped_buffers;
 	int num_mapped_buffers;
-	struct gk20a_fence *post_fence;
+	struct nvgpu_fence_type *post_fence;
 	struct priv_cmd_entry *wait_cmd;
 	struct priv_cmd_entry *incr_cmd;
 	struct nvgpu_list_node list;
@@ -507,7 +507,7 @@ int nvgpu_submit_channel_gpfifo_user(struct channel_gk20a *c,
 				u32 num_entries,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
-				struct gk20a_fence **fence_out,
+				struct nvgpu_fence_type **fence_out,
 				struct fifo_profile_gk20a *profile);
 
 int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
@@ -515,7 +515,7 @@ int nvgpu_submit_channel_gpfifo_kernel(struct channel_gk20a *c,
 				u32 num_entries,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
-				struct gk20a_fence **fence_out);
+				struct nvgpu_fence_type **fence_out);
 
 #ifdef CONFIG_DEBUG_FS
 void trace_write_pushbuffers(struct channel_gk20a *c, u32 count);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
index 34223558f..ee79408eb 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
@@ -2,7 +2,7 @@
  *
  * Nvgpu Channel Synchronization Abstraction
  *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -31,7 +31,7 @@
 struct nvgpu_channel_sync;
 struct priv_cmd_entry;
 struct channel_gk20a;
-struct gk20a_fence;
+struct nvgpu_fence_type;
 struct gk20a;
 
 /* Public APIS for channel_sync below */
@@ -50,7 +50,7 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
  *  - a fence that can be passed to wait_cpu() and is_expired().
  */
 int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
-	struct priv_cmd_entry *entry, struct gk20a_fence *fence,
+	struct priv_cmd_entry *entry, struct nvgpu_fence_type *fence,
 	bool need_sync_fence, bool register_irq);
 
 /*
@@ -59,11 +59,11 @@ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
  * Returns
  *  - a gpu cmdbuf that performs the increment when executed,
  *  - a fence that can be passed to wait_cpu() and is_expired(),
- *  - a gk20a_fence that signals when the incr has happened.
+ *  - a nvgpu_fence_type that signals when the incr has happened.
  */
 int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
 	int wait_fence_fd, struct priv_cmd_entry *entry,
-	struct gk20a_fence *fence, bool wfi, bool need_sync_fence,
+	struct nvgpu_fence_type *fence, bool wfi, bool need_sync_fence,
 	bool register_irq);
 /*
  * Reset the channel syncpoint/semaphore. Syncpoint increments generally
diff --git a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/fence.h
similarity index 65%
rename from drivers/gpu/nvgpu/gk20a/fence_gk20a.h
rename to drivers/gpu/nvgpu/include/nvgpu/fence.h
index 9caa4f2de..9451c6af6 100644
--- a/drivers/gpu/nvgpu/gk20a/fence_gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fence.h
@@ -1,9 +1,7 @@
 /*
- * drivers/video/tegra/host/gk20a/fence_gk20a.h
+ * Fences
  *
- * GK20A Fences
- *
- * Copyright (c) 2014-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -23,28 +21,26 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#ifndef NVGPU_GK20A_FENCE_GK20A_H
-#define NVGPU_GK20A_FENCE_GK20A_H
+#ifndef NVGPU_FENCE_H
+#define NVGPU_FENCE_H
 
 #include <nvgpu/types.h>
 #include <nvgpu/kref.h>
 #include <nvgpu/os_fence.h>
 
+struct gk20a;
+struct channel_gk20a;
 struct platform_device;
 struct nvgpu_semaphore;
-struct channel_gk20a;
-struct gk20a;
 struct nvgpu_os_fence;
 
-struct gk20a_fence_ops;
-
-struct gk20a_fence {
+struct nvgpu_fence_type {
 	struct gk20a *g;
 
 	/* Valid for all fence types: */
 	bool valid;
 	struct nvgpu_ref ref;
-	const struct gk20a_fence_ops *ops;
+	const struct nvgpu_fence_ops *ops;
 
 	struct nvgpu_os_fence os_fence;
 
@@ -61,39 +57,40 @@ struct gk20a_fence {
 	struct nvgpu_allocator *allocator;
 };
 
+struct nvgpu_fence_ops {
+	int (*wait)(struct nvgpu_fence_type *f, u32 timeout);
+	bool (*is_expired)(struct nvgpu_fence_type *f);
+	void *(*free)(struct nvgpu_ref *ref);
+};
+
 /* Fences can be created from semaphores or syncpoint (id, value) pairs */
-int gk20a_fence_from_semaphore(
-		struct gk20a_fence *fence_out,
+int nvgpu_fence_from_semaphore(
+		struct nvgpu_fence_type *fence_out,
 		struct nvgpu_semaphore *semaphore,
 		struct nvgpu_cond *semaphore_wq,
 		struct nvgpu_os_fence os_fence);
 
-int gk20a_fence_from_syncpt(
-		struct gk20a_fence *fence_out,
+int nvgpu_fence_from_syncpt(
+		struct nvgpu_fence_type *fence_out,
 		struct nvgpu_nvhost_dev *nvhost_dev,
 		u32 id, u32 value,
 		struct nvgpu_os_fence os_fence);
 
-int gk20a_alloc_fence_pool(
-		struct channel_gk20a *c,
-		unsigned int count);
+int nvgpu_fence_pool_alloc(struct channel_gk20a *ch, unsigned int count);
 
-void gk20a_free_fence_pool(
-		struct channel_gk20a *c);
+void nvgpu_fence_pool_free(struct channel_gk20a *ch);
 
-struct gk20a_fence *gk20a_alloc_fence(
-		struct channel_gk20a *c);
+struct nvgpu_fence_type *nvgpu_fence_alloc(struct channel_gk20a *ch);
 
-void gk20a_init_fence(struct gk20a_fence *f,
-		const struct gk20a_fence_ops *ops,
+void nvgpu_fence_init(struct nvgpu_fence_type *f,
+		const struct nvgpu_fence_ops *ops,
 		struct nvgpu_os_fence os_fence);
 
 /* Fence operations */
-void gk20a_fence_put(struct gk20a_fence *f);
-struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f);
-int gk20a_fence_wait(struct gk20a *g, struct gk20a_fence *f,
-							u32 timeout);
-bool gk20a_fence_is_expired(struct gk20a_fence *f);
-int gk20a_fence_install_fd(struct gk20a_fence *f, int fd);
+void nvgpu_fence_put(struct nvgpu_fence_type *f);
+struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f);
+int  nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f, u32 timeout);
+bool nvgpu_fence_is_expired(struct nvgpu_fence_type *f);
+int  nvgpu_fence_install_fd(struct nvgpu_fence_type *f, int fd);
 
-#endif /* NVGPU_GK20A_FENCE_GK20A_H */
+#endif /* NVGPU_FENCE_H */
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index bc9e506dd..c9f4dc928 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -39,11 +39,11 @@
 #include <nvgpu/utils.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/string.h>
+#include <nvgpu/fence.h>
 
 #include <nvgpu/linux/vm.h>
 
 #include "gk20a/mm_gk20a.h"
-#include "gk20a/fence_gk20a.h"
 #include "gk20a/gr_gk20a.h"
 
 #include "cde.h"
@@ -761,8 +761,8 @@ deinit_image:
 }
 
 static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
-				    u32 op, struct nvgpu_channel_fence *fence,
-				    u32 flags, struct gk20a_fence **fence_out)
+				u32 op, struct nvgpu_channel_fence *fence,
+				u32 flags, struct nvgpu_fence_type **fence_out)
 {
 	struct nvgpu_os_linux *l = cde_ctx->l;
 	struct gk20a *g = &l->g;
@@ -1014,7 +1014,7 @@ int gk20a_cde_convert(struct nvgpu_os_linux *l,
 		      u64 scatterbuffer_byte_offset,
 		      struct nvgpu_channel_fence *fence,
 		      u32 __flags, struct gk20a_cde_param *params,
-		      int num_params, struct gk20a_fence **fence_out)
+		      int num_params, struct nvgpu_fence_type **fence_out)
 __acquires(&l->cde_app->mutex)
 __releases(&l->cde_app->mutex)
 {
@@ -1535,7 +1535,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
 	struct gk20a_cde_param params[MAX_CDE_LAUNCH_PATCHES];
 	int param = 0;
 	int err = 0;
-	struct gk20a_fence *new_fence = NULL;
+	struct nvgpu_fence_type *new_fence = NULL;
 	const int wgx = 8;
 	const int wgy = 8;
 	const int compbits_per_byte = 4; /* one byte stores 4 compbit pairs */
@@ -1657,7 +1657,7 @@ static int gk20a_buffer_convert_gpu_to_cde_v1(
 		goto out;
 
 	/* compbits generated, update state & fence */
-	gk20a_fence_put(state->fence);
+	nvgpu_fence_put(state->fence);
 	state->fence = new_fence;
 	state->valid_compbits |= consumer &
 		(NVGPU_GPU_COMPBITS_CDEH | NVGPU_GPU_COMPBITS_CDEV);
@@ -1704,7 +1704,7 @@ int gk20a_prepare_compressible_read(
 		u32 width, u32 height, u32 block_height_log2,
 		u32 submit_flags, struct nvgpu_channel_fence *fence,
 		u32 *valid_compbits, u32 *zbc_color,
-		struct gk20a_fence **fence_out)
+		struct nvgpu_fence_type **fence_out)
 {
 	struct gk20a *g = &l->g;
 	int err = 0;
@@ -1728,7 +1728,7 @@ int gk20a_prepare_compressible_read(
 
 	if (state->valid_compbits && request == NVGPU_GPU_COMPBITS_NONE) {
 
-		gk20a_fence_put(state->fence);
+		nvgpu_fence_put(state->fence);
 		state->fence = NULL;
 		/* state->fence = decompress();
 		state->valid_compbits = 0; */
@@ -1753,7 +1753,7 @@ int gk20a_prepare_compressible_read(
 	}
 
 	if (state->fence && fence_out)
-		*fence_out = gk20a_fence_get(state->fence);
+		*fence_out = nvgpu_fence_get(state->fence);
 
 	if (valid_compbits)
 		*valid_compbits = state->valid_compbits;
@@ -1794,7 +1794,7 @@ int gk20a_mark_compressible_write(struct gk20a *g, u32 buffer_fd,
 	state->zbc_color = zbc_color;
 
 	/* Discard previous compbit job fence. */
-	gk20a_fence_put(state->fence);
+	nvgpu_fence_put(state->fence);
 	state->fence = NULL;
 
 	nvgpu_mutex_release(&state->lock);
diff --git a/drivers/gpu/nvgpu/os/linux/cde.h b/drivers/gpu/nvgpu/os/linux/cde.h
index 5928b6249..8688d98da 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.h
+++ b/drivers/gpu/nvgpu/os/linux/cde.h
@@ -1,7 +1,7 @@
 /*
  * GK20A color decompression engine support
  *
- * Copyright (c) 2014-2017, NVIDIA Corporation.  All rights reserved.
+ * Copyright (c) 2014-2019, NVIDIA Corporation.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -43,7 +43,7 @@ struct dma_buf;
 struct device;
 struct nvgpu_os_linux;
 struct gk20a;
-struct gk20a_fence;
+struct nvgpu_fence_type;
 struct nvgpu_channel_fence;
 struct channel_gk20a;
 struct vm_gk20a;
@@ -308,7 +308,7 @@ int gk20a_cde_convert(struct nvgpu_os_linux *l,
 		u64 scatterbuffer_byte_offset,
 		struct nvgpu_channel_fence *fence,
 		u32 __flags, struct gk20a_cde_param *params,
-		int num_params, struct gk20a_fence **fence_out);
+		int num_params, struct nvgpu_fence_type **fence_out);
 
 int gk20a_prepare_compressible_read(
 		struct nvgpu_os_linux *l, u32 buffer_fd, u32 request, u64 offset,
@@ -317,7 +317,7 @@ int gk20a_prepare_compressible_read(
 		u32 width, u32 height, u32 block_height_log2,
 		u32 submit_flags, struct nvgpu_channel_fence *fence,
 		u32 *valid_compbits, u32 *zbc_color,
-		struct gk20a_fence **fence_out);
+		struct nvgpu_fence_type **fence_out);
 int gk20a_mark_compressible_write(
 		struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
 		u32 zbc_color);
diff --git a/drivers/gpu/nvgpu/os/linux/channel.h b/drivers/gpu/nvgpu/os/linux/channel.h
index 78230375f..b88a2deec 100644
--- a/drivers/gpu/nvgpu/os/linux/channel.h
+++ b/drivers/gpu/nvgpu/os/linux/channel.h
@@ -25,7 +25,7 @@ struct channel_gk20a;
 struct nvgpu_gpfifo;
 struct nvgpu_submit_gpfifo_args;
 struct nvgpu_channel_fence;
-struct gk20a_fence;
+struct nvgpu_fence_type;
 struct fifo_profile_gk20a;
 struct nvgpu_os_linux;
 
diff --git a/drivers/gpu/nvgpu/os/linux/dmabuf.c b/drivers/gpu/nvgpu/os/linux/dmabuf.c
index c7acd8873..c83be18db 100644
--- a/drivers/gpu/nvgpu/os/linux/dmabuf.c
+++ b/drivers/gpu/nvgpu/os/linux/dmabuf.c
@@ -21,11 +21,9 @@
 #include <nvgpu/comptags.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/gk20a.h>
-
 #include <nvgpu/linux/vm.h>
 #include <nvgpu/bug.h>
-
-#include "gk20a/fence_gk20a.h"
+#include <nvgpu/fence.h>
 
 #include "platform_gk20a.h"
 #include "dmabuf.h"
@@ -53,7 +51,7 @@ static void gk20a_mm_delete_priv(void *_priv)
 	/* Free buffer states */
 	nvgpu_list_for_each_entry_safe(s, s_tmp, &priv->states,
 				gk20a_buffer_state, list) {
-		gk20a_fence_put(s->fence);
+		nvgpu_fence_put(s->fence);
 		nvgpu_list_del(&s->list);
 		nvgpu_kfree(g, s);
 	}
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index 31357baa0..e64c75fe6 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -40,8 +40,7 @@
 #include <nvgpu/channel_sync_syncpt.h>
 #include <nvgpu/runlist.h>
 #include <nvgpu/gr/ctx.h>
-
-#include "gk20a/fence_gk20a.h"
+#include <nvgpu/fence.h>
 
 #include "platform_gk20a.h"
 #include "ioctl_channel.h"
@@ -770,7 +769,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	struct nvgpu_submit_gpfifo_args *args)
 {
 	struct nvgpu_channel_fence fence;
-	struct gk20a_fence *fence_out;
+	struct nvgpu_fence_type *fence_out;
 	struct fifo_profile_gk20a *profile = NULL;
 	u32 submit_flags = 0;
 	int fd = -1;
@@ -816,7 +815,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	/* Convert fence_out to something we can pass back to user space. */
 	if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
 		if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
-			ret = gk20a_fence_install_fd(fence_out, fd);
+			ret = nvgpu_fence_install_fd(fence_out, fd);
 			if (ret)
 				put_unused_fd(fd);
 			else
@@ -826,7 +825,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 			args->fence.value = fence_out->syncpt_value;
 		}
 	}
-	gk20a_fence_put(fence_out);
+	nvgpu_fence_put(fence_out);
 
 	gk20a_fifo_profile_snapshot(profile, PROFILE_IOCTL_EXIT);
 	if (profile)
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index fc5f68d79..248b201ba 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -38,13 +38,13 @@
 #include <nvgpu/channel.h>
 #include <nvgpu/pmu/pmgr.h>
 #include <nvgpu/power_features/pg.h>
+#include <nvgpu/fence.h>
 
 #include "ioctl_ctrl.h"
 #include "ioctl_dbg.h"
 #include "ioctl_as.h"
 #include "ioctl_tsg.h"
 #include "ioctl_channel.h"
-#include "gk20a/fence_gk20a.h"
 
 #include "platform_gk20a.h"
 #include "os_linux.h"
@@ -392,7 +392,7 @@ static int gk20a_ctrl_prepare_compressible_read(
 #ifdef CONFIG_NVGPU_SUPPORT_CDE
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
 	struct nvgpu_channel_fence fence;
-	struct gk20a_fence *fence_out = NULL;
+	struct nvgpu_fence_type *fence_out = NULL;
 	int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(
 		args->submit_flags);
 	int fd = -1;
@@ -426,7 +426,7 @@ static int gk20a_ctrl_prepare_compressible_read(
 	if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) {
 		if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
 			if (fence_out) {
-				ret = gk20a_fence_install_fd(fence_out, fd);
+				ret = nvgpu_fence_install_fd(fence_out, fd);
 				if (ret)
 					put_unused_fd(fd);
 				else
@@ -446,7 +446,7 @@ static int gk20a_ctrl_prepare_compressible_read(
 			}
 		}
 	}
-	gk20a_fence_put(fence_out);
+	nvgpu_fence_put(fence_out);
 #endif
 
 	return ret;