diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml
index 3f57422bc..eacdc2639 100644
--- a/arch/nvgpu-common.yaml
+++ b/arch/nvgpu-common.yaml
@@ -84,7 +84,8 @@ fence:
   safe: no
   owner: Seema K
   sources: [ common/fence/fence.c,
-             include/nvgpu/fence.h ]
+             include/nvgpu/fence.h,
+             include/nvgpu/user_fence.h ]
 
 io:
   safe: yes
diff --git a/drivers/gpu/nvgpu/common/fence/fence.c b/drivers/gpu/nvgpu/common/fence/fence.c
index 3762b5ab4..76f35ca06 100644
--- a/drivers/gpu/nvgpu/common/fence/fence.c
+++ b/drivers/gpu/nvgpu/common/fence/fence.c
@@ -30,6 +30,7 @@
 #include <nvgpu/semaphore.h>
 #include <nvgpu/fence.h>
 #include <nvgpu/channel_sync_syncpt.h>
+#include <nvgpu/user_fence.h>
 
 static struct nvgpu_fence_type *nvgpu_fence_from_ref(struct nvgpu_ref *ref)
 {
@@ -75,9 +76,23 @@ struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f)
 	return f;
 }
 
-int nvgpu_fence_install_fd(struct nvgpu_fence_type *f, int fd)
+struct nvgpu_user_fence nvgpu_fence_extract_user(struct nvgpu_fence_type *f)
 {
-	return f->os_fence.ops->install_fence(&f->os_fence, fd);
+	struct nvgpu_user_fence uf = (struct nvgpu_user_fence) {
+		.syncpt_id = f->syncpt_id,
+		.syncpt_value = f->syncpt_value,
+		.os_fence = f->os_fence,
+	};
+
+	/*
+	 * Keep our ref to the os fence for now so that the user fence can be
+	 * extracted multiple times (for cde).
+	 */
+	if (nvgpu_os_fence_is_initialized(&f->os_fence)) {
+		f->os_fence.ops->dup(&f->os_fence);
+	}
+
+	return uf;
 }
 
 int nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f,
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index fe315df29..17be7de00 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -38,6 +38,7 @@
 #include <nvgpu/vpr.h>
 #include <nvgpu/trace.h>
 #include <nvgpu/nvhost.h>
+#include <nvgpu/user_fence.h>
 
 #include <nvgpu/fifo/swprofile.h>
 
@@ -806,11 +807,19 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
 				u32 num_entries,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
-				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_user_fence *fence_out,
 				struct nvgpu_swprofiler *profiler)
 {
-	return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
-			flags, fence, fence_out, profiler);
+	struct nvgpu_fence_type *fence_internal = NULL;
+	int err;
+
+	err = nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries,
+			flags, fence, &fence_internal, profiler);
+	if (err == 0 && fence_internal != NULL) {
+		*fence_out = nvgpu_fence_extract_user(fence_internal);
+		nvgpu_fence_put(fence_internal);
+	}
+	return err;
 }
 
 int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index fca50b5ff..0186e4898 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -49,6 +49,7 @@ struct nvgpu_debug_context;
 struct priv_cmd_queue;
 struct priv_cmd_entry;
 struct nvgpu_channel_wdt;
+struct nvgpu_user_fence;
 
 /**
  * S/W defined invalid channel identifier.
@@ -575,7 +576,7 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
 				u32 num_entries,
 				u32 flags,
 				struct nvgpu_channel_fence *fence,
-				struct nvgpu_fence_type **fence_out,
+				struct nvgpu_user_fence *fence_out,
 				struct nvgpu_swprofiler *profiler);
 
 int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/fence.h b/drivers/gpu/nvgpu/include/nvgpu/fence.h
index a58488364..c71d2a321 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/fence.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fence.h
@@ -1,7 +1,5 @@
 /*
- * Fences
- *
- * Copyright (c) 2014-2019, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2020, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -35,6 +33,7 @@ struct platform_device;
 struct nvgpu_semaphore;
 #endif
 struct nvgpu_os_fence;
+struct nvgpu_user_fence;
 
 struct nvgpu_fence_type {
 	struct gk20a *g;
@@ -98,6 +97,6 @@ void nvgpu_fence_put(struct nvgpu_fence_type *f);
 struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f);
 int  nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f, u32 timeout);
 bool nvgpu_fence_is_expired(struct nvgpu_fence_type *f);
-int  nvgpu_fence_install_fd(struct nvgpu_fence_type *f, int fd);
+struct nvgpu_user_fence nvgpu_fence_extract_user(struct nvgpu_fence_type *f);
 
 #endif /* NVGPU_FENCE_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/user_fence.h b/drivers/gpu/nvgpu/include/nvgpu/user_fence.h
new file mode 100644
index 000000000..beed0f74d
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/user_fence.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2020, NVIDIA Corporation.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef NVGPU_USER_FENCE_H
+#define NVGPU_USER_FENCE_H
+
+#include <nvgpu/nvhost.h>
+#include <nvgpu/os_fence.h>
+
+/*
+ * A post-submit fence to be given to userspace. Either the syncpt id and value
+ * pair is valid or the os fence is valid; this depends on the flags that were
+ * used: NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE implies os fence.
+ */
+struct nvgpu_user_fence {
+	u32 syncpt_id, syncpt_value;
+	struct nvgpu_os_fence os_fence;
+};
+
+static inline struct nvgpu_user_fence nvgpu_user_fence_init(void)
+{
+	return (struct nvgpu_user_fence) {
+		.syncpt_id = NVGPU_INVALID_SYNCPT_ID,
+	};
+}
+
+static inline void nvgpu_user_fence_release(struct nvgpu_user_fence *fence)
+{
+	if (nvgpu_os_fence_is_initialized(&fence->os_fence)) {
+		fence->os_fence.ops->drop_ref(&fence->os_fence);
+	}
+}
+
+#endif /* NVGPU_USER_FENCE_H */
diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c
index 4b1f5bba9..7214fa4cc 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.c
+++ b/drivers/gpu/nvgpu/os/linux/cde.c
@@ -40,6 +40,7 @@
 #include <nvgpu/nvgpu_init.h>
 #include <nvgpu/string.h>
 #include <nvgpu/fence.h>
+#include <nvgpu/user_fence.h>
 
 #include <nvgpu/linux/vm.h>
 
@@ -1695,7 +1696,7 @@ int gk20a_prepare_compressible_read(
 		u32 width, u32 height, u32 block_height_log2,
 		u32 submit_flags, struct nvgpu_channel_fence *fence,
 		u32 *valid_compbits, u32 *zbc_color,
-		struct nvgpu_fence_type **fence_out)
+		struct nvgpu_user_fence *fence_out)
 {
 	struct gk20a *g = &l->g;
 	int err = 0;
@@ -1743,14 +1744,12 @@ int gk20a_prepare_compressible_read(
 		}
 	}
 
-	if (state->fence && fence_out)
-		*fence_out = nvgpu_fence_get(state->fence);
+	if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET && state->fence != NULL) {
+		*fence_out = nvgpu_fence_extract_user(state->fence);
+	}
 
-	if (valid_compbits)
-		*valid_compbits = state->valid_compbits;
-
-	if (zbc_color)
-		*zbc_color = state->zbc_color;
+	*valid_compbits = state->valid_compbits;
+	*zbc_color = state->zbc_color;
 
 out:
 	nvgpu_mutex_release(&state->lock);
diff --git a/drivers/gpu/nvgpu/os/linux/cde.h b/drivers/gpu/nvgpu/os/linux/cde.h
index 2d7259d3b..2645526c0 100644
--- a/drivers/gpu/nvgpu/os/linux/cde.h
+++ b/drivers/gpu/nvgpu/os/linux/cde.h
@@ -48,6 +48,7 @@ struct nvgpu_channel_fence;
 struct nvgpu_channel;
 struct vm_gk20a;
 struct nvgpu_gpfifo_entry;
+struct nvgpu_user_fence;
 
 /*
  * this element defines a buffer that is allocated and mapped into gpu address
@@ -317,7 +318,7 @@ int gk20a_prepare_compressible_read(
 		u32 width, u32 height, u32 block_height_log2,
 		u32 submit_flags, struct nvgpu_channel_fence *fence,
 		u32 *valid_compbits, u32 *zbc_color,
-		struct nvgpu_fence_type **fence_out);
+		struct nvgpu_user_fence *fence_out);
 int gk20a_mark_compressible_write(
 		struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset,
 		u32 zbc_color);
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index c8288fb47..e83b53b05 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -48,6 +48,7 @@
 #include <nvgpu/preempt.h>
 #include <nvgpu/swprofile.h>
 #include <nvgpu/nvgpu_init.h>
+#include <nvgpu/user_fence.h>
 
 #include <nvgpu/fifo/swprofile.h>
 
@@ -793,13 +794,13 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	struct nvgpu_submit_gpfifo_args *args)
 {
 	struct nvgpu_channel_fence fence;
-	struct nvgpu_fence_type *fence_out;
+	struct nvgpu_user_fence fence_out = nvgpu_user_fence_init();
 	u32 submit_flags = 0;
 	int fd = -1;
 	struct gk20a *g = ch->g;
 	struct nvgpu_fifo *f = &g->fifo;
 	struct nvgpu_swprofiler *kickoff_profiler = &f->kickoff_profiler;
-	struct nvgpu_gpfifo_userdata userdata;
+	struct nvgpu_gpfifo_userdata userdata = { NULL, NULL };
 	bool flag_fence_wait = (args->flags &
 			NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U;
 	bool flag_fence_get = (args->flags &
@@ -860,17 +861,18 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	/* Convert fence_out to something we can pass back to user space. */
 	if (flag_fence_get) {
 		if (flag_sync_fence) {
-			ret = nvgpu_fence_install_fd(fence_out, fd);
+			ret = fence_out.os_fence.ops->install_fence(
+					&fence_out.os_fence, fd);
 			if (ret)
 				put_unused_fd(fd);
 			else
 				args->fence.id = fd;
 		} else {
-			args->fence.id = fence_out->syncpt_id;
-			args->fence.value = fence_out->syncpt_value;
+			args->fence.id = fence_out.syncpt_id;
+			args->fence.value = fence_out.syncpt_value;
 		}
+		nvgpu_user_fence_release(&fence_out);
 	}
-	nvgpu_fence_put(fence_out);
 
 	nvgpu_swprofile_snapshot(kickoff_profiler, PROF_KICKOFF_IOCTL_EXIT);
 
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index f1071a2b4..b0890bef8 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -52,6 +52,7 @@
 #include <nvgpu/channel_sync_syncpt.h>
 #include <nvgpu/soc.h>
 #include <nvgpu/nvgpu_init.h>
+#include <nvgpu/user_fence.h>
 
 #include "ioctl_ctrl.h"
 #include "ioctl_dbg.h"
@@ -442,7 +443,7 @@ static int gk20a_ctrl_prepare_compressible_read(
 #ifdef CONFIG_NVGPU_SUPPORT_CDE
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
 	struct nvgpu_channel_fence fence;
-	struct nvgpu_fence_type *fence_out = NULL;
+	struct nvgpu_user_fence fence_out = nvgpu_user_fence_init();
 	int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(
 		args->submit_flags);
 	int fd = -1;
@@ -472,31 +473,31 @@ static int gk20a_ctrl_prepare_compressible_read(
 		return ret;
 	}
 
-	/* Convert fence_out to something we can pass back to user space. */
+	/*
+	 * Convert fence_out, if any, to something we can pass back to user
+	 * space. Even if successful, the fence may not exist if there was
+	 * nothing to be done (no compbits requested); that's not an error.
+	 */
 	if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) {
 		if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
-			if (fence_out) {
-				ret = nvgpu_fence_install_fd(fence_out, fd);
-				if (ret)
+			if (nvgpu_os_fence_is_initialized(&fence_out.os_fence)) {
+				ret = fence_out.os_fence.ops->install_fence(
+						&fence_out.os_fence, fd);
+				if (ret) {
 					put_unused_fd(fd);
-				else
-					args->fence.fd = fd;
+					fd = -1;
+				}
 			} else {
-				args->fence.fd = -1;
 				put_unused_fd(fd);
+				fd = -1;
 			}
+			args->fence.fd = fd;
 		} else {
-			if (fence_out) {
-				args->fence.syncpt_id = fence_out->syncpt_id;
-				args->fence.syncpt_value =
-						fence_out->syncpt_value;
-			} else {
-				args->fence.syncpt_id = NVGPU_INVALID_SYNCPT_ID;
-				args->fence.syncpt_value = 0;
-			}
+			args->fence.syncpt_id = fence_out.syncpt_id;
+			args->fence.syncpt_value = fence_out.syncpt_value;
 		}
+		nvgpu_user_fence_release(&fence_out);
 	}
-	nvgpu_fence_put(fence_out);
 #endif
 
 	return ret;