diff --git a/drivers/gpu/nvgpu/common/linux/cde.c b/drivers/gpu/nvgpu/common/linux/cde.c
index dcccafb47..4911afa19 100644
--- a/drivers/gpu/nvgpu/common/linux/cde.c
+++ b/drivers/gpu/nvgpu/common/linux/cde.c
@@ -1171,7 +1171,7 @@ __releases(&l->cde_app->mutex)
 
 	/* take always the postfence as it is needed for protecting the
 	 * cde context */
-	flags = __flags | NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
+	flags = __flags | NVGPU_SUBMIT_FLAGS_FENCE_GET;
 
 	/* gk20a_cde_execute_buffer() will grab a power reference of it's own */
 	gk20a_idle(g);
diff --git a/drivers/gpu/nvgpu/common/linux/ce2.c b/drivers/gpu/nvgpu/common/linux/ce2.c
index 6ffc05323..5339f1300 100644
--- a/drivers/gpu/nvgpu/common/linux/ce2.c
+++ b/drivers/gpu/nvgpu/common/linux/ce2.c
@@ -128,7 +128,7 @@ int gk20a_ce_execute_ops(struct gk20a *g,
 			pbdma_gp_entry1_length_f(methodSize));
 
 		/* take always the postfence as it is needed for protecting the ce context */
-		submit_flags |= NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET;
+		submit_flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
 
 		nvgpu_smp_wmb();
 
diff --git a/drivers/gpu/nvgpu/common/linux/channel.c b/drivers/gpu/nvgpu/common/linux/channel.c
index db85f66a4..70baeb2d8 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.c
+++ b/drivers/gpu/nvgpu/common/linux/channel.c
@@ -39,6 +39,34 @@
 #include <trace/events/gk20a.h>
 #include <uapi/linux/nvgpu.h>
 
+u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags)
+{
+	u32 flags = 0;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT)
+		flags |= NVGPU_SUBMIT_FLAGS_FENCE_WAIT;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+		flags |= NVGPU_SUBMIT_FLAGS_FENCE_GET;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_HW_FORMAT)
+		flags |= NVGPU_SUBMIT_FLAGS_HW_FORMAT;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)
+		flags |= NVGPU_SUBMIT_FLAGS_SYNC_FENCE;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI)
+		flags |= NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING)
+		flags |= NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING;
+
+	if (user_flags & NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST)
+		flags |= NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST;
+
+	return flags;
+}
+
 /*
  * API to convert error_notifiers in common code and of the form
  * NVGPU_ERR_NOTIFIER_* into Linux specific error_notifiers exposed to user
@@ -430,7 +458,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
 	bool new_sync_created = false;
 	int wait_fence_fd = -1;
 	int err = 0;
-	bool need_wfi = !(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI);
+	bool need_wfi = !(flags & NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI);
 	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c);
 
 	/*
@@ -465,7 +493,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
 	 * Optionally insert syncpt/semaphore wait in the beginning of gpfifo
 	 * submission when user requested and the wait hasn't expired.
 	 */
-	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
+	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) {
 		int max_wait_cmds = c->deterministic ? 1 : 0;
 
 		if (!pre_alloc_enabled)
@@ -477,7 +505,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
 			goto fail;
 		}
 
-		if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+		if (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
 			wait_fence_fd = fence->id;
 			err = c->sync->wait_fd(c->sync, wait_fence_fd,
 					       job->wait_cmd, max_wait_cmds);
@@ -494,8 +522,8 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
 			*wait_cmd = job->wait_cmd;
 	}
 
-	if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
-	    (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
+	if ((flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) &&
+	    (flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE))
 		need_sync_fence = true;
 
 	/*
@@ -516,7 +544,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
 		goto clean_up_post_fence;
 	}
 
-	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
+	if (flags & NVGPU_SUBMIT_FLAGS_FENCE_GET)
 		err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
 				 job->post_fence, need_wfi, need_sync_fence,
 				 register_irq);
@@ -678,7 +706,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	 * and one for post fence. */
 	const int extra_entries = 2;
 	bool skip_buffer_refcounting = (flags &
-			NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
+			NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING);
 	int err = 0;
 	bool need_job_tracking;
 	bool need_deferred_cleanup = false;
@@ -706,8 +734,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	if (!gpfifo && !args)
 		return -EINVAL;
 
-	if ((flags & (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT |
-		      NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)) &&
+	if ((flags & (NVGPU_SUBMIT_FLAGS_FENCE_WAIT |
+		      NVGPU_SUBMIT_FLAGS_FENCE_GET)) &&
 	    !fence)
 		return -EINVAL;
 
@@ -738,8 +766,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	 * required and a fast submit can be done (ie. only need to write
 	 * out userspace GPFIFO entries and update GP_PUT).
 	 */
-	need_job_tracking = (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) ||
-			(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) ||
+	need_job_tracking = (flags & NVGPU_SUBMIT_FLAGS_FENCE_WAIT) ||
+			(flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) ||
 			c->timeout.enabled ||
 			(g->can_railgate && !c->deterministic) ||
 			!skip_buffer_refcounting;
@@ -757,8 +785,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 
 		need_sync_framework = force_need_sync_fence ||
 			gk20a_channel_sync_needs_sync_framework(g) ||
-			(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
-			 flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET);
+			(flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE &&
+			 flags & NVGPU_SUBMIT_FLAGS_FENCE_GET);
 
 		/*
 		 * Deferred clean-up is necessary for any of the following
@@ -899,7 +927,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 
 	g->ops.fifo.userd_gp_put(g, c);
 
-	if ((NVGPU_SUBMIT_GPFIFO_FLAGS_RESCHEDULE_RUNLIST & flags) &&
+	if ((NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST & flags) &&
 		g->ops.fifo.reschedule_runlist)
 		g->ops.fifo.reschedule_runlist(g, c->runlist_id);
 
diff --git a/drivers/gpu/nvgpu/common/linux/channel.h b/drivers/gpu/nvgpu/common/linux/channel.h
index 6026e2598..d4cb6d558 100644
--- a/drivers/gpu/nvgpu/common/linux/channel.h
+++ b/drivers/gpu/nvgpu/common/linux/channel.h
@@ -61,6 +61,7 @@ struct nvgpu_channel_linux {
 	struct dma_buf *cyclestate_buffer_handler;
 };
 
+u32 nvgpu_submit_gpfifo_user_flags_to_common_flags(u32 user_flags);
 int nvgpu_init_channel_support_linux(struct nvgpu_os_linux *l);
 void nvgpu_remove_channel_support_linux(struct nvgpu_os_linux *l);
 
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
index c75c92448..5b4dda6ce 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_channel.c
@@ -771,6 +771,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	struct nvgpu_channel_fence fence;
 	struct gk20a_fence *fence_out;
 	struct fifo_profile_gk20a *profile = NULL;
+	u32 submit_flags = 0;
 
 	int ret = 0;
 	gk20a_dbg_fn("");
@@ -789,8 +790,10 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 		return -EPERM;
 
 	nvgpu_get_fence_args(&args->fence, &fence);
+	submit_flags =
+		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
 	ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
-					  args->flags, &fence,
+					  submit_flags, &fence,
 					  &fence_out, false, profile);
 
 	if (ret)
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
index 3392b6faf..e4b66460f 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_ctrl.c
@@ -43,6 +43,7 @@
 #include "platform_gk20a.h"
 #include "os_linux.h"
 #include "dmabuf.h"
+#include "channel.h"
 
 #define HZ_TO_MHZ(a) ((a > 0xF414F9CD7ULL) ? 0xffff : (a >> 32) ? \
 	(u32) ((a * 0x10C8ULL) >> 32) : (u16) ((u32) a/MHZ))
@@ -341,7 +342,8 @@ static int gk20a_ctrl_prepare_compressible_read(
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
 	struct nvgpu_channel_fence fence;
 	struct gk20a_fence *fence_out = NULL;
-	int flags = args->submit_flags;
+	int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags(
+		args->submit_flags);
 
 	fence.id = args->fence.syncpt_id;
 	fence.value = args->fence.syncpt_value;
@@ -351,15 +353,15 @@ static int gk20a_ctrl_prepare_compressible_read(
 			args->compbits_hoffset, args->compbits_voffset,
 			args->scatterbuffer_offset,
 			args->width, args->height, args->block_height_log2,
-			flags, &fence, &args->valid_compbits,
+			submit_flags, &fence, &args->valid_compbits,
 			&args->zbc_color, &fence_out);
 
 	if (ret)
 		return ret;
 
 	/* Convert fence_out to something we can pass back to user space. */
-	if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
-		if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+	if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) {
+		if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) {
 			if (fence_out) {
 				int fd = gk20a_fence_install_fd(fence_out);
 				if (fd < 0)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
index 2008c4710..7fcc54cc7 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -47,6 +47,15 @@ struct fifo_profile_gk20a;
 #define NVGPU_GPFIFO_FLAGS_REPLAYABLE_FAULTS_ENABLE	(1 << 2)
 #define NVGPU_GPFIFO_FLAGS_USERMODE_SUPPORT		(1 << 3)
 
+/* Flags to be passed to gk20a_submit_channel_gpfifo() */
+#define NVGPU_SUBMIT_FLAGS_FENCE_WAIT	(1 << 0)
+#define NVGPU_SUBMIT_FLAGS_FENCE_GET	(1 << 1)
+#define NVGPU_SUBMIT_FLAGS_HW_FORMAT	(1 << 2)
+#define NVGPU_SUBMIT_FLAGS_SYNC_FENCE	(1 << 3)
+#define NVGPU_SUBMIT_FLAGS_SUPPRESS_WFI	(1 << 4)
+#define NVGPU_SUBMIT_FLAGS_SKIP_BUFFER_REFCOUNTING	(1 << 5)
+#define NVGPU_SUBMIT_FLAGS_RESCHEDULE_RUNLIST	(1 << 6)
+
 /*
  * The binary format of 'struct nvgpu_channel_fence' introduced here
  * should match that of 'struct nvgpu_fence' defined in uapi header, since