diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml
index 1e22ff38e..5fdbacb4d 100644
--- a/arch/nvgpu-common.yaml
+++ b/arch/nvgpu-common.yaml
@@ -348,6 +348,8 @@ fifo:
     submit:
       safe: yes
       sources: [ common/fifo/submit.c,
+                 common/fifo/priv_cmdbuf.c,
+                 include/nvgpu/priv_cmdbuf.h,
                  include/nvgpu/profile.h ]
       deps: [ ]
     runlist:
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 7bc58db9f..fdca1590d 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -518,6 +518,7 @@ nvgpu-y += \
 	common/fifo/channel.o \
 	common/fifo/pbdma.o \
 	common/fifo/submit.o \
+	common/fifo/priv_cmdbuf.o \
 	common/fifo/tsg.o \
 	common/fifo/runlist.o \
 	common/fifo/engine_status.o \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index c4d6586ba..0970a9816 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -387,6 +387,7 @@ endif
 
 ifeq ($(CONFIG_NVGPU_KERNEL_MODE_SUBMIT),1)
 srcs += common/fifo/submit.c \
+        common/fifo/priv_cmdbuf.c \
 	common/sync/channel_sync.c \
 	common/sync/channel_sync_syncpt.c
 endif
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index aec1426dd..b2cb0fa85 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -60,12 +60,12 @@
 #ifdef CONFIG_NVGPU_DEBUGGER
 #include <nvgpu/gr/gr.h>
 #endif
+#include <nvgpu/priv_cmdbuf.h>
 
 static void free_channel(struct nvgpu_fifo *f, struct nvgpu_channel *ch);
 static void channel_dump_ref_actions(struct nvgpu_channel *ch);
 
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
-static void channel_free_priv_cmd_q(struct nvgpu_channel *ch);
 static void channel_free_prealloc_resources(struct nvgpu_channel *c);
 static void channel_joblist_add(struct nvgpu_channel *c,
 		struct nvgpu_channel_job *job);
@@ -238,164 +238,6 @@ static void channel_kernelmode_deinit(struct nvgpu_channel *ch)
 	nvgpu_mutex_release(&ch->sync_lock);
 }
 
-/* allocate private cmd buffer.
-   used for inserting commands before/after user submitted buffers. */
-static int channel_alloc_priv_cmdbuf(struct nvgpu_channel *ch,
-	u32 num_in_flight)
-{
-	struct gk20a *g = ch->g;
-	struct vm_gk20a *ch_vm = ch->vm;
-	struct priv_cmd_queue *q = &ch->priv_cmd_q;
-	u64 size, tmp_size;
-	int err = 0;
-	bool gpfifo_based = false;
-
-	if (num_in_flight == 0U) {
-		num_in_flight = ch->gpfifo.entry_num;
-		gpfifo_based = true;
-	}
-
-	/*
-	 * Compute the amount of priv_cmdbuf space we need. In general the worst
-	 * case is the kernel inserts both a semaphore pre-fence and post-fence.
-	 * Any sync-pt fences will take less memory so we can ignore them for
-	 * now.
-	 *
-	 * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
-	 * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
-	 * words: all the same as an ACQ plus a non-stalling intr which is
-	 * another 2 words.
-	 *
-	 * We have two cases to consider: the first is we base the size of the
-	 * priv_cmd_buf on the gpfifo count. Here we multiply by a factor of
-	 * 2/3rds because only at most 2/3rds of the GPFIFO can be used for
-	 * sync commands:
-	 *
-	 *   nr_gpfifos * (2 / 3) * (8 + 10) * 4 bytes
-	 *
-	 * If instead num_in_flight is specified then we will use that to size
-	 * the priv_cmd_buf. The worst case is two sync commands (one ACQ and
-	 * one INCR) per submit so we have a priv_cmd_buf size of:
-	 *
-	 *   num_in_flight * (8 + 10) * 4 bytes
-	 */
-	size = num_in_flight * 18UL * sizeof(u32);
-	if (gpfifo_based) {
-		size = 2U * size / 3U;
-	}
-
-	tmp_size = PAGE_ALIGN(roundup_pow_of_two(size));
-	nvgpu_assert(tmp_size <= U32_MAX);
-	size = (u32)tmp_size;
-
-	err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem);
-	if (err != 0) {
-		nvgpu_err(g, "%s: memory allocation failed", __func__);
-		goto clean_up;
-	}
-
-	tmp_size = q->mem.size / sizeof(u32);
-	nvgpu_assert(tmp_size <= U32_MAX);
-	q->size = (u32)tmp_size;
-
-	return 0;
-
-clean_up:
-	channel_free_priv_cmd_q(ch);
-	return err;
-}
-
-static void channel_free_priv_cmd_q(struct nvgpu_channel *ch)
-{
-	struct vm_gk20a *ch_vm = ch->vm;
-	struct priv_cmd_queue *q = &ch->priv_cmd_q;
-
-	if (q->size == 0U) {
-		return;
-	}
-
-	nvgpu_dma_unmap_free(ch_vm, &q->mem);
-
-	(void) memset(q, 0, sizeof(struct priv_cmd_queue));
-}
-
-/* allocate a cmd buffer with given size. size is number of u32 entries */
-int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
-			     struct priv_cmd_entry *e)
-{
-	struct priv_cmd_queue *q = &c->priv_cmd_q;
-	u32 free_count;
-	u32 size = orig_size;
-
-	nvgpu_log_fn(c->g, "size %d", orig_size);
-
-	if (e == NULL) {
-		nvgpu_err(c->g,
-			"ch %d: priv cmd entry is null",
-			c->chid);
-		return -EINVAL;
-	}
-
-	/* if free space in the end is less than requested, increase the size
-	 * to make the real allocated space start from beginning. */
-	if (q->put + size > q->size) {
-		size = orig_size + (q->size - q->put);
-	}
-
-	nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d",
-			c->chid, q->get, q->put);
-
-	free_count = (q->size - (q->put - q->get) - 1U) % q->size;
-
-	if (size > free_count) {
-		return -EAGAIN;
-	}
-
-	e->size = orig_size;
-	e->mem = &q->mem;
-
-	/* if we have increased size to skip free space in the end, set put
-	   to beginning of cmd buffer (0) + size */
-	if (size != orig_size) {
-		e->off = 0;
-		e->gva = q->mem.gpu_va;
-		q->put = orig_size;
-	} else {
-		e->off = q->put;
-		e->gva = q->mem.gpu_va + q->put * sizeof(u32);
-		q->put = (q->put + orig_size) & (q->size - 1U);
-	}
-
-	/* we already handled q->put + size > q->size so BUG_ON this */
-	BUG_ON(q->put > q->size);
-
-	/*
-	 * commit the previous writes before making the entry valid.
-	 * see the corresponding nvgpu_smp_rmb() in
-	 * nvgpu_channel_update_priv_cmd_q_and_free_entry().
-	 */
-	nvgpu_smp_wmb();
-
-	e->valid = true;
-	nvgpu_log_fn(c->g, "done");
-
-	return 0;
-}
-
-/*
- * Don't call this to free an explicit cmd entry.
- * It doesn't update priv_cmd_queue get/put.
- */
-void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
-			     struct priv_cmd_entry *e)
-{
-	if (nvgpu_channel_is_prealloc_enabled(c)) {
-		(void) memset(e, 0, sizeof(struct priv_cmd_entry));
-	} else {
-		nvgpu_kfree(c->g, e);
-	}
-}
-
 int nvgpu_channel_alloc_job(struct nvgpu_channel *c,
 		struct nvgpu_channel_job **job_out)
 {
@@ -1213,29 +1055,6 @@ static void channel_worker_enqueue(struct nvgpu_channel *ch)
 	}
 }
 
-void nvgpu_channel_update_priv_cmd_q_and_free_entry(
-		struct nvgpu_channel *ch, struct priv_cmd_entry *e)
-{
-	struct priv_cmd_queue *q = &ch->priv_cmd_q;
-	struct gk20a *g = ch->g;
-
-	if (e == NULL) {
-		return;
-	}
-
-	if (e->valid) {
-		/* read the entry's valid flag before reading its contents */
-		nvgpu_smp_rmb();
-		if ((q->get != e->off) && e->off != 0U) {
-			nvgpu_err(g, "requests out-of-order, ch=%d",
-				  ch->chid);
-		}
-		q->get = e->off + e->size;
-	}
-
-	nvgpu_channel_free_priv_cmd_entry(ch, e);
-}
-
 int nvgpu_channel_add_job(struct nvgpu_channel *c,
 				 struct nvgpu_channel_job *job,
 				 bool skip_buffer_refcounting)
diff --git a/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
new file mode 100644
index 000000000..cdcf2ef55
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/fifo/priv_cmdbuf.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2011-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/log.h>
+#include <nvgpu/utils.h>
+#include <nvgpu/log2.h>
+#include <nvgpu/barrier.h>
+#include <nvgpu/dma.h>
+#include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/kmem.h>
+#include <nvgpu/channel.h>
+#include <nvgpu/priv_cmdbuf.h>
+
+/* allocate private cmd buffer.
+   used for inserting commands before/after user submitted buffers. */
+int channel_alloc_priv_cmdbuf(struct nvgpu_channel *ch,
+	u32 num_in_flight)
+{
+	struct gk20a *g = ch->g;
+	struct vm_gk20a *ch_vm = ch->vm;
+	struct priv_cmd_queue *q = &ch->priv_cmd_q;
+	u64 size, tmp_size;
+	int err = 0;
+	bool gpfifo_based = false;
+
+	if (num_in_flight == 0U) {
+		num_in_flight = ch->gpfifo.entry_num;
+		gpfifo_based = true;
+	}
+
+	/*
+	 * Compute the amount of priv_cmdbuf space we need. In general the worst
+	 * case is the kernel inserts both a semaphore pre-fence and post-fence.
+	 * Any sync-pt fences will take less memory so we can ignore them for
+	 * now.
+	 *
+	 * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
+	 * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
+	 * words: all the same as an ACQ plus a non-stalling intr which is
+	 * another 2 words.
+	 *
+	 * We have two cases to consider: the first is we base the size of the
+	 * priv_cmd_buf on the gpfifo count. Here we multiply by a factor of
+	 * 2/3rds because only at most 2/3rds of the GPFIFO can be used for
+	 * sync commands:
+	 *
+	 *   nr_gpfifos * (2 / 3) * (8 + 10) * 4 bytes
+	 *
+	 * If instead num_in_flight is specified then we will use that to size
+	 * the priv_cmd_buf. The worst case is two sync commands (one ACQ and
+	 * one INCR) per submit so we have a priv_cmd_buf size of:
+	 *
+	 *   num_in_flight * (8 + 10) * 4 bytes
+	 */
+	size = num_in_flight * 18UL * sizeof(u32);
+	if (gpfifo_based) {
+		size = 2U * size / 3U;
+	}
+
+	tmp_size = PAGE_ALIGN(roundup_pow_of_two(size));
+	nvgpu_assert(tmp_size <= U32_MAX);
+	size = (u32)tmp_size;
+
+	err = nvgpu_dma_alloc_map_sys(ch_vm, size, &q->mem);
+	if (err != 0) {
+		nvgpu_err(g, "%s: memory allocation failed", __func__);
+		goto clean_up;
+	}
+
+	tmp_size = q->mem.size / sizeof(u32);
+	nvgpu_assert(tmp_size <= U32_MAX);
+	q->size = (u32)tmp_size;
+
+	return 0;
+
+clean_up:
+	channel_free_priv_cmd_q(ch);
+	return err;
+}
+
+void channel_free_priv_cmd_q(struct nvgpu_channel *ch)
+{
+	struct vm_gk20a *ch_vm = ch->vm;
+	struct priv_cmd_queue *q = &ch->priv_cmd_q;
+
+	if (q->size == 0U) {
+		return;
+	}
+
+	nvgpu_dma_unmap_free(ch_vm, &q->mem);
+
+	(void) memset(q, 0, sizeof(struct priv_cmd_queue));
+}
+
+/* allocate a cmd buffer with given size. size is number of u32 entries */
+int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
+			     struct priv_cmd_entry *e)
+{
+	struct priv_cmd_queue *q = &c->priv_cmd_q;
+	u32 free_count;
+	u32 size = orig_size;
+
+	nvgpu_log_fn(c->g, "size %d", orig_size);
+
+	if (e == NULL) {
+		nvgpu_err(c->g,
+			"ch %d: priv cmd entry is null",
+			c->chid);
+		return -EINVAL;
+	}
+
+	/* if free space in the end is less than requested, increase the size
+	 * to make the real allocated space start from beginning. */
+	if (q->put + size > q->size) {
+		size = orig_size + (q->size - q->put);
+	}
+
+	nvgpu_log_info(c->g, "ch %d: priv cmd queue get:put %d:%d",
+			c->chid, q->get, q->put);
+
+	free_count = (q->size - (q->put - q->get) - 1U) % q->size;
+
+	if (size > free_count) {
+		return -EAGAIN;
+	}
+
+	e->size = orig_size;
+	e->mem = &q->mem;
+
+	/* if we have increased size to skip free space in the end, set put
+	   to beginning of cmd buffer (0) + size */
+	if (size != orig_size) {
+		e->off = 0;
+		e->gva = q->mem.gpu_va;
+		q->put = orig_size;
+	} else {
+		e->off = q->put;
+		e->gva = q->mem.gpu_va + q->put * sizeof(u32);
+		q->put = (q->put + orig_size) & (q->size - 1U);
+	}
+
+	/* we already handled q->put + size > q->size so BUG_ON this */
+	BUG_ON(q->put > q->size);
+
+	/*
+	 * commit the previous writes before making the entry valid.
+	 * see the corresponding nvgpu_smp_rmb() in
+	 * nvgpu_channel_update_priv_cmd_q_and_free_entry().
+	 */
+	nvgpu_smp_wmb();
+
+	e->valid = true;
+	nvgpu_log_fn(c->g, "done");
+
+	return 0;
+}
+
+/*
+ * Don't call this to free an explicit cmd entry.
+ * It doesn't update priv_cmd_queue get/put.
+ */
+void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
+			     struct priv_cmd_entry *e)
+{
+	if (nvgpu_channel_is_prealloc_enabled(c)) {
+		(void) memset(e, 0, sizeof(struct priv_cmd_entry));
+	} else {
+		nvgpu_kfree(c->g, e);
+	}
+}
+
+void nvgpu_channel_update_priv_cmd_q_and_free_entry(
+		struct nvgpu_channel *ch, struct priv_cmd_entry *e)
+{
+	struct priv_cmd_queue *q = &ch->priv_cmd_q;
+	struct gk20a *g = ch->g;
+
+	if (e == NULL) {
+		return;
+	}
+
+	if (e->valid) {
+		/* read the entry's valid flag before reading its contents */
+		nvgpu_smp_rmb();
+		if ((q->get != e->off) && e->off != 0U) {
+			nvgpu_err(g, "requests out-of-order, ch=%d",
+				  ch->chid);
+		}
+		q->get = e->off + e->size;
+	}
+
+	nvgpu_channel_free_priv_cmd_entry(ch, e);
+}
diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index 1c47aa5f6..cdaa0af20 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -26,8 +26,10 @@
 #include <nvgpu/ltc.h>
 #include <nvgpu/os_sched.h>
 #include <nvgpu/utils.h>
+#include <nvgpu/channel.h>
 #include <nvgpu/channel_sync.h>
 #include <nvgpu/channel_sync_syncpt.h>
+#include <nvgpu/priv_cmdbuf.h>
 #include <nvgpu/bug.h>
 #include <nvgpu/fence.h>
 #include <nvgpu/profile.h>
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
index 64510767f..ee2314844 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
@@ -35,6 +35,7 @@
 #include <nvgpu/channel.h>
 #include <nvgpu/channel_sync.h>
 #include <nvgpu/channel_sync_semaphore.h>
+#include <nvgpu/priv_cmdbuf.h>
 #include <nvgpu/fence.h>
 
 #include "channel_sync_priv.h"
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
index adcdd6026..5d0e68222 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -34,6 +34,7 @@
 #include <nvgpu/channel.h>
 #include <nvgpu/channel_sync.h>
 #include <nvgpu/channel_sync_syncpt.h>
+#include <nvgpu/priv_cmdbuf.h>
 #include <nvgpu/fence.h>
 #include <nvgpu/string.h>
 
diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
index 7670bf119..5c3da8e4e 100644
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/semaphore.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/priv_cmdbuf.h>
 
 #include "sema_cmdbuf_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
index cd6393f62..f6cc5b1a0 100644
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
@@ -26,6 +26,7 @@
 #include <nvgpu/semaphore.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/priv_cmdbuf.h>
 
 #include "sema_cmdbuf_gv11b.h"
 
diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
index 90e9e8633..34f65fc6d 100644
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
@@ -25,6 +25,7 @@
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/priv_cmdbuf.h>
 
 #include "syncpt_cmdbuf_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
index 0fe1b70a4..69cfc0198 100644
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
@@ -29,6 +29,7 @@
 #include <nvgpu/lock.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/priv_cmdbuf.h>
 #include <nvgpu/nvhost.h>
 #include <nvgpu/static_analysis.h>
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index 0840bcdb7..cdef0544a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -46,6 +46,7 @@ struct nvgpu_gpfifo_userdata;
 struct nvgpu_gr_subctx;
 struct nvgpu_gr_ctx;
 struct nvgpu_debug_context;
+struct priv_cmd_entry;
 
 /**
  * S/W defined invalid channel identifier.
@@ -257,15 +258,6 @@ struct priv_cmd_queue {
 	u32 get;	/* get for priv cmd queue */
 };
 
-struct priv_cmd_entry {
-	bool valid;
-	struct nvgpu_mem *mem;
-	u32 off;	/* offset in mem, in u32 entries */
-	u64 gva;
-	u32 get;	/* start of entry in queue */
-	u32 size;	/* in words */
-};
-
 struct nvgpu_channel_job {
 	struct nvgpu_mapped_buf **mapped_buffers;
 	u32 num_mapped_buffers;
@@ -619,10 +611,6 @@ nvgpu_channel_from_worker_item(struct nvgpu_list_node *node)
 	return (struct nvgpu_channel *)
 	   ((uintptr_t)node - offsetof(struct nvgpu_channel, worker_item));
 };
-int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
-			     struct priv_cmd_entry *e);
-void nvgpu_channel_update_priv_cmd_q_and_free_entry(
-	struct nvgpu_channel *ch, struct priv_cmd_entry *e);
 int nvgpu_channel_worker_init(struct gk20a *g);
 void nvgpu_channel_worker_deinit(struct gk20a *g);
 void nvgpu_channel_update(struct nvgpu_channel *c);
@@ -636,8 +624,6 @@ u32 nvgpu_channel_get_gpfifo_free_count(struct nvgpu_channel *ch);
 int nvgpu_channel_add_job(struct nvgpu_channel *c,
 				 struct nvgpu_channel_job *job,
 				 bool skip_buffer_refcounting);
-void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
-			     struct priv_cmd_entry *e);
 void nvgpu_channel_clean_up_jobs(struct nvgpu_channel *c,
 					bool clean_all);
 int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h
new file mode 100644
index 000000000..2c98c8482
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/priv_cmdbuf.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_PRIV_CMDBUF_H
+#define NVGPU_PRIV_CMDBUF_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+struct nvgpu_mem;
+struct nvgpu_channel;
+
+struct priv_cmd_entry {
+	bool valid;
+	struct nvgpu_mem *mem;
+	u32 off;	/* offset in mem, in u32 entries */
+	u64 gva;
+	u32 get;	/* start of entry in queue */
+	u32 size;	/* in words */
+};
+
+int channel_alloc_priv_cmdbuf(struct nvgpu_channel *ch, u32 num_in_flight);
+void channel_free_priv_cmd_q(struct nvgpu_channel *ch);
+
+int nvgpu_channel_alloc_priv_cmdbuf(struct nvgpu_channel *c, u32 orig_size,
+		struct priv_cmd_entry *e);
+void nvgpu_channel_free_priv_cmd_entry(struct nvgpu_channel *c,
+		struct priv_cmd_entry *e);
+void nvgpu_channel_update_priv_cmd_q_and_free_entry(struct nvgpu_channel *ch,
+		struct priv_cmd_entry *e);
+
+#endif