diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index 0c1cdd59f..eea371d0b 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -753,16 +753,23 @@ static int channel_gk20a_alloc_priv_cmdbuf(struct channel_gk20a *c,
 	 * Any sync-pt fences will take less memory so we can ignore them for
 	 * now.
 	 *
-	 * A semaphore ACQ (fence-wait) is 8 dwords: semaphore_a, semaphore_b,
+	 * A semaphore ACQ (fence-wait) is 8 words: semaphore_a, semaphore_b,
 	 * semaphore_c, and semaphore_d. A semaphore INCR (fence-get) will be 10
-	 * dwords: all the same as an ACQ plus a non-stalling intr which is
-	 * another 2 dwords.
+	 * words: all the same as an ACQ plus a non-stalling intr which is
+	 * another 2 words.
 	 *
-	 * Lastly the number of gpfifo entries per channel is fixed so at most
-	 * we can use 2/3rds of the gpfifo entries (1 pre-fence entry, one
-	 * userspace entry, and one post-fence entry). Thus the computation is:
+	 * We have two cases to consider: the first is we base the size of the
+	 * priv_cmd_buf on the gpfifo count. Here we multiply by a factor of
+	 * 2/3rds because only at most 2/3rds of the GPFIFO can be used for
+	 * sync commands:
 	 *
-	 *   (gpfifo entry number * (2 / 3) * (8 + 10) * 4 bytes.
+	 *   nr_gpfifos * (2 / 3) * (8 + 10) * 4 bytes
+	 *
+	 * If instead num_in_flight is specified then we will use that to size
+	 * the priv_cmd_buf. The worst case is two sync commands (one ACQ and
+	 * one INCR) per submit so we have a priv_cmd_buf size of:
+	 *
+	 *   num_in_flight * (8 + 10) * 4 bytes
 	 */
 	size = num_in_flight * 18U * (u32)sizeof(u32);
 	if (gpfifo_based) {