gpu: nvgpu: separate fb queue management

FB queues handling is different from DMEM/EMEM queues in many aspects. For e.g. no rewind required, additional queue struct fields, additional queue operations required only for FB queues, push/pop semantics are different. Hence prepare separate structure and APIs for FB queues. PMU will have to deal with the queue implementation chosen. This patch does the follo- wing: 1. Update function/structure names to falcon_fb_queue_<op/name>. 2. Export nvgpu_falcon_fb_queue_* structure and functions. 3. Removed rewind function pointer and used direct functions for push, pop and has_room. 4. PMU wrapper defined to use appropriate queue for empty check - nvgpu_pmu_queue_is_empty. 5. PMU side updates for handling the work buffer and SEC2 updates for usage of public queue functions. JIRA NVGPU-1994 Change-Id: Ia5e40384e6e3f9e81d5dbc3d8138eb091337c086 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2016285 GVS: Gerrit_Virtual_Submit Reviewed-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2019-02-11 09:16:45 +05:30
parent 868853c66c
commit 05eaa33548
11 changed files with 659 additions and 375 deletions
--- a/drivers/gpu/nvgpu/common/falcon/falcon_fb_queue.c
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_fb_queue.c
@@ -26,16 +26,30 @@
 #include <nvgpu/pmuif/gpmuif_cmn.h>
 #include <nvgpu/flcnif_cmn.h>
 #include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/pmu.h>
 #include <nvgpu/string.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/falcon_fb_queue.h>

-#include "falcon_queue_priv.h"
 #include "falcon_priv.h"
-#include "falcon_fb_queue.h"
+#include "falcon_fb_queue_priv.h"

 /* FB-Q ops */
-static int falcon_queue_tail_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 *tail, bool set)
+static int falcon_fb_queue_head(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 *head, bool set)
+{
+	int err = -EINVAL;
+
+	if (flcn->flcn_engine_dep_ops.queue_head != NULL) {
+		err = flcn->flcn_engine_dep_ops.queue_head(flcn->g, queue->id,
+			queue->index, head, set);
+	}
+
+	return err;
+}
+
+static int falcon_fb_queue_tail(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 *tail, bool set)
 {
 	struct gk20a *g = flcn->g;
 	int err = -EINVAL;
@@ -53,15 +67,15 @@ static int falcon_queue_tail_fb(struct nvgpu_falcon *flcn,
 	return err;
 }

-static inline u32 falcon_queue_get_next_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 head)
+static inline u32 falcon_fb_queue_get_next(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 head)
 {
 		return (head + 1U) % queue->size;
 }

-static bool falcon_queue_has_room_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue,
-	u32 size, bool *need_rewind)
+static bool falcon_fb_queue_has_room(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue,
+	u32 size)
 {
 	u32 head = 0;
 	u32 tail = 0;
@@ -80,14 +94,14 @@ static bool falcon_queue_has_room_fb(struct nvgpu_falcon *flcn,
 		goto exit;
 	}

-	next_head = falcon_queue_get_next_fb(flcn, queue, head);
+	next_head = falcon_fb_queue_get_next(flcn, queue, head);

 exit:
 	return next_head != tail;
 }

-static int falcon_queue_write_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 offset,
+static int falcon_fb_queue_write(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 offset,
 	u8 *src, u32 size)
 {
 	struct gk20a *g = flcn->g;
@@ -123,8 +137,8 @@ exit:
 	return err;
 }

-static int falcon_queue_element_set_use_state_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 queue_pos, bool set)
+static int falcon_fb_queue_set_element_use_state(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 queue_pos, bool set)
 {
 	int err = 0;

@@ -152,38 +166,261 @@ exit:
 	return err;
 }

-static int falcon_queue_push_fb(struct nvgpu_falcon *flcn,
-			struct nvgpu_falcon_queue *queue, void *data, u32 size)
+static int falcon_fb_queue_is_element_in_use(struct nvgpu_falcon *flcn,
+		struct nvgpu_falcon_fb_queue *queue,
+		u32 queue_pos, bool *in_use)
 {
-	struct gk20a *g = flcn->g;
 	int err = 0;

+	if (queue_pos >= queue->size) {
+		err = -EINVAL;
+		goto exit;
+	}
+
+	*in_use = test_bit((int)queue_pos, (void *)&queue->fbq.element_in_use);
+
+exit:
+	return err;
+}
+
+static int falcon_fb_queue_sweep(struct nvgpu_falcon *flcn,
+		struct nvgpu_falcon_fb_queue *queue)
+{
+	u32 head;
+	u32 tail;
+	bool in_use = false;
+	int err = 0;
+
+	tail = queue->fbq.tail;
+	err = queue->head(flcn, queue, &head, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "flcn-%d queue-%d, position GET failed",
+			flcn->flcn_id, queue->id);
+		goto exit;
+	}
+
+	/*
+	 * Step from tail forward in the queue,
+	 * to see how many consecutive entries
+	 * can be made available.
+	 */
+	while (tail != head) {
+		if (falcon_fb_queue_is_element_in_use(flcn, queue,
+			tail, &in_use) != 0) {
+			break;
+		}
+
+		if (in_use) {
+			break;
+		}
+
+		tail = falcon_fb_queue_get_next(flcn, queue, tail);
+	}
+
+	/* Update tail */
+	queue->fbq.tail = tail;
+
+exit:
+	return err;
+}
+
+u32 nvgpu_falcon_fb_queue_get_id(struct nvgpu_falcon_fb_queue *queue)
+{
+	return queue->id;
+}
+
+u32 nvgpu_falcon_fb_queue_get_index(struct nvgpu_falcon_fb_queue *queue)
+{
+	return queue->index;
+}
+
+u32 nvgpu_falcon_fb_queue_get_size(struct nvgpu_falcon_fb_queue *queue)
+{
+	return queue->size;
+}
+
+u32 nvgpu_falcon_fb_queue_get_position(struct nvgpu_falcon_fb_queue *queue)
+{
+	return queue->position;
+}
+
+/* return the queue element size */
+u32 nvgpu_falcon_fb_queue_get_element_size(struct nvgpu_falcon_fb_queue *queue)
+{
+	return queue->fbq.element_size;
+}
+
+/* return the queue offset from super surface FBQ's */
+u32 nvgpu_falcon_fb_queue_get_offset(struct nvgpu_falcon_fb_queue *queue)
+{
+	return queue->fbq.fb_offset;
+}
+
+/* lock work buffer of queue */
+void nvgpu_falcon_fb_queue_lock_work_buffer(struct nvgpu_falcon_fb_queue *queue)
+{
+	/* acquire work buffer mutex */
+	nvgpu_mutex_acquire(&queue->fbq.work_buffer_mutex);
+}
+
+/* unlock work buffer of queue */
+void nvgpu_falcon_fb_queue_unlock_work_buffer(
+					struct nvgpu_falcon_fb_queue *queue)
+{
+	/* release work buffer mutex */
+	nvgpu_mutex_release(&queue->fbq.work_buffer_mutex);
+}
+
+/* return a pointer of queue work buffer */
+u8 *nvgpu_falcon_fb_queue_get_work_buffer(struct nvgpu_falcon_fb_queue *queue)
+{
+	return queue->fbq.work_buffer;
+}
+
+int nvgpu_falcon_fb_queue_free_element(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 queue_pos)
+{
+	int err = 0;
+
+	err = falcon_fb_queue_set_element_use_state(flcn, queue,
+		queue_pos, false);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "fb queue elelment %d free failed",
+			queue_pos);
+		goto exit;
+	}
+
+	err = falcon_fb_queue_sweep(flcn, queue);
+
+exit:
+	return err;
+}
+
+/* queue is_empty check with lock */
+bool nvgpu_falcon_fb_queue_is_empty(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue)
+{
+	u32 q_head = 0;
+	u32 q_tail = 0;
+	int err = 0;
+
+	if ((flcn == NULL) || (queue == NULL)) {
+		return true;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = queue->head(flcn, queue, &q_head, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "flcn-%d queue-%d, head GET failed",
+			flcn->flcn_id, queue->id);
+		goto exit;
+	}
+
+	err = queue->tail(flcn, queue, &q_tail, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "flcn-%d queue-%d, tail GET failed",
+			flcn->flcn_id, queue->id);
+		goto exit;
+	}
+
+exit:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
+
+	return q_head == q_tail;
+}
+
+static int falcon_fb_queue_prepare_write(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 size)
+{
+	int err = 0;
+
+	/* make sure there's enough free space for the write */
+	if (!falcon_fb_queue_has_room(flcn, queue, size)) {
+		nvgpu_pmu_dbg(flcn->g, "queue full: queue-id %d: index %d",
+			queue->id, queue->index);
+		err = -EAGAIN;
+		goto exit;
+	}
+
+	err = queue->head(flcn, queue, &queue->position, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "flcn-%d queue-%d, position GET failed",
+			flcn->flcn_id, queue->id);
+		goto exit;
+	}
+
+exit:
+	return err;
+}
+
+/* queue push operation with lock */
+int nvgpu_falcon_fb_queue_push(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, void *data, u32 size)
+{
+	struct gk20a *g;
+	int err = 0;
+
+	if ((flcn == NULL) || (queue == NULL)) {
+		return -EINVAL;
+	}
+
+	g = queue->g;
+
 	nvgpu_log_fn(g, " ");

+	if (queue->oflag != OFLAG_WRITE) {
+		nvgpu_err(flcn->g, "flcn-%d, queue-%d not opened for write",
+			flcn->flcn_id, queue->id);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = falcon_fb_queue_prepare_write(flcn, queue, size);
+	if (err != 0) {
+		goto unlock_mutex;
+	}
+
 	/* Bounds check size */
 	if (size > queue->fbq.element_size) {
 		nvgpu_err(g, "size too large size=0x%x", size);
-		goto exit;
+		goto unlock_mutex;
 	}

 	/* Set queue element in use */
-	if (falcon_queue_element_set_use_state_fb(flcn, queue,
+	if (falcon_fb_queue_set_element_use_state(flcn, queue,
 		queue->position, true) != 0) {
 		nvgpu_err(g,
 			"fb-queue element in use map is in invalid state");
-		goto exit;
+		err = -EINVAL;
+		goto unlock_mutex;
 	}

 	/* write data to FB */
-	err = falcon_queue_write_fb(flcn, queue, queue->position, data, size);
+	err = falcon_fb_queue_write(flcn, queue, queue->position, data, size);
 	if (err != 0) {
 		nvgpu_err(g, "write to fb-queue failed");
-		goto exit;
+		goto unlock_mutex;
 	}

-	queue->position = falcon_queue_get_next_fb(flcn, queue,
+	queue->position = falcon_fb_queue_get_next(flcn, queue,
 			queue->position);

+	err = queue->head(flcn, queue, &queue->position, QUEUE_SET);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "flcn-%d queue-%d, position SET failed",
+			flcn->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+unlock_mutex:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
 exit:
 	if (err != 0) {
 		nvgpu_err(flcn->g, "falcon id-%d, queue id-%d, failed",
@@ -193,28 +430,51 @@ exit:
 	return err;
 }

-static int falcon_queue_pop_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, void *data, u32 size,
+/* queue pop operation with lock */
+int nvgpu_falcon_fb_queue_pop(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, void *data, u32 size,
 	u32 *bytes_read)
 {
-	struct gk20a *g = flcn->g;
-	struct pmu_hdr *hdr = (struct pmu_hdr *)
-		(void *)queue->fbq.work_buffer;
+	struct gk20a *g;
+	struct pmu_hdr *hdr;
 	u32 entry_offset = 0U;
 	int err = 0;

+	if ((flcn == NULL) || (queue == NULL)) {
+		return -EINVAL;
+	}
+
+	g = flcn->g;
+	hdr = (struct pmu_hdr *) (void *) queue->fbq.work_buffer;
+
 	nvgpu_log_fn(g, " ");

+	if (queue->oflag != OFLAG_READ) {
+		nvgpu_err(flcn->g, "flcn-%d, queue-%d, not opened for read",
+			flcn->flcn_id, queue->id);
+		err = -EINVAL;
+		goto exit;
+	}
+
+	/* acquire mutex */
+	nvgpu_mutex_acquire(&queue->mutex);
+
+	err = queue->tail(flcn, queue, &queue->position, QUEUE_GET);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "flcn-%d queue-%d, position GET failed",
+			flcn->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
 	*bytes_read = 0U;

 	/* Check size */
-	if ((size + queue->fbq.read_position) >=
-		queue->fbq.element_size) {
+	if ((size + queue->fbq.read_position) >= queue->fbq.element_size) {
 		nvgpu_err(g,
-			"Attempt to read > than queue element size for queue id-%d",
-			queue->id);
+			"Attempt to read > than queue element size "
+			"for queue id-%d", queue->id);
 		err = -EINVAL;
-		goto exit;
+		goto unlock_mutex;
 	}

 	entry_offset = queue->position * queue->fbq.element_size;
@@ -236,7 +496,7 @@ static int falcon_queue_pop_fb(struct nvgpu_falcon *flcn,
 		if (hdr->size >= queue->fbq.element_size) {
 			nvgpu_err(g, "Super Surface read failed");
 			err = -ERANGE;
-			goto exit;
+			goto unlock_mutex;
 		}
 	}

@@ -250,12 +510,22 @@ static int falcon_queue_pop_fb(struct nvgpu_falcon *flcn,
 	if (queue->fbq.read_position >= hdr->size) {
 		queue->fbq.read_position = 0U;
 		/* Increment queue index. */
-		queue->position = falcon_queue_get_next_fb(flcn, queue,
+		queue->position = falcon_fb_queue_get_next(flcn, queue,
 			queue->position);
 	}

 	*bytes_read = size;

+	err = queue->tail(flcn, queue, &queue->position, QUEUE_SET);
+	if (err != 0) {
+		nvgpu_err(flcn->g, "flcn-%d queue-%d, position SET failed",
+			flcn->flcn_id, queue->id);
+		goto unlock_mutex;
+	}
+
+unlock_mutex:
+	/* release mutex */
+	nvgpu_mutex_release(&queue->mutex);
 exit:
 	if (err != 0) {
 		nvgpu_err(flcn->g, "falcon id-%d, queue id-%d, failed",
@@ -265,122 +535,73 @@ exit:
 	return err;
 }

-static int falcon_queue_element_is_in_use_fb(struct nvgpu_falcon *flcn,
-		struct nvgpu_falcon_queue *queue, u32 queue_pos, bool *in_use)
+void nvgpu_falcon_fb_queue_free(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue **queue_p)
 {
-	int err = 0;
+	struct nvgpu_falcon_fb_queue *queue = NULL;
+	struct gk20a *g = flcn->g;

-	if (queue_pos >= queue->size) {
-		err = -EINVAL;
-		goto exit;
+	if ((queue_p == NULL) || (*queue_p == NULL)) {
+		return;
 	}

-	*in_use = test_bit((int)queue_pos, (void *)&queue->fbq.element_in_use);
+	queue = *queue_p;

-exit:
-	return err;
+	nvgpu_pmu_dbg(g, "flcn id-%d q-id %d: index %d ",
+		      flcn->flcn_id, queue->id, queue->index);
+
+	nvgpu_kfree(g, queue->fbq.work_buffer);
+	nvgpu_mutex_destroy(&queue->fbq.work_buffer_mutex);
+
+	/* destroy mutex */
+	nvgpu_mutex_destroy(&queue->mutex);
+
+	nvgpu_kfree(g, queue);
+	*queue_p = NULL;
 }

-static int falcon_queue_sweep_fb(struct nvgpu_falcon *flcn,
-		struct nvgpu_falcon_queue *queue)
-{
-	u32 head;
-	u32 tail;
-	bool in_use = false;
-	int err = 0;
-
-	tail = queue->fbq.tail;
-	err = queue->head(flcn, queue, &head, QUEUE_GET);
-	if (err != 0) {
-		nvgpu_err(flcn->g, "flcn-%d queue-%d, position GET failed",
-			flcn->flcn_id, queue->id);
-		goto exit;
-	}
-
-	/*
-	 * Step from tail forward in the queue,
-	 * to see how many consecutive entries
-	 * can be made available.
-	 */
-	while (tail != head) {
-		if (falcon_queue_element_is_in_use_fb(flcn, queue,
-			tail, &in_use) != 0) {
-			break;
-		}
-
-		if (in_use) {
-			break;
-		}
-
-		tail = falcon_queue_get_next_fb(flcn, queue, tail);
-	}
-
-	/* Update tail */
-	queue->fbq.tail = tail;
-
-exit:
-	return err;
-}
-
-/* return the queue element size */
-u32 falcon_queue_get_element_size_fb(struct nvgpu_falcon_queue *queue)
-{
-	return queue->fbq.element_size;
-}
-
-/* return the queue offset from super surface FBQ's */
-u32 falcon_queue_get_offset_fb(struct nvgpu_falcon_queue *queue)
-{
-	return queue->fbq.fb_offset;
-}
-
-/* lock work buffer of queue */
-void falcon_queue_lock_work_buffer_fb(struct nvgpu_falcon_queue *queue)
-{
-	/* acquire work buffer mutex */
-	nvgpu_mutex_acquire(&queue->fbq.work_buffer_mutex);
-}
-
-/* unlock work buffer of queue */
-void falcon_queue_unlock_work_buffer_fb(struct nvgpu_falcon_queue *queue)
-{
-	/* release work buffer mutex */
-	nvgpu_mutex_release(&queue->fbq.work_buffer_mutex);
-}
-
-/* return a pointer of queue work buffer */
-u8 *falcon_queue_get_work_buffer_fb(struct nvgpu_falcon_queue *queue)
-{
-	return queue->fbq.work_buffer;
-}
-
-int falcon_queue_free_element_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 queue_pos)
-{
-	int err = 0;
-
-	err = falcon_queue_element_set_use_state_fb(flcn, queue,
-		queue_pos, false);
-	if (err != 0) {
-		nvgpu_err(flcn->g, "fb queue elelment %d free failed",
-			queue_pos);
-		goto exit;
-	}
-
-	err = falcon_queue_sweep_fb(flcn, queue);
-
-exit:
-	return err;
-}
-
-/* assign FB queue type specific ops */
-int falcon_fb_queue_init(struct nvgpu_falcon *flcn,
-		struct nvgpu_falcon_queue *queue)
+int nvgpu_falcon_fb_queue_init(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue **queue_p,
+	struct nvgpu_falcon_fb_queue_params params)
 {
+	struct nvgpu_falcon_fb_queue *queue = NULL;
 	struct gk20a *g = flcn->g;
 	int err = 0;

-	nvgpu_log_fn(flcn->g, " ");
+	if (queue_p == NULL) {
+		return -EINVAL;
+	}
+
+	queue = (struct nvgpu_falcon_fb_queue *)
+		   nvgpu_kmalloc(g, sizeof(struct nvgpu_falcon_fb_queue));
+
+	if (queue == NULL) {
+		return -ENOMEM;
+	}
+
+	queue->g = g;
+	queue->id = params.id;
+	queue->index = params.index;
+	queue->size = params.size;
+	queue->oflag = params.oflag;
+
+	queue->fbq.tail = 0U;
+	queue->fbq.element_in_use = 0U;
+	queue->fbq.read_position = 0U;
+	queue->fbq.super_surface_mem = params.super_surface_mem;
+	queue->fbq.element_size = params.fbq_element_size;
+	queue->fbq.fb_offset = params.fbq_offset;
+
+	queue->position = 0U;
+
+	queue->head = falcon_fb_queue_head;
+	queue->tail = falcon_fb_queue_tail;
+
+	/* init mutex */
+	err = nvgpu_mutex_init(&queue->mutex);
+	if (err != 0) {
+		goto exit;
+	}

 	/* init mutex */
 	err = nvgpu_mutex_init(&queue->fbq.work_buffer_mutex);
@@ -394,18 +615,12 @@ int falcon_fb_queue_init(struct nvgpu_falcon *flcn,
 		goto exit;
 	}

-	queue->offset = 0U;
-	queue->position = 0U;
-	queue->fbq.tail = 0U;
-	queue->fbq.element_in_use = 0U;
-	queue->fbq.read_position = 0U;
-
-	queue->tail = falcon_queue_tail_fb;
-	queue->has_room = falcon_queue_has_room_fb;
-	queue->push = falcon_queue_push_fb;
-	queue->pop = falcon_queue_pop_fb;
-	queue->rewind = NULL; /* Not required for FB-Q */
+	nvgpu_log(g, gpu_dbg_pmu,
+		"flcn id-%d q-id %d: index %d, size 0x%08x",
+		flcn->flcn_id, queue->id, queue->index,
+		queue->size);

+	*queue_p = queue;
 exit:
 	return err;
 }
--- a/drivers/gpu/nvgpu/common/falcon/falcon_fb_queue.h
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_fb_queue.h
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef NVGPU_FALCON_FB_QUEUE_H
-#define NVGPU_FALCON_FB_QUEUE_H
-
-u32 falcon_queue_get_element_size_fb(struct nvgpu_falcon_queue *queue);
-u32 falcon_queue_get_offset_fb(struct nvgpu_falcon_queue *queue);
-void falcon_queue_lock_work_buffer_fb(struct nvgpu_falcon_queue *queue);
-void falcon_queue_unlock_work_buffer_fb(struct nvgpu_falcon_queue *queue);
-u8 *falcon_queue_get_work_buffer_fb(struct nvgpu_falcon_queue *queue);
-int falcon_queue_free_element_fb(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 queue_pos);
-int falcon_fb_queue_init(struct nvgpu_falcon *flcn,
-		struct nvgpu_falcon_queue *queue);
-
-#endif /* NVGPU_FALCON_FB_QUEUE_H */
--- a/drivers/gpu/nvgpu/common/falcon/falcon_fb_queue_priv.h
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_fb_queue_priv.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_FALCON_FB_QUEUE_PRIV_H
+#define NVGPU_FALCON_FB_QUEUE_PRIV_H
+
+struct nvgpu_falcon_fb_queue {
+	struct gk20a *g;
+
+	/* used by nvgpu, for command LPQ/HPQ */
+	struct nvgpu_mutex mutex;
+
+	/* current write position */
+	u32 position;
+	/* logical queue identifier */
+	u32 id;
+	/* physical queue index */
+	u32 index;
+	/* in bytes */
+	u32 size;
+	/* open-flag */
+	u32 oflag;
+
+	/* members unique to the FB version of the falcon queues */
+	struct {
+		/* Holds super surface base address */
+		struct nvgpu_mem *super_surface_mem;
+
+		/*
+		 * Holds the offset of queue data (0th element).
+		 * This is used for FB Queues to hold a offset of
+		 * Super Surface for this queue.
+		 */
+		 u32 fb_offset;
+
+		/*
+		 * Define the size of a single queue element.
+		 * queues_size above is used for the number of
+		 * queue elements.
+		 */
+		u32 element_size;
+
+		/* To keep track of elements in use */
+		u64 element_in_use;
+
+		/*
+		 * Define a pointer to a local (SYSMEM) allocated
+		 * buffer to hold a single queue element
+		 * it is being assembled.
+		 */
+		 u8 *work_buffer;
+		 struct nvgpu_mutex work_buffer_mutex;
+
+		/*
+		 * Tracks how much of the current FB Queue MSG queue
+		 * entry have been read. This is needed as functions read
+		 * the MSG queue as a byte stream, rather
+		 * than reading a whole MSG at a time.
+		 */
+		u32 read_position;
+
+		/*
+		 * Tail as tracked on the nvgpu "side".  Because the queue
+		 * elements and its associated payload (which is also moved
+		 * PMU->nvgpu through the FB CMD Queue) can't be free-ed until
+		 * the command is complete, response is received and any "out"
+		 * payload delivered to the client, it is necessary for the
+		 * nvgpu to track it's own version of "tail".  This one is
+		 * incremented as commands and completed entries are found
+		 * following tail.
+		 */
+		u32 tail;
+	} fbq;
+
+	/* queue ops */
+	int (*tail)(struct nvgpu_falcon *flcn,
+		struct nvgpu_falcon_fb_queue *queue, u32 *tail, bool set);
+	int (*head)(struct nvgpu_falcon *flcn,
+		struct nvgpu_falcon_fb_queue *queue, u32 *head, bool set);
+};
+
+#endif /* NVGPU_FALCON_FB_QUEUE_PRIV_H */
--- a/drivers/gpu/nvgpu/common/falcon/falcon_queue.c
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_queue.c
@@ -27,7 +27,6 @@
 #include "falcon_priv.h"
 #include "falcon_dmem_queue.h"
 #include "falcon_emem_queue.h"
-#include "falcon_fb_queue.h"

 /* common falcon queue ops */
 static int falcon_queue_head(struct nvgpu_falcon *flcn,
@@ -336,11 +335,6 @@ void nvgpu_falcon_queue_free(struct nvgpu_falcon *flcn,
 	nvgpu_pmu_dbg(g, "flcn id-%d q-id %d: index %d ",
 		      flcn->flcn_id, queue->id, queue->index);

-	if (queue->queue_type == QUEUE_TYPE_FB) {
-		nvgpu_kfree(g, queue->fbq.work_buffer);
-		nvgpu_mutex_destroy(&queue->fbq.work_buffer_mutex);
-	}
-
 	/* destroy mutex */
 	nvgpu_mutex_destroy(&queue->mutex);

@@ -353,11 +347,6 @@ u32 nvgpu_falcon_queue_get_id(struct nvgpu_falcon_queue *queue)
 	return queue->id;
 }

-u32 nvgpu_falcon_queue_get_position(struct nvgpu_falcon_queue *queue)
-{
-	return queue->position;
-}
-
 u32 nvgpu_falcon_queue_get_index(struct nvgpu_falcon_queue *queue)
 {
 	return queue->index;
@@ -368,37 +357,6 @@ u32 nvgpu_falcon_queue_get_size(struct nvgpu_falcon_queue *queue)
 	return queue->size;
 }

-u32 nvgpu_falcon_fbq_get_element_size(struct nvgpu_falcon_queue *queue)
-{
-	return falcon_queue_get_element_size_fb(queue);
-}
-
-u32 nvgpu_falcon_queue_get_fbq_offset(struct nvgpu_falcon_queue *queue)
-{
-	return falcon_queue_get_offset_fb(queue);
-}
-
-void nvgpu_falcon_queue_lock_fbq_work_buffer(struct nvgpu_falcon_queue *queue)
-{
-	falcon_queue_lock_work_buffer_fb(queue);
-}
-
-void nvgpu_falcon_queue_unlock_fbq_work_buffer(struct nvgpu_falcon_queue *queue)
-{
-	falcon_queue_unlock_work_buffer_fb(queue);
-}
-
-u8* nvgpu_falcon_queue_get_fbq_work_buffer(struct nvgpu_falcon_queue *queue)
-{
-	return falcon_queue_get_work_buffer_fb(queue);
-}
-
-int nvgpu_falcon_queue_free_fbq_element(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 queue_pos)
-{
-	return falcon_queue_free_element_fb(flcn, queue, queue_pos);
-}
-
 int nvgpu_falcon_queue_init(struct nvgpu_falcon *flcn,
 	struct nvgpu_falcon_queue **queue_p,
 	struct nvgpu_falcon_queue_params params)
@@ -444,16 +402,6 @@ int nvgpu_falcon_queue_init(struct nvgpu_falcon *flcn,
 	case QUEUE_TYPE_EMEM:
 		falcon_emem_queue_init(flcn, queue);
 		break;
-	case QUEUE_TYPE_FB:
-		queue->fbq.super_surface_mem = params.super_surface_mem;
-		queue->fbq.element_size = params.fbq_element_size;
-		queue->fbq.fb_offset = params.fbq_offset;
-
-		err = falcon_fb_queue_init(flcn, queue);
-		if (err != 0x0) {
-			goto exit;
-		}
-		break;
 	default:
 		err = -EINVAL;
 		break;
--- a/drivers/gpu/nvgpu/common/falcon/falcon_queue_priv.h
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_queue_priv.h
@@ -50,57 +50,6 @@ struct nvgpu_falcon_queue {
 	/* open-flag */
 	u32 oflag;

-	/* members unique to the FB version of the falcon queues */
-	struct {
-		/* Holds super surface base address */
-		struct nvgpu_mem *super_surface_mem;
-
-		/*
-		 * Holds the offset of queue data (0th element).
-		 * This is used for FB Queues to hold a offset of
-		 * Super Surface for this queue.
-		 */
-		 u32 fb_offset;
-
-		/*
-		 * Define the size of a single queue element.
-		 * queues_size above is used for the number of
-		 * queue elements.
-		 */
-		u32 element_size;
-
-		/* To keep track of elements in use */
-		u64 element_in_use;
-
-		/*
-		 * Define a pointer to a local (SYSMEM) allocated
-		 * buffer to hold a single queue element
-		 * it is being assembled.
-		 */
-		 u8 *work_buffer;
-		 struct nvgpu_mutex work_buffer_mutex;
-
-		/*
-		 * Tracks how much of the current FB Queue MSG queue
-		 * entry have been read. This is needed as functions read
-		 * the MSG queue as a byte stream, rather
-		 * than reading a whole MSG at a time.
-		 */
-		u32 read_position;
-
-		/*
-		 * Tail as tracked on the nvgpu "side".  Because the queue
-		 * elements and its associated payload (which is also moved
-		 * PMU->nvgpu through the FB CMD Queue) can't be free-ed until
-		 * the command is complete, response is received and any "out"
-		 * payload delivered to the client, it is necessary for the
-		 * nvgpu to track it's own version of "tail".  This one is
-		 * incremented as commands and completed entries are found
-		 * following tail.
-		 */
-		u32 tail;
-	} fbq;
-
 	/* queue type(DMEM-Q/FB-Q) specific ops */
 	int (*rewind)(struct nvgpu_falcon *flcn,
 		struct nvgpu_falcon_queue *queue);
--- a/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_gk20a.c
@@ -686,7 +686,6 @@ bool gk20a_pmu_is_interrupted(struct nvgpu_pmu *pmu)
 void gk20a_pmu_isr(struct gk20a *g)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
-	struct nvgpu_falcon_queue *queue;
 	u32 intr, mask;
 	bool recheck = false;

@@ -745,8 +744,7 @@ void gk20a_pmu_isr(struct gk20a *g)
 	gk20a_writel(g, pwr_falcon_irqsclr_r(), intr);

 	if (recheck) {
-		queue = pmu->queue[PMU_MESSAGE_QUEUE];
-		if (!nvgpu_falcon_queue_is_empty(pmu->flcn, queue)) {
+		if (!nvgpu_pmu_queue_is_empty(pmu, PMU_MESSAGE_QUEUE)) {
 			gk20a_writel(g, pwr_falcon_irqsset_r(),
 				pwr_falcon_irqsset_swgen0_set_f());
 		}
--- a/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_ipc.c
@@ -29,6 +29,7 @@
 #include <nvgpu/pmuif/gpmu_super_surf_if.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/falcon_queue.h>
+#include <nvgpu/falcon_fb_queue.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/string.h>

@@ -110,7 +111,7 @@ int nvgpu_pmu_queue_init_fb(struct nvgpu_pmu *pmu,
 		u32 id, union pmu_init_msg_pmu *init)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
-	struct nvgpu_falcon_queue_params params = {0};
+	struct nvgpu_falcon_fb_queue_params params = {0};
 	u32 oflag = 0;
 	int err = 0;
 	u32 tmp_id = id;
@@ -162,7 +163,6 @@ int nvgpu_pmu_queue_init_fb(struct nvgpu_pmu *pmu,

 	params.id = id;
 	params.oflag = oflag;
-	params.queue_type = QUEUE_TYPE_FB;

 	if (tmp_id == PMU_COMMAND_QUEUE_HPQ) {
 		tmp_id = PMU_QUEUE_HPQ_IDX_FOR_V3;
@@ -176,9 +176,7 @@ int nvgpu_pmu_queue_init_fb(struct nvgpu_pmu *pmu,
 	}
 	params.index = init->v5.queue_index[tmp_id];

-	params.offset = init->v5.queue_offset;
-
-	err = nvgpu_falcon_queue_init(pmu->flcn, &pmu->queue[id], params);
+	err = nvgpu_falcon_fb_queue_init(pmu->flcn, &pmu->fb_queue[id], params);
 	if (err != 0) {
 		nvgpu_err(g, "queue-%d init failed", id);
 	}
@@ -242,11 +240,20 @@ void nvgpu_pmu_queue_free(struct nvgpu_pmu *pmu, u32 id)
 		goto exit;
 	}

-	if (pmu->queue[id] == NULL) {
-		goto exit;
+	if (pmu->queue_type == QUEUE_TYPE_FB) {
+		if (pmu->fb_queue[id] == NULL) {
+			goto exit;
+		}
+
+		nvgpu_falcon_fb_queue_free(pmu->flcn, &pmu->fb_queue[id]);
+	} else {
+		if (pmu->queue[id] == NULL) {
+			goto exit;
+		}
+
+		nvgpu_falcon_queue_free(pmu->flcn, &pmu->queue[id]);
 	}

-	nvgpu_falcon_queue_free(pmu->flcn, &pmu->queue[id]);
 exit:
 	return;
 }
@@ -256,7 +263,8 @@ static bool pmu_validate_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd,
 			u32 queue_id)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
-	struct nvgpu_falcon_queue *queue;
+	struct nvgpu_falcon_fb_queue *fb_queue = NULL;
+	struct nvgpu_falcon_queue *queue = NULL;
 	u32 queue_size;
 	u32 in_size, out_size;

@@ -264,11 +272,11 @@ static bool pmu_validate_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd,
 		goto invalid_cmd;
 	}

-	queue = pmu->queue[queue_id];
-
 	if (pmu->queue_type == QUEUE_TYPE_FB) {
-		queue_size = nvgpu_falcon_fbq_get_element_size(queue);
+		fb_queue = pmu->fb_queue[queue_id];
+		queue_size = nvgpu_falcon_fb_queue_get_element_size(fb_queue);
 	} else {
+		queue = pmu->queue[queue_id];
 		queue_size = nvgpu_falcon_queue_get_size(queue);
 	}

@@ -345,17 +353,26 @@ static int pmu_write_cmd(struct nvgpu_pmu *pmu, struct pmu_cmd *cmd,
 			u32 queue_id)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
-	struct nvgpu_falcon_queue *queue;
+	struct nvgpu_falcon_fb_queue *fb_queue = NULL;
+	struct nvgpu_falcon_queue *queue = NULL;
 	struct nvgpu_timeout timeout;
 	int err;

 	nvgpu_log_fn(g, " ");

-	queue = pmu->queue[queue_id];
 	nvgpu_timeout_init(g, &timeout, U32_MAX, NVGPU_TIMER_CPU_TIMER);

 	do {
-		err = nvgpu_falcon_queue_push(pmu->flcn, queue, cmd, cmd->hdr.size);
+		if (pmu->queue_type == QUEUE_TYPE_FB) {
+			fb_queue = pmu->fb_queue[queue_id];
+			err = nvgpu_falcon_fb_queue_push(pmu->flcn, fb_queue,
+							 cmd, cmd->hdr.size);
+		} else {
+			queue = pmu->queue[queue_id];
+			err = nvgpu_falcon_queue_push(pmu->flcn, queue,
+						      cmd, cmd->hdr.size);
+		}
+
 		if (err == -EAGAIN && nvgpu_timeout_expired(&timeout) == 0) {
 			nvgpu_usleep_range(1000, 2000);
 		} else {
@@ -414,7 +431,7 @@ static int pmu_cmd_payload_setup_rpc(struct gk20a *g, struct pmu_cmd *cmd,
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
 	struct pmu_v *pv = &g->ops.pmu_ver;
-	struct nvgpu_falcon_queue *queue = seq->cmd_queue;
+	struct nvgpu_falcon_fb_queue *queue = seq->cmd_queue;
 	struct falcon_payload_alloc alloc;
 	int err = 0;

@@ -435,7 +452,7 @@ static int pmu_cmd_payload_setup_rpc(struct gk20a *g, struct pmu_cmd *cmd,
 	if (pmu->queue_type == QUEUE_TYPE_FB) {
 		/* copy payload to FBQ work buffer */
 		nvgpu_memcpy((u8 *)
-			nvgpu_falcon_queue_get_fbq_work_buffer(queue) +
+			nvgpu_falcon_fb_queue_get_work_buffer(queue) +
 			alloc.dmem_offset,
 			(u8 *)payload->rpc.prpc, payload->rpc.size_rpc);

@@ -530,7 +547,8 @@ static int pmu_cmd_payload_setup(struct gk20a *g, struct pmu_cmd *cmd,
 			if (pmu->queue_type == QUEUE_TYPE_FB) {
 				/* copy payload to FBQ work buffer */
 				nvgpu_memcpy((u8 *)
-					nvgpu_falcon_queue_get_fbq_work_buffer(seq->cmd_queue) +
+					nvgpu_falcon_fb_queue_get_work_buffer(
+							seq->cmd_queue) +
 					alloc.dmem_offset,
 					(u8 *)payload->in.buf,
 					payload->in.size);
@@ -625,7 +643,7 @@ clean_up:
 }

 static int pmu_fbq_cmd_setup(struct gk20a *g, struct pmu_cmd *cmd,
-	struct nvgpu_falcon_queue *queue, struct pmu_payload *payload,
+	struct nvgpu_falcon_fb_queue *queue, struct pmu_payload *payload,
 	struct pmu_sequence *seq)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
@@ -637,10 +655,10 @@ static int pmu_fbq_cmd_setup(struct gk20a *g, struct pmu_cmd *cmd,
 	int err = 0;

 	fbq_hdr = (struct nv_falcon_fbq_hdr *)
-		nvgpu_falcon_queue_get_fbq_work_buffer(queue);
+		nvgpu_falcon_fb_queue_get_work_buffer(queue);

 	flcn_cmd = (struct pmu_cmd *)
-		(nvgpu_falcon_queue_get_fbq_work_buffer(queue) +
+		(nvgpu_falcon_fb_queue_get_work_buffer(queue) +
 		sizeof(struct nv_falcon_fbq_hdr));

 	if (cmd->cmd.rpc.cmd_type == NV_PMU_RPC_CMD_ID) {
@@ -685,8 +703,8 @@ static int pmu_fbq_cmd_setup(struct gk20a *g, struct pmu_cmd *cmd,
 	seq->out_payload_fb_queue = false;

 	/* clear work queue buffer */
-	memset(nvgpu_falcon_queue_get_fbq_work_buffer(queue), 0,
-		nvgpu_falcon_fbq_get_element_size(queue));
+	memset(nvgpu_falcon_fb_queue_get_work_buffer(queue), 0,
+		nvgpu_falcon_fb_queue_get_element_size(queue));

 	/* Need to save room for both FBQ hdr, and the CMD */
 	seq->buffer_size_used = sizeof(struct nv_falcon_fbq_hdr) +
@@ -704,7 +722,7 @@ static int pmu_fbq_cmd_setup(struct gk20a *g, struct pmu_cmd *cmd,
 	 * save queue index in seq structure
 	 * so can free queue element when response is received
 	 */
-	seq->fbq_element_index = nvgpu_falcon_queue_get_position(queue);
+	seq->fbq_element_index = nvgpu_falcon_fb_queue_get_position(queue);

 exit:
 	return err;
@@ -717,7 +735,7 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
 	struct pmu_sequence *seq = NULL;
-	struct nvgpu_falcon_queue *queue = pmu->queue[queue_id];
+	struct nvgpu_falcon_fb_queue *fb_queue = NULL;
 	int err;

 	nvgpu_log_fn(g, " ");
@@ -750,9 +768,6 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 	cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
 	cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;

-	/* Save the queue in the seq structure. */
-	seq->cmd_queue = queue;
-
 	seq->callback = callback;
 	seq->cb_params = cb_param;
 	seq->msg = msg;
@@ -762,11 +777,15 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 	*seq_desc = seq->desc;

 	if (pmu->queue_type == QUEUE_TYPE_FB) {
+		fb_queue = pmu->fb_queue[queue_id];
+		/* Save the queue in the seq structure. */
+		seq->cmd_queue = fb_queue;
+
 		/* Lock the FBQ work buffer */
-		nvgpu_falcon_queue_lock_fbq_work_buffer(queue);
+		nvgpu_falcon_fb_queue_lock_work_buffer(fb_queue);

 		/* Create FBQ work buffer & copy cmd to FBQ work buffer */
-		err = pmu_fbq_cmd_setup(g, cmd, queue, payload, seq);
+		err = pmu_fbq_cmd_setup(g, cmd, fb_queue, payload, seq);
 		if (err != 0) {
 			nvgpu_err(g, "FBQ cmd setup failed");
 			pmu_seq_release(pmu, seq);
@@ -779,7 +798,7 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 		 * in call pmu_fgq_cmd_setup()
 		 */
 		cmd = (struct pmu_cmd *)
-			(nvgpu_falcon_queue_get_fbq_work_buffer(queue) +
+			(nvgpu_falcon_fb_queue_get_work_buffer(fb_queue) +
 			sizeof(struct nv_falcon_fbq_hdr));
 	}

@@ -805,7 +824,7 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 exit:
 	if (pmu->queue_type == QUEUE_TYPE_FB) {
 		/* Unlock the FBQ work buffer */
-		nvgpu_falcon_queue_unlock_fbq_work_buffer(queue);
+		nvgpu_falcon_fb_queue_unlock_work_buffer(fb_queue);
 	}

 	nvgpu_log_fn(g, "Done, err %x", err);
@@ -824,9 +843,10 @@ static int pmu_payload_extract(struct nvgpu_pmu *pmu,

 	if (seq->out_payload_fb_queue) {
 		fbq_payload_offset =
-			nvgpu_falcon_queue_get_fbq_offset(seq->cmd_queue) +
-			seq->fbq_out_offset_in_queue_element + (seq->fbq_element_index *
-				nvgpu_falcon_fbq_get_element_size(seq->cmd_queue));
+			nvgpu_falcon_fb_queue_get_offset(seq->cmd_queue) +
+			seq->fbq_out_offset_in_queue_element +
+			(seq->fbq_element_index *
+			nvgpu_falcon_fb_queue_get_element_size(seq->cmd_queue));

 		nvgpu_mem_rd_n(g, &pmu->super_surface_buf, fbq_payload_offset,
 			seq->out_payload,
@@ -868,7 +888,7 @@ static void pmu_payload_fbq_free(struct nvgpu_pmu *pmu,
 	 * set FBQ element work buffer to NULL
 	 * Clear the in use bit for the queue entry this CMD used.
 	 */
-	nvgpu_falcon_queue_free_fbq_element(pmu->flcn, seq->cmd_queue,
+	nvgpu_falcon_fb_queue_free_element(pmu->flcn, seq->cmd_queue,
 		seq->fbq_element_index);
 }

@@ -1025,15 +1045,25 @@ static int pmu_handle_event(struct nvgpu_pmu *pmu, struct pmu_msg *msg)
 }

 static bool pmu_falcon_queue_read(struct nvgpu_pmu *pmu,
-	struct nvgpu_falcon_queue *queue, void *data,
+	u32 queue_id, void *data,
 	u32 bytes_to_read, int *status)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct nvgpu_falcon_fb_queue *fb_queue = NULL;
+	struct nvgpu_falcon_queue *queue = NULL;
 	u32 bytes_read;
 	int err;

-	err = nvgpu_falcon_queue_pop(pmu->flcn, queue, data,
-			bytes_to_read, &bytes_read);
+	if (pmu->queue_type == QUEUE_TYPE_FB) {
+		fb_queue = pmu->fb_queue[queue_id];
+		err = nvgpu_falcon_fb_queue_pop(pmu->flcn, fb_queue, data,
+				bytes_to_read, &bytes_read);
+	} else {
+		queue = pmu->queue[queue_id];
+		err = nvgpu_falcon_queue_pop(pmu->flcn, queue, data,
+				bytes_to_read, &bytes_read);
+	}
+
 	if (err != 0) {
 		nvgpu_err(g, "fail to read msg: err %d", err);
 		*status = err;
@@ -1049,37 +1079,57 @@ static bool pmu_falcon_queue_read(struct nvgpu_pmu *pmu,
 	return true;
 }

-static bool pmu_read_message(struct nvgpu_pmu *pmu,
-	struct nvgpu_falcon_queue *queue, struct pmu_msg *msg, int *status)
+bool nvgpu_pmu_queue_is_empty(struct nvgpu_pmu *pmu, u32 queue_id)
+{
+	struct nvgpu_falcon_queue *queue = NULL;
+	struct nvgpu_falcon_fb_queue *fb_queue = NULL;
+	bool empty = true;
+
+	if (pmu->queue_type == QUEUE_TYPE_FB) {
+		fb_queue = pmu->fb_queue[queue_id];
+		empty = nvgpu_falcon_fb_queue_is_empty(pmu->flcn, fb_queue);
+	} else {
+		queue = pmu->queue[queue_id];
+		empty = nvgpu_falcon_queue_is_empty(pmu->flcn, queue);
+	}
+
+	return empty;
+}
+
+static bool pmu_read_message(struct nvgpu_pmu *pmu, u32 queue_id,
+	struct pmu_msg *msg, int *status)
 {
 	struct gk20a *g = gk20a_from_pmu(pmu);
+	struct nvgpu_falcon_queue *queue = NULL;
 	u32 read_size;
-	u32 queue_id;
 	int err;

 	*status = 0;

-	if (nvgpu_falcon_queue_is_empty(pmu->flcn, queue)) {
+	if (nvgpu_pmu_queue_is_empty(pmu, queue_id)) {
 		return false;
 	}

-	queue_id = nvgpu_falcon_queue_get_id(queue);
-
-	if (!pmu_falcon_queue_read(pmu, queue, &msg->hdr, PMU_MSG_HDR_SIZE,
+	if (!pmu_falcon_queue_read(pmu, queue_id, &msg->hdr, PMU_MSG_HDR_SIZE,
 			status)) {
 		nvgpu_err(g, "fail to read msg from queue %d", queue_id);
 		goto clean_up;
 	}

 	if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
-		err = nvgpu_falcon_queue_rewind(pmu->flcn, queue);
-		if (err != 0) {
-			nvgpu_err(g, "fail to rewind queue %d", queue_id);
-			*status = err;
-			goto clean_up;
+		if (pmu->queue_type != QUEUE_TYPE_FB) {
+			queue = pmu->queue[queue_id];
+			err = nvgpu_falcon_queue_rewind(pmu->flcn, queue);
+			if (err != 0) {
+				nvgpu_err(g, "fail to rewind queue %d",
+					  queue_id);
+				*status = err;
+				goto clean_up;
+			}
 		}
+
 		/* read again after rewind */
-		if (!pmu_falcon_queue_read(pmu, queue, &msg->hdr,
+		if (!pmu_falcon_queue_read(pmu, queue_id, &msg->hdr,
 				PMU_MSG_HDR_SIZE, status)) {
 			nvgpu_err(g, "fail to read msg from queue %d",
 				queue_id);
@@ -1096,7 +1146,7 @@ static bool pmu_read_message(struct nvgpu_pmu *pmu,

 	if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
 		read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
-		if (!pmu_falcon_queue_read(pmu, queue, &msg->msg, read_size,
+		if (!pmu_falcon_queue_read(pmu, queue_id, &msg->msg, read_size,
 				status)) {
 			nvgpu_err(g, "fail to read msg from queue %d",
 				queue_id);
@@ -1129,8 +1179,7 @@ int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu)
 		return 0;
 	}

-	while (pmu_read_message(pmu,
-		pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
+	while (pmu_read_message(pmu, PMU_MESSAGE_QUEUE, &msg, &status)) {

 		nvgpu_pmu_dbg(g, "read msg hdr: ");
 		nvgpu_pmu_dbg(g, "unit_id = 0x%08x, size = 0x%08x",
--- a/drivers/gpu/nvgpu/common/sec2/sec2_ipc.c
+++ b/drivers/gpu/nvgpu/common/sec2/sec2_ipc.c
@@ -267,12 +267,11 @@ static bool sec2_falcon_queue_read(struct nvgpu_sec2 *sec2,
 }

 static bool sec2_read_message(struct nvgpu_sec2 *sec2,
-	struct nvgpu_falcon_queue *queue,
-	struct nv_flcn_msg_sec2 *msg, int *status)
+	u32 queue_id, struct nv_flcn_msg_sec2 *msg, int *status)
 {
+	struct nvgpu_falcon_queue *queue = sec2->queue[queue_id];
 	struct gk20a *g = sec2->g;
 	u32 read_size;
-	u32 queue_id;
 	int err;

 	*status = 0U;
@@ -281,8 +280,6 @@ static bool sec2_read_message(struct nvgpu_sec2 *sec2,
 		return false;
 	}

-	queue_id = nvgpu_falcon_queue_get_id(queue);
-
 	if (!sec2_falcon_queue_read(sec2, queue, &msg->hdr, PMU_MSG_HDR_SIZE,
 			status)) {
 		nvgpu_err(g, "fail to read msg from queue %d", queue_id);
@@ -411,7 +408,7 @@ int nvgpu_sec2_process_message(struct nvgpu_sec2 *sec2)
 	}

 	while (sec2_read_message(sec2,
-		sec2->queue[SEC2_NV_MSGQ_LOG_ID], &msg, &status)) {
+		SEC2_NV_MSGQ_LOG_ID, &msg, &status)) {

 		nvgpu_sec2_dbg(g, "read msg hdr: ");
 		nvgpu_sec2_dbg(g, "unit_id = 0x%08x, size = 0x%08x",
--- a/drivers/gpu/nvgpu/include/nvgpu/falcon_fb_queue.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/falcon_fb_queue.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_FALCON_FB_QUEUE_H
+#define NVGPU_FALCON_FB_QUEUE_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+struct nvgpu_falcon;
+struct nvgpu_falcon_fb_queue;
+
+struct nvgpu_falcon_fb_queue_params {
+	/* logical queue identifier */
+	u32 id;
+	/* physical queue index */
+	u32 index;
+	/* in bytes */
+	u32 size;
+	/* open-flag */
+	u32 oflag;
+
+	/* fb queue params*/
+	/* Holds the offset of queue data (0th element) */
+	u32 fbq_offset;
+
+	/* fb queue element size*/
+	u32 fbq_element_size;
+
+	/* Holds super surface base address */
+	struct nvgpu_mem *super_surface_mem;
+};
+
+/* queue public functions */
+int nvgpu_falcon_fb_queue_init(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue **queue_p,
+	struct nvgpu_falcon_fb_queue_params params);
+bool nvgpu_falcon_fb_queue_is_empty(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue);
+int nvgpu_falcon_fb_queue_pop(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, void *data, u32 size,
+	u32 *bytes_read);
+int nvgpu_falcon_fb_queue_push(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, void *data, u32 size);
+void nvgpu_falcon_fb_queue_free(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue **queue_p);
+u32 nvgpu_falcon_fb_queue_get_id(struct nvgpu_falcon_fb_queue *queue);
+u32 nvgpu_falcon_fb_queue_get_index(struct nvgpu_falcon_fb_queue *queue);
+u32 nvgpu_falcon_fb_queue_get_size(struct nvgpu_falcon_fb_queue *queue);
+u32 nvgpu_falcon_fb_queue_get_position(struct nvgpu_falcon_fb_queue *queue);
+u32 nvgpu_falcon_fb_queue_get_element_size(struct nvgpu_falcon_fb_queue *queue);
+u32 nvgpu_falcon_fb_queue_get_offset(struct nvgpu_falcon_fb_queue *queue);
+u8 *nvgpu_falcon_fb_queue_get_work_buffer(struct nvgpu_falcon_fb_queue *queue);
+int nvgpu_falcon_fb_queue_free_element(struct nvgpu_falcon *flcn,
+	struct nvgpu_falcon_fb_queue *queue, u32 queue_pos);
+void nvgpu_falcon_fb_queue_lock_work_buffer(
+	struct nvgpu_falcon_fb_queue *queue);
+void nvgpu_falcon_fb_queue_unlock_work_buffer(
+	struct nvgpu_falcon_fb_queue *queue);
+
+#endif /* NVGPU_FALCON_FB_QUEUE_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/falcon_queue.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/falcon_queue.h
@@ -30,7 +30,6 @@
 #define QUEUE_TYPE_EMEM 0x1U
 #define QUEUE_TYPE_FB   0x2U

-struct gk20a;
 struct nvgpu_falcon;
 struct nvgpu_falcon_queue;

@@ -49,16 +48,6 @@ struct nvgpu_falcon_queue_params {
 	u32 size;
 	/* open-flag */
 	u32 oflag;
-
-	/* fb queue params*/
-	/* Holds the offset of queue data (0th element) */
-	u32 fbq_offset;
-
-	/* fb queue element size*/
-	u32 fbq_element_size;
-
-	/* Holds super surface base address */
-	struct nvgpu_mem *super_surface_mem;
 };

 /* queue public functions */
@@ -77,16 +66,7 @@ int nvgpu_falcon_queue_push(struct nvgpu_falcon *flcn,
 void nvgpu_falcon_queue_free(struct nvgpu_falcon *flcn,
 	struct nvgpu_falcon_queue **queue_p);
 u32 nvgpu_falcon_queue_get_id(struct nvgpu_falcon_queue *queue);
-u32 nvgpu_falcon_queue_get_position(struct nvgpu_falcon_queue *queue);
 u32 nvgpu_falcon_queue_get_index(struct nvgpu_falcon_queue *queue);
 u32 nvgpu_falcon_queue_get_size(struct nvgpu_falcon_queue *queue);
-u32 nvgpu_falcon_fbq_get_element_size(struct nvgpu_falcon_queue *queue);
-u32 nvgpu_falcon_queue_get_fbq_offset(struct nvgpu_falcon_queue *queue);
-u8 *nvgpu_falcon_queue_get_fbq_work_buffer(struct nvgpu_falcon_queue *queue);
-int nvgpu_falcon_queue_free_fbq_element(struct nvgpu_falcon *flcn,
-	struct nvgpu_falcon_queue *queue, u32 queue_pos);
-void nvgpu_falcon_queue_lock_fbq_work_buffer(struct nvgpu_falcon_queue *queue);
-void nvgpu_falcon_queue_unlock_fbq_work_buffer(
-	struct nvgpu_falcon_queue *queue);

 #endif /* NVGPU_FALCON_QUEUE_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -299,7 +299,7 @@ struct pmu_sequence {
 	void *cb_params;

 	/* fb queue that is associated with this seq */
-	struct nvgpu_falcon_queue *cmd_queue;
+	struct nvgpu_falcon_fb_queue *cmd_queue;
 	/* fbq element that is associated with this seq */
 	u8 *fbq_work_buffer;
 	u32 fbq_element_index;
@@ -355,6 +355,8 @@ struct nvgpu_pmu {
 	struct nvgpu_falcon_queue *queue[PMU_QUEUE_COUNT];
 	u32 queue_type;

+	struct nvgpu_falcon_fb_queue *fb_queue[PMU_QUEUE_COUNT];
+
 	struct pmu_sequence *seq;
 	unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
 	u32 next_seq_desc;
@@ -464,6 +466,7 @@ int nvgpu_pmu_cmd_post(struct gk20a *g, struct pmu_cmd *cmd,
 		u32 queue_id, pmu_callback callback, void *cb_param,
 		u32 *seq_desc);

+bool nvgpu_pmu_queue_is_empty(struct nvgpu_pmu *pmu, u32 queue_id);
 int nvgpu_pmu_process_message(struct nvgpu_pmu *pmu);

 /* perfmon */