From 87984ea34469e5b0a821d896d6da9621177f9079 Mon Sep 17 00:00:00 2001
From: mkumbar <mkumbar@nvidia.com>
Date: Wed, 30 Jun 2021 11:23:39 +0530
Subject: [PATCH] gpu: nvgpu: support nvriscv debug feature

Enable nvriscv debug buffer feature in NVGPU.
Debug buffer is a feature to print the debug log from ucode onto console
in real time.
Debug buffer feature uses the DMEM, queue and SWGEN1 interrupt to share
ucode debug data with NVGPU.
Ucode writes debug message to DMEM and updates offset in queue to trigger
interrupt to NVGPU.
NVGPU copies the debug message from DMEM to local buffer to process and
print onto console.

Debug buffer feature is added under falcon unit and required engine
can utilize the feature by providing required param through public
functions.

Currently GA10B NVRISCV NS/LS PMU ucode has support for this feature
and enabled support on NVGPU side by adding required changes, with this
feature enabled, it is now possible to see prints in real time.

JIRA NVGPU-6959

Change-Id: I9d46020470285b490b6bc876204f62698055b1ec
Signed-off-by: mkumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2548951
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 arch/nvgpu-common.yaml                        |   4 +-
 drivers/gpu/nvgpu/Makefile                    |   2 +-
 drivers/gpu/nvgpu/Makefile.linux.configs      |   6 +
 drivers/gpu/nvgpu/Makefile.shared.configs     |   6 +
 drivers/gpu/nvgpu/Makefile.sources            |   4 +
 .../gpu/nvgpu/common/falcon/falcon_debug.c    | 377 ++++++++++++++++++
 .../gpu/nvgpu/common/falcon/falcon_debug.h    |  41 ++
 .../gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c |  15 +
 drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c  |  20 +
 drivers/gpu/nvgpu/hal/init/hal_ga100.c        |   1 +
 drivers/gpu/nvgpu/hal/init/hal_ga10b.c        |   1 +
 drivers/gpu/nvgpu/hal/init/hal_gm20b.c        |   1 +
 drivers/gpu/nvgpu/hal/init/hal_gp10b.c        |   1 +
 drivers/gpu/nvgpu/hal/init/hal_gv11b.c        |   1 +
 drivers/gpu/nvgpu/hal/init/hal_tu104.c        |   1 +
 drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c         |  16 +
 drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h         |   1 +
 drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c         |   4 +
 drivers/gpu/nvgpu/include/nvgpu/falcon.h      |  74 ++++
 drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h    |   1 +
 .../include/nvgpu/hw/ga10b/hw_pwr_ga10b.h     |   1 +
 21 files changed, 576 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/common/falcon/falcon_debug.c
 create mode 100644 drivers/gpu/nvgpu/common/falcon/falcon_debug.h

diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml
index de1caa354..3a9543c92 100644
--- a/arch/nvgpu-common.yaml
+++ b/arch/nvgpu-common.yaml
@@ -364,11 +364,13 @@ falcon_fusa:
 falcon:
   owner: Sagar K
   safe: no
-  gpu: dgpu
+  gpu: both
   sources: [ common/falcon/falcon_sw_tu104.c,
              common/falcon/falcon_sw_tu104.h,
              common/falcon/falcon_sw_ga10b.c,
              common/falcon/falcon_sw_ga10b.h,
+             common/falcon/falcon_debug.c,
+             common/falcon/falcon_debug.h,
              include/nvgpu/gops/gsp.h,
              include/nvgpu/gops/nvdec.h ]
   deps: [ ]
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index a4aaa8857..ed38166c7 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -573,6 +573,7 @@ nvgpu-y += \
 	common/mm/dma.o \
 	common/vbios/bios.o \
 	common/falcon/falcon.o \
+	common/falcon/falcon_debug.o \
 	common/falcon/falcon_sw_gk20a.o \
 	common/engine_queues/engine_mem_queue.o \
 	common/engine_queues/engine_dmem_queue.o \
@@ -804,7 +805,6 @@ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \
 	hal/vgpu/init/vgpu_hal_gv11b.o
 endif
 
-
 ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y)
 nvgpu-y += \
 	common/falcon/falcon_sw_ga10b.o \
diff --git a/drivers/gpu/nvgpu/Makefile.linux.configs b/drivers/gpu/nvgpu/Makefile.linux.configs
index b45940975..63359c801 100644
--- a/drivers/gpu/nvgpu/Makefile.linux.configs
+++ b/drivers/gpu/nvgpu/Makefile.linux.configs
@@ -16,6 +16,9 @@ CONFIG_TEGRA_ACR := y
 # Support for debugger APIs
 CONFIG_NVGPU_DEBUGGER := y
 
+# Support for Falcon debugger APIs
+CONFIG_NVGPU_FALCON_DEBUG := y
+
 # Support for iGPU LS PMU enable/disable
 CONFIG_NVGPU_LS_PMU := y
 
@@ -163,6 +166,9 @@ endif
 ifeq ($(CONFIG_NVGPU_DEBUGGER),y)
 ccflags-y += -DCONFIG_NVGPU_DEBUGGER
 endif
+ifeq ($(CONFIG_NVGPU_FALCON_DEBUG),y)
+ccflags-y += -DCONFIG_NVGPU_FALCON_DEBUG
+endif
 ifeq ($(CONFIG_NVGPU_LS_PMU),y)
 ccflags-y += -DCONFIG_NVGPU_LS_PMU
 endif
diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs
index ccc3f0d34..1e462cd75 100644
--- a/drivers/gpu/nvgpu/Makefile.shared.configs
+++ b/drivers/gpu/nvgpu/Makefile.shared.configs
@@ -112,6 +112,9 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
 CONFIG_NVGPU_FALCON_NON_FUSA	:= 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_NON_FUSA
 
+CONFIG_NVGPU_FALCON_DEBUG   := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_DEBUG
+
 CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
 
@@ -210,6 +213,9 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_VPR
 CONFIG_NVGPU_REPLAYABLE_FAULT   := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_REPLAYABLE_FAULT
 
+CONFIG_NVGPU_FALCON_DEBUG       := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_DEBUG
+
 # Enable LS PMU support for normal build
 CONFIG_NVGPU_LS_PMU             := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_LS_PMU
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 006f3a346..07121679d 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -489,6 +489,10 @@ ifdef NVGPU_FAULT_INJECTION_ENABLEMENT
 srcs += os/posix/posix-fault-injection.c
 endif
 
+ifeq ($(CONFIG_NVGPU_FALCON_DEBUG),1)
+srcs += common/falcon/falcon_debug.c
+endif
+
 ifeq ($(CONFIG_NVGPU_LS_PMU),1)
 # Add LS PMU files which are required for normal build
 srcs += \
diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_debug.c b/drivers/gpu/nvgpu/common/falcon/falcon_debug.c
new file mode 100644
index 000000000..fc3f4754c
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_debug.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/falcon.h>
+#include <nvgpu/io.h>
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/string.h>
+
+#include "falcon_debug.h"
+
+#define NV_NVRISCV_DEBUG_BUFFER_MAGIC   0xf007ba11
+
+#define FLCN_DMEM_ACCESS_ALIGNMENT    (4)
+
+#define NV_ALIGN_DOWN(v, g) ((v) & ~((g) - 1))
+
+#define NV_IS_ALIGNED(addr, align)	((addr & (align - 1U)) == 0U)
+
+void nvgpu_falcon_dbg_buf_destroy(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+
+	if (debug_buffer->local_buf != NULL) {
+		nvgpu_kfree(g, debug_buffer->local_buf);
+		debug_buffer->local_buf = NULL;
+	}
+
+	debug_buffer->first_msg_received = false;
+	debug_buffer->read_offset = 0;
+	debug_buffer->buffer_size = 0;
+	debug_buffer->dmem_offset = g->ops.falcon.get_mem_size(flcn, MEM_DMEM);
+}
+
+int nvgpu_falcon_dbg_buf_init(struct nvgpu_falcon *flcn,
+	u32 debug_buffer_max_size, u32 write_reg_addr, u32 read_reg_addr)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	int status = 0;
+
+	/*
+	 * Set the debugBufferSize to it's initial value of max size.
+	 * We will refine it later once ucode informs us of the size it wants
+	 * the debug buffer to be.
+	 */
+	debug_buffer->buffer_size = debug_buffer_max_size;
+	debug_buffer->first_msg_received = false;
+	debug_buffer->read_offset = 0;
+
+	if (debug_buffer->local_buf == NULL) {
+		/*
+		 * Allocate memory for nvgpu-side debug buffer, used for copies
+		 * from nvriscv dmem. we make it 1 byte larger than the actual debug
+		 * buffer to keep a null character at the end for ease of printing.
+		 */
+		debug_buffer->local_buf = nvgpu_kzalloc(g, debug_buffer_max_size + 1);
+
+		if (debug_buffer->local_buf == NULL) {
+			nvgpu_err(g, "Failed to alloc memory for flcn debug buffer");
+			nvgpu_err(g, "status=0x%08x", status);
+			status = -ENOMEM;
+			goto exit;
+		}
+	}
+
+	/* Zero out memory in the local debug buffer. */
+	memset(debug_buffer->local_buf, 0, debug_buffer_max_size + 1);
+
+	/*
+	 * Debug buffer is located at the very end of available DMEM.
+	 * NVGPU don't know the exact size until the ucode informs us of
+	 * the size it wants, so only make it as large as the metadata
+	 * at the end of the buffer.
+	 */
+	debug_buffer->dmem_offset = g->ops.falcon.get_mem_size(flcn, MEM_DMEM) -
+		sizeof(struct nvgpu_falcon_dbg_buf_metadata);
+
+	/* The DMEM offset must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(debug_buffer->dmem_offset, FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "metadata DMEM offset is not 4-byte aligned.");
+		nvgpu_err(g, "dmem_offset=0x%08x", debug_buffer->dmem_offset);
+		status = -EINVAL;
+		goto exit;
+	}
+
+	/* The DMEM buffer size must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(sizeof(struct nvgpu_falcon_dbg_buf_metadata),
+			FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "The debug buffer metadata size is not 4-byte aligned");
+		status =  -EINVAL;
+		goto exit;
+	}
+
+	debug_buffer->read_offset_address  = read_reg_addr;
+	debug_buffer->write_offset_address = write_reg_addr;
+
+exit:
+	if (status != 0) {
+		nvgpu_falcon_dbg_buf_destroy(flcn);
+	}
+	return status;
+}
+
+/*
+ * Copy new data from the nvriscv debug buffer to the local buffer.
+ * Get all data from the last read offset to the current write offset.
+ *
+ * @return '0' if data fetched successfully, error otherwise.
+ */
+static int falcon_update_debug_buffer_from_dmem(struct nvgpu_falcon *flcn,
+	u32 write_offset)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	u32 first_read_size     = 0;
+	u32 second_read_size    = 0;
+
+	/*
+	 * Align read offset, since reading DMEM only works with 32-bit words.
+	 * We only need to align the offset since dmem_offset is already aligned.
+	 * We don't need to align the write offset since nvgpu_falcon_copy_from_dmem
+	 * handles unaligned-size reads.
+	 */
+	u32 read_offset_aligned = NV_ALIGN_DOWN(debug_buffer->read_offset,
+			FLCN_DMEM_ACCESS_ALIGNMENT);
+
+	if (write_offset >= debug_buffer->read_offset) {
+		first_read_size = write_offset - read_offset_aligned;
+		second_read_size = 0;
+	} else {
+		/* Write offset has wrapped around, need two reads */
+		first_read_size = debug_buffer->buffer_size - read_offset_aligned;
+		second_read_size = write_offset;
+	}
+
+	if (first_read_size > 0) {
+		if (read_offset_aligned + first_read_size >
+			debug_buffer->buffer_size) {
+			nvgpu_err(g,
+				"Invalid read (first read) from print buffer attempted!");
+			return -EINVAL;
+		}
+
+		if (nvgpu_falcon_copy_from_dmem(flcn,
+			debug_buffer->dmem_offset + read_offset_aligned,
+			debug_buffer->local_buf + read_offset_aligned,
+			first_read_size,
+			0) != 0) {
+			nvgpu_err(g, "Failed to copy debug buffer contents from DMEM");
+			return -EINVAL;
+		}
+	}
+
+	if (second_read_size > 0) {
+		if (second_read_size > debug_buffer->buffer_size) {
+			nvgpu_err(g,
+				"Invalid read (second read) from print buffer attempted!");
+			return -EINVAL;
+		}
+
+		/*
+		 * Wrap around, read from start
+		 * Assume dmem_offset is always aligned.
+		 */
+		if (nvgpu_falcon_copy_from_dmem(flcn, debug_buffer->dmem_offset,
+			debug_buffer->local_buf, second_read_size,
+			0) != 0) {
+			nvgpu_err(g,
+				"Failed to copy debug buffer contents from nvriscv DMEM");
+			return -EINVAL;
+		}
+	}
+
+	if (first_read_size == 0 && second_read_size == 0) {
+		nvgpu_err(g, "Debug buffer empty, can't read any data!");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * There is a metadata buffer at the end of the DMEM buffer in nvriscv.
+ * It sets the buffer size, the magic number for identification etc.
+ *
+ */
+static int falcon_fetch_debug_buffer_metadata(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	struct nvgpu_falcon_dbg_buf_metadata  buffer_metadata_copy;
+
+	/* DMEM offset will point to metadata initially */
+	if (nvgpu_falcon_copy_from_dmem(flcn, debug_buffer->dmem_offset,
+		(u8 *)&buffer_metadata_copy, sizeof(buffer_metadata_copy),
+		0) != 0) {
+		nvgpu_err(g, "Failed to copy debug buffer metadata from nvriscv DMEM");
+		return -EINVAL;
+	}
+
+	nvgpu_info(g, "metadata magic        - 0x%x", buffer_metadata_copy.magic);
+	nvgpu_info(g, "metadata buffer size  - 0x%x",
+			buffer_metadata_copy.buffer_size);
+	nvgpu_info(g, "metadata write offset - 0x%x",
+			buffer_metadata_copy.write_offset);
+	nvgpu_info(g, "metadata read offset  - 0x%x",
+			buffer_metadata_copy.read_offset);
+
+	if (buffer_metadata_copy.magic != NV_NVRISCV_DEBUG_BUFFER_MAGIC) {
+		nvgpu_err(g, "Failed to verify magic number in debug buffer");
+		nvgpu_err(g, " metadata copied from nvriscv DMEM");
+		return -EINVAL;
+	}
+
+	if (buffer_metadata_copy.buffer_size >= debug_buffer->buffer_size) {
+		nvgpu_err(g, "Debug buffer size requested by ucode too big!");
+		return -EINVAL;
+	}
+
+	debug_buffer->buffer_size = buffer_metadata_copy.buffer_size;
+
+	/* The DMEM buffer size must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(debug_buffer->buffer_size, FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "The debug buffer size is not 4-byte aligned");
+		nvgpu_err(g, "buffer_size=0x%08x", debug_buffer->buffer_size);
+		return -EINVAL;
+	}
+
+	/*
+	 * NVGPU don't want to overwrite the metadata since NVGPU might want to use
+	 * it to pass read and write offsets if no registers are available.
+	 */
+	debug_buffer->dmem_offset -= buffer_metadata_copy.buffer_size;
+
+	/* The DMEM offset must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(debug_buffer->dmem_offset, FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "The debug buffer DMEM offset is not 4-byte aligned.");
+		nvgpu_err(g, " dmem_offset=0x%08x", debug_buffer->dmem_offset);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int nvgpu_falcon_dbg_buf_display(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	u8  *buffer_data  = debug_buffer->local_buf;
+	u32 write_offset  = nvgpu_readl(g, debug_buffer->write_offset_address);
+	u32 itr_Offset    = debug_buffer->read_offset;
+
+	bool is_line_split = false;
+
+	if (debug_buffer->local_buf == NULL) {
+		nvgpu_err(g, "Local Debug Buffer not allocated!");
+		return -EINVAL;
+	}
+
+	if (!debug_buffer->first_msg_received) {
+		if (falcon_fetch_debug_buffer_metadata(flcn) != 0) {
+			nvgpu_err(g, "Failed to process debug buffer metadata!");
+			return -EINVAL;
+		}
+
+		debug_buffer->first_msg_received = true;
+	}
+
+	if (write_offset >= debug_buffer->buffer_size) {
+		nvgpu_err(g, "Invalid write offset (%u >= %u)",
+				  write_offset, debug_buffer->buffer_size);
+		nvgpu_err(g, "abort Debug buffer display");
+		return -EINVAL;
+	}
+
+	if (falcon_update_debug_buffer_from_dmem(flcn, write_offset) != 0) {
+		nvgpu_err(g, "Failed to fetch debug buffer contents");
+		return -EINVAL;
+	}
+
+	/* Buffer is empty when read_offset == write_offset */
+	while (itr_Offset != write_offset) {
+		/* Null character is the newline marker in falcon firmware logs */
+		if (buffer_data[itr_Offset] != '\0') {
+			itr_Offset = (itr_Offset + 1) % debug_buffer->buffer_size;
+			if (itr_Offset == 0) {
+				is_line_split = true;
+			}
+		} else {
+			int status   = 0;
+			u8 *tmp_buf   = NULL;
+			u8 *curr_data = NULL;
+			u32  buf_size  = 0;
+
+			if (is_line_split) {
+				/* Logic to concat the split line into a temp buffer */
+				u32 first_chunk_len  =
+					strlen((char *)&buffer_data[debug_buffer->read_offset]);
+				u32 second_chunk_len = strlen((char *)&buffer_data[0]);
+
+				buf_size = first_chunk_len + second_chunk_len + 1;
+				tmp_buf  = nvgpu_kzalloc(g, buf_size);
+
+				if (tmp_buf == NULL) {
+					status = -ENOMEM;
+					nvgpu_err(g,
+						"Failed to alloc tmp buf for line-split print %d",
+						status);
+					return status;
+				}
+
+				nvgpu_memcpy(tmp_buf, &buffer_data[debug_buffer->read_offset],
+						first_chunk_len + 1);
+				strcat((char *)tmp_buf, (char *)&buffer_data[0]);
+
+				/* Set the byte array that gets printed as a string */
+				curr_data = tmp_buf;
+
+				/* Reset line-split flag */
+				is_line_split = false;
+			} else {
+				buf_size =
+					strlen((char *)&buffer_data[debug_buffer->read_offset]) + 1;
+
+				/* Set the byte array that gets printed as a string */
+				curr_data = &buffer_data[debug_buffer->read_offset];
+			}
+
+			if (curr_data == NULL) {
+				status = -EINVAL;
+				nvgpu_err(g, "Debug buffer - no data to print %d", status);
+
+				if (tmp_buf != NULL) {
+					nvgpu_kfree(g, tmp_buf);
+				}
+				return status;
+			}
+
+			nvgpu_info(g, "Flcn-%d Async: %s", flcn->flcn_id, curr_data);
+
+			/* Cleanup in case we had to allocate a temp buffer */
+			if (tmp_buf != NULL) {
+				nvgpu_kfree(g, tmp_buf);
+			}
+
+			itr_Offset = (itr_Offset + 1) % debug_buffer->buffer_size;
+			debug_buffer->read_offset = itr_Offset;
+		}
+	}
+
+	nvgpu_writel(g, debug_buffer->read_offset_address,
+			debug_buffer->read_offset);
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_debug.h b/drivers/gpu/nvgpu/common/falcon/falcon_debug.h
new file mode 100644
index 000000000..b6ae2660c
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_debug.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_FALCON_DEBUG_H
+#define NVGPU_FALCON_DEBUG_H
+
+struct nvgpu_falcon;
+
+struct nvgpu_falcon_dbg_buf_metadata {
+	/* Read offset updated by NVGPU */
+	u32 read_offset;
+
+	/* Write offset updated by firmware */
+	u32 write_offset;
+
+	/* Buffer size configured by NVGPU */
+	u32 buffer_size;
+
+	/* Magic number for header validation */
+	u32 magic;
+};
+
+#endif /* NVGPU_FALCON_DEBUG_H */
diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
index 80a164e3b..710a9b30e 100644
--- a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
+++ b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
@@ -27,6 +27,7 @@
 #include <nvgpu/pmu/fw.h>
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/string.h>
+#include <nvgpu/falcon.h>
 
 static void pmu_free_ns_ucode_blob(struct gk20a *g)
 {
@@ -74,6 +75,20 @@ int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 #if defined(CONFIG_NVGPU_NON_FUSA)
 	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
 		nvgpu_pmu_next_core_rtos_args_setup(g, pmu);
+
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+		err = nvgpu_falcon_dbg_buf_init(g->pmu->flcn,
+				NV_RISCV_DMESG_BUFFER_SIZE,
+				g->ops.pmu.pmu_get_queue_head(NV_RISCV_DEBUG_BUFFER_QUEUE),
+				g->ops.pmu.pmu_get_queue_tail(NV_RISCV_DEBUG_BUFFER_QUEUE));
+		if (err != 0) {
+			nvgpu_err(g,
+				"Failed to allocate NVRISCV PMU debug buffer status=0x%x)",
+				err);
+			return err;
+		}
+#endif
+
 	} else
 #endif
 	{
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
index 725d5b454..c33b80782 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
@@ -34,6 +34,7 @@
 #include <nvgpu/pmu.h>
 #include <nvgpu/string.h>
 #include <nvgpu/pmu/clk/clk.h>
+#include <nvgpu/falcon.h>
 
 #include <nvgpu/pmu/mutex.h>
 #include <nvgpu/pmu/seq.h>
@@ -159,6 +160,13 @@ static void remove_pmu_support(struct nvgpu_pmu *pmu)
 		nvgpu_pmu_pstate_deinit(g);
 	}
 
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
+		nvgpu_falcon_dbg_buf_display(pmu->flcn);
+		nvgpu_falcon_dbg_buf_destroy(pmu->flcn);
+	}
+#endif
+
 	nvgpu_pmu_debug_deinit(g, pmu);
 	nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm);
 #ifdef CONFIG_PMU_POWER_PG
@@ -418,6 +426,18 @@ int nvgpu_pmu_rtos_init(struct gk20a *g)
 
 #if defined(CONFIG_NVGPU_NON_FUSA)
 		if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+			err = nvgpu_falcon_dbg_buf_init(g->pmu->flcn,
+					NV_RISCV_DMESG_BUFFER_SIZE,
+					g->ops.pmu.pmu_get_queue_head(NV_RISCV_DEBUG_BUFFER_QUEUE),
+					g->ops.pmu.pmu_get_queue_tail(NV_RISCV_DEBUG_BUFFER_QUEUE));
+			if (err != 0) {
+				nvgpu_err(g,
+					"Failed to allocate RISCV PMU debug buffer status=0x%x)",
+					err);
+				goto exit;
+			}
+#endif
 			g->ops.falcon.bootstrap(g->pmu->flcn, 0U);
 			err = nvgpu_pmu_wait_for_priv_lockdown_release(g,
 					g->pmu->flcn, U32_MAX);
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
index 6a4b5235d..d446f6ebb 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -1297,6 +1297,7 @@ static const struct gops_pmu ga100_ops_pmu = {
 	.pmu_get_queue_tail = tu104_pmu_queue_tail_r,
 	.get_irqdest = gk20a_pmu_get_irqdest,
 	.handle_ext_irq = NULL,
+	.handle_swgen1_irq = NULL,
 	.is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en,
 	.setup_apertures = tu104_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
index cd4939382..3cac00fa8 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -1289,6 +1289,7 @@ static const struct gops_pmu ga10b_ops_pmu = {
 	.pmu_destroy = nvgpu_pmu_destroy,
 	/* ISR */
 	.pmu_is_interrupted = gk20a_pmu_is_interrupted,
+	.handle_swgen1_irq = ga10b_pmu_handle_swgen1_irq,
 	/* queue */
 	.pmu_get_queue_head = gv11b_pmu_queue_head_r,
 	.pmu_get_queue_head_size = gv11b_pmu_queue_head__size_1_v,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index f2229d30b..a985c240a 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -850,6 +850,7 @@ static const struct gops_pmu gm20b_ops_pmu = {
 	.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap,
 	.setup_apertures = gm20b_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
+	.handle_swgen1_irq = NULL,
 };
 #endif
 
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index e41ef765a..02f6c1418 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -940,6 +940,7 @@ static const struct gops_pmu gp10b_ops_pmu = {
 	.bar0_error_status = gk20a_pmu_bar0_error_status,
 	.flcn_setup_boot_config = gm20b_pmu_flcn_setup_boot_config,
 	.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap,
+	.handle_swgen1_irq = NULL,
 };
 #endif
 
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index b5d807241..ff81447c6 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1126,6 +1126,7 @@ static const struct gops_pmu gv11b_ops_pmu = {
 	.pmu_destroy = nvgpu_pmu_destroy,
 	/* ISR */
 	.pmu_is_interrupted = gk20a_pmu_is_interrupted,
+	.handle_swgen1_irq = NULL,
 	/* queue */
 	.pmu_get_queue_head = gv11b_pmu_queue_head_r,
 	.pmu_get_queue_head_size = gv11b_pmu_queue_head__size_1_v,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 227d94aa5..870bfc925 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1189,6 +1189,7 @@ static const struct gops_pmu tu104_ops_pmu = {
 	.pmu_get_queue_tail = tu104_pmu_queue_tail_r,
 	.get_irqdest = gk20a_pmu_get_irqdest,
 	.handle_ext_irq = gv11b_pmu_handle_ext_irq,
+	.handle_swgen1_irq = NULL,
 	.is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en,
 	.setup_apertures = tu104_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
index 15e0b1298..134d9eee4 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
@@ -342,3 +342,19 @@ bool ga10b_pmu_is_debug_mode_en(struct gk20a *g)
 		return false;
 	}
 }
+
+void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr)
+{
+	struct nvgpu_pmu *pmu = g->pmu;
+	int err = 0;
+
+	if ((intr & pwr_falcon_irqstat_swgen1_true_f()) != 0U) {
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+		err = nvgpu_falcon_dbg_buf_display(pmu->flcn);
+		if (err != 0) {
+			nvgpu_err(g, "nvgpu_falcon_dbg_buf_display failed err=%d",
+				err);
+		}
+#endif
+	}
+}
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
index d87220d3a..4faeea65d 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
@@ -52,5 +52,6 @@ u32 ga10b_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
 void ga10b_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
 u32 ga10b_pmu_get_irqmask(struct gk20a *g);
 bool ga10b_pmu_is_debug_mode_en(struct gk20a *g);
+void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr);
 
 #endif /* NVGPU_PMU_GA10B_H */
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
index d55ccf472..a79220f0a 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
@@ -517,6 +517,10 @@ void gk20a_pmu_handle_interrupts(struct gk20a *g, u32 intr)
 				~pwr_falcon_exterrstat_valid_m());
 	}
 
+	if (g->ops.pmu.handle_swgen1_irq != NULL) {
+		g->ops.pmu.handle_swgen1_irq(g, intr);
+	}
+
 	if ((intr & pwr_falcon_irqstat_swgen0_true_f()) != 0U) {
 		err = nvgpu_pmu_process_message(pmu);
 		if (err != 0) {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/falcon.h b/drivers/gpu/nvgpu/include/nvgpu/falcon.h
index 95405e333..14353bc20 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/falcon.h
@@ -207,6 +207,35 @@ enum falcon_mem_type {
 	MEM_IMEM
 };
 
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+/*
+ * Structure tracking information relevant to firmware debug buffer.
+ */
+struct nvgpu_falcon_dbg_buf {
+	/* Offset to debug buffer in NVRISCV DMEM */
+	u32 dmem_offset;
+
+	/*
+	 * Pointer to local debug buffer copy on system memory
+	 * where nvgpu copy the data from NVRISCV DMEM.
+	 */
+	u8 *local_buf;
+
+	/* Last read offset for the circular debug buffer */
+	u32 read_offset;
+
+	/* Read/Write offset register addresses */
+	u32 read_offset_address;
+	u32 write_offset_address;
+
+	/* Flcn debug buffer size */
+	u32 buffer_size;
+
+	/* Set once nvgpu get the first message from FLCN */
+	bool first_msg_received;
+};
+#endif
+
 /**
  * This struct holds the falcon ops which are falcon engine specific.
  */
@@ -259,6 +288,9 @@ struct nvgpu_falcon {
 #endif
 	/** Functions for engine specific reset and memory access. */
 	struct nvgpu_falcon_engine_dependency_ops flcn_engine_dep_ops;
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+	struct nvgpu_falcon_dbg_buf debug_buffer;
+#endif
 };
 
 /**
@@ -726,4 +758,46 @@ void nvgpu_falcon_print_imem(struct nvgpu_falcon *flcn, u32 src, u32 size);
 void nvgpu_falcon_get_ctls(struct nvgpu_falcon *flcn, u32 *sctl, u32 *cpuctl);
 #endif
 
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+#define NV_RISCV_DEBUG_BUFFER_QUEUE   7U
+#define NV_RISCV_DMESG_BUFFER_SIZE    0x1000U
+
+/**
+ * @brief falcon debug buffer initialization.
+ *
+ * @param flcn [in] The falcon.
+ *
+ * Allocates and maps buffer in system memory for sharing flcn firmware
+ * debug prints with client nvgpu.
+ *
+ * @return '0' if initialization is successful, error otherwise.
+ */
+int nvgpu_falcon_dbg_buf_init(struct nvgpu_falcon *flcn,
+	u32 debug_buffer_max_size, u32 write_reg_addr, u32 read_reg_addr);
+
+/*
+ * @brief falcon debug buffer deinitialization.
+ *
+ * @param flcn [in] The falcon.
+ *
+ * Frees falcon debug buffer from memory.
+ *
+ */
+void nvgpu_falcon_dbg_buf_destroy(struct nvgpu_falcon *flcn);
+
+/**
+ * @brief Display falcon firmware logs
+ *
+ * @param flcn [in] The falcon.
+ *
+ * This function reads the contents of flcn debug buffer filled by firmware.
+ * Logs are displayed line-by-line with label '<FLCN> Async' signifying that
+ * these logs might be delayed and should be assumed as out-of-order when read
+ * alongside other client nvgpu logs.
+ *
+ * @return '0' if contents logged successfully, error otherwise.
+ */
+int nvgpu_falcon_dbg_buf_display(struct nvgpu_falcon *flcn);
+#endif
+
 #endif /* NVGPU_FALCON_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
index dee0e38d4..29e82cc0c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
@@ -312,6 +312,7 @@ struct gops_pmu {
 
 	/** @cond DOXYGEN_SHOULD_SKIP_THIS */
 	void (*handle_ext_irq)(struct gk20a *g, u32 intr);
+	void (*handle_swgen1_irq)(struct gk20a *g, u32 intr);
 
 	void (*pmu_enable_irq)(struct nvgpu_pmu *pmu, bool enable);
 	u32 (*get_irqdest)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
index 8e7b7e260..ff573664a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
@@ -68,6 +68,7 @@
 #define pwr_falcon_irqstat_exterr_true_f()                               (0x20U)
 #define pwr_falcon_irqstat_swgen0_true_f()                               (0x40U)
 #define pwr_falcon_irqstat_ext_ecc_parity_true_f()                      (0x400U)
+#define pwr_falcon_irqstat_swgen1_true_f()                               (0x80U)
 #define pwr_pmu_ecc_intr_status_r()                                (0x0010abfcU)
 #define pwr_pmu_ecc_intr_status_corrected_m()                  (U32(0x1U) << 0U)
 #define pwr_pmu_ecc_intr_status_uncorrected_m()                (U32(0x1U) << 1U)