diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml
index de1caa354..3a9543c92 100644
--- a/arch/nvgpu-common.yaml
+++ b/arch/nvgpu-common.yaml
@@ -364,11 +364,13 @@ falcon_fusa:
 falcon:
   owner: Sagar K
   safe: no
-  gpu: dgpu
+  gpu: both
   sources: [ common/falcon/falcon_sw_tu104.c,
              common/falcon/falcon_sw_tu104.h,
              common/falcon/falcon_sw_ga10b.c,
              common/falcon/falcon_sw_ga10b.h,
+             common/falcon/falcon_debug.c,
+             common/falcon/falcon_debug.h,
              include/nvgpu/gops/gsp.h,
              include/nvgpu/gops/nvdec.h ]
   deps: [ ]
diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index a4aaa8857..ed38166c7 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -573,6 +573,7 @@ nvgpu-y += \
 	common/mm/dma.o \
 	common/vbios/bios.o \
 	common/falcon/falcon.o \
+	common/falcon/falcon_debug.o \
 	common/falcon/falcon_sw_gk20a.o \
 	common/engine_queues/engine_mem_queue.o \
 	common/engine_queues/engine_dmem_queue.o \
@@ -804,7 +805,6 @@ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \
 	hal/vgpu/init/vgpu_hal_gv11b.o
 endif
 
-
 ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y)
 nvgpu-y += \
 	common/falcon/falcon_sw_ga10b.o \
diff --git a/drivers/gpu/nvgpu/Makefile.linux.configs b/drivers/gpu/nvgpu/Makefile.linux.configs
index b45940975..63359c801 100644
--- a/drivers/gpu/nvgpu/Makefile.linux.configs
+++ b/drivers/gpu/nvgpu/Makefile.linux.configs
@@ -16,6 +16,9 @@ CONFIG_TEGRA_ACR := y
 # Support for debugger APIs
 CONFIG_NVGPU_DEBUGGER := y
 
+# Support for Falcon debugger APIs
+CONFIG_NVGPU_FALCON_DEBUG := y
+
 # Support for iGPU LS PMU enable/disable
 CONFIG_NVGPU_LS_PMU := y
 
@@ -163,6 +166,9 @@ endif
 ifeq ($(CONFIG_NVGPU_DEBUGGER),y)
 ccflags-y += -DCONFIG_NVGPU_DEBUGGER
 endif
+ifeq ($(CONFIG_NVGPU_FALCON_DEBUG),y)
+ccflags-y += -DCONFIG_NVGPU_FALCON_DEBUG
+endif
 ifeq ($(CONFIG_NVGPU_LS_PMU),y)
 ccflags-y += -DCONFIG_NVGPU_LS_PMU
 endif
diff --git a/drivers/gpu/nvgpu/Makefile.shared.configs b/drivers/gpu/nvgpu/Makefile.shared.configs
index ccc3f0d34..1e462cd75 100644
--- a/drivers/gpu/nvgpu/Makefile.shared.configs
+++ b/drivers/gpu/nvgpu/Makefile.shared.configs
@@ -112,6 +112,9 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
 CONFIG_NVGPU_FALCON_NON_FUSA	:= 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_NON_FUSA
 
+CONFIG_NVGPU_FALCON_DEBUG   := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_DEBUG
+
 CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
 
@@ -210,6 +213,9 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_VPR
 CONFIG_NVGPU_REPLAYABLE_FAULT   := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_REPLAYABLE_FAULT
 
+CONFIG_NVGPU_FALCON_DEBUG       := 1
+NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_DEBUG
+
 # Enable LS PMU support for normal build
 CONFIG_NVGPU_LS_PMU             := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_LS_PMU
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index 006f3a346..07121679d 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -489,6 +489,10 @@ ifdef NVGPU_FAULT_INJECTION_ENABLEMENT
 srcs += os/posix/posix-fault-injection.c
 endif
 
+ifeq ($(CONFIG_NVGPU_FALCON_DEBUG),1)
+srcs += common/falcon/falcon_debug.c
+endif
+
 ifeq ($(CONFIG_NVGPU_LS_PMU),1)
 # Add LS PMU files which are required for normal build
 srcs += \
diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_debug.c b/drivers/gpu/nvgpu/common/falcon/falcon_debug.c
new file mode 100644
index 000000000..fc3f4754c
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_debug.c
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+#include <nvgpu/timers.h>
+#include <nvgpu/falcon.h>
+#include <nvgpu/io.h>
+#include <nvgpu/static_analysis.h>
+#include <nvgpu/string.h>
+
+#include "falcon_debug.h"
+
+#define NV_NVRISCV_DEBUG_BUFFER_MAGIC   0xf007ba11
+
+#define FLCN_DMEM_ACCESS_ALIGNMENT    (4)
+
+#define NV_ALIGN_DOWN(v, g) ((v) & ~((g) - 1))
+
+#define NV_IS_ALIGNED(addr, align)	((addr & (align - 1U)) == 0U)
+
+void nvgpu_falcon_dbg_buf_destroy(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+
+	if (debug_buffer->local_buf != NULL) {
+		nvgpu_kfree(g, debug_buffer->local_buf);
+		debug_buffer->local_buf = NULL;
+	}
+
+	debug_buffer->first_msg_received = false;
+	debug_buffer->read_offset = 0;
+	debug_buffer->buffer_size = 0;
+	debug_buffer->dmem_offset = g->ops.falcon.get_mem_size(flcn, MEM_DMEM);
+}
+
+int nvgpu_falcon_dbg_buf_init(struct nvgpu_falcon *flcn,
+	u32 debug_buffer_max_size, u32 write_reg_addr, u32 read_reg_addr)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	int status = 0;
+
+	/*
+	 * Set the debugBufferSize to it's initial value of max size.
+	 * We will refine it later once ucode informs us of the size it wants
+	 * the debug buffer to be.
+	 */
+	debug_buffer->buffer_size = debug_buffer_max_size;
+	debug_buffer->first_msg_received = false;
+	debug_buffer->read_offset = 0;
+
+	if (debug_buffer->local_buf == NULL) {
+		/*
+		 * Allocate memory for nvgpu-side debug buffer, used for copies
+		 * from nvriscv dmem. we make it 1 byte larger than the actual debug
+		 * buffer to keep a null character at the end for ease of printing.
+		 */
+		debug_buffer->local_buf = nvgpu_kzalloc(g, debug_buffer_max_size + 1);
+
+		if (debug_buffer->local_buf == NULL) {
+			nvgpu_err(g, "Failed to alloc memory for flcn debug buffer");
+			nvgpu_err(g, "status=0x%08x", status);
+			status = -ENOMEM;
+			goto exit;
+		}
+	}
+
+	/* Zero out memory in the local debug buffer. */
+	memset(debug_buffer->local_buf, 0, debug_buffer_max_size + 1);
+
+	/*
+	 * Debug buffer is located at the very end of available DMEM.
+	 * NVGPU don't know the exact size until the ucode informs us of
+	 * the size it wants, so only make it as large as the metadata
+	 * at the end of the buffer.
+	 */
+	debug_buffer->dmem_offset = g->ops.falcon.get_mem_size(flcn, MEM_DMEM) -
+		sizeof(struct nvgpu_falcon_dbg_buf_metadata);
+
+	/* The DMEM offset must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(debug_buffer->dmem_offset, FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "metadata DMEM offset is not 4-byte aligned.");
+		nvgpu_err(g, "dmem_offset=0x%08x", debug_buffer->dmem_offset);
+		status = -EINVAL;
+		goto exit;
+	}
+
+	/* The DMEM buffer size must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(sizeof(struct nvgpu_falcon_dbg_buf_metadata),
+			FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "The debug buffer metadata size is not 4-byte aligned");
+		status =  -EINVAL;
+		goto exit;
+	}
+
+	debug_buffer->read_offset_address  = read_reg_addr;
+	debug_buffer->write_offset_address = write_reg_addr;
+
+exit:
+	if (status != 0) {
+		nvgpu_falcon_dbg_buf_destroy(flcn);
+	}
+	return status;
+}
+
+/*
+ * Copy new data from the nvriscv debug buffer to the local buffer.
+ * Get all data from the last read offset to the current write offset.
+ *
+ * @return '0' if data fetched successfully, error otherwise.
+ */
+static int falcon_update_debug_buffer_from_dmem(struct nvgpu_falcon *flcn,
+	u32 write_offset)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	u32 first_read_size     = 0;
+	u32 second_read_size    = 0;
+
+	/*
+	 * Align read offset, since reading DMEM only works with 32-bit words.
+	 * We only need to align the offset since dmem_offset is already aligned.
+	 * We don't need to align the write offset since nvgpu_falcon_copy_from_dmem
+	 * handles unaligned-size reads.
+	 */
+	u32 read_offset_aligned = NV_ALIGN_DOWN(debug_buffer->read_offset,
+			FLCN_DMEM_ACCESS_ALIGNMENT);
+
+	if (write_offset >= debug_buffer->read_offset) {
+		first_read_size = write_offset - read_offset_aligned;
+		second_read_size = 0;
+	} else {
+		/* Write offset has wrapped around, need two reads */
+		first_read_size = debug_buffer->buffer_size - read_offset_aligned;
+		second_read_size = write_offset;
+	}
+
+	if (first_read_size > 0) {
+		if (read_offset_aligned + first_read_size >
+			debug_buffer->buffer_size) {
+			nvgpu_err(g,
+				"Invalid read (first read) from print buffer attempted!");
+			return -EINVAL;
+		}
+
+		if (nvgpu_falcon_copy_from_dmem(flcn,
+			debug_buffer->dmem_offset + read_offset_aligned,
+			debug_buffer->local_buf + read_offset_aligned,
+			first_read_size,
+			0) != 0) {
+			nvgpu_err(g, "Failed to copy debug buffer contents from DMEM");
+			return -EINVAL;
+		}
+	}
+
+	if (second_read_size > 0) {
+		if (second_read_size > debug_buffer->buffer_size) {
+			nvgpu_err(g,
+				"Invalid read (second read) from print buffer attempted!");
+			return -EINVAL;
+		}
+
+		/*
+		 * Wrap around, read from start
+		 * Assume dmem_offset is always aligned.
+		 */
+		if (nvgpu_falcon_copy_from_dmem(flcn, debug_buffer->dmem_offset,
+			debug_buffer->local_buf, second_read_size,
+			0) != 0) {
+			nvgpu_err(g,
+				"Failed to copy debug buffer contents from nvriscv DMEM");
+			return -EINVAL;
+		}
+	}
+
+	if (first_read_size == 0 && second_read_size == 0) {
+		nvgpu_err(g, "Debug buffer empty, can't read any data!");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * There is a metadata buffer at the end of the DMEM buffer in nvriscv.
+ * It sets the buffer size, the magic number for identification etc.
+ *
+ */
+static int falcon_fetch_debug_buffer_metadata(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	struct nvgpu_falcon_dbg_buf_metadata  buffer_metadata_copy;
+
+	/* DMEM offset will point to metadata initially */
+	if (nvgpu_falcon_copy_from_dmem(flcn, debug_buffer->dmem_offset,
+		(u8 *)&buffer_metadata_copy, sizeof(buffer_metadata_copy),
+		0) != 0) {
+		nvgpu_err(g, "Failed to copy debug buffer metadata from nvriscv DMEM");
+		return -EINVAL;
+	}
+
+	nvgpu_info(g, "metadata magic        - 0x%x", buffer_metadata_copy.magic);
+	nvgpu_info(g, "metadata buffer size  - 0x%x",
+			buffer_metadata_copy.buffer_size);
+	nvgpu_info(g, "metadata write offset - 0x%x",
+			buffer_metadata_copy.write_offset);
+	nvgpu_info(g, "metadata read offset  - 0x%x",
+			buffer_metadata_copy.read_offset);
+
+	if (buffer_metadata_copy.magic != NV_NVRISCV_DEBUG_BUFFER_MAGIC) {
+		nvgpu_err(g, "Failed to verify magic number in debug buffer");
+		nvgpu_err(g, " metadata copied from nvriscv DMEM");
+		return -EINVAL;
+	}
+
+	if (buffer_metadata_copy.buffer_size >= debug_buffer->buffer_size) {
+		nvgpu_err(g, "Debug buffer size requested by ucode too big!");
+		return -EINVAL;
+	}
+
+	debug_buffer->buffer_size = buffer_metadata_copy.buffer_size;
+
+	/* The DMEM buffer size must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(debug_buffer->buffer_size, FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "The debug buffer size is not 4-byte aligned");
+		nvgpu_err(g, "buffer_size=0x%08x", debug_buffer->buffer_size);
+		return -EINVAL;
+	}
+
+	/*
+	 * NVGPU don't want to overwrite the metadata since NVGPU might want to use
+	 * it to pass read and write offsets if no registers are available.
+	 */
+	debug_buffer->dmem_offset -= buffer_metadata_copy.buffer_size;
+
+	/* The DMEM offset must be 4-byte aligned */
+	if (!NV_IS_ALIGNED(debug_buffer->dmem_offset, FLCN_DMEM_ACCESS_ALIGNMENT)) {
+		nvgpu_err(g, "The debug buffer DMEM offset is not 4-byte aligned.");
+		nvgpu_err(g, " dmem_offset=0x%08x", debug_buffer->dmem_offset);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int nvgpu_falcon_dbg_buf_display(struct nvgpu_falcon *flcn)
+{
+	struct gk20a *g = flcn->g;
+	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
+	u8  *buffer_data  = debug_buffer->local_buf;
+	u32 write_offset  = nvgpu_readl(g, debug_buffer->write_offset_address);
+	u32 itr_Offset    = debug_buffer->read_offset;
+
+	bool is_line_split = false;
+
+	if (debug_buffer->local_buf == NULL) {
+		nvgpu_err(g, "Local Debug Buffer not allocated!");
+		return -EINVAL;
+	}
+
+	if (!debug_buffer->first_msg_received) {
+		if (falcon_fetch_debug_buffer_metadata(flcn) != 0) {
+			nvgpu_err(g, "Failed to process debug buffer metadata!");
+			return -EINVAL;
+		}
+
+		debug_buffer->first_msg_received = true;
+	}
+
+	if (write_offset >= debug_buffer->buffer_size) {
+		nvgpu_err(g, "Invalid write offset (%u >= %u)",
+				  write_offset, debug_buffer->buffer_size);
+		nvgpu_err(g, "abort Debug buffer display");
+		return -EINVAL;
+	}
+
+	if (falcon_update_debug_buffer_from_dmem(flcn, write_offset) != 0) {
+		nvgpu_err(g, "Failed to fetch debug buffer contents");
+		return -EINVAL;
+	}
+
+	/* Buffer is empty when read_offset == write_offset */
+	while (itr_Offset != write_offset) {
+		/* Null character is the newline marker in falcon firmware logs */
+		if (buffer_data[itr_Offset] != '\0') {
+			itr_Offset = (itr_Offset + 1) % debug_buffer->buffer_size;
+			if (itr_Offset == 0) {
+				is_line_split = true;
+			}
+		} else {
+			int status   = 0;
+			u8 *tmp_buf   = NULL;
+			u8 *curr_data = NULL;
+			u32  buf_size  = 0;
+
+			if (is_line_split) {
+				/* Logic to concat the split line into a temp buffer */
+				u32 first_chunk_len  =
+					strlen((char *)&buffer_data[debug_buffer->read_offset]);
+				u32 second_chunk_len = strlen((char *)&buffer_data[0]);
+
+				buf_size = first_chunk_len + second_chunk_len + 1;
+				tmp_buf  = nvgpu_kzalloc(g, buf_size);
+
+				if (tmp_buf == NULL) {
+					status = -ENOMEM;
+					nvgpu_err(g,
+						"Failed to alloc tmp buf for line-split print %d",
+						status);
+					return status;
+				}
+
+				nvgpu_memcpy(tmp_buf, &buffer_data[debug_buffer->read_offset],
+						first_chunk_len + 1);
+				strcat((char *)tmp_buf, (char *)&buffer_data[0]);
+
+				/* Set the byte array that gets printed as a string */
+				curr_data = tmp_buf;
+
+				/* Reset line-split flag */
+				is_line_split = false;
+			} else {
+				buf_size =
+					strlen((char *)&buffer_data[debug_buffer->read_offset]) + 1;
+
+				/* Set the byte array that gets printed as a string */
+				curr_data = &buffer_data[debug_buffer->read_offset];
+			}
+
+			if (curr_data == NULL) {
+				status = -EINVAL;
+				nvgpu_err(g, "Debug buffer - no data to print %d", status);
+
+				if (tmp_buf != NULL) {
+					nvgpu_kfree(g, tmp_buf);
+				}
+				return status;
+			}
+
+			nvgpu_info(g, "Flcn-%d Async: %s", flcn->flcn_id, curr_data);
+
+			/* Cleanup in case we had to allocate a temp buffer */
+			if (tmp_buf != NULL) {
+				nvgpu_kfree(g, tmp_buf);
+			}
+
+			itr_Offset = (itr_Offset + 1) % debug_buffer->buffer_size;
+			debug_buffer->read_offset = itr_Offset;
+		}
+	}
+
+	nvgpu_writel(g, debug_buffer->read_offset_address,
+			debug_buffer->read_offset);
+
+	return 0;
+}
diff --git a/drivers/gpu/nvgpu/common/falcon/falcon_debug.h b/drivers/gpu/nvgpu/common/falcon/falcon_debug.h
new file mode 100644
index 000000000..b6ae2660c
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_debug.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NVGPU_FALCON_DEBUG_H
+#define NVGPU_FALCON_DEBUG_H
+
+struct nvgpu_falcon;
+
+struct nvgpu_falcon_dbg_buf_metadata {
+	/* Read offset updated by NVGPU */
+	u32 read_offset;
+
+	/* Write offset updated by firmware */
+	u32 write_offset;
+
+	/* Buffer size configured by NVGPU */
+	u32 buffer_size;
+
+	/* Magic number for header validation */
+	u32 magic;
+};
+
+#endif /* NVGPU_FALCON_DEBUG_H */
diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
index 80a164e3b..710a9b30e 100644
--- a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
+++ b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
@@ -27,6 +27,7 @@
 #include <nvgpu/pmu/fw.h>
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/string.h>
+#include <nvgpu/falcon.h>
 
 static void pmu_free_ns_ucode_blob(struct gk20a *g)
 {
@@ -74,6 +75,20 @@ int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 #if defined(CONFIG_NVGPU_NON_FUSA)
 	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
 		nvgpu_pmu_next_core_rtos_args_setup(g, pmu);
+
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+		err = nvgpu_falcon_dbg_buf_init(g->pmu->flcn,
+				NV_RISCV_DMESG_BUFFER_SIZE,
+				g->ops.pmu.pmu_get_queue_head(NV_RISCV_DEBUG_BUFFER_QUEUE),
+				g->ops.pmu.pmu_get_queue_tail(NV_RISCV_DEBUG_BUFFER_QUEUE));
+		if (err != 0) {
+			nvgpu_err(g,
+				"Failed to allocate NVRISCV PMU debug buffer status=0x%x)",
+				err);
+			return err;
+		}
+#endif
+
 	} else
 #endif
 	{
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
index 725d5b454..c33b80782 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
@@ -34,6 +34,7 @@
 #include <nvgpu/pmu.h>
 #include <nvgpu/string.h>
 #include <nvgpu/pmu/clk/clk.h>
+#include <nvgpu/falcon.h>
 
 #include <nvgpu/pmu/mutex.h>
 #include <nvgpu/pmu/seq.h>
@@ -159,6 +160,13 @@ static void remove_pmu_support(struct nvgpu_pmu *pmu)
 		nvgpu_pmu_pstate_deinit(g);
 	}
 
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
+		nvgpu_falcon_dbg_buf_display(pmu->flcn);
+		nvgpu_falcon_dbg_buf_destroy(pmu->flcn);
+	}
+#endif
+
 	nvgpu_pmu_debug_deinit(g, pmu);
 	nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm);
 #ifdef CONFIG_PMU_POWER_PG
@@ -418,6 +426,18 @@ int nvgpu_pmu_rtos_init(struct gk20a *g)
 
 #if defined(CONFIG_NVGPU_NON_FUSA)
 		if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+			err = nvgpu_falcon_dbg_buf_init(g->pmu->flcn,
+					NV_RISCV_DMESG_BUFFER_SIZE,
+					g->ops.pmu.pmu_get_queue_head(NV_RISCV_DEBUG_BUFFER_QUEUE),
+					g->ops.pmu.pmu_get_queue_tail(NV_RISCV_DEBUG_BUFFER_QUEUE));
+			if (err != 0) {
+				nvgpu_err(g,
+					"Failed to allocate RISCV PMU debug buffer status=0x%x)",
+					err);
+				goto exit;
+			}
+#endif
 			g->ops.falcon.bootstrap(g->pmu->flcn, 0U);
 			err = nvgpu_pmu_wait_for_priv_lockdown_release(g,
 					g->pmu->flcn, U32_MAX);
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga100.c b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
index 6a4b5235d..d446f6ebb 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -1297,6 +1297,7 @@ static const struct gops_pmu ga100_ops_pmu = {
 	.pmu_get_queue_tail = tu104_pmu_queue_tail_r,
 	.get_irqdest = gk20a_pmu_get_irqdest,
 	.handle_ext_irq = NULL,
+	.handle_swgen1_irq = NULL,
 	.is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en,
 	.setup_apertures = tu104_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
index cd4939382..3cac00fa8 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -1289,6 +1289,7 @@ static const struct gops_pmu ga10b_ops_pmu = {
 	.pmu_destroy = nvgpu_pmu_destroy,
 	/* ISR */
 	.pmu_is_interrupted = gk20a_pmu_is_interrupted,
+	.handle_swgen1_irq = ga10b_pmu_handle_swgen1_irq,
 	/* queue */
 	.pmu_get_queue_head = gv11b_pmu_queue_head_r,
 	.pmu_get_queue_head_size = gv11b_pmu_queue_head__size_1_v,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index f2229d30b..a985c240a 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -850,6 +850,7 @@ static const struct gops_pmu gm20b_ops_pmu = {
 	.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap,
 	.setup_apertures = gm20b_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
+	.handle_swgen1_irq = NULL,
 };
 #endif
 
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index e41ef765a..02f6c1418 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -940,6 +940,7 @@ static const struct gops_pmu gp10b_ops_pmu = {
 	.bar0_error_status = gk20a_pmu_bar0_error_status,
 	.flcn_setup_boot_config = gm20b_pmu_flcn_setup_boot_config,
 	.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap,
+	.handle_swgen1_irq = NULL,
 };
 #endif
 
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index b5d807241..ff81447c6 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1126,6 +1126,7 @@ static const struct gops_pmu gv11b_ops_pmu = {
 	.pmu_destroy = nvgpu_pmu_destroy,
 	/* ISR */
 	.pmu_is_interrupted = gk20a_pmu_is_interrupted,
+	.handle_swgen1_irq = NULL,
 	/* queue */
 	.pmu_get_queue_head = gv11b_pmu_queue_head_r,
 	.pmu_get_queue_head_size = gv11b_pmu_queue_head__size_1_v,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 227d94aa5..870bfc925 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1189,6 +1189,7 @@ static const struct gops_pmu tu104_ops_pmu = {
 	.pmu_get_queue_tail = tu104_pmu_queue_tail_r,
 	.get_irqdest = gk20a_pmu_get_irqdest,
 	.handle_ext_irq = gv11b_pmu_handle_ext_irq,
+	.handle_swgen1_irq = NULL,
 	.is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en,
 	.setup_apertures = tu104_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
index 15e0b1298..134d9eee4 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
@@ -342,3 +342,19 @@ bool ga10b_pmu_is_debug_mode_en(struct gk20a *g)
 		return false;
 	}
 }
+
+void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr)
+{
+	struct nvgpu_pmu *pmu = g->pmu;
+	int err = 0;
+
+	if ((intr & pwr_falcon_irqstat_swgen1_true_f()) != 0U) {
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+		err = nvgpu_falcon_dbg_buf_display(pmu->flcn);
+		if (err != 0) {
+			nvgpu_err(g, "nvgpu_falcon_dbg_buf_display failed err=%d",
+				err);
+		}
+#endif
+	}
+}
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
index d87220d3a..4faeea65d 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
@@ -52,5 +52,6 @@ u32 ga10b_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
 void ga10b_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
 u32 ga10b_pmu_get_irqmask(struct gk20a *g);
 bool ga10b_pmu_is_debug_mode_en(struct gk20a *g);
+void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr);
 
 #endif /* NVGPU_PMU_GA10B_H */
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
index d55ccf472..a79220f0a 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
@@ -517,6 +517,10 @@ void gk20a_pmu_handle_interrupts(struct gk20a *g, u32 intr)
 				~pwr_falcon_exterrstat_valid_m());
 	}
 
+	if (g->ops.pmu.handle_swgen1_irq != NULL) {
+		g->ops.pmu.handle_swgen1_irq(g, intr);
+	}
+
 	if ((intr & pwr_falcon_irqstat_swgen0_true_f()) != 0U) {
 		err = nvgpu_pmu_process_message(pmu);
 		if (err != 0) {
diff --git a/drivers/gpu/nvgpu/include/nvgpu/falcon.h b/drivers/gpu/nvgpu/include/nvgpu/falcon.h
index 95405e333..14353bc20 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/falcon.h
@@ -207,6 +207,35 @@ enum falcon_mem_type {
 	MEM_IMEM
 };
 
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+/*
+ * Structure tracking information relevant to firmware debug buffer.
+ */
+struct nvgpu_falcon_dbg_buf {
+	/* Offset to debug buffer in NVRISCV DMEM */
+	u32 dmem_offset;
+
+	/*
+	 * Pointer to local debug buffer copy on system memory
+	 * where nvgpu copy the data from NVRISCV DMEM.
+	 */
+	u8 *local_buf;
+
+	/* Last read offset for the circular debug buffer */
+	u32 read_offset;
+
+	/* Read/Write offset register addresses */
+	u32 read_offset_address;
+	u32 write_offset_address;
+
+	/* Flcn debug buffer size */
+	u32 buffer_size;
+
+	/* Set once nvgpu get the first message from FLCN */
+	bool first_msg_received;
+};
+#endif
+
 /**
  * This struct holds the falcon ops which are falcon engine specific.
  */
@@ -259,6 +288,9 @@ struct nvgpu_falcon {
 #endif
 	/** Functions for engine specific reset and memory access. */
 	struct nvgpu_falcon_engine_dependency_ops flcn_engine_dep_ops;
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+	struct nvgpu_falcon_dbg_buf debug_buffer;
+#endif
 };
 
 /**
@@ -726,4 +758,46 @@ void nvgpu_falcon_print_imem(struct nvgpu_falcon *flcn, u32 src, u32 size);
 void nvgpu_falcon_get_ctls(struct nvgpu_falcon *flcn, u32 *sctl, u32 *cpuctl);
 #endif
 
+#ifdef CONFIG_NVGPU_FALCON_DEBUG
+#define NV_RISCV_DEBUG_BUFFER_QUEUE   7U
+#define NV_RISCV_DMESG_BUFFER_SIZE    0x1000U
+
+/**
+ * @brief falcon debug buffer initialization.
+ *
+ * @param flcn [in] The falcon.
+ *
+ * Allocates and maps buffer in system memory for sharing flcn firmware
+ * debug prints with client nvgpu.
+ *
+ * @return '0' if initialization is successful, error otherwise.
+ */
+int nvgpu_falcon_dbg_buf_init(struct nvgpu_falcon *flcn,
+	u32 debug_buffer_max_size, u32 write_reg_addr, u32 read_reg_addr);
+
+/*
+ * @brief falcon debug buffer deinitialization.
+ *
+ * @param flcn [in] The falcon.
+ *
+ * Frees falcon debug buffer from memory.
+ *
+ */
+void nvgpu_falcon_dbg_buf_destroy(struct nvgpu_falcon *flcn);
+
+/**
+ * @brief Display falcon firmware logs
+ *
+ * @param flcn [in] The falcon.
+ *
+ * This function reads the contents of flcn debug buffer filled by firmware.
+ * Logs are displayed line-by-line with label '<FLCN> Async' signifying that
+ * these logs might be delayed and should be assumed as out-of-order when read
+ * alongside other client nvgpu logs.
+ *
+ * @return '0' if contents logged successfully, error otherwise.
+ */
+int nvgpu_falcon_dbg_buf_display(struct nvgpu_falcon *flcn);
+#endif
+
 #endif /* NVGPU_FALCON_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
index dee0e38d4..29e82cc0c 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
@@ -312,6 +312,7 @@ struct gops_pmu {
 
 	/** @cond DOXYGEN_SHOULD_SKIP_THIS */
 	void (*handle_ext_irq)(struct gk20a *g, u32 intr);
+	void (*handle_swgen1_irq)(struct gk20a *g, u32 intr);
 
 	void (*pmu_enable_irq)(struct nvgpu_pmu *pmu, bool enable);
 	u32 (*get_irqdest)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
index 8e7b7e260..ff573664a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
@@ -68,6 +68,7 @@
 #define pwr_falcon_irqstat_exterr_true_f()                               (0x20U)
 #define pwr_falcon_irqstat_swgen0_true_f()                               (0x40U)
 #define pwr_falcon_irqstat_ext_ecc_parity_true_f()                      (0x400U)
+#define pwr_falcon_irqstat_swgen1_true_f()                               (0x80U)
 #define pwr_pmu_ecc_intr_status_r()                                (0x0010abfcU)
 #define pwr_pmu_ecc_intr_status_corrected_m()                  (U32(0x1U) << 0U)
 #define pwr_pmu_ecc_intr_status_uncorrected_m()                (U32(0x1U) << 1U)