gpu: nvgpu: support nvriscv debug feature

Enable nvriscv debug buffer feature in NVGPU. Debug buffer is a feature to print the debug log from ucode onto console in real time. Debug buffer feature uses the DMEM, queue and SWGEN1 interrupt to share ucode debug data with NVGPU. Ucode writes debug message to DMEM and updates offset in queue to trigger interrupt to NVGPU. NVGPU copies the debug message from DMEM to local buffer to process and print onto console. Debug buffer feature is added under falcon unit and required engine can utilize the feature by providing required param through public functions. Currently GA10B NVRISCV NS/LS PMU ucode has support for this feature and enabled support on NVGPU side by adding required changes, with this feature enabled, it is now possible to see prints in real time. JIRA NVGPU-6959 Change-Id: I9d46020470285b490b6bc876204f62698055b1ec Signed-off-by: mkumbar <mkumbar@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2548951 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2021-06-30 11:23:39 +05:30
parent 7ce01d3d1d
commit 87984ea344
21 changed files with 576 additions and 2 deletions
--- a/arch/nvgpu-common.yaml
+++ b/arch/nvgpu-common.yaml
@@ -364,11 +364,13 @@ falcon_fusa:
 falcon:
  owner: Sagar K
  safe: no
-  gpu: dgpu
+  gpu: both
  sources: [ common/falcon/falcon_sw_tu104.c,
             common/falcon/falcon_sw_tu104.h,
             common/falcon/falcon_sw_ga10b.c,
             common/falcon/falcon_sw_ga10b.h,
             common/falcon/falcon_debug.c,
             common/falcon/falcon_debug.h,
             include/nvgpu/gops/gsp.h,
             include/nvgpu/gops/nvdec.h ]
  deps: [ ]
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -573,6 +573,7 @@ nvgpu-y += \
 	common/mm/dma.o \
 	common/vbios/bios.o \
 	common/falcon/falcon.o \
 	common/falcon/falcon_debug.o \
 	common/falcon/falcon_sw_gk20a.o \
 	common/engine_queues/engine_mem_queue.o \
 	common/engine_queues/engine_dmem_queue.o \
@@ -804,7 +805,6 @@ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \
 	hal/vgpu/init/vgpu_hal_gv11b.o
 endif
 ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),y)
 nvgpu-y += \
 	common/falcon/falcon_sw_ga10b.o \
--- a/drivers/gpu/nvgpu/Makefile.linux.configs
+++ b/drivers/gpu/nvgpu/Makefile.linux.configs
@@ -16,6 +16,9 @@ CONFIG_TEGRA_ACR := y
 # Support for debugger APIs
 CONFIG_NVGPU_DEBUGGER := y
 # Support for Falcon debugger APIs
 CONFIG_NVGPU_FALCON_DEBUG := y
 # Support for iGPU LS PMU enable/disable
 CONFIG_NVGPU_LS_PMU := y
@@ -163,6 +166,9 @@ endif
 ifeq ($(CONFIG_NVGPU_DEBUGGER),y)
 ccflags-y += -DCONFIG_NVGPU_DEBUGGER
 endif
 ifeq ($(CONFIG_NVGPU_FALCON_DEBUG),y)
 ccflags-y += -DCONFIG_NVGPU_FALCON_DEBUG
 endif
 ifeq ($(CONFIG_NVGPU_LS_PMU),y)
 ccflags-y += -DCONFIG_NVGPU_LS_PMU
 endif
--- a/drivers/gpu/nvgpu/Makefile.shared.configs
+++ b/drivers/gpu/nvgpu/Makefile.shared.configs
@@ -112,6 +112,9 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_KERNEL_MODE_SUBMIT
 CONFIG_NVGPU_FALCON_NON_FUSA	:= 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_NON_FUSA
 CONFIG_NVGPU_FALCON_DEBUG   := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_DEBUG
 CONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_GR_FALCON_NON_SECURE_BOOT
@@ -210,6 +213,9 @@ NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_VPR
 CONFIG_NVGPU_REPLAYABLE_FAULT   := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_REPLAYABLE_FAULT
 CONFIG_NVGPU_FALCON_DEBUG       := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_FALCON_DEBUG
 # Enable LS PMU support for normal build
 CONFIG_NVGPU_LS_PMU             := 1
 NVGPU_COMMON_CFLAGS             += -DCONFIG_NVGPU_LS_PMU
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -489,6 +489,10 @@ ifdef NVGPU_FAULT_INJECTION_ENABLEMENT
 srcs += os/posix/posix-fault-injection.c
 endif
 ifeq ($(CONFIG_NVGPU_FALCON_DEBUG),1)
 srcs += common/falcon/falcon_debug.c
 endif
 ifeq ($(CONFIG_NVGPU_LS_PMU),1)
 # Add LS PMU files which are required for normal build
 srcs += \
--- a/drivers/gpu/nvgpu/common/falcon/falcon_debug.c
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_debug.c
@@ -0,0 +1,377 @@
 /*
 * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
 #include <nvgpu/gk20a.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/io.h>
 #include <nvgpu/static_analysis.h>
 #include <nvgpu/string.h>
 #include "falcon_debug.h"
 #define NV_NVRISCV_DEBUG_BUFFER_MAGIC   0xf007ba11
 #define FLCN_DMEM_ACCESS_ALIGNMENT    (4)
 #define NV_ALIGN_DOWN(v, g) ((v) & ~((g) - 1))
 #define NV_IS_ALIGNED(addr, align)	((addr & (align - 1U)) == 0U)
 void nvgpu_falcon_dbg_buf_destroy(struct nvgpu_falcon *flcn)
 {
 	struct gk20a *g = flcn->g;
 	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
 	if (debug_buffer->local_buf != NULL) {
 		nvgpu_kfree(g, debug_buffer->local_buf);
 		debug_buffer->local_buf = NULL;
 	}
 	debug_buffer->first_msg_received = false;
 	debug_buffer->read_offset = 0;
 	debug_buffer->buffer_size = 0;
 	debug_buffer->dmem_offset = g->ops.falcon.get_mem_size(flcn, MEM_DMEM);
 }
 int nvgpu_falcon_dbg_buf_init(struct nvgpu_falcon *flcn,
 	u32 debug_buffer_max_size, u32 write_reg_addr, u32 read_reg_addr)
 {
 	struct gk20a *g = flcn->g;
 	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
 	int status = 0;
 	/*
 	 * Set the debugBufferSize to it's initial value of max size.
 	 * We will refine it later once ucode informs us of the size it wants
 	 * the debug buffer to be.
 	 */
 	debug_buffer->buffer_size = debug_buffer_max_size;
 	debug_buffer->first_msg_received = false;
 	debug_buffer->read_offset = 0;
 	if (debug_buffer->local_buf == NULL) {
 		/*
 		 * Allocate memory for nvgpu-side debug buffer, used for copies
 		 * from nvriscv dmem. we make it 1 byte larger than the actual debug
 		 * buffer to keep a null character at the end for ease of printing.
 		 */
 		debug_buffer->local_buf = nvgpu_kzalloc(g, debug_buffer_max_size + 1);
 		if (debug_buffer->local_buf == NULL) {
 			nvgpu_err(g, "Failed to alloc memory for flcn debug buffer");
 			nvgpu_err(g, "status=0x%08x", status);
 			status = -ENOMEM;
 			goto exit;
 		}
 	}
 	/* Zero out memory in the local debug buffer. */
 	memset(debug_buffer->local_buf, 0, debug_buffer_max_size + 1);
 	/*
 	 * Debug buffer is located at the very end of available DMEM.
 	 * NVGPU don't know the exact size until the ucode informs us of
 	 * the size it wants, so only make it as large as the metadata
 	 * at the end of the buffer.
 	 */
 	debug_buffer->dmem_offset = g->ops.falcon.get_mem_size(flcn, MEM_DMEM) -
 		sizeof(struct nvgpu_falcon_dbg_buf_metadata);
 	/* The DMEM offset must be 4-byte aligned */
 	if (!NV_IS_ALIGNED(debug_buffer->dmem_offset, FLCN_DMEM_ACCESS_ALIGNMENT)) {
 		nvgpu_err(g, "metadata DMEM offset is not 4-byte aligned.");
 		nvgpu_err(g, "dmem_offset=0x%08x", debug_buffer->dmem_offset);
 		status = -EINVAL;
 		goto exit;
 	}
 	/* The DMEM buffer size must be 4-byte aligned */
 	if (!NV_IS_ALIGNED(sizeof(struct nvgpu_falcon_dbg_buf_metadata),
 			FLCN_DMEM_ACCESS_ALIGNMENT)) {
 		nvgpu_err(g, "The debug buffer metadata size is not 4-byte aligned");
 		status =  -EINVAL;
 		goto exit;
 	}
 	debug_buffer->read_offset_address  = read_reg_addr;
 	debug_buffer->write_offset_address = write_reg_addr;
 exit:
 	if (status != 0) {
 		nvgpu_falcon_dbg_buf_destroy(flcn);
 	}
 	return status;
 }
 /*
 * Copy new data from the nvriscv debug buffer to the local buffer.
 * Get all data from the last read offset to the current write offset.
 *
 * @return '0' if data fetched successfully, error otherwise.
 */
 static int falcon_update_debug_buffer_from_dmem(struct nvgpu_falcon *flcn,
 	u32 write_offset)
 {
 	struct gk20a *g = flcn->g;
 	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
 	u32 first_read_size     = 0;
 	u32 second_read_size    = 0;
 	/*
 	 * Align read offset, since reading DMEM only works with 32-bit words.
 	 * We only need to align the offset since dmem_offset is already aligned.
 	 * We don't need to align the write offset since nvgpu_falcon_copy_from_dmem
 	 * handles unaligned-size reads.
 	 */
 	u32 read_offset_aligned = NV_ALIGN_DOWN(debug_buffer->read_offset,
 			FLCN_DMEM_ACCESS_ALIGNMENT);
 	if (write_offset >= debug_buffer->read_offset) {
 		first_read_size = write_offset - read_offset_aligned;
 		second_read_size = 0;
 	} else {
 		/* Write offset has wrapped around, need two reads */
 		first_read_size = debug_buffer->buffer_size - read_offset_aligned;
 		second_read_size = write_offset;
 	}
 	if (first_read_size > 0) {
 		if (read_offset_aligned + first_read_size >
 			debug_buffer->buffer_size) {
 			nvgpu_err(g,
 				"Invalid read (first read) from print buffer attempted!");
 			return -EINVAL;
 		}
 		if (nvgpu_falcon_copy_from_dmem(flcn,
 			debug_buffer->dmem_offset + read_offset_aligned,
 			debug_buffer->local_buf + read_offset_aligned,
 			first_read_size,
 			0) != 0) {
 			nvgpu_err(g, "Failed to copy debug buffer contents from DMEM");
 			return -EINVAL;
 		}
 	}
 	if (second_read_size > 0) {
 		if (second_read_size > debug_buffer->buffer_size) {
 			nvgpu_err(g,
 				"Invalid read (second read) from print buffer attempted!");
 			return -EINVAL;
 		}
 		/*
 		 * Wrap around, read from start
 		 * Assume dmem_offset is always aligned.
 		 */
 		if (nvgpu_falcon_copy_from_dmem(flcn, debug_buffer->dmem_offset,
 			debug_buffer->local_buf, second_read_size,
 			0) != 0) {
 			nvgpu_err(g,
 				"Failed to copy debug buffer contents from nvriscv DMEM");
 			return -EINVAL;
 		}
 	}
 	if (first_read_size == 0 && second_read_size == 0) {
 		nvgpu_err(g, "Debug buffer empty, can't read any data!");
 		return -EINVAL;
 	}
 	return 0;
 }
 /*
 * There is a metadata buffer at the end of the DMEM buffer in nvriscv.
 * It sets the buffer size, the magic number for identification etc.
 *
 */
 static int falcon_fetch_debug_buffer_metadata(struct nvgpu_falcon *flcn)
 {
 	struct gk20a *g = flcn->g;
 	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
 	struct nvgpu_falcon_dbg_buf_metadata  buffer_metadata_copy;
 	/* DMEM offset will point to metadata initially */
 	if (nvgpu_falcon_copy_from_dmem(flcn, debug_buffer->dmem_offset,
 		(u8 *)&buffer_metadata_copy, sizeof(buffer_metadata_copy),
 		0) != 0) {
 		nvgpu_err(g, "Failed to copy debug buffer metadata from nvriscv DMEM");
 		return -EINVAL;
 	}
 	nvgpu_info(g, "metadata magic        - 0x%x", buffer_metadata_copy.magic);
 	nvgpu_info(g, "metadata buffer size  - 0x%x",
 			buffer_metadata_copy.buffer_size);
 	nvgpu_info(g, "metadata write offset - 0x%x",
 			buffer_metadata_copy.write_offset);
 	nvgpu_info(g, "metadata read offset  - 0x%x",
 			buffer_metadata_copy.read_offset);
 	if (buffer_metadata_copy.magic != NV_NVRISCV_DEBUG_BUFFER_MAGIC) {
 		nvgpu_err(g, "Failed to verify magic number in debug buffer");
 		nvgpu_err(g, " metadata copied from nvriscv DMEM");
 		return -EINVAL;
 	}
 	if (buffer_metadata_copy.buffer_size >= debug_buffer->buffer_size) {
 		nvgpu_err(g, "Debug buffer size requested by ucode too big!");
 		return -EINVAL;
 	}
 	debug_buffer->buffer_size = buffer_metadata_copy.buffer_size;
 	/* The DMEM buffer size must be 4-byte aligned */
 	if (!NV_IS_ALIGNED(debug_buffer->buffer_size, FLCN_DMEM_ACCESS_ALIGNMENT)) {
 		nvgpu_err(g, "The debug buffer size is not 4-byte aligned");
 		nvgpu_err(g, "buffer_size=0x%08x", debug_buffer->buffer_size);
 		return -EINVAL;
 	}
 	/*
 	 * NVGPU don't want to overwrite the metadata since NVGPU might want to use
 	 * it to pass read and write offsets if no registers are available.
 	 */
 	debug_buffer->dmem_offset -= buffer_metadata_copy.buffer_size;
 	/* The DMEM offset must be 4-byte aligned */
 	if (!NV_IS_ALIGNED(debug_buffer->dmem_offset, FLCN_DMEM_ACCESS_ALIGNMENT)) {
 		nvgpu_err(g, "The debug buffer DMEM offset is not 4-byte aligned.");
 		nvgpu_err(g, " dmem_offset=0x%08x", debug_buffer->dmem_offset);
 		return -EINVAL;
 	}
 	return 0;
 }
 int nvgpu_falcon_dbg_buf_display(struct nvgpu_falcon *flcn)
 {
 	struct gk20a *g = flcn->g;
 	struct nvgpu_falcon_dbg_buf *debug_buffer = &flcn->debug_buffer;
 	u8  *buffer_data  = debug_buffer->local_buf;
 	u32 write_offset  = nvgpu_readl(g, debug_buffer->write_offset_address);
 	u32 itr_Offset    = debug_buffer->read_offset;
 	bool is_line_split = false;
 	if (debug_buffer->local_buf == NULL) {
 		nvgpu_err(g, "Local Debug Buffer not allocated!");
 		return -EINVAL;
 	}
 	if (!debug_buffer->first_msg_received) {
 		if (falcon_fetch_debug_buffer_metadata(flcn) != 0) {
 			nvgpu_err(g, "Failed to process debug buffer metadata!");
 			return -EINVAL;
 		}
 		debug_buffer->first_msg_received = true;
 	}
 	if (write_offset >= debug_buffer->buffer_size) {
 		nvgpu_err(g, "Invalid write offset (%u >= %u)",
 				  write_offset, debug_buffer->buffer_size);
 		nvgpu_err(g, "abort Debug buffer display");
 		return -EINVAL;
 	}
 	if (falcon_update_debug_buffer_from_dmem(flcn, write_offset) != 0) {
 		nvgpu_err(g, "Failed to fetch debug buffer contents");
 		return -EINVAL;
 	}
 	/* Buffer is empty when read_offset == write_offset */
 	while (itr_Offset != write_offset) {
 		/* Null character is the newline marker in falcon firmware logs */
 		if (buffer_data[itr_Offset] != '\0') {
 			itr_Offset = (itr_Offset + 1) % debug_buffer->buffer_size;
 			if (itr_Offset == 0) {
 				is_line_split = true;
 			}
 		} else {
 			int status   = 0;
 			u8 *tmp_buf   = NULL;
 			u8 *curr_data = NULL;
 			u32  buf_size  = 0;
 			if (is_line_split) {
 				/* Logic to concat the split line into a temp buffer */
 				u32 first_chunk_len  =
 					strlen((char *)&buffer_data[debug_buffer->read_offset]);
 				u32 second_chunk_len = strlen((char *)&buffer_data[0]);
 				buf_size = first_chunk_len + second_chunk_len + 1;
 				tmp_buf  = nvgpu_kzalloc(g, buf_size);
 				if (tmp_buf == NULL) {
 					status = -ENOMEM;
 					nvgpu_err(g,
 						"Failed to alloc tmp buf for line-split print %d",
 						status);
 					return status;
 				}
 				nvgpu_memcpy(tmp_buf, &buffer_data[debug_buffer->read_offset],
 						first_chunk_len + 1);
 				strcat((char *)tmp_buf, (char *)&buffer_data[0]);
 				/* Set the byte array that gets printed as a string */
 				curr_data = tmp_buf;
 				/* Reset line-split flag */
 				is_line_split = false;
 			} else {
 				buf_size =
 					strlen((char *)&buffer_data[debug_buffer->read_offset]) + 1;
 				/* Set the byte array that gets printed as a string */
 				curr_data = &buffer_data[debug_buffer->read_offset];
 			}
 			if (curr_data == NULL) {
 				status = -EINVAL;
 				nvgpu_err(g, "Debug buffer - no data to print %d", status);
 				if (tmp_buf != NULL) {
 					nvgpu_kfree(g, tmp_buf);
 				}
 				return status;
 			}
 			nvgpu_info(g, "Flcn-%d Async: %s", flcn->flcn_id, curr_data);
 			/* Cleanup in case we had to allocate a temp buffer */
 			if (tmp_buf != NULL) {
 				nvgpu_kfree(g, tmp_buf);
 			}
 			itr_Offset = (itr_Offset + 1) % debug_buffer->buffer_size;
 			debug_buffer->read_offset = itr_Offset;
 		}
 	}
 	nvgpu_writel(g, debug_buffer->read_offset_address,
 			debug_buffer->read_offset);
 	return 0;
 }
--- a/drivers/gpu/nvgpu/common/falcon/falcon_debug.h
+++ b/drivers/gpu/nvgpu/common/falcon/falcon_debug.h
@@ -0,0 +1,41 @@
 /*
 * Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
 #ifndef NVGPU_FALCON_DEBUG_H
 #define NVGPU_FALCON_DEBUG_H
 struct nvgpu_falcon;
 struct nvgpu_falcon_dbg_buf_metadata {
 	/* Read offset updated by NVGPU */
 	u32 read_offset;
 	/* Write offset updated by firmware */
 	u32 write_offset;
 	/* Buffer size configured by NVGPU */
 	u32 buffer_size;
 	/* Magic number for header validation */
 	u32 magic;
 };
 #endif /* NVGPU_FALCON_DEBUG_H */
--- a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
+++ b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
@@ -27,6 +27,7 @@
 #include <nvgpu/pmu/fw.h>
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/string.h>
 #include <nvgpu/falcon.h>
 static void pmu_free_ns_ucode_blob(struct gk20a *g)
 {
@@ -74,6 +75,20 @@ int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 #if defined(CONFIG_NVGPU_NON_FUSA)
 	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
 		nvgpu_pmu_next_core_rtos_args_setup(g, pmu);
 #ifdef CONFIG_NVGPU_FALCON_DEBUG
 		err = nvgpu_falcon_dbg_buf_init(g->pmu->flcn,
 				NV_RISCV_DMESG_BUFFER_SIZE,
 				g->ops.pmu.pmu_get_queue_head(NV_RISCV_DEBUG_BUFFER_QUEUE),
 				g->ops.pmu.pmu_get_queue_tail(NV_RISCV_DEBUG_BUFFER_QUEUE));
 		if (err != 0) {
 			nvgpu_err(g,
 				"Failed to allocate NVRISCV PMU debug buffer status=0x%x)",
 				err);
 			return err;
 		}
 #endif
 	} else
 #endif
 	{
--- a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
@@ -34,6 +34,7 @@
 #include <nvgpu/pmu.h>
 #include <nvgpu/string.h>
 #include <nvgpu/pmu/clk/clk.h>
 #include <nvgpu/falcon.h>
 #include <nvgpu/pmu/mutex.h>
 #include <nvgpu/pmu/seq.h>
@@ -159,6 +160,13 @@ static void remove_pmu_support(struct nvgpu_pmu *pmu)
 		nvgpu_pmu_pstate_deinit(g);
 	}
 #ifdef CONFIG_NVGPU_FALCON_DEBUG
 	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
 		nvgpu_falcon_dbg_buf_display(pmu->flcn);
 		nvgpu_falcon_dbg_buf_destroy(pmu->flcn);
 	}
 #endif
 	nvgpu_pmu_debug_deinit(g, pmu);
 	nvgpu_pmu_lsfm_deinit(g, pmu, pmu->lsfm);
 #ifdef CONFIG_PMU_POWER_PG
@@ -418,6 +426,18 @@ int nvgpu_pmu_rtos_init(struct gk20a *g)
 #if defined(CONFIG_NVGPU_NON_FUSA)
 		if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
 #ifdef CONFIG_NVGPU_FALCON_DEBUG
 			err = nvgpu_falcon_dbg_buf_init(g->pmu->flcn,
 					NV_RISCV_DMESG_BUFFER_SIZE,
 					g->ops.pmu.pmu_get_queue_head(NV_RISCV_DEBUG_BUFFER_QUEUE),
 					g->ops.pmu.pmu_get_queue_tail(NV_RISCV_DEBUG_BUFFER_QUEUE));
 			if (err != 0) {
 				nvgpu_err(g,
 					"Failed to allocate RISCV PMU debug buffer status=0x%x)",
 					err);
 				goto exit;
 			}
 #endif
 			g->ops.falcon.bootstrap(g->pmu->flcn, 0U);
 			err = nvgpu_pmu_wait_for_priv_lockdown_release(g,
 					g->pmu->flcn, U32_MAX);
--- a/drivers/gpu/nvgpu/hal/init/hal_ga100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga100.c
@@ -1297,6 +1297,7 @@ static const struct gops_pmu ga100_ops_pmu = {
 	.pmu_get_queue_tail = tu104_pmu_queue_tail_r,
 	.get_irqdest = gk20a_pmu_get_irqdest,
 	.handle_ext_irq = NULL,
 	.handle_swgen1_irq = NULL,
 	.is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en,
 	.setup_apertures = tu104_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
--- a/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_ga10b.c
@@ -1289,6 +1289,7 @@ static const struct gops_pmu ga10b_ops_pmu = {
 	.pmu_destroy = nvgpu_pmu_destroy,
 	/* ISR */
 	.pmu_is_interrupted = gk20a_pmu_is_interrupted,
 	.handle_swgen1_irq = ga10b_pmu_handle_swgen1_irq,
 	/* queue */
 	.pmu_get_queue_head = gv11b_pmu_queue_head_r,
 	.pmu_get_queue_head_size = gv11b_pmu_queue_head__size_1_v,
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -850,6 +850,7 @@ static const struct gops_pmu gm20b_ops_pmu = {
 	.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap,
 	.setup_apertures = gm20b_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
 	.handle_swgen1_irq = NULL,
 };
 #endif
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -940,6 +940,7 @@ static const struct gops_pmu gp10b_ops_pmu = {
 	.bar0_error_status = gk20a_pmu_bar0_error_status,
 	.flcn_setup_boot_config = gm20b_pmu_flcn_setup_boot_config,
 	.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap,
 	.handle_swgen1_irq = NULL,
 };
 #endif
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1126,6 +1126,7 @@ static const struct gops_pmu gv11b_ops_pmu = {
 	.pmu_destroy = nvgpu_pmu_destroy,
 	/* ISR */
 	.pmu_is_interrupted = gk20a_pmu_is_interrupted,
 	.handle_swgen1_irq = NULL,
 	/* queue */
 	.pmu_get_queue_head = gv11b_pmu_queue_head_r,
 	.pmu_get_queue_head_size = gv11b_pmu_queue_head__size_1_v,
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1189,6 +1189,7 @@ static const struct gops_pmu tu104_ops_pmu = {
 	.pmu_get_queue_tail = tu104_pmu_queue_tail_r,
 	.get_irqdest = gk20a_pmu_get_irqdest,
 	.handle_ext_irq = gv11b_pmu_handle_ext_irq,
 	.handle_swgen1_irq = NULL,
 	.is_debug_mode_enabled = gm20b_pmu_is_debug_mode_en,
 	.setup_apertures = tu104_pmu_setup_apertures,
 	.secured_pmu_start = gm20b_secured_pmu_start,
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.c
@@ -342,3 +342,19 @@ bool ga10b_pmu_is_debug_mode_en(struct gk20a *g)
 		return false;
 	}
 }
 void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr)
 {
 	struct nvgpu_pmu *pmu = g->pmu;
 	int err = 0;
 	if ((intr & pwr_falcon_irqstat_swgen1_true_f()) != 0U) {
 #ifdef CONFIG_NVGPU_FALCON_DEBUG
 		err = nvgpu_falcon_dbg_buf_display(pmu->flcn);
 		if (err != 0) {
 			nvgpu_err(g, "nvgpu_falcon_dbg_buf_display failed err=%d",
 				err);
 		}
 #endif
 	}
 }
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_ga10b.h
@@ -52,5 +52,6 @@ u32 ga10b_pmu_read_idle_counter(struct gk20a *g, u32 counter_id);
 void ga10b_pmu_reset_idle_counter(struct gk20a *g, u32 counter_id);
 u32 ga10b_pmu_get_irqmask(struct gk20a *g);
 bool ga10b_pmu_is_debug_mode_en(struct gk20a *g);
 void ga10b_pmu_handle_swgen1_irq(struct gk20a *g, u32 intr);
 #endif /* NVGPU_PMU_GA10B_H */
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
@@ -517,6 +517,10 @@ void gk20a_pmu_handle_interrupts(struct gk20a *g, u32 intr)
 				~pwr_falcon_exterrstat_valid_m());
 	}
 	if (g->ops.pmu.handle_swgen1_irq != NULL) {
 		g->ops.pmu.handle_swgen1_irq(g, intr);
 	}
 	if ((intr & pwr_falcon_irqstat_swgen0_true_f()) != 0U) {
 		err = nvgpu_pmu_process_message(pmu);
 		if (err != 0) {
--- a/drivers/gpu/nvgpu/include/nvgpu/falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/falcon.h
@@ -207,6 +207,35 @@ enum falcon_mem_type {
 	MEM_IMEM
 };
 #ifdef CONFIG_NVGPU_FALCON_DEBUG
 /*
 * Structure tracking information relevant to firmware debug buffer.
 */
 struct nvgpu_falcon_dbg_buf {
 	/* Offset to debug buffer in NVRISCV DMEM */
 	u32 dmem_offset;
 	/*
 	 * Pointer to local debug buffer copy on system memory
 	 * where nvgpu copy the data from NVRISCV DMEM.
 	 */
 	u8 *local_buf;
 	/* Last read offset for the circular debug buffer */
 	u32 read_offset;
 	/* Read/Write offset register addresses */
 	u32 read_offset_address;
 	u32 write_offset_address;
 	/* Flcn debug buffer size */
 	u32 buffer_size;
 	/* Set once nvgpu get the first message from FLCN */
 	bool first_msg_received;
 };
 #endif
 /**
 * This struct holds the falcon ops which are falcon engine specific.
 */
@@ -259,6 +288,9 @@ struct nvgpu_falcon {
 #endif
 	/** Functions for engine specific reset and memory access. */
 	struct nvgpu_falcon_engine_dependency_ops flcn_engine_dep_ops;
 #ifdef CONFIG_NVGPU_FALCON_DEBUG
 	struct nvgpu_falcon_dbg_buf debug_buffer;
 #endif
 };
 /**
@@ -726,4 +758,46 @@ void nvgpu_falcon_print_imem(struct nvgpu_falcon *flcn, u32 src, u32 size);
 void nvgpu_falcon_get_ctls(struct nvgpu_falcon *flcn, u32 *sctl, u32 *cpuctl);
 #endif
 #ifdef CONFIG_NVGPU_FALCON_DEBUG
 #define NV_RISCV_DEBUG_BUFFER_QUEUE   7U
 #define NV_RISCV_DMESG_BUFFER_SIZE    0x1000U
 /**
 * @brief falcon debug buffer initialization.
 *
 * @param flcn [in] The falcon.
 *
 * Allocates and maps buffer in system memory for sharing flcn firmware
 * debug prints with client nvgpu.
 *
 * @return '0' if initialization is successful, error otherwise.
 */
 int nvgpu_falcon_dbg_buf_init(struct nvgpu_falcon *flcn,
 	u32 debug_buffer_max_size, u32 write_reg_addr, u32 read_reg_addr);
 /*
 * @brief falcon debug buffer deinitialization.
 *
 * @param flcn [in] The falcon.
 *
 * Frees falcon debug buffer from memory.
 *
 */
 void nvgpu_falcon_dbg_buf_destroy(struct nvgpu_falcon *flcn);
 /**
 * @brief Display falcon firmware logs
 *
 * @param flcn [in] The falcon.
 *
 * This function reads the contents of flcn debug buffer filled by firmware.
 * Logs are displayed line-by-line with label '<FLCN> Async' signifying that
 * these logs might be delayed and should be assumed as out-of-order when read
 * alongside other client nvgpu logs.
 *
 * @return '0' if contents logged successfully, error otherwise.
 */
 int nvgpu_falcon_dbg_buf_display(struct nvgpu_falcon *flcn);
 #endif
 #endif /* NVGPU_FALCON_H */
--- a/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops/pmu.h
@@ -312,6 +312,7 @@ struct gops_pmu {
 	/** @cond DOXYGEN_SHOULD_SKIP_THIS */
 	void (*handle_ext_irq)(struct gk20a *g, u32 intr);
 	void (*handle_swgen1_irq)(struct gk20a *g, u32 intr);
 	void (*pmu_enable_irq)(struct nvgpu_pmu *pmu, bool enable);
 	u32 (*get_irqdest)(struct gk20a *g);
--- a/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/hw/ga10b/hw_pwr_ga10b.h
@@ -68,6 +68,7 @@
 #define pwr_falcon_irqstat_exterr_true_f()                               (0x20U)
 #define pwr_falcon_irqstat_swgen0_true_f()                               (0x40U)
 #define pwr_falcon_irqstat_ext_ecc_parity_true_f()                      (0x400U)
 #define pwr_falcon_irqstat_swgen1_true_f()                               (0x80U)
 #define pwr_pmu_ecc_intr_status_r()                                (0x0010abfcU)
 #define pwr_pmu_ecc_intr_status_corrected_m()                  (U32(0x1U) << 0U)
 #define pwr_pmu_ecc_intr_status_uncorrected_m()                (U32(0x1U) << 1U)