From 65111b64f2dc943df49eb91d0f94c6f04572fbcd Mon Sep 17 00:00:00 2001
From: mkumbar <mkumbar@nvidia.com>
Date: Tue, 8 Dec 2020 00:03:53 +0530
Subject: [PATCH] gpu: nvgpu: Add RISCV LS PMU support

-Add RISCV LS PMU support by adding RISCV LS PMU ucode to the blob.
-Modify the PMU RTOS sequence based on NEXT CORE enable flag.

JIRA NVGPU-6303

Change-Id: I4e2b989f9903b72a6327c931eb3c02f8cef2aa75
Signed-off-by: mkumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2447388
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 .../gpu/nvgpu/common/acr/acr_blob_construct.c | 152 ++++++++++++------
 .../gpu/nvgpu/common/acr/acr_blob_construct.h |   8 +
 drivers/gpu/nvgpu/common/acr/acr_bootstrap.h  |   1 +
 .../nvgpu/common/acr/nvgpu_acr_interface.h    |   2 +
 drivers/gpu/nvgpu/common/gr/gr_falcon.c       |   3 +-
 .../gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c |  67 +-------
 drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c  |  24 +--
 drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c      |   6 +
 drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c  | 137 +++++++++++++++-
 drivers/gpu/nvgpu/include/nvgpu/falcon.h      |   1 +
 drivers/gpu/nvgpu/include/nvgpu/pmu.h         |   8 +
 11 files changed, 271 insertions(+), 138 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c
index 777ccd273..68e0a6066 100644
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c
@@ -102,6 +102,44 @@ int nvgpu_acr_lsf_pmu_ucode_details(struct gk20a *g, void *lsf_ucode_img)
 exit:
 	return err;
 }
+#if defined(CONFIG_NVGPU_NEXT)
+s32 nvgpu_acr_lsf_pmu_ncore_ucode_details(struct gk20a *g, void *lsf_ucode_img)
+{
+	struct lsf_ucode_desc *lsf_desc;
+	struct nvgpu_firmware *fw_sig;
+	struct nvgpu_firmware *fw_desc;
+	struct nvgpu_firmware *fw_image;
+	struct flcn_ucode_img *p_img =
+		(struct flcn_ucode_img *)lsf_ucode_img;
+	s32 err = 0;
+
+	lsf_desc = nvgpu_kzalloc(g, sizeof(struct lsf_ucode_desc));
+	if (lsf_desc == NULL) {
+		err = -ENOMEM;
+		goto exit;
+	}
+
+	fw_sig = nvgpu_pmu_fw_sig_desc(g, g->pmu);
+	fw_desc = nvgpu_pmu_fw_desc_desc(g, g->pmu);
+	fw_image = nvgpu_pmu_fw_image_desc(g, g->pmu);
+
+	nvgpu_memcpy((u8 *)lsf_desc, (u8 *)fw_sig->data,
+		min_t(size_t, sizeof(*lsf_desc), fw_sig->size));
+
+	lsf_desc->falcon_id = FALCON_ID_PMU_NEXT_CORE;
+
+	p_img->ndesc = (struct falcon_next_core_ucode_desc *)(void *)fw_desc->data;
+
+	p_img->data = (u32 *)(void *)fw_image->data;
+	p_img->data_size = U32(fw_image->size);
+	p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
+
+	p_img->is_next_core_img = true;
+
+exit:
+	return err;
+}
+#endif
 #endif
 
 int nvgpu_acr_lsf_fecs_ucode_details(struct gk20a *g, void *lsf_ucode_img)
@@ -431,51 +469,60 @@ static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
 			sizeof(struct lsf_ucode_desc));
 	}
 
-	pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
+	if (!pnode->ucode_img.is_next_core_img) {
+		pnode->lsb_header.ucode_size = pnode->ucode_img.data_size;
 
-	/* Uses a loader. that is has a desc */
-	pnode->lsb_header.data_size = LSB_HDR_DATA_SIZE;
+		/* Uses a loader. that is has a desc */
+		pnode->lsb_header.data_size = LSB_HDR_DATA_SIZE;
 
-	/*
-	 * The loader code size is already aligned (padded) such that
-	 * the code following it is aligned, but the size in the image
-	 * desc is not, bloat it up to be on a 256 byte alignment.
-	 */
-	pnode->lsb_header.bl_code_size = ALIGN(
-		pnode->ucode_img.desc->bootloader_size,
-		LSF_BL_CODE_SIZE_ALIGNMENT);
-	full_app_size = nvgpu_safe_add_u32(
-			ALIGN(pnode->ucode_img.desc->app_size,
-				LSF_BL_CODE_SIZE_ALIGNMENT),
-			pnode->lsb_header.bl_code_size);
-
-	pnode->lsb_header.ucode_size = nvgpu_safe_add_u32(ALIGN(
-			pnode->ucode_img.desc->app_resident_data_offset,
-			LSF_BL_CODE_SIZE_ALIGNMENT),
+		/*
+		 * The loader code size is already aligned (padded) such that
+		 * the code following it is aligned, but the size in the image
+		 * desc is not, bloat it up to be on a 256 byte alignment.
+		 */
+		pnode->lsb_header.bl_code_size = ALIGN(
+			pnode->ucode_img.desc->bootloader_size,
+			LSF_BL_CODE_SIZE_ALIGNMENT);
+		full_app_size = nvgpu_safe_add_u32(
+				ALIGN(pnode->ucode_img.desc->app_size,
+					LSF_BL_CODE_SIZE_ALIGNMENT),
 				pnode->lsb_header.bl_code_size);
 
-	pnode->lsb_header.data_size = nvgpu_safe_sub_u32(full_app_size,
-					pnode->lsb_header.ucode_size);
-	/*
-	 * Though the BL is located at 0th offset of the image, the VA
-	 * is different to make sure that it doesn't collide the actual OS
-	 * VA range
-	 */
-	pnode->lsb_header.bl_imem_off =
-	pnode->ucode_img.desc->bootloader_imem_offset;
+		pnode->lsb_header.ucode_size = nvgpu_safe_add_u32(ALIGN(
+				pnode->ucode_img.desc->app_resident_data_offset,
+				LSF_BL_CODE_SIZE_ALIGNMENT),
+					pnode->lsb_header.bl_code_size);
 
-	pnode->lsb_header.flags = NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE;
+		pnode->lsb_header.data_size = nvgpu_safe_sub_u32(full_app_size,
+						pnode->lsb_header.ucode_size);
+		/*
+		 * Though the BL is located at 0th offset of the image, the VA
+		 * is different to make sure that it doesn't collide the actual OS
+		 * VA range
+		 */
+		pnode->lsb_header.bl_imem_off =
+		pnode->ucode_img.desc->bootloader_imem_offset;
 
-	if (falcon_id == FALCON_ID_PMU) {
-		data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
-		pnode->lsb_header.flags = data;
+		pnode->lsb_header.flags = NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE;
+
+		if (falcon_id == FALCON_ID_PMU) {
+			data = NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE;
+			pnode->lsb_header.flags = data;
+		}
+
+		if (g->acr->lsf[falcon_id].is_priv_load) {
+			pnode->lsb_header.flags |=
+				NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE;
+		}
+
+	} else {
+		pnode->lsb_header.ucode_size = 0;
+		pnode->lsb_header.data_size = 0;
+		pnode->lsb_header.bl_code_size = 0;
+		pnode->lsb_header.bl_imem_off = 0;
+		pnode->lsb_header.bl_data_size = 0;
+		pnode->lsb_header.bl_data_off = 0;
 	}
-
-	if (g->acr->lsf[falcon_id].is_priv_load) {
-		pnode->lsb_header.flags |=
-			NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE;
-	}
-
 }
 
 /* Adds a ucode image to the list of managed ucode images managed. */
@@ -710,7 +757,8 @@ static int lsf_gen_wpr_requirements(struct gk20a *g,
 		/* Finally, update ucode surface size to include updates */
 		pnode->full_ucode_size = wpr_offset -
 			pnode->lsb_header.ucode_off;
-		if (pnode->wpr_header.falcon_id != FALCON_ID_PMU) {
+		if (pnode->wpr_header.falcon_id != FALCON_ID_PMU &&
+			pnode->wpr_header.falcon_id != FALCON_ID_PMU_NEXT_CORE) {
 			pnode->lsb_header.app_code_off =
 				pnode->lsb_header.bl_code_size;
 			pnode->lsb_header.app_code_size =
@@ -721,6 +769,7 @@ static int lsf_gen_wpr_requirements(struct gk20a *g,
 			pnode->lsb_header.app_data_size =
 				pnode->lsb_header.data_size;
 		}
+
 		pnode = pnode->next;
 	}
 
@@ -942,22 +991,25 @@ static int lsfm_init_wpr_contents(struct gk20a *g,
 		nvgpu_acr_dbg(g, "flags :%x",
 				pnode->lsb_header.flags);
 
-		/*
-		 * If this falcon has a boot loader and related args,
-		 * flush them.
-		 */
-		/* Populate gen bl and flush to memory */
-		err = lsfm_fill_flcn_bl_gen_desc(g, pnode);
-		if (err != 0) {
-			nvgpu_err(g, "bl_gen_desc failed err=%d", err);
-			return err;
+		if (!pnode->ucode_img.is_next_core_img) {
+			/*
+			 * If this falcon has a boot loader and related args,
+			 * flush them.
+			 */
+			/* Populate gen bl and flush to memory */
+			err = lsfm_fill_flcn_bl_gen_desc(g, pnode);
+			if (err != 0) {
+				nvgpu_err(g, "bl_gen_desc failed err=%d", err);
+				return err;
+			}
+			nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.bl_data_off,
+				&pnode->bl_gen_desc, pnode->bl_gen_desc_size);
 		}
-		nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.bl_data_off,
-			&pnode->bl_gen_desc, pnode->bl_gen_desc_size);
 
 		/* Copying of ucode */
 		nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off,
 			pnode->ucode_img.data, pnode->ucode_img.data_size);
+
 		pnode = pnode->next;
 		i = nvgpu_safe_add_u32(i, 1U);
 	}
diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h
index 1407a023b..29c5e4254 100644
--- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h
@@ -25,6 +25,7 @@
 
 #include <nvgpu/falcon.h>
 #include <nvgpu/flcnif_cmn.h>
+#include <nvgpu/pmu.h>
 
 #include "nvgpu_acr_interface.h"
 
@@ -85,6 +86,10 @@ struct flcn_ucode_img {
 	struct ls_falcon_ucode_desc *desc;
 	u32 data_size;
 	struct lsf_ucode_desc *lsf_desc;
+	bool is_next_core_img;
+#if defined(CONFIG_NVGPU_NEXT)
+	struct falcon_next_core_ucode_desc *ndesc;
+#endif
 };
 
 struct lsfm_managed_ucode_img {
@@ -135,6 +140,9 @@ struct ls_flcn_mgr {
 int nvgpu_acr_prepare_ucode_blob(struct gk20a *g);
 #ifdef CONFIG_NVGPU_LS_PMU
 int nvgpu_acr_lsf_pmu_ucode_details(struct gk20a *g, void *lsf_ucode_img);
+#if defined(CONFIG_NVGPU_NEXT)
+s32 nvgpu_acr_lsf_pmu_ncore_ucode_details(struct gk20a *g, void *lsf_ucode_img);
+#endif
 #endif
 int nvgpu_acr_lsf_fecs_ucode_details(struct gk20a *g, void *lsf_ucode_img);
 int nvgpu_acr_lsf_gpccs_ucode_details(struct gk20a *g, void *lsf_ucode_img);
diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h
index 7e4abae6a..4b116cf2e 100644
--- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h
+++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h
@@ -118,6 +118,7 @@ struct hs_acr {
 #if defined(CONFIG_NVGPU_NEXT)
 	struct nvgpu_mem acr_falcon2_sysmem_desc;
 	struct flcn2_acr_desc acr_sysmem_desc;
+	struct nvgpu_mem ls_pmu_desc;
 #endif
 
 	/* Falcon used to execute ACR ucode */
diff --git a/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h
index 6368d54a2..a5f622814 100644
--- a/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h
+++ b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h
@@ -597,6 +597,8 @@ struct flcn2_acr_desc {
 	 * kernel stores ucode blob
 	 */
 	u64 nonwpr_ucode_blob_start;
+
+	u64 ls_pmu_desc;
 };
 
 /** @} */
diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
index 1693804ad..f5c8bbf24 100644
--- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c
+++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c
@@ -573,7 +573,8 @@ static int gr_falcon_sec2_or_ls_pmu_bootstrap(struct gk20a *g,
 	} else
 #endif
 #ifdef CONFIG_NVGPU_LS_PMU
-	if (g->support_ls_pmu) {
+	if (g->support_ls_pmu &&
+		!nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
 		bootstrap_set = true;
 		nvgpu_log(g, gpu_dbg_gr, "bootstrap by LS PMU");
 
diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
index d0788b96d..4946eae55 100644
--- a/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
+++ b/drivers/gpu/nvgpu/common/pmu/fw/fw_ns_bootstrap.c
@@ -42,52 +42,6 @@ static void pmu_free_ns_ucode_blob(struct gk20a *g)
 	}
 }
 
-static void pmu_next_core_ucode_setup(struct gk20a *g, struct nvgpu_pmu *pmu)
-{
-	struct nv_pmu_boot_params boot_params;
-	struct nv_next_core_bootldr_params *btldr_params;
-	struct nv_next_core_rtos_params *rtos_params;
-	struct pmu_cmdline_args_v7 *cmd_line_args;
-	u64 phyadr = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	btldr_params = &boot_params.boot_params.bl;
-	rtos_params = &boot_params.boot_params.rtos;
-	cmd_line_args = &boot_params.cmd_line_args;
-
-	/* setup core dump */
-	rtos_params->core_dump_size = NV_REG_STR_NEXT_CORE_DUMP_SIZE_DEFAULT;
-	rtos_params->core_dump_phys = nvgpu_mem_get_addr(g,
-			&pmu->fw->ucode_core_dump);
-
-	/* copy cmd line args to pmu->boot_params.cmd_line_args */
-	nvgpu_memcpy((u8 *)cmd_line_args,
-			(u8 *) (pmu->fw->ops.get_cmd_line_args_ptr(pmu)),
-			pmu->fw->ops.get_cmd_line_args_size(pmu));
-
-	cmd_line_args->ctx_bind_addr = g->ops.pmu.get_inst_block_config(g);
-
-	/* setup boot loader args */
-	btldr_params->boot_type = NV_NEXT_CORE_BOOTLDR_BOOT_TYPE_RM;
-	btldr_params->size = sizeof(struct nv_pmu_boot_params);
-	btldr_params->version = NV_NEXT_CORE_BOOTLDR_VERSION;
-
-	/* copy to boot_args phyadr */
-	nvgpu_mem_wr_n(g, &pmu->fw->ucode_boot_args, 0,
-			(u32 *)&boot_params.boot_params.bl,
-			sizeof(struct nv_pmu_boot_params));
-
-	/* copy boot args phyadr to mailbox 0/1 */
-	phyadr = NV_NEXT_CORE_AMAP_EXTMEM2_START +
-			nvgpu_mem_get_addr(g, &pmu->fw->ucode_boot_args);
-
-	nvgpu_falcon_mailbox_write(g->pmu->flcn, FALCON_MAILBOX_0,
-			u64_lo32(phyadr));
-	nvgpu_falcon_mailbox_write(g->pmu->flcn, FALCON_MAILBOX_1,
-			u64_hi32(phyadr));
-}
-
 int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 {
 	int err;
@@ -117,22 +71,13 @@ int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 
 	g->ops.pmu.setup_apertures(g);
 
-	pmu->fw->ops.set_cmd_line_args_trace_size(
-		pmu, PMU_RTOS_TRACE_BUFSIZE);
-	pmu->fw->ops.set_cmd_line_args_trace_dma_base(pmu);
-	pmu->fw->ops.set_cmd_line_args_trace_dma_idx(
-		pmu, GK20A_PMU_DMAIDX_VIRT);
-
-	pmu->fw->ops.set_cmd_line_args_cpu_freq(pmu,
-		g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
-
-	if (pmu->fw->ops.config_cmd_line_args_super_surface != NULL) {
-		pmu->fw->ops.config_cmd_line_args_super_surface(pmu);
-	}
-
+#if defined(CONFIG_NVGPU_NEXT)
 	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
-		pmu_next_core_ucode_setup(g, pmu);
-	} else {
+		nvgpu_pmu_next_core_rtos_args_setup(g, pmu);
+	} else
+#endif
+	{
+		nvgpu_pmu_rtos_cmdline_args_init(g, pmu);
 		nvgpu_pmu_fw_get_cmd_line_args_offset(g, &args_offset);
 
 		err = nvgpu_falcon_copy_to_dmem(pmu->flcn, args_offset,
diff --git a/drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c b/drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c
index 0cb95f57a..c93deccb6 100644
--- a/drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c
+++ b/drivers/gpu/nvgpu/common/pmu/fw/fw_ver_ops.c
@@ -1170,27 +1170,13 @@ static int pmu_prepare_ns_ucode_blob_v1(struct gk20a *g)
 		nvgpu_mem_wr_n(g, &pmu->fw->ucode, 0, ucode_image,
 				rtos_fw->fw_image->size);
 
+#if defined(CONFIG_NVGPU_NEXT)
 		/* alloc boot args */
-		if (!nvgpu_mem_is_valid(&rtos_fw->ucode_boot_args)) {
-			err = nvgpu_dma_alloc_flags_sys(g,
-					NVGPU_DMA_PHYSICALLY_ADDRESSED,
-					sizeof(struct nv_pmu_boot_params),
-					&rtos_fw->ucode_boot_args);
-			if (err != 0) {
-				goto exit;
-			}
-		}
-
-		/* alloc core dump */
-		if (!nvgpu_mem_is_valid(&rtos_fw->ucode_core_dump)) {
-			err = nvgpu_dma_alloc_flags_sys(g,
-					NVGPU_DMA_PHYSICALLY_ADDRESSED,
-					NV_REG_STR_NEXT_CORE_DUMP_SIZE_DEFAULT,
-					&rtos_fw->ucode_core_dump);
-			if (err != 0) {
-				goto exit;
-			}
+		err = nvgpu_pmu_next_core_rtos_args_allocate(g, pmu);
+		if (err != 0) {
+			goto exit;
 		}
+#endif
 	} else {
 		desc = (struct pmu_ucode_desc_v1 *)(void *)
 					rtos_fw->fw_desc->data;
diff --git a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c
index 39467f3db..7722d3c10 100644
--- a/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c
+++ b/drivers/gpu/nvgpu/common/pmu/lsfm/lsfm.c
@@ -41,6 +41,12 @@
 static bool is_lsfm_supported(struct gk20a *g,
 	struct nvgpu_pmu *pmu, struct nvgpu_pmu_lsfm *lsfm)
 {
+#if defined(CONFIG_NVGPU_NEXT)
+	if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
+		return false;
+	}
+#endif
+
 	if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY) &&
 			(lsfm != NULL)) {
 		return true;
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
index b271e5b76..df82400a6 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_rtos_init.c
@@ -21,6 +21,7 @@
  */
 
 #include <nvgpu/gk20a.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/log.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/bug.h>
@@ -29,6 +30,8 @@
 #include <nvgpu/nvgpu_err.h>
 #include <nvgpu/boardobjgrp.h>
 #include <nvgpu/pmu.h>
+#include <nvgpu/string.h>
+#include <nvgpu/pmu/clk/clk.h>
 
 #include <nvgpu/pmu/mutex.h>
 #include <nvgpu/pmu/seq.h>
@@ -211,6 +214,107 @@ exit:
 	return err;
 }
 
+void nvgpu_pmu_rtos_cmdline_args_init(struct gk20a *g, struct nvgpu_pmu *pmu)
+{
+	nvgpu_log_fn(g, " ");
+
+	pmu->fw->ops.set_cmd_line_args_trace_size(
+		pmu, PMU_RTOS_TRACE_BUFSIZE);
+	pmu->fw->ops.set_cmd_line_args_trace_dma_base(pmu);
+	pmu->fw->ops.set_cmd_line_args_trace_dma_idx(
+		pmu, GK20A_PMU_DMAIDX_VIRT);
+
+	pmu->fw->ops.set_cmd_line_args_cpu_freq(pmu,
+		g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
+
+	if (pmu->fw->ops.config_cmd_line_args_super_surface != NULL) {
+		pmu->fw->ops.config_cmd_line_args_super_surface(pmu);
+	}
+}
+
+#if defined(CONFIG_NVGPU_NEXT)
+void nvgpu_pmu_next_core_rtos_args_setup(struct gk20a *g,
+		struct nvgpu_pmu *pmu)
+{
+	struct nv_pmu_boot_params boot_params;
+	struct nv_next_core_bootldr_params *btldr_params;
+	struct nv_next_core_rtos_params *rtos_params;
+	struct pmu_cmdline_args_v7 *cmd_line_args;
+	u64 phyadr = 0;
+
+	nvgpu_pmu_rtos_cmdline_args_init(g, pmu);
+
+	btldr_params = &boot_params.boot_params.bl;
+	rtos_params = &boot_params.boot_params.rtos;
+	cmd_line_args = &boot_params.cmd_line_args;
+
+	/* setup core dump */
+	rtos_params->core_dump_size = NV_REG_STR_NEXT_CORE_DUMP_SIZE_DEFAULT;
+	rtos_params->core_dump_phys = nvgpu_mem_get_addr(g,
+			&pmu->fw->ucode_core_dump);
+
+	/* copy cmd line args to pmu->boot_params.cmd_line_args */
+	nvgpu_memcpy((u8 *)cmd_line_args,
+			(u8 *) (pmu->fw->ops.get_cmd_line_args_ptr(pmu)),
+			pmu->fw->ops.get_cmd_line_args_size(pmu));
+
+	cmd_line_args->ctx_bind_addr = g->ops.pmu.get_inst_block_config(g);
+
+	/* setup boot loader args */
+	btldr_params->boot_type = NV_NEXT_CORE_BOOTLDR_BOOT_TYPE_RM;
+	btldr_params->size = U16(sizeof(struct nv_pmu_boot_params));
+	btldr_params->version = NV_NEXT_CORE_BOOTLDR_VERSION;
+
+	/* copy to boot_args phyadr */
+	nvgpu_mem_wr_n(g, &pmu->fw->ucode_boot_args, 0,
+			&boot_params.boot_params.bl,
+			sizeof(struct nv_pmu_boot_params));
+
+	/* copy boot args phyadr to mailbox 0/1 */
+	phyadr = nvgpu_safe_add_u64(NV_NEXT_CORE_AMAP_EXTMEM2_START,
+			nvgpu_mem_get_addr(g, &pmu->fw->ucode_boot_args));
+
+	nvgpu_falcon_mailbox_write(g->pmu->flcn, FALCON_MAILBOX_0,
+			u64_lo32(phyadr));
+	nvgpu_falcon_mailbox_write(g->pmu->flcn, FALCON_MAILBOX_1,
+			u64_hi32(phyadr));
+}
+
+s32 nvgpu_pmu_next_core_rtos_args_allocate(struct gk20a *g,
+		struct nvgpu_pmu *pmu)
+{
+	struct pmu_rtos_fw *rtos_fw = pmu->fw;
+	s32 err =0;
+
+	nvgpu_log_fn(g, " ");
+
+	/* alloc boot args */
+	if (!nvgpu_mem_is_valid(&rtos_fw->ucode_boot_args)) {
+		err = nvgpu_dma_alloc_flags_sys(g,
+				NVGPU_DMA_PHYSICALLY_ADDRESSED,
+				sizeof(struct nv_pmu_boot_params),
+				&rtos_fw->ucode_boot_args);
+		if (err != 0) {
+			goto exit;
+		}
+	}
+
+	/* alloc core dump */
+	if (!nvgpu_mem_is_valid(&rtos_fw->ucode_core_dump)) {
+		err = nvgpu_dma_alloc_flags_sys(g,
+				NVGPU_DMA_PHYSICALLY_ADDRESSED,
+				NV_REG_STR_NEXT_CORE_DUMP_SIZE_DEFAULT,
+				&rtos_fw->ucode_core_dump);
+		if (err != 0) {
+			goto exit;
+		}
+	}
+
+exit:
+	return err;
+}
+#endif
+
 int nvgpu_pmu_rtos_init(struct gk20a *g)
 {
 	int err = 0;
@@ -254,17 +358,36 @@ int nvgpu_pmu_rtos_init(struct gk20a *g)
 			g->ops.pmu.setup_apertures(g);
 		}
 
-		err = nvgpu_pmu_lsfm_ls_pmu_cmdline_args_copy(g, g->pmu,
-			g->pmu->lsfm);
-		if (err != 0) {
-			goto exit;
+#if defined(CONFIG_NVGPU_NEXT)
+		if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
+			err = nvgpu_pmu_next_core_rtos_args_allocate(g, g->pmu);
+			if (err != 0) {
+				goto exit;
+			}
+
+			nvgpu_pmu_next_core_rtos_args_setup(g, g->pmu);
+		} else
+#endif
+		{
+			err = nvgpu_pmu_lsfm_ls_pmu_cmdline_args_copy(g, g->pmu,
+				g->pmu->lsfm);
+			if (err != 0) {
+				goto exit;
+			}
 		}
 
 		nvgpu_pmu_enable_irq(g, true);
 
-		/*Once in LS mode, cpuctl_alias is only accessible*/
-		if (g->ops.pmu.secured_pmu_start != NULL) {
-			g->ops.pmu.secured_pmu_start(g);
+#if defined(CONFIG_NVGPU_NEXT)
+		if (nvgpu_is_enabled(g, NVGPU_PMU_NEXT_CORE_ENABLED)) {
+			g->ops.falcon.bootstrap(g->pmu->flcn, 0U);
+		} else
+#endif
+		{
+			/*Once in LS mode, cpuctl_alias is only accessible*/
+			if (g->ops.pmu.secured_pmu_start != NULL) {
+				g->ops.pmu.secured_pmu_start(g);
+			}
 		}
 	} else {
 		/* non-secure boot */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/falcon.h b/drivers/gpu/nvgpu/include/nvgpu/falcon.h
index 9cdc21fc9..ed1d5378e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/falcon.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/falcon.h
@@ -130,6 +130,7 @@
 #define FALCON_ID_SEC2      (7U)
 /** Falcon ID for MINION engine */
 #define FALCON_ID_MINION    (10U)
+#define FALCON_ID_PMU_NEXT_CORE (13U)
 #define FALCON_ID_END	    (15U)
 #define FALCON_ID_INVALID   0xFFFFFFFFU
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
index bcd2d30e9..602253c15 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -445,6 +445,14 @@ int nvgpu_pmu_lock_release(struct gk20a *g, struct nvgpu_pmu *pmu,
 int nvgpu_pmu_rtos_early_init(struct gk20a *g, struct nvgpu_pmu *pmu);
 int nvgpu_pmu_rtos_init(struct gk20a *g);
 int nvgpu_pmu_destroy(struct gk20a *g, struct nvgpu_pmu *pmu);
+
+void nvgpu_pmu_rtos_cmdline_args_init(struct gk20a *g, struct nvgpu_pmu *pmu);
+#if defined(CONFIG_NVGPU_NEXT)
+void nvgpu_pmu_next_core_rtos_args_setup(struct gk20a *g,
+		struct nvgpu_pmu *pmu);
+s32 nvgpu_pmu_next_core_rtos_args_allocate(struct gk20a *g,
+		struct nvgpu_pmu *pmu);
+#endif
 #endif
 
 /**