diff --git a/drivers/gpu/nvgpu/common/pmu/pmu.c b/drivers/gpu/nvgpu/common/pmu/pmu.c
index 25dab0261..f243f1e4a 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu.c
@@ -286,14 +286,8 @@ int nvgpu_init_pmu_support(struct gk20a *g)
 			g->ops.pmu.secured_pmu_start(g);
 		}
 	} else {
-		/* prepare blob for non-secure PMU boot */
-		err = nvgpu_pmu_prepare_ns_ucode_blob(g);
-
-		/* Do non-secure PMU boot */
-		err = g->ops.pmu.pmu_setup_hw_and_bootstrap(g, pmu);
-		if (err != 0) {
-			goto exit;
-		}
+		/* non-secure boot */
+		nvgpu_pmu_ns_fw_bootstrap(g, pmu);
 	}
 
 	nvgpu_pmu_state_change(g, PMU_STATE_STARTING, false);
diff --git a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
index bc5e45a6f..dada419af 100644
--- a/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
+++ b/drivers/gpu/nvgpu/common/pmu/pmu_fw.c
@@ -1819,7 +1819,7 @@ exit:
 	return err;
 }
 
-int nvgpu_pmu_prepare_ns_ucode_blob(struct gk20a *g)
+static int pmu_prepare_ns_ucode_blob(struct gk20a *g)
 {
 	struct nvgpu_pmu *pmu = &g->pmu;
 	struct mm_gk20a *mm = &g->mm;
@@ -1845,3 +1845,41 @@ int nvgpu_pmu_prepare_ns_ucode_blob(struct gk20a *g)
 exit:
 	return err;
 }
+
+int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
+{
+	int err;
+	u32 args_offset = 0;
+
+	/* prepare blob for non-secure PMU boot */
+	err = pmu_prepare_ns_ucode_blob(g);
+	if (err != 0) {
+		nvgpu_err(g, "non secure ucode blop consrtuct failed");
+		return err;
+	}
+
+	/* Do non-secure PMU boot */
+	nvgpu_mutex_acquire(&pmu->isr_mutex);
+	nvgpu_falcon_reset(&pmu->flcn);
+	pmu->isr_enabled = true;
+	nvgpu_mutex_release(&pmu->isr_mutex);
+
+	g->ops.pmu.setup_apertures(g);
+
+	g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
+		pmu, GK20A_PMU_TRACE_BUFSIZE);
+	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
+	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
+		pmu, GK20A_PMU_DMAIDX_VIRT);
+
+	g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
+		g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
+
+	nvgpu_pmu_get_cmd_line_args_offset(g, &args_offset);
+
+	nvgpu_falcon_copy_to_dmem(&pmu->flcn, args_offset,
+		(u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
+		g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
+
+	return g->ops.pmu.pmu_ns_bootstrap(g, pmu, args_offset);
+}
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 3650583b4..855668578 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -729,7 +729,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.dump_secure_fuses = NULL,
 		.reset_engine = NULL,
 		.is_engine_in_reset = NULL,
-		.pmu_nsbootstrap = NULL,
+		.pmu_ns_bootstrap = NULL,
 		.pmu_pg_set_sub_feature_mask = NULL,
 		.is_pmu_supported = NULL,
 		.save_zbc = NULL,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index 3dd472f41..0f3155273 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -1168,9 +1168,9 @@ int gm20b_init_hal(struct gk20a *g)
 			nvgpu_gr_falcon_load_secure_ctxsw_ucode;
 	} else {
 		/* Inherit from gk20a */
-		gops->pmu.pmu_setup_hw_and_bootstrap =
-			gm20b_ns_pmu_setup_hw_and_bootstrap;
-		gops->pmu.pmu_nsbootstrap = pmu_bootstrap;
+		gops->pmu.setup_apertures =
+				gm20b_pmu_ns_setup_apertures;
+		gops->pmu.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap;
 	}
 
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index 518f119fd..71336d3ca 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -1232,9 +1232,9 @@ int gp10b_init_hal(struct gk20a *g)
 			nvgpu_gr_falcon_load_secure_ctxsw_ucode;
 	} else {
 		/* Inherit from gk20a */
-		gops->pmu.pmu_setup_hw_and_bootstrap =
-			gm20b_ns_pmu_setup_hw_and_bootstrap;
-		gops->pmu.pmu_nsbootstrap = pmu_bootstrap;
+		gops->pmu.setup_apertures =
+				gm20b_pmu_ns_setup_apertures;
+		gops->pmu.pmu_ns_bootstrap = gk20a_pmu_ns_bootstrap;
 	}
 
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false);
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index c98ff35d8..4f5bf904d 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1413,9 +1413,9 @@ int gv11b_init_hal(struct gk20a *g)
 			nvgpu_gr_falcon_load_secure_ctxsw_ucode;
 	} else {
 		/* non-secure boot */
-		gops->pmu.pmu_nsbootstrap = gv11b_pmu_bootstrap;
-		gops->pmu.pmu_setup_hw_and_bootstrap =
-			gm20b_ns_pmu_setup_hw_and_bootstrap;
+		gops->pmu.pmu_ns_bootstrap = gv11b_pmu_bootstrap;
+		gops->pmu.setup_apertures =
+			gm20b_pmu_ns_setup_apertures;
 	}
 
 	nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false);
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
index da142668a..9257400b3 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.c
@@ -615,13 +615,14 @@ int gk20a_pmu_bar0_error_status(struct gk20a *g, u32 *bar0_status,
 }
 
 /* non-secure boot */
-int pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
+int gk20a_pmu_ns_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
+	u32 args_offset)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct pmu_ucode_desc *desc =
 		(struct pmu_ucode_desc *)(void *)pmu->fw_image->data;
 	u32 addr_code, addr_data, addr_load;
-	u32 i, blocks, addr_args;
+	u32 i, blocks;
 	int err;
 	u64 tmp_addr;
 
@@ -637,25 +638,6 @@ int pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 		pwr_pmu_new_instblk_valid_f(1) |
 		pwr_pmu_new_instblk_target_sys_coh_f());
 
-	/* TBD: load all other surfaces */
-	g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
-		pmu, GK20A_PMU_TRACE_BUFSIZE);
-	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
-	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
-		pmu, GK20A_PMU_DMAIDX_VIRT);
-
-	g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
-		g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
-
-	addr_args = (pwr_falcon_hwcfg_dmem_size_v(
-		gk20a_readl(g, pwr_falcon_hwcfg_r()))
-			<< GK20A_PMU_DMEM_BLKSIZE2) -
-		g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
-
-	nvgpu_falcon_copy_to_dmem(&pmu->flcn, addr_args,
-			(u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
-			g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
-
 	gk20a_writel(g, pwr_falcon_dmemc_r(0),
 		pwr_falcon_dmemc_offs_f(0) |
 		pwr_falcon_dmemc_blk_f(0)  |
@@ -679,7 +661,7 @@ int pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 	gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
 	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_code);
 	gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1);
-	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
+	gk20a_writel(g, pwr_falcon_dmemd_r(0), args_offset);
 
 	g->ops.pmu.write_dmatrfbase(g,
 			addr_load - (desc->bootloader_imem_offset >> U32(8)));
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.h b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.h
index 93448eab3..3fd524c66 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gk20a.h
@@ -54,7 +54,8 @@ bool gk20a_pmu_is_interrupted(struct nvgpu_pmu *pmu);
 void gk20a_pmu_isr(struct gk20a *g);
 int gk20a_pmu_bar0_error_status(struct gk20a *g, u32 *bar0_status,
 	u32 *etype);
-int pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu);
+int gk20a_pmu_ns_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
+	u32 args_offset);
 bool gk20a_pmu_is_engine_in_reset(struct gk20a *g);
 int gk20a_pmu_engine_reset(struct gk20a *g, bool do_reset);
 void gk20a_write_dmatrfbase(struct gk20a *g, u32 addr);
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.c
index d17fb685a..96f94dc0a 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.c
@@ -140,16 +140,10 @@ bool gm20b_pmu_is_debug_mode_en(struct gk20a *g)
 	return pwr_pmu_scpctl_stat_debug_mode_v(ctl_stat) != 0U;
 }
 
-int gm20b_ns_pmu_setup_hw_and_bootstrap(struct gk20a *g,
-	struct nvgpu_pmu *pmu)
+void gm20b_pmu_ns_setup_apertures(struct gk20a *g)
 {
 	nvgpu_log_fn(g, " ");
 
-	nvgpu_mutex_acquire(&pmu->isr_mutex);
-	nvgpu_falcon_reset(&pmu->flcn);
-	pmu->isr_enabled = true;
-	nvgpu_mutex_release(&pmu->isr_mutex);
-
 	/* setup apertures - virtual */
 	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_UCODE),
 		pwr_fbif_transcfg_mem_type_virtual_f());
@@ -165,8 +159,6 @@ int gm20b_ns_pmu_setup_hw_and_bootstrap(struct gk20a *g,
 	gk20a_writel(g, pwr_fbif_transcfg_r(GK20A_PMU_DMAIDX_PHYS_SYS_NCOH),
 		pwr_fbif_transcfg_mem_type_physical_f() |
 		pwr_fbif_transcfg_target_noncoherent_sysmem_f());
-
-	return g->ops.pmu.pmu_nsbootstrap(g, pmu);
 }
 
 void gm20b_pmu_setup_apertures(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.h b/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.h
index 04da7252a..bf0cf3694 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.h
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gm20b.h
@@ -31,8 +31,7 @@ void gm20b_pmu_setup_elpg(struct gk20a *g);
 void pmu_dump_security_fuses_gm20b(struct gk20a *g);
 void gm20b_write_dmatrfbase(struct gk20a *g, u32 addr);
 bool gm20b_pmu_is_debug_mode_en(struct gk20a *g);
-int gm20b_ns_pmu_setup_hw_and_bootstrap(struct gk20a *g,
-	struct nvgpu_pmu *pmu);
+void gm20b_pmu_ns_setup_apertures(struct gk20a *g);
 void gm20b_pmu_setup_apertures(struct gk20a *g);
 void gm20b_pmu_flcn_setup_boot_config(struct gk20a *g);
 void gm20b_secured_pmu_start(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c
index a9702d088..4a4727b4c 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.c
@@ -139,14 +139,15 @@ bool gv11b_is_pmu_supported(struct gk20a *g)
 #endif
 }
 
-int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
+int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
+	u32 args_offset)
 {
 	struct mm_gk20a *mm = &g->mm;
 	struct pmu_ucode_desc *desc =
 		(struct pmu_ucode_desc *)(void *)pmu->fw_image->data;
 	u32 addr_code_lo, addr_data_lo, addr_load_lo;
 	u32 addr_code_hi, addr_data_hi;
-	u32 i, blocks, addr_args;
+	u32 i, blocks;
 	int err;
 	u32 inst_block_ptr;
 
@@ -159,28 +160,10 @@ int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 	inst_block_ptr = nvgpu_inst_block_ptr(g, &mm->pmu.inst_block);
 	gk20a_writel(g, pwr_pmu_new_instblk_r(),
 		pwr_pmu_new_instblk_ptr_f(inst_block_ptr) |
-		     pwr_pmu_new_instblk_valid_f(1) |
-		     (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
-		      pwr_pmu_new_instblk_target_sys_coh_f() :
-		      pwr_pmu_new_instblk_target_sys_ncoh_f()));
-
-	g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(
-		pmu, GK20A_PMU_TRACE_BUFSIZE);
-	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base(pmu);
-	g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx(
-		pmu, GK20A_PMU_DMAIDX_VIRT);
-
-	g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq(pmu,
-		g->ops.clk.get_rate(g, CTRL_CLK_DOMAIN_PWRCLK));
-
-	addr_args = (pwr_falcon_hwcfg_dmem_size_v(
-		gk20a_readl(g, pwr_falcon_hwcfg_r()))
-			<< GK20A_PMU_DMEM_BLKSIZE2) -
-		g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu);
-
-	nvgpu_falcon_copy_to_dmem(&pmu->flcn, addr_args,
-			(u8 *)(g->ops.pmu_ver.get_pmu_cmdline_args_ptr(pmu)),
-			g->ops.pmu_ver.get_pmu_cmdline_args_size(pmu), 0);
+		pwr_pmu_new_instblk_valid_f(1) |
+		(nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
+		pwr_pmu_new_instblk_target_sys_coh_f() :
+		pwr_pmu_new_instblk_target_sys_ncoh_f()));
 
 	gk20a_writel(g, pwr_falcon_dmemc_r(0),
 		pwr_falcon_dmemc_offs_f(0) |
@@ -223,7 +206,7 @@ int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu)
 	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_data_hi);
 	gk20a_writel(g, pwr_falcon_dmemd_r(0), desc->app_resident_data_size);
 	gk20a_writel(g, pwr_falcon_dmemd_r(0), 0x1U);
-	gk20a_writel(g, pwr_falcon_dmemd_r(0), addr_args);
+	gk20a_writel(g, pwr_falcon_dmemd_r(0), args_offset);
 
 	g->ops.pmu.write_dmatrfbase(g,
 				addr_load_lo -
diff --git a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h
index 372602216..b938fa2e5 100644
--- a/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/pmu/pmu_gv11b.h
@@ -28,7 +28,8 @@
 struct gk20a;
 
 bool gv11b_is_pmu_supported(struct gk20a *g);
-int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu);
+int gv11b_pmu_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu,
+	u32 args_offset);
 void gv11b_pmu_setup_elpg(struct gk20a *g);
 u32 gv11b_pmu_get_irqdest(struct gk20a *g);
 void gv11b_pmu_handle_ext_irq(struct gk20a *g, u32 intr0);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 0db357a34..2579e1676 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1468,9 +1468,8 @@ struct gpu_ops {
 		void (*secured_pmu_start)(struct gk20a *g);
 		void (*flcn_setup_boot_config)(struct gk20a *g);
 		/* non-secure */
-		int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g,
-			struct nvgpu_pmu *pmu);
-		int (*pmu_nsbootstrap)(struct gk20a *g, struct nvgpu_pmu *pmu);
+		int (*pmu_ns_bootstrap)(struct gk20a *g, struct nvgpu_pmu *pmu,
+			u32 args_offset);
 		/* queue */
 		u32 (*pmu_get_queue_head)(u32 i);
 		u32 (*pmu_get_queue_head_size)(void);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pmu.h b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
index 518746f3c..489635ae6 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/pmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pmu.h
@@ -299,7 +299,7 @@ int nvgpu_pmu_sysmem_surface_alloc(struct gk20a *g, struct nvgpu_mem *mem,
 
 /* PMU F/W support */
 int nvgpu_early_init_pmu_sw(struct gk20a *g, struct nvgpu_pmu *pmu);
-int nvgpu_pmu_prepare_ns_ucode_blob(struct gk20a *g);
+int nvgpu_pmu_ns_fw_bootstrap(struct gk20a *g, struct nvgpu_pmu *pmu);
 
 /* PMU reset */
 int nvgpu_pmu_reset(struct gk20a *g);