nvgpu: Host side changes to support HS mode

GM20B changes in PMU boot sequence to support booting in HS mode and LS mode Bug 1509680 Change-Id: I2832eda0efe17dd5e3a8f11dd06e7d4da267be70 Signed-off-by: Supriya <ssharatkumar@nvidia.com> Reviewed-on: http://git-master/r/423140 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com> Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Shridhar Rasal <srasal@nvidia.com> Reviewed-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2014-06-13 12:44:27 +05:30
parent c32ac10b0b
commit b7793a493a
17 changed files with 2001 additions and 17 deletions
--- a/drivers/gpu/nvgpu/Kconfig
+++ b/drivers/gpu/nvgpu/Kconfig
@@ -70,3 +70,12 @@ config TEGRA_GK20A
 	  Enable support for the GK20A graphics engine on Tegra
 	  by adding a Tegra platfrom interface to the GK20A driver.
 	  The Tegra platform interface requires TEGRA_GRHOST (host1x).
+
+config TEGRA_ACR
+	bool "Enable HS bin support on GM20B GPU on Tegra"
+	depends on GK20A_PMU
+	default n
+	help
+	  Enable Support for Loading High Secure binary, and using
+	  Write Protected Regions (WPR) for storing ucodes, and bootstrap
+	  PMU, FECS and GPCCS in Low Secure mode.
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -28,6 +28,7 @@ struct channel_gk20a;
 struct gr_gk20a;
 struct sim_gk20a;
 struct gk20a_ctxsw_ucode_segments;
+struct acr_gm20b;

 #include <linux/sched.h>
 #include <linux/spinlock.h>
@@ -45,6 +46,7 @@ struct gk20a_ctxsw_ucode_segments;
 #include "priv_ring_gk20a.h"
 #include "therm_gk20a.h"
 #include "platform_gk20a.h"
+#include "gm20b/acr_gm20b.h"

 extern struct platform_device tegra_gk20a_device;

@@ -205,6 +207,8 @@ struct gpu_ops {
 				struct pmu_sequence *seq);
 		void *(*get_pmu_seq_out_a_ptr)(
 				struct pmu_sequence *seq);
+		void (*set_pmu_cmdline_args_secure_mode)(struct pmu_gk20a *pmu,
+			u32 val);
 	} pmu_ver;
 	struct {
 		int (*get_netlist_name)(int index, char *name);
@@ -214,6 +218,10 @@ struct gpu_ops {
 		int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
 			       u32 num_pages, u32 pgsz_idx);
 	} mm;
+	struct {
+		int (*pmu_setup_sw)(struct gk20a *g);
+		int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
+	} pmu;
 };

 struct gk20a {
@@ -236,6 +244,7 @@ struct gk20a {
 	struct sim_gk20a sim;
 	struct mm_gk20a mm;
 	struct pmu_gk20a pmu;
+	struct acr_gm20b acr;
 	struct cooling_device_gk20a gk20a_cdev;

 	/* Save pmu fw here so that it lives cross suspend/resume.
--- a/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/hal_gk20a.c
@@ -23,6 +23,7 @@
 #include "channel_gk20a.h"
 #include "gr_ctx_gk20a.h"
 #include "mm_gk20a.h"
+#include "pmu_gk20a.h"

 struct gpu_ops gk20a_ops = {
 	.clock_gating = {
@@ -48,6 +49,7 @@ int gk20a_init_hal(struct gpu_ops *gops)
 	gk20a_init_fifo(gops);
 	gk20a_init_gr_ctx(gops);
 	gk20a_init_mm(gops);
+	gk20a_init_pmu_ops(gops);
 	gops->name = "gk20a";

 	return 0;
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -38,10 +38,8 @@
 #define gk20a_dbg_pmu(fmt, arg...) \
 	gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)

-static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
 static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
 		u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
-static void pmu_setup_hw(struct work_struct *work);
 static void ap_callback_init_and_enable_ctrl(
 		struct gk20a *g, struct pmu_msg *msg,
 		void *param, u32 seq_desc, u32 status);
@@ -62,6 +60,10 @@ static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
 {
 	pmu->args_v1.cpu_freq_hz = freq;
 }
+static void set_pmu_cmdline_args_secure_mode_v1(struct pmu_gk20a *pmu, u32 val)
+{
+	pmu->args_v1.secure_mode = val;
+}

 static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
 {
@@ -482,10 +484,12 @@ static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
 	return (void *)(&seq->out_v0);
 }

-static int gk20a_init_pmu(struct pmu_gk20a *pmu)
+int gk20a_init_pmu(struct pmu_gk20a *pmu)
 {
 	struct gk20a *g = pmu->g;
 	switch (pmu->desc->app_version) {
+	case APP_VERSION_GM20B_1:
+	case APP_VERSION_GM20B:
 	case APP_VERSION_1:
 	case APP_VERSION_2:
 		g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
@@ -493,6 +497,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
 			pmu_cmdline_size_v1;
 		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
 			set_pmu_cmdline_args_cpufreq_v1;
+		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+			set_pmu_cmdline_args_secure_mode_v1;
 		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
 			get_pmu_cmdline_args_ptr_v1;
 		g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -558,6 +564,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
 			pmu_cmdline_size_v0;
 		g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
 			set_pmu_cmdline_args_cpufreq_v0;
+		g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
+			NULL;
 		g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
 			get_pmu_cmdline_args_ptr_v0;
 		g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -627,7 +635,7 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
 	return 0;
 }

-static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
+void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
 		u32 src, u8 *dst, u32 size, u8 port)
 {
 	struct gk20a *g = pmu->g;
@@ -673,7 +681,7 @@ static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
 	return;
 }

-static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
+void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
 		u32 dst, u8 *src, u32 size, u8 port)
 {
 	struct gk20a *g = pmu->g;
@@ -887,7 +895,7 @@ static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
 	return 0;
 }

-static int pmu_reset(struct pmu_gk20a *pmu)
+int pmu_reset(struct pmu_gk20a *pmu)
 {
 	int err;

@@ -999,7 +1007,7 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu)
 	return 0;
 }

-static void pmu_seq_init(struct pmu_gk20a *pmu)
+void pmu_seq_init(struct pmu_gk20a *pmu)
 {
 	u32 i;

@@ -1784,7 +1792,7 @@ static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
 static void pmu_setup_hw_load_zbc(struct gk20a *g);
 static void pmu_setup_hw_enable_elpg(struct gk20a *g);

-static void pmu_setup_hw(struct work_struct *work)
+void pmu_setup_hw(struct work_struct *work)
 {
 	struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
 	struct gk20a *g = pmu->g;
@@ -1967,6 +1975,12 @@ static void pmu_setup_hw_enable_elpg(struct gk20a *g)
 	}
 }

+void gk20a_init_pmu_ops(struct gpu_ops *gops)
+{
+	gops->pmu.pmu_setup_sw = gk20a_init_pmu_setup_sw;
+	gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
+}
+
 int gk20a_init_pmu_support(struct gk20a *g)
 {
 	struct pmu_gk20a *pmu = &g->pmu;
@@ -1984,11 +1998,10 @@ int gk20a_init_pmu_support(struct gk20a *g)
 		return err;

 	if (support_gk20a_pmu()) {
-		err = gk20a_init_pmu_setup_sw(g);
+		err = g->ops.pmu.pmu_setup_sw(g);
 		if (err)
 			return err;
-
-		err = gk20a_init_pmu_setup_hw1(g);
+		err = g->ops.pmu.pmu_setup_hw_and_bootstrap(g);
 		if (err)
 			return err;
 	}
@@ -2724,7 +2737,7 @@ static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
 	*/
 }

-static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
+void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
 {
 	struct gk20a *g = pmu->g;
 	int i;
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.h
@@ -51,6 +51,8 @@
 /* Mapping between AP_CTRLs and Idle counters */
 #define PMU_AP_IDLE_MASK_GRAPHICS	(PMU_AP_IDLE_MASK_HIST_IDX_1)

+#define APP_VERSION_GM20B_1 18547257
+#define APP_VERSION_GM20B 17615280
 #define APP_VERSION_2 18542378
 #define APP_VERSION_1 17997577
 #define APP_VERSION_0 16856675
@@ -1058,6 +1060,8 @@ struct pmu_gk20a {
 	};
 	unsigned long perfmon_events_cnt;
 	bool perfmon_sampling_enabled;
+	u8 pmu_mode; /*Added for GM20b, and ACR*/
+	u32 falcon_id;
 };

 int gk20a_init_pmu_support(struct gk20a *g);
@@ -1086,5 +1090,16 @@ int gk20a_pmu_debugfs_init(struct platform_device *dev);
 void gk20a_pmu_reset_load_counters(struct gk20a *g);
 void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
 		u32 *total_cycles);
+void gk20a_init_pmu_ops(struct gpu_ops *gops);

+void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
+		u32 dst, u8 *src, u32 size, u8 port);
+void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
+		u32 src, u8 *dst, u32 size, u8 port);
+int pmu_reset(struct pmu_gk20a *pmu);
+int gk20a_init_pmu(struct pmu_gk20a *pmu);
+void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
+void gk20a_remove_pmu_support(struct pmu_gk20a *pmu);
+void pmu_setup_hw(struct work_struct *work);
+void pmu_seq_init(struct pmu_gk20a *pmu);
 #endif /*__PMU_GK20A_H__*/
--- a/drivers/gpu/nvgpu/gm20b/Makefile
+++ b/drivers/gpu/nvgpu/gm20b/Makefile
@@ -11,4 +11,6 @@ obj-$(CONFIG_GK20A)  = \
 	fifo_gm20b.o \
 	gr_ctx_gm20b.o \
 	gm20b_gating_reglist.o \
+	acr_gm20b.o \
+	pmu_gm20b.o \
 	mm_gm20b.o
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.h
@@ -0,0 +1,377 @@
+/*
+ * GM20B ACR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __ACR_GM20B_H_
+#define __ACR_GM20B_H_
+#include "gk20a/gk20a.h"
+#include "mm_gm20b.h"
+
+/*Defines*/
+
+/*chip specific defines*/
+#define MAX_SUPPORTED_LSFM 1 /*PMU, FECS, GPCCS*/
+#define LSF_UCODE_DATA_ALIGNMENT 4096
+
+#define GM20B_PMU_UCODE_IMAGE "gpmu_ucode.bin"
+#define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin"
+#define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin"
+
+#define LSFM_DISABLE_MASK_NONE (0x00000000) /*Disable all LS falcons*/
+#define LSFM_DISABLE_MASK_ALL (0xFFFFFFFF) /*Enable all LS falcons*/
+
+#define PMU_SECURE_MODE (0x1)
+#define PMU_LSFM_MANAGED (0x2)
+
+/*ACR load related*/
+/*!
+ * Supporting maximum of 2 regions.
+ * This is needed to pre-allocate space in DMEM
+ */
+#define T210_FLCN_ACR_MAX_REGIONS                  (2)
+#define LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE   (0x200)
+
+/*!
+ * Falcon Id Defines
+ * Defines a common Light Secure Falcon identifier.
+ */
+#define LSF_FALCON_ID_PMU       (0)
+#define LSF_FALCON_ID_FECS      (2)
+#define LSF_FALCON_ID_GPCCS     (3)
+#define LSF_FALCON_ID_INVALID   (0xFFFFFFFF)
+
+/*!
+ * Bootstrap Owner Defines
+ */
+#define LSF_BOOTSTRAP_OWNER_DEFAULT (LSF_FALCON_ID_PMU)
+
+/*!
+ * Image Status Defines
+ */
+#define LSF_IMAGE_STATUS_NONE               (0)
+#define LSF_IMAGE_STATUS_COPY               (1)
+#define LSF_IMAGE_STATUS_VALIDATION         (2)
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY    (3)
+
+/*LSB header related defines*/
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE        1
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE       0
+#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE        4
+
+/*!
+ * Light Secure WPR Content Alignments
+ */
+#define LSF_LSB_HEADER_ALIGNMENT    256
+#define LSF_BL_DATA_ALIGNMENT       256
+#define LSF_BL_DATA_SIZE_ALIGNMENT  256
+#define LSF_BL_CODE_SIZE_ALIGNMENT  256
+
+/*!
+ * Falcon UCODE header index.
+ */
+#define FLCN_NL_UCODE_HDR_OS_CODE_OFF_IND              (0)
+#define FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND             (1)
+#define FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND              (2)
+#define FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND             (3)
+#define FLCN_NL_UCODE_HDR_NUM_APPS_IND                 (4)
+/*!
+ * There are total N number of Apps with code and offset defined in UCODE header
+ * This macro provides the CODE and DATA offset and size of Ath application.
+ */
+#define FLCN_NL_UCODE_HDR_APP_CODE_START_IND           (5)
+#define FLCN_NL_UCODE_HDR_APP_CODE_OFF_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2))
+#define FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2) + 1)
+#define FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (N*2) - 1)
+
+#define FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) + 1)
+#define FLCN_NL_UCODE_HDR_APP_DATA_OFF_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2))
+#define FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND(N, A) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2) + 1)
+#define FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (N*2) - 1)
+
+#define FLCN_NL_UCODE_HDR_OS_OVL_OFF_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 1)
+#define FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(N) \
+	(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 2)
+
+/*Externs*/
+
+/*Structs*/
+
+/*!
+ * Light Secure Falcon Ucode Description Defines
+ * This stucture is prelim and may change as the ucode signing flow evolves.
+ */
+struct lsf_ucode_desc {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+};
+
+/*!
+ * Light Secure WPR Header
+ * Defines state allowing Light Secure Falcon bootstrapping.
+ *
+ * falcon_id       - LS falcon ID
+ * lsb_offset      - Offset into WPR region holding LSB header
+ * bootstrap_owner - Bootstrap OWNER (either PMU or SEC2)
+ * lazy_bootstrap - Skip bootstrapping by ACR
+ * status         - Bootstrapping status
+ */
+struct lsf_wpr_header {
+	u32  falcon_id;
+	u32  lsb_offset;
+	u32  bootstrap_owner;
+	u32  lazy_bootstrap;
+	u32  status;
+};
+
+struct lsf_lsb_header {
+	struct lsf_ucode_desc signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 flags;
+};
+
+/*!
+ * Structure used by the boot-loader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ *
+ * signature         - 16B signature for secure code. 0s if no secure code
+ * ctx_dma           - CtxDma to be used by BL while loading code/data
+ * code_dma_base     - 256B aligned Physical FB Address where code is located
+ * non_sec_code_off  - Offset from code_dma_base where the nonSecure code is
+ *                     located. The offset must be multiple of 256 to help perf
+ * non_sec_code_size - The size of the nonSecure code part.
+ * sec_code_size     - Offset from code_dma_base where the secure code is
+ *                     located. The offset must be multiple of 256 to help perf
+ * code_entry_point  - Code entry point which will be invoked by BL after
+ *			code is loaded.
+ * data_dma_base     - 256B aligned Physical FB Address where data is located.
+ * data_size         - Size of data block. Should be multiple of 256B
+ */
+struct flcn_bl_dmem_desc {
+	u32    signature[4];        /*Should be the first element..*/
+	u32    ctx_dma;
+	u32    code_dma_base;
+	u32    non_sec_code_off;
+	u32    non_sec_code_size;
+	u32    sec_code_off;
+	u32    sec_code_size;
+	u32    code_entry_point;
+	u32    data_dma_base;
+	u32    data_size;
+};
+
+/*!
+ * Legacy structure used by the current PMU/DPU bootloader.
+ */
+struct loader_config {
+	u32 dma_idx;
+	u32 code_dma_base;     /*<! upper 32-bits of 40-bit dma address*/
+	u32 code_size_total;
+	u32 code_size_to_load;
+	u32 code_entry_point;
+	u32 data_dma_base;     /*<! upper 32-bits of 40-bit dma address*/
+	u32 data_size;        /*<! initialized data of the application */
+	u32 overlay_dma_base;  /*<! upper 32-bits of the 40-bit dma address*/
+	u32 argc;
+	u32 argv;
+};
+
+/*!
+ * Union of all supported structures used by bootloaders.
+ */
+union flcn_bl_generic_desc {
+	struct flcn_bl_dmem_desc bl_dmem_desc;
+	struct loader_config loader_cfg;
+};
+
+struct flcn_ucode_img {
+	u32 *header; /*only some falcons have header*/
+	u32 *data;
+	struct pmu_ucode_desc *desc;  /*only some falcons have descriptor*/
+	u32 data_size;
+	void *fw_ver; /*NV2080_CTRL_GPU_GET_FIRMWARE_VERSION_PARAMS struct*/
+	u8 load_entire_os_data; /* load the whole osData section at boot time.*/
+	struct lsf_ucode_desc *lsf_desc; /* NULL if not a light secure falcon.*/
+	u8 free_res_allocs;/*True if there a resources to freed by the client.*/
+	u32 flcn_inst;
+};
+
+/*!
+ * LSFM Managed Ucode Image
+ * next             : Next image the list, NULL if last.
+ * wpr_header         : WPR header for this ucode image
+ * lsb_header         : LSB header for this ucode image
+ * bl_gen_desc     : Bootloader generic desc structure for this ucode image
+ * bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image
+ * full_ucode_size  : Surface size required for final ucode image
+ * ucode_img        : Ucode image info
+ */
+struct lsfm_managed_ucode_img {
+	struct lsfm_managed_ucode_img *next;
+	struct lsf_wpr_header wpr_header;
+	struct lsf_lsb_header lsb_header;
+	union flcn_bl_generic_desc bl_gen_desc;
+	u32 bl_gen_desc_size;
+	u32 full_ucode_size;
+	struct flcn_ucode_img ucode_img;
+};
+
+struct ls_flcn_mgr {
+	u16 managed_flcn_cnt;
+	u32 wpr_size;
+	u32 disable_mask;
+	struct lsfm_managed_ucode_img *ucode_img_list;
+	void *wpr_client_req_state;/*PACR_CLIENT_REQUEST_STATE originally*/
+};
+
+/*ACR related structs*/
+/*!
+ * start_addr     - Starting address of region
+ * end_addr       - Ending address of region
+ * region_id      - Region ID
+ * read_mask      - Read Mask
+ * write_mask     - WriteMask
+ * client_mask    - Bit map of all clients currently using this region
+ */
+struct flcn_acr_region_prop {
+	u32   start_addr;
+	u32   end_addr;
+	u32   region_id;
+	u32   read_mask;
+	u32   write_mask;
+	u32   client_mask;
+};
+
+/*!
+ * no_regions   - Number of regions used.
+ * region_props   - Region properties
+ */
+struct flcn_acr_regions {
+	u32                     no_regions;
+	struct flcn_acr_region_prop   region_props[T210_FLCN_ACR_MAX_REGIONS];
+};
+
+/*!
+ * reserved_dmem-When the bootstrap owner has done bootstrapping other falcons,
+ *                and need to switch into LS mode, it needs to have its own
+ *                actual DMEM image copied into DMEM as part of LS setup. If
+ *                ACR desc is at location 0, it will definitely get overwritten
+ *                causing data corruption. Hence we are reserving 0x200 bytes
+ *                to give room for any loading data. NOTE: This has to be the
+ *                first member always
+ * signature    - Signature of ACR ucode.
+ * wpr_region_id - Region ID holding the WPR header and its details
+ * wpr_offset    - Offset from the WPR region holding the wpr header
+ * regions       - Region descriptors
+ * nonwpr_ucode_blob_start -stores non-WPR start where kernel stores ucode blob
+ * nonwpr_ucode_blob_end   -stores non-WPR end where kernel stores ucode blob
+ */
+struct flcn_acr_desc {
+	u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
+	/*Always 1st*/
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	struct flcn_acr_regions regions;
+	u32 nonwpr_ucode_blob_start;
+	u32 nonwpr_ucode_blob_size;
+};
+
+/*!
+ * The header used by RM to figure out code and data sections of bootloader.
+ *
+ * bl_code_off        - Offset of code section in the image
+ * bl_code_size          - Size of code section in the image
+ * bl_data_off        - Offset of data section in the image
+ * bl_data_size          - Size of data section in the image
+ */
+struct hsflcn_bl_img_hdr {
+	u32 bl_code_off;
+	u32 bl_code_size;
+	u32 bl_data_off;
+	u32 bl_data_size;
+};
+
+/*!
+ * The descriptor used by RM to figure out the requirements of boot loader.
+ *
+ * bl_start_tag          - Starting tag of bootloader
+ * bl_desc_dmem_load_off   - Dmem offset where _def_rm_flcn_bl_dmem_desc
+ to be loaded
+ * bl_img_hdr         - Description of the image
+ */
+struct hsflcn_bl_desc {
+	u32 bl_start_tag;
+	u32 bl_desc_dmem_load_off;
+	struct hsflcn_bl_img_hdr bl_img_hdr;
+};
+
+struct bin_hdr {
+	u32 bin_magic;      /* 0x10de */
+	u32 bin_ver;          /* versioning of bin format */
+	u32 bin_size;         /* entire image size including this header */
+	u32 header_offset; /* Header offset of executable binary metadata,
+				start @ offset- 0x100 */
+	u32 data_offset; /* Start of executable binary data, start @
+				offset- 0x200 */
+	u32 data_size; /* Size ofexecutable binary */
+};
+
+struct acr_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset; /*this header points to acr_ucode_header_t210_load*/
+	u32 hdr_size; /*size of above header*/
+};
+
+struct acr_gm20b {
+	u64 ucode_blob_start;
+	u32 ucode_blob_size;
+	struct bin_hdr *bl_bin_hdr;
+	struct hsflcn_bl_desc *pmu_hsbl_desc;
+	struct bin_hdr *hsbin_hdr;
+	struct acr_fw_header *fw_hdr;
+};
+
+void gm20b_init_secure_pmu(struct gpu_ops *gops);
+int prepare_ucode_blob(struct gk20a *g);
+int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
+int gm20b_bootstrap_hs_flcn(struct gk20a *g);
+int gm20b_pmu_setup_sw(struct gk20a *g);
+int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt);
+int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_us);
+#endif /*__ACR_GM20B_H_*/
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -25,6 +25,7 @@
 #include "fifo_gm20b.h"
 #include "gr_ctx_gm20b.h"
 #include "mm_gm20b.h"
+#include "pmu_gm20b.h"

 struct gpu_ops gm20b_ops = {
 	.clock_gating = {
@@ -51,6 +52,7 @@ int gm20b_init_hal(struct gpu_ops *gops)
 	gm20b_init_fifo(gops);
 	gm20b_init_gr_ctx(gops);
 	gm20b_init_mm(gops);
+	gm20b_init_pmu_ops(gops);
 	gops->name = "gm20b";

 	return 0;
--- a/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_fb_gm20b.h
@@ -202,4 +202,24 @@ static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
 {
 	return 0x00000001;
 }
+static inline u32 fb_mmu_vpr_info_r(void)
+{
+	return 0x00100cd0;
+}
+static inline u32 fb_mmu_vpr_info_fetch_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
+{
+	return (r >> 2) & 0x1;
+}
+static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
+{
+	return 0x00000000;
+}
+static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
+{
+	return 0x00000001;
+}
 #endif
--- a/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_gr_gm20b.h
@@ -322,6 +322,14 @@ static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
 {
 	return (v & 0x1) << 1;
 }
+static inline u32 gr_fecs_cpuctl_alias_r(void)
+{
+	return 0x00409130;
+}
+static inline u32 gr_fecs_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
 static inline u32 gr_fecs_dmactl_r(void)
 {
 	return 0x0040910c;
--- a/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/hw_pwr_gm20b.h
@@ -290,6 +290,86 @@ static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
 {
 	return (v & 0x1) << 1;
 }
+static inline u32 pwr_falcon_cpuctl_halt_intr_f(u32 v)
+{
+	return (v & 0x1) << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_m(void)
+{
+	return 0x1 << 4;
+}
+static inline u32 pwr_falcon_cpuctl_halt_intr_v(u32 r)
+{
+	return (r >> 4) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_f(u32 v)
+{
+	return (v & 0x1) << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_m(void)
+{
+	return 0x1 << 6;
+}
+static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_v(u32 r)
+{
+	return (r >> 6) & 0x1;
+}
+static inline u32 pwr_falcon_cpuctl_alias_r(void)
+{
+	return 0x0010a130;
+}
+static inline u32 pwr_falcon_cpuctl_alias_startcpu_f(u32 v)
+{
+	return (v & 0x1) << 1;
+}
+static inline u32 pwr_pmu_scpctl_stat_r(void)
+{
+	return 0x0010ac08;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_f(u32 v)
+{
+	return (v & 0x1) << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_m(void)
+{
+	return 0x1 << 20;
+}
+static inline u32 pwr_pmu_scpctl_stat_debug_mode_v(u32 r)
+{
+	return (r >> 20) & 0x1;
+}
+static inline u32 pwr_falcon_imemc_r(u32 i)
+{
+	return 0x0010a180 + i*16;
+}
+static inline u32 pwr_falcon_imemc_offs_f(u32 v)
+{
+	return (v & 0x3f) << 2;
+}
+static inline u32 pwr_falcon_imemc_blk_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+static inline u32 pwr_falcon_imemc_aincw_f(u32 v)
+{
+	return (v & 0x1) << 24;
+}
+static inline u32 pwr_falcon_imemd_r(u32 i)
+{
+	return 0x0010a184 + i*16;
+}
+static inline u32 pwr_falcon_imemt_r(u32 i)
+{
+	return 0x0010a188 + i*16;
+}
+static inline u32 pwr_falcon_sctl_r(void)
+{
+	return 0x0010a240;
+}
+static inline u32 pwr_falcon_mmu_phys_sec_r(void)
+{
+	return 0x00100ce4;
+}
 static inline u32 pwr_falcon_bootvec_r(void)
 {
 	return 0x0010a104;
--- a/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h
+++ b/drivers/gpu/nvgpu/gm20b/mc_carveout_reg.h
@@ -0,0 +1,22 @@
+/*
+ * GM20B MC registers used by ACR
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _MC_CARVEOUT_REG_H_
+#define  _MC_CARVEOUT_REG_H_
+
+#define MC_SECURITY_CARVEOUT2_BOM_0		0xc5c
+#define MC_SECURITY_CARVEOUT3_BOM_0		0xcac
+#define MC_ERR_GENERALIZED_CARVEOUT_STATUS_0		0xc00
+#endif /*_MC_CARVEOUT_REG_H_*/
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -13,9 +13,11 @@
 * more details.
 */

+#include <linux/pm_runtime.h>
 #include "gk20a/gk20a.h"
 #include "mm_gm20b.h"
 #include "hw_gmmu_gm20b.h"
+#include "hw_fb_gm20b.h"

 static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
 static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
@@ -24,8 +26,8 @@ static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
 static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };

 static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
-				   enum gmmu_pgsz_gk20a pgsz_idx,
-				   u64 first_vaddr, u64 last_vaddr)
+				enum gmmu_pgsz_gk20a pgsz_idx,
+				u64 first_vaddr, u64 last_vaddr)
 {
 	int err;
 	u32 pte_lo, pte_hi;
@@ -39,10 +41,10 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
 	gk20a_dbg_fn("");

 	pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
-				   &pde_lo, &pde_hi);
+					&pde_lo, &pde_hi);

 	gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
-		   pgsz_idx, pde_lo, pde_hi);
+			pgsz_idx, pde_lo, pde_hi);

 	/* Expect ptes of the same pde */
 	BUG_ON(pde_lo != pde_hi);
@@ -185,7 +187,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
 			vaddr_pde_start = (u64)i << pde_shift;
 			allocate_gmmu_ptes_sparse(vm, pgsz_idx,
 				vaddr_pde_start,
-				PDE_ADDR_END(vaddr_pde_start, pde_shift));
+				PDE_ADDR_END(vaddr_pde_start,
+				pde_shift));
 		} else {
 			/* Check leading and trailing spaces which doesn't fit
 			 * into entire pde. */
@@ -212,6 +215,56 @@ fail:
 	return err;
 }

+static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
+		const unsigned int msec)
+{
+	unsigned long timeout;
+
+	if (tegra_platform_is_silicon())
+		timeout = jiffies + msecs_to_jiffies(msec);
+	else
+		timeout = msecs_to_jiffies(msec);
+
+	while (1) {
+		u32 val;
+		val = gk20a_readl(g, fb_mmu_vpr_info_r());
+		if (fb_mmu_vpr_info_fetch_v(val) ==
+				fb_mmu_vpr_info_fetch_false_v())
+			break;
+		if (tegra_platform_is_silicon()) {
+			if (WARN_ON(time_after(jiffies, timeout)))
+				return -ETIME;
+		} else if (--timeout == 0)
+			return -ETIME;
+	}
+	return 0;
+}
+
+int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g)
+{
+	int ret = 0;
+
+	gk20a_busy_noresume(g->dev);
+#ifdef CONFIG_PM_RUNTIME
+	if (!pm_runtime_active(&g->dev->dev))
+		goto fail;
+#endif
+
+	if (gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
+		ret = -ETIME;
+		goto fail;
+	}
+
+	gk20a_writel(g, fb_mmu_vpr_info_r(),
+			fb_mmu_vpr_info_fetch_true_v());
+
+	ret = gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
+
+fail:
+	gk20a_idle(g->dev);
+	return ret;
+}
+
 void gm20b_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.set_sparse = gm20b_vm_put_sparse;
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
@@ -19,6 +19,8 @@ struct gk20a;

 #define PDE_ADDR_START(x, y)	((x) &  ~((0x1UL << (y)) - 1))
 #define PDE_ADDR_END(x, y)	((x) | ((0x1UL << (y)) - 1))
+#define VPR_INFO_FETCH_WAIT	(5)

 void gm20b_init_mm(struct gpu_ops *gops);
+int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
 #endif
--- a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.c
@@ -0,0 +1,26 @@
+/*
+ * GM20B PMU
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include "gk20a/gk20a.h"
+#include "acr_gm20b.h"
+
+void gm20b_init_pmu_ops(struct gpu_ops *gops)
+{
+#ifdef CONFIG_TEGRA_ACR
+	gm20b_init_secure_pmu(gops);
+#else
+	gk20a_init_pmu_ops(gops);
+#endif
+}
--- a/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/pmu_gm20b.h
@@ -0,0 +1,19 @@
+/*
+ * GM20B PMU
+ *
+ * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __PMU_GM20B_H_
+#define __PMU_GM20B_H_
+void gm20b_init_pmu_ops(struct gpu_ops *gops);
+#endif /*__PMU_GM20B_H_*/