nvgpu: Host side changes to support HS mode

GM20B changes in PMU boot sequence to support booting in
HS mode and LS mode

Bug 1509680

Change-Id: I2832eda0efe17dd5e3a8f11dd06e7d4da267be70
Signed-off-by: Supriya <ssharatkumar@nvidia.com>
Reviewed-on: http://git-master/r/423140
Reviewed-by: Automatic_Commit_Validation_User
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Tested-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Shridhar Rasal <srasal@nvidia.com>
Reviewed-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-by: Bharat Nihalani <bnihalani@nvidia.com>
This commit is contained in:
Supriya
2014-06-13 12:44:27 +05:30
committed by Dan Willemsen
parent c32ac10b0b
commit b7793a493a
17 changed files with 2001 additions and 17 deletions

View File

@@ -70,3 +70,12 @@ config TEGRA_GK20A
Enable support for the GK20A graphics engine on Tegra
by adding a Tegra platfrom interface to the GK20A driver.
The Tegra platform interface requires TEGRA_GRHOST (host1x).
config TEGRA_ACR
bool "Enable HS bin support on GM20B GPU on Tegra"
depends on GK20A_PMU
default n
help
Enable Support for Loading High Secure binary, and using
Write Protected Regions (WPR) for storing ucodes, and bootstrap
PMU, FECS and GPCCS in Low Secure mode.

View File

@@ -28,6 +28,7 @@ struct channel_gk20a;
struct gr_gk20a;
struct sim_gk20a;
struct gk20a_ctxsw_ucode_segments;
struct acr_gm20b;
#include <linux/sched.h>
#include <linux/spinlock.h>
@@ -45,6 +46,7 @@ struct gk20a_ctxsw_ucode_segments;
#include "priv_ring_gk20a.h"
#include "therm_gk20a.h"
#include "platform_gk20a.h"
#include "gm20b/acr_gm20b.h"
extern struct platform_device tegra_gk20a_device;
@@ -205,6 +207,8 @@ struct gpu_ops {
struct pmu_sequence *seq);
void *(*get_pmu_seq_out_a_ptr)(
struct pmu_sequence *seq);
void (*set_pmu_cmdline_args_secure_mode)(struct pmu_gk20a *pmu,
u32 val);
} pmu_ver;
struct {
int (*get_netlist_name)(int index, char *name);
@@ -214,6 +218,10 @@ struct gpu_ops {
int (*set_sparse)(struct vm_gk20a *vm, u64 vaddr,
u32 num_pages, u32 pgsz_idx);
} mm;
struct {
int (*pmu_setup_sw)(struct gk20a *g);
int (*pmu_setup_hw_and_bootstrap)(struct gk20a *g);
} pmu;
};
struct gk20a {
@@ -236,6 +244,7 @@ struct gk20a {
struct sim_gk20a sim;
struct mm_gk20a mm;
struct pmu_gk20a pmu;
struct acr_gm20b acr;
struct cooling_device_gk20a gk20a_cdev;
/* Save pmu fw here so that it lives cross suspend/resume.

View File

@@ -23,6 +23,7 @@
#include "channel_gk20a.h"
#include "gr_ctx_gk20a.h"
#include "mm_gk20a.h"
#include "pmu_gk20a.h"
struct gpu_ops gk20a_ops = {
.clock_gating = {
@@ -48,6 +49,7 @@ int gk20a_init_hal(struct gpu_ops *gops)
gk20a_init_fifo(gops);
gk20a_init_gr_ctx(gops);
gk20a_init_mm(gops);
gk20a_init_pmu_ops(gops);
gops->name = "gk20a";
return 0;

View File

@@ -38,10 +38,8 @@
#define gk20a_dbg_pmu(fmt, arg...) \
gk20a_dbg(gpu_dbg_pmu, fmt, ##arg)
static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
static int gk20a_pmu_get_elpg_residency_gating(struct gk20a *g,
u32 *ingating_time, u32 *ungating_time, u32 *gating_cnt);
static void pmu_setup_hw(struct work_struct *work);
static void ap_callback_init_and_enable_ctrl(
struct gk20a *g, struct pmu_msg *msg,
void *param, u32 seq_desc, u32 status);
@@ -62,6 +60,10 @@ static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
{
pmu->args_v1.cpu_freq_hz = freq;
}
static void set_pmu_cmdline_args_secure_mode_v1(struct pmu_gk20a *pmu, u32 val)
{
pmu->args_v1.secure_mode = val;
}
static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
{
@@ -482,10 +484,12 @@ static void *get_pmu_sequence_out_alloc_ptr_v0(struct pmu_sequence *seq)
return (void *)(&seq->out_v0);
}
static int gk20a_init_pmu(struct pmu_gk20a *pmu)
int gk20a_init_pmu(struct pmu_gk20a *pmu)
{
struct gk20a *g = pmu->g;
switch (pmu->desc->app_version) {
case APP_VERSION_GM20B_1:
case APP_VERSION_GM20B:
case APP_VERSION_1:
case APP_VERSION_2:
g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
@@ -493,6 +497,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
pmu_cmdline_size_v1;
g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
set_pmu_cmdline_args_cpufreq_v1;
g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
set_pmu_cmdline_args_secure_mode_v1;
g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
get_pmu_cmdline_args_ptr_v1;
g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -558,6 +564,8 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
pmu_cmdline_size_v0;
g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
set_pmu_cmdline_args_cpufreq_v0;
g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
NULL;
g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
get_pmu_cmdline_args_ptr_v0;
g->ops.pmu_ver.get_pmu_allocation_struct_size =
@@ -627,7 +635,7 @@ static int gk20a_init_pmu(struct pmu_gk20a *pmu)
return 0;
}
static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
u32 src, u8 *dst, u32 size, u8 port)
{
struct gk20a *g = pmu->g;
@@ -673,7 +681,7 @@ static void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
return;
}
static void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
u32 dst, u8 *src, u32 size, u8 port)
{
struct gk20a *g = pmu->g;
@@ -887,7 +895,7 @@ static int pmu_enable(struct pmu_gk20a *pmu, bool enable)
return 0;
}
static int pmu_reset(struct pmu_gk20a *pmu)
int pmu_reset(struct pmu_gk20a *pmu)
{
int err;
@@ -999,7 +1007,7 @@ static int pmu_bootstrap(struct pmu_gk20a *pmu)
return 0;
}
static void pmu_seq_init(struct pmu_gk20a *pmu)
void pmu_seq_init(struct pmu_gk20a *pmu)
{
u32 i;
@@ -1784,7 +1792,7 @@ static int gk20a_aelpg_init_and_enable(struct gk20a *g, u8 ctrl_id);
static void pmu_setup_hw_load_zbc(struct gk20a *g);
static void pmu_setup_hw_enable_elpg(struct gk20a *g);
static void pmu_setup_hw(struct work_struct *work)
void pmu_setup_hw(struct work_struct *work)
{
struct pmu_gk20a *pmu = container_of(work, struct pmu_gk20a, pg_init);
struct gk20a *g = pmu->g;
@@ -1967,6 +1975,12 @@ static void pmu_setup_hw_enable_elpg(struct gk20a *g)
}
}
void gk20a_init_pmu_ops(struct gpu_ops *gops)
{
gops->pmu.pmu_setup_sw = gk20a_init_pmu_setup_sw;
gops->pmu.pmu_setup_hw_and_bootstrap = gk20a_init_pmu_setup_hw1;
}
int gk20a_init_pmu_support(struct gk20a *g)
{
struct pmu_gk20a *pmu = &g->pmu;
@@ -1984,11 +1998,10 @@ int gk20a_init_pmu_support(struct gk20a *g)
return err;
if (support_gk20a_pmu()) {
err = gk20a_init_pmu_setup_sw(g);
err = g->ops.pmu.pmu_setup_sw(g);
if (err)
return err;
err = gk20a_init_pmu_setup_hw1(g);
err = g->ops.pmu.pmu_setup_hw_and_bootstrap(g);
if (err)
return err;
}
@@ -2724,7 +2737,7 @@ static void pmu_dump_elpg_stats(struct pmu_gk20a *pmu)
*/
}
static void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
void pmu_dump_falcon_stats(struct pmu_gk20a *pmu)
{
struct gk20a *g = pmu->g;
int i;

View File

@@ -51,6 +51,8 @@
/* Mapping between AP_CTRLs and Idle counters */
#define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1)
#define APP_VERSION_GM20B_1 18547257
#define APP_VERSION_GM20B 17615280
#define APP_VERSION_2 18542378
#define APP_VERSION_1 17997577
#define APP_VERSION_0 16856675
@@ -1058,6 +1060,8 @@ struct pmu_gk20a {
};
unsigned long perfmon_events_cnt;
bool perfmon_sampling_enabled;
u8 pmu_mode; /*Added for GM20b, and ACR*/
u32 falcon_id;
};
int gk20a_init_pmu_support(struct gk20a *g);
@@ -1086,5 +1090,16 @@ int gk20a_pmu_debugfs_init(struct platform_device *dev);
void gk20a_pmu_reset_load_counters(struct gk20a *g);
void gk20a_pmu_get_load_counters(struct gk20a *g, u32 *busy_cycles,
u32 *total_cycles);
void gk20a_init_pmu_ops(struct gpu_ops *gops);
void pmu_copy_to_dmem(struct pmu_gk20a *pmu,
u32 dst, u8 *src, u32 size, u8 port);
void pmu_copy_from_dmem(struct pmu_gk20a *pmu,
u32 src, u8 *dst, u32 size, u8 port);
int pmu_reset(struct pmu_gk20a *pmu);
int gk20a_init_pmu(struct pmu_gk20a *pmu);
void pmu_dump_falcon_stats(struct pmu_gk20a *pmu);
void gk20a_remove_pmu_support(struct pmu_gk20a *pmu);
void pmu_setup_hw(struct work_struct *work);
void pmu_seq_init(struct pmu_gk20a *pmu);
#endif /*__PMU_GK20A_H__*/

View File

@@ -11,4 +11,6 @@ obj-$(CONFIG_GK20A) = \
fifo_gm20b.o \
gr_ctx_gm20b.o \
gm20b_gating_reglist.o \
acr_gm20b.o \
pmu_gm20b.o \
mm_gm20b.o

View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,377 @@
/*
* GM20B ACR
*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __ACR_GM20B_H_
#define __ACR_GM20B_H_
#include "gk20a/gk20a.h"
#include "mm_gm20b.h"
/*Defines*/
/*chip specific defines*/
#define MAX_SUPPORTED_LSFM 1 /*PMU, FECS, GPCCS*/
#define LSF_UCODE_DATA_ALIGNMENT 4096
#define GM20B_PMU_UCODE_IMAGE "gpmu_ucode.bin"
#define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin"
#define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin"
#define LSFM_DISABLE_MASK_NONE (0x00000000) /*Disable all LS falcons*/
#define LSFM_DISABLE_MASK_ALL (0xFFFFFFFF) /*Enable all LS falcons*/
#define PMU_SECURE_MODE (0x1)
#define PMU_LSFM_MANAGED (0x2)
/*ACR load related*/
/*!
* Supporting maximum of 2 regions.
* This is needed to pre-allocate space in DMEM
*/
#define T210_FLCN_ACR_MAX_REGIONS (2)
#define LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE (0x200)
/*!
* Falcon Id Defines
* Defines a common Light Secure Falcon identifier.
*/
#define LSF_FALCON_ID_PMU (0)
#define LSF_FALCON_ID_FECS (2)
#define LSF_FALCON_ID_GPCCS (3)
#define LSF_FALCON_ID_INVALID (0xFFFFFFFF)
/*!
* Bootstrap Owner Defines
*/
#define LSF_BOOTSTRAP_OWNER_DEFAULT (LSF_FALCON_ID_PMU)
/*!
* Image Status Defines
*/
#define LSF_IMAGE_STATUS_NONE (0)
#define LSF_IMAGE_STATUS_COPY (1)
#define LSF_IMAGE_STATUS_VALIDATION (2)
#define LSF_IMAGE_STATUS_BOOTSTRAP_READY (3)
/*LSB header related defines*/
#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE 0
#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_TRUE 1
#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_FALSE 0
#define NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX_TRUE 4
/*!
* Light Secure WPR Content Alignments
*/
#define LSF_LSB_HEADER_ALIGNMENT 256
#define LSF_BL_DATA_ALIGNMENT 256
#define LSF_BL_DATA_SIZE_ALIGNMENT 256
#define LSF_BL_CODE_SIZE_ALIGNMENT 256
/*!
* Falcon UCODE header index.
*/
#define FLCN_NL_UCODE_HDR_OS_CODE_OFF_IND (0)
#define FLCN_NL_UCODE_HDR_OS_CODE_SIZE_IND (1)
#define FLCN_NL_UCODE_HDR_OS_DATA_OFF_IND (2)
#define FLCN_NL_UCODE_HDR_OS_DATA_SIZE_IND (3)
#define FLCN_NL_UCODE_HDR_NUM_APPS_IND (4)
/*!
* There are total N number of Apps with code and offset defined in UCODE header
* This macro provides the CODE and DATA offset and size of Ath application.
*/
#define FLCN_NL_UCODE_HDR_APP_CODE_START_IND (5)
#define FLCN_NL_UCODE_HDR_APP_CODE_OFF_IND(N, A) \
(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2))
#define FLCN_NL_UCODE_HDR_APP_CODE_SIZE_IND(N, A) \
(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (A*2) + 1)
#define FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) \
(FLCN_NL_UCODE_HDR_APP_CODE_START_IND + (N*2) - 1)
#define FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) \
(FLCN_NL_UCODE_HDR_APP_CODE_END_IND(N) + 1)
#define FLCN_NL_UCODE_HDR_APP_DATA_OFF_IND(N, A) \
(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2))
#define FLCN_NL_UCODE_HDR_APP_DATA_SIZE_IND(N, A) \
(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (A*2) + 1)
#define FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) \
(FLCN_NL_UCODE_HDR_APP_DATA_START_IND(N) + (N*2) - 1)
#define FLCN_NL_UCODE_HDR_OS_OVL_OFF_IND(N) \
(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 1)
#define FLCN_NL_UCODE_HDR_OS_OVL_SIZE_IND(N) \
(FLCN_NL_UCODE_HDR_APP_DATA_END_IND(N) + 2)
/*Externs*/
/*Structs*/
/*!
* Light Secure Falcon Ucode Description Defines
* This stucture is prelim and may change as the ucode signing flow evolves.
*/
struct lsf_ucode_desc {
u8 prd_keys[2][16];
u8 dbg_keys[2][16];
u32 b_prd_present;
u32 b_dbg_present;
u32 falcon_id;
};
/*!
* Light Secure WPR Header
* Defines state allowing Light Secure Falcon bootstrapping.
*
* falcon_id - LS falcon ID
* lsb_offset - Offset into WPR region holding LSB header
* bootstrap_owner - Bootstrap OWNER (either PMU or SEC2)
* lazy_bootstrap - Skip bootstrapping by ACR
* status - Bootstrapping status
*/
struct lsf_wpr_header {
u32 falcon_id;
u32 lsb_offset;
u32 bootstrap_owner;
u32 lazy_bootstrap;
u32 status;
};
struct lsf_lsb_header {
struct lsf_ucode_desc signature;
u32 ucode_off;
u32 ucode_size;
u32 data_size;
u32 bl_code_size;
u32 bl_imem_off;
u32 bl_data_off;
u32 bl_data_size;
u32 flags;
};
/*!
* Structure used by the boot-loader to load the rest of the code. This has
* to be filled by host and copied into DMEM at offset provided in the
* hsflcn_bl_desc.bl_desc_dmem_load_off.
*
* signature - 16B signature for secure code. 0s if no secure code
* ctx_dma - CtxDma to be used by BL while loading code/data
* code_dma_base - 256B aligned Physical FB Address where code is located
* non_sec_code_off - Offset from code_dma_base where the nonSecure code is
* located. The offset must be multiple of 256 to help perf
* non_sec_code_size - The size of the nonSecure code part.
* sec_code_size - Offset from code_dma_base where the secure code is
* located. The offset must be multiple of 256 to help perf
* code_entry_point - Code entry point which will be invoked by BL after
* code is loaded.
* data_dma_base - 256B aligned Physical FB Address where data is located.
* data_size - Size of data block. Should be multiple of 256B
*/
struct flcn_bl_dmem_desc {
u32 signature[4]; /*Should be the first element..*/
u32 ctx_dma;
u32 code_dma_base;
u32 non_sec_code_off;
u32 non_sec_code_size;
u32 sec_code_off;
u32 sec_code_size;
u32 code_entry_point;
u32 data_dma_base;
u32 data_size;
};
/*!
* Legacy structure used by the current PMU/DPU bootloader.
*/
struct loader_config {
u32 dma_idx;
u32 code_dma_base; /*<! upper 32-bits of 40-bit dma address*/
u32 code_size_total;
u32 code_size_to_load;
u32 code_entry_point;
u32 data_dma_base; /*<! upper 32-bits of 40-bit dma address*/
u32 data_size; /*<! initialized data of the application */
u32 overlay_dma_base; /*<! upper 32-bits of the 40-bit dma address*/
u32 argc;
u32 argv;
};
/*!
* Union of all supported structures used by bootloaders.
*/
union flcn_bl_generic_desc {
struct flcn_bl_dmem_desc bl_dmem_desc;
struct loader_config loader_cfg;
};
struct flcn_ucode_img {
u32 *header; /*only some falcons have header*/
u32 *data;
struct pmu_ucode_desc *desc; /*only some falcons have descriptor*/
u32 data_size;
void *fw_ver; /*NV2080_CTRL_GPU_GET_FIRMWARE_VERSION_PARAMS struct*/
u8 load_entire_os_data; /* load the whole osData section at boot time.*/
struct lsf_ucode_desc *lsf_desc; /* NULL if not a light secure falcon.*/
u8 free_res_allocs;/*True if there a resources to freed by the client.*/
u32 flcn_inst;
};
/*!
* LSFM Managed Ucode Image
* next : Next image the list, NULL if last.
* wpr_header : WPR header for this ucode image
* lsb_header : LSB header for this ucode image
* bl_gen_desc : Bootloader generic desc structure for this ucode image
* bl_gen_desc_size : Sizeof bootloader desc structure for this ucode image
* full_ucode_size : Surface size required for final ucode image
* ucode_img : Ucode image info
*/
struct lsfm_managed_ucode_img {
struct lsfm_managed_ucode_img *next;
struct lsf_wpr_header wpr_header;
struct lsf_lsb_header lsb_header;
union flcn_bl_generic_desc bl_gen_desc;
u32 bl_gen_desc_size;
u32 full_ucode_size;
struct flcn_ucode_img ucode_img;
};
struct ls_flcn_mgr {
u16 managed_flcn_cnt;
u32 wpr_size;
u32 disable_mask;
struct lsfm_managed_ucode_img *ucode_img_list;
void *wpr_client_req_state;/*PACR_CLIENT_REQUEST_STATE originally*/
};
/*ACR related structs*/
/*!
* start_addr - Starting address of region
* end_addr - Ending address of region
* region_id - Region ID
* read_mask - Read Mask
* write_mask - WriteMask
* client_mask - Bit map of all clients currently using this region
*/
struct flcn_acr_region_prop {
u32 start_addr;
u32 end_addr;
u32 region_id;
u32 read_mask;
u32 write_mask;
u32 client_mask;
};
/*!
* no_regions - Number of regions used.
* region_props - Region properties
*/
struct flcn_acr_regions {
u32 no_regions;
struct flcn_acr_region_prop region_props[T210_FLCN_ACR_MAX_REGIONS];
};
/*!
* reserved_dmem-When the bootstrap owner has done bootstrapping other falcons,
* and need to switch into LS mode, it needs to have its own
* actual DMEM image copied into DMEM as part of LS setup. If
* ACR desc is at location 0, it will definitely get overwritten
* causing data corruption. Hence we are reserving 0x200 bytes
* to give room for any loading data. NOTE: This has to be the
* first member always
* signature - Signature of ACR ucode.
* wpr_region_id - Region ID holding the WPR header and its details
* wpr_offset - Offset from the WPR region holding the wpr header
* regions - Region descriptors
* nonwpr_ucode_blob_start -stores non-WPR start where kernel stores ucode blob
* nonwpr_ucode_blob_end -stores non-WPR end where kernel stores ucode blob
*/
struct flcn_acr_desc {
u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
/*Always 1st*/
u32 wpr_region_id;
u32 wpr_offset;
struct flcn_acr_regions regions;
u32 nonwpr_ucode_blob_start;
u32 nonwpr_ucode_blob_size;
};
/*!
* The header used by RM to figure out code and data sections of bootloader.
*
* bl_code_off - Offset of code section in the image
* bl_code_size - Size of code section in the image
* bl_data_off - Offset of data section in the image
* bl_data_size - Size of data section in the image
*/
struct hsflcn_bl_img_hdr {
u32 bl_code_off;
u32 bl_code_size;
u32 bl_data_off;
u32 bl_data_size;
};
/*!
* The descriptor used by RM to figure out the requirements of boot loader.
*
* bl_start_tag - Starting tag of bootloader
* bl_desc_dmem_load_off - Dmem offset where _def_rm_flcn_bl_dmem_desc
to be loaded
* bl_img_hdr - Description of the image
*/
struct hsflcn_bl_desc {
u32 bl_start_tag;
u32 bl_desc_dmem_load_off;
struct hsflcn_bl_img_hdr bl_img_hdr;
};
struct bin_hdr {
u32 bin_magic; /* 0x10de */
u32 bin_ver; /* versioning of bin format */
u32 bin_size; /* entire image size including this header */
u32 header_offset; /* Header offset of executable binary metadata,
start @ offset- 0x100 */
u32 data_offset; /* Start of executable binary data, start @
offset- 0x200 */
u32 data_size; /* Size ofexecutable binary */
};
struct acr_fw_header {
u32 sig_dbg_offset;
u32 sig_dbg_size;
u32 sig_prod_offset;
u32 sig_prod_size;
u32 patch_loc;
u32 patch_sig;
u32 hdr_offset; /*this header points to acr_ucode_header_t210_load*/
u32 hdr_size; /*size of above header*/
};
struct acr_gm20b {
u64 ucode_blob_start;
u32 ucode_blob_size;
struct bin_hdr *bl_bin_hdr;
struct hsflcn_bl_desc *pmu_hsbl_desc;
struct bin_hdr *hsbin_hdr;
struct acr_fw_header *fw_hdr;
};
void gm20b_init_secure_pmu(struct gpu_ops *gops);
int prepare_ucode_blob(struct gk20a *g);
int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
int gpccs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img);
int gm20b_bootstrap_hs_flcn(struct gk20a *g);
int gm20b_pmu_setup_sw(struct gk20a *g);
int pmu_exec_gen_bl(struct gk20a *g, void *desc, u8 b_wait_for_halt);
int pmu_wait_for_halt(struct gk20a *g, unsigned int timeout_us);
#endif /*__ACR_GM20B_H_*/

View File

@@ -25,6 +25,7 @@
#include "fifo_gm20b.h"
#include "gr_ctx_gm20b.h"
#include "mm_gm20b.h"
#include "pmu_gm20b.h"
struct gpu_ops gm20b_ops = {
.clock_gating = {
@@ -51,6 +52,7 @@ int gm20b_init_hal(struct gpu_ops *gops)
gm20b_init_fifo(gops);
gm20b_init_gr_ctx(gops);
gm20b_init_mm(gops);
gm20b_init_pmu_ops(gops);
gops->name = "gm20b";
return 0;

View File

@@ -202,4 +202,24 @@ static inline u32 fb_mmu_debug_ctrl_debug_enabled_v(void)
{
return 0x00000001;
}
static inline u32 fb_mmu_vpr_info_r(void)
{
return 0x00100cd0;
}
static inline u32 fb_mmu_vpr_info_fetch_f(u32 v)
{
return (v & 0x1) << 2;
}
static inline u32 fb_mmu_vpr_info_fetch_v(u32 r)
{
return (r >> 2) & 0x1;
}
static inline u32 fb_mmu_vpr_info_fetch_false_v(void)
{
return 0x00000000;
}
static inline u32 fb_mmu_vpr_info_fetch_true_v(void)
{
return 0x00000001;
}
#endif

View File

@@ -322,6 +322,14 @@ static inline u32 gr_fecs_cpuctl_startcpu_f(u32 v)
{
return (v & 0x1) << 1;
}
static inline u32 gr_fecs_cpuctl_alias_r(void)
{
return 0x00409130;
}
static inline u32 gr_fecs_cpuctl_alias_startcpu_f(u32 v)
{
return (v & 0x1) << 1;
}
static inline u32 gr_fecs_dmactl_r(void)
{
return 0x0040910c;

View File

@@ -290,6 +290,86 @@ static inline u32 pwr_falcon_cpuctl_startcpu_f(u32 v)
{
return (v & 0x1) << 1;
}
static inline u32 pwr_falcon_cpuctl_halt_intr_f(u32 v)
{
return (v & 0x1) << 4;
}
static inline u32 pwr_falcon_cpuctl_halt_intr_m(void)
{
return 0x1 << 4;
}
static inline u32 pwr_falcon_cpuctl_halt_intr_v(u32 r)
{
return (r >> 4) & 0x1;
}
static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_f(u32 v)
{
return (v & 0x1) << 6;
}
static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_m(void)
{
return 0x1 << 6;
}
static inline u32 pwr_falcon_cpuctl_cpuctl_alias_en_v(u32 r)
{
return (r >> 6) & 0x1;
}
static inline u32 pwr_falcon_cpuctl_alias_r(void)
{
return 0x0010a130;
}
static inline u32 pwr_falcon_cpuctl_alias_startcpu_f(u32 v)
{
return (v & 0x1) << 1;
}
static inline u32 pwr_pmu_scpctl_stat_r(void)
{
return 0x0010ac08;
}
static inline u32 pwr_pmu_scpctl_stat_debug_mode_f(u32 v)
{
return (v & 0x1) << 20;
}
static inline u32 pwr_pmu_scpctl_stat_debug_mode_m(void)
{
return 0x1 << 20;
}
static inline u32 pwr_pmu_scpctl_stat_debug_mode_v(u32 r)
{
return (r >> 20) & 0x1;
}
static inline u32 pwr_falcon_imemc_r(u32 i)
{
return 0x0010a180 + i*16;
}
static inline u32 pwr_falcon_imemc_offs_f(u32 v)
{
return (v & 0x3f) << 2;
}
static inline u32 pwr_falcon_imemc_blk_f(u32 v)
{
return (v & 0xff) << 8;
}
static inline u32 pwr_falcon_imemc_aincw_f(u32 v)
{
return (v & 0x1) << 24;
}
static inline u32 pwr_falcon_imemd_r(u32 i)
{
return 0x0010a184 + i*16;
}
static inline u32 pwr_falcon_imemt_r(u32 i)
{
return 0x0010a188 + i*16;
}
static inline u32 pwr_falcon_sctl_r(void)
{
return 0x0010a240;
}
static inline u32 pwr_falcon_mmu_phys_sec_r(void)
{
return 0x00100ce4;
}
static inline u32 pwr_falcon_bootvec_r(void)
{
return 0x0010a104;

View File

@@ -0,0 +1,22 @@
/*
* GM20B MC registers used by ACR
*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef _MC_CARVEOUT_REG_H_
#define _MC_CARVEOUT_REG_H_
#define MC_SECURITY_CARVEOUT2_BOM_0 0xc5c
#define MC_SECURITY_CARVEOUT3_BOM_0 0xcac
#define MC_ERR_GENERALIZED_CARVEOUT_STATUS_0 0xc00
#endif /*_MC_CARVEOUT_REG_H_*/

View File

@@ -13,9 +13,11 @@
* more details.
*/
#include <linux/pm_runtime.h>
#include "gk20a/gk20a.h"
#include "mm_gm20b.h"
#include "hw_gmmu_gm20b.h"
#include "hw_fb_gm20b.h"
static const u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { SZ_4K, SZ_128K };
static const u32 gmmu_page_shifts[gmmu_nr_page_sizes] = { 12, 17 };
@@ -24,8 +26,8 @@ static const u64 gmmu_page_offset_masks[gmmu_nr_page_sizes] = { 0xfffLL,
static const u64 gmmu_page_masks[gmmu_nr_page_sizes] = { ~0xfffLL, ~0x1ffffLL };
static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
enum gmmu_pgsz_gk20a pgsz_idx,
u64 first_vaddr, u64 last_vaddr)
enum gmmu_pgsz_gk20a pgsz_idx,
u64 first_vaddr, u64 last_vaddr)
{
int err;
u32 pte_lo, pte_hi;
@@ -39,10 +41,10 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
gk20a_dbg_fn("");
pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
&pde_lo, &pde_hi);
&pde_lo, &pde_hi);
gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
pgsz_idx, pde_lo, pde_hi);
pgsz_idx, pde_lo, pde_hi);
/* Expect ptes of the same pde */
BUG_ON(pde_lo != pde_hi);
@@ -185,7 +187,8 @@ static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
vaddr_pde_start = (u64)i << pde_shift;
allocate_gmmu_ptes_sparse(vm, pgsz_idx,
vaddr_pde_start,
PDE_ADDR_END(vaddr_pde_start, pde_shift));
PDE_ADDR_END(vaddr_pde_start,
pde_shift));
} else {
/* Check leading and trailing spaces which doesn't fit
* into entire pde. */
@@ -212,6 +215,56 @@ fail:
return err;
}
static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
const unsigned int msec)
{
unsigned long timeout;
if (tegra_platform_is_silicon())
timeout = jiffies + msecs_to_jiffies(msec);
else
timeout = msecs_to_jiffies(msec);
while (1) {
u32 val;
val = gk20a_readl(g, fb_mmu_vpr_info_r());
if (fb_mmu_vpr_info_fetch_v(val) ==
fb_mmu_vpr_info_fetch_false_v())
break;
if (tegra_platform_is_silicon()) {
if (WARN_ON(time_after(jiffies, timeout)))
return -ETIME;
} else if (--timeout == 0)
return -ETIME;
}
return 0;
}
int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g)
{
int ret = 0;
gk20a_busy_noresume(g->dev);
#ifdef CONFIG_PM_RUNTIME
if (!pm_runtime_active(&g->dev->dev))
goto fail;
#endif
if (gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
ret = -ETIME;
goto fail;
}
gk20a_writel(g, fb_mmu_vpr_info_r(),
fb_mmu_vpr_info_fetch_true_v());
ret = gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
fail:
gk20a_idle(g->dev);
return ret;
}
void gm20b_init_mm(struct gpu_ops *gops)
{
gops->mm.set_sparse = gm20b_vm_put_sparse;

View File

@@ -19,6 +19,8 @@ struct gk20a;
#define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1))
#define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1))
#define VPR_INFO_FETCH_WAIT (5)
void gm20b_init_mm(struct gpu_ops *gops);
int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
#endif

View File

@@ -0,0 +1,26 @@
/*
* GM20B PMU
*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include "gk20a/gk20a.h"
#include "acr_gm20b.h"
void gm20b_init_pmu_ops(struct gpu_ops *gops)
{
#ifdef CONFIG_TEGRA_ACR
gm20b_init_secure_pmu(gops);
#else
gk20a_init_pmu_ops(gops);
#endif
}

View File

@@ -0,0 +1,19 @@
/*
* GM20B PMU
*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __PMU_GM20B_H_
#define __PMU_GM20B_H_
void gm20b_init_pmu_ops(struct gpu_ops *gops);
#endif /*__PMU_GM20B_H_*/