gpu: nvgpu: Changes to support LS sig

Support added to send PMU and FECS signatures
to ACR ucode

Bug 200046413

Change-Id: Ie1babb640be20a697ad4d6dd18bd11161edb263c
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
Signed-off-by: Supriya <ssharatkumar@nvidia.com>
Signed-off-by: Vijayakumar <vsubbu@nvidia.com>
This commit is contained in:
Supriya
2014-10-27 17:31:04 +05:30
committed by Dan Willemsen
parent 8c6a9fd115
commit eb690cb391
6 changed files with 224 additions and 25 deletions

View File

@@ -1998,6 +1998,8 @@ void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base,
/* Write out the actual data */
switch (segments->boot_signature) {
case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED:
case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED:
case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED:
case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED:
gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0);

View File

@@ -304,10 +304,12 @@ struct gk20a_ctxsw_ucode_segments {
/* sums over the ucode files as sequences of u32, computed to the
* boot_signature field in the structure above */
#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5c
#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78
#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344b
#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09f
#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2
#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5
#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3
#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877

View File

@@ -155,6 +155,37 @@ static void set_pmu_cmdline_args_falctracedmaidx_v2(
pmu->args_v2.falc_trace_dma_idx = idx;
}
static u32 pmu_cmdline_size_v3(struct pmu_gk20a *pmu)
{
return sizeof(struct pmu_cmdline_args_v3);
}
static void set_pmu_cmdline_args_cpufreq_v3(struct pmu_gk20a *pmu, u32 freq)
{
pmu->args_v3.cpu_freq_hz = freq;
}
static void set_pmu_cmdline_args_secure_mode_v3(struct pmu_gk20a *pmu, u32 val)
{
pmu->args_v3.secure_mode = val;
}
static void set_pmu_cmdline_args_falctracesize_v3(
struct pmu_gk20a *pmu, u32 size)
{
pmu->args_v3.falc_trace_size = size;
}
static void set_pmu_cmdline_args_falctracedmabase_v3(struct pmu_gk20a *pmu)
{
pmu->args_v3.falc_trace_dma_base = ((u32)pmu->trace_buf.pmu_va)/0x100;
}
static void set_pmu_cmdline_args_falctracedmaidx_v3(
struct pmu_gk20a *pmu, u32 idx)
{
pmu->args_v3.falc_trace_dma_idx = idx;
}
static void set_pmu_cmdline_args_cpufreq_v1(struct pmu_gk20a *pmu, u32 freq)
{
pmu->args_v1.cpu_freq_hz = freq;
@@ -229,6 +260,11 @@ static void set_pmu_cmdline_args_cpufreq_v0(struct pmu_gk20a *pmu, u32 freq)
pmu->args_v0.cpu_freq_hz = freq;
}
static void *get_pmu_cmdline_args_ptr_v3(struct pmu_gk20a *pmu)
{
return (void *)(&pmu->args_v3);
}
static void *get_pmu_cmdline_args_ptr_v2(struct pmu_gk20a *pmu)
{
return (void *)(&pmu->args_v2);
@@ -661,6 +697,89 @@ int gk20a_init_pmu(struct pmu_gk20a *pmu)
pmu->remove_support = gk20a_remove_pmu_support;
switch (pmu->desc->app_version) {
case APP_VERSION_GM20B_4:
g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;
g->ops.pmu_ver.set_perfmon_cntr_ut = set_perfmon_cntr_ut_v2;
g->ops.pmu_ver.set_perfmon_cntr_lt = set_perfmon_cntr_lt_v2;
g->ops.pmu_ver.set_perfmon_cntr_valid =
set_perfmon_cntr_valid_v2;
g->ops.pmu_ver.set_perfmon_cntr_index =
set_perfmon_cntr_index_v2;
g->ops.pmu_ver.set_perfmon_cntr_group_id =
set_perfmon_cntr_group_id_v2;
g->ops.pmu_ver.get_perfmon_cntr_sz = pmu_perfmon_cntr_sz_v2;
g->ops.pmu_ver.cmd_id_zbc_table_update = 16;
g->ops.pmu_ver.get_pmu_cmdline_args_size =
pmu_cmdline_size_v3;
g->ops.pmu_ver.set_pmu_cmdline_args_cpu_freq =
set_pmu_cmdline_args_cpufreq_v3;
g->ops.pmu_ver.set_pmu_cmdline_args_secure_mode =
set_pmu_cmdline_args_secure_mode_v3;
g->ops.pmu_ver.set_pmu_cmdline_args_trace_size =
set_pmu_cmdline_args_falctracesize_v3;
g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_base =
set_pmu_cmdline_args_falctracedmabase_v3;
g->ops.pmu_ver.set_pmu_cmdline_args_trace_dma_idx =
set_pmu_cmdline_args_falctracedmaidx_v3;
g->ops.pmu_ver.get_pmu_cmdline_args_ptr =
get_pmu_cmdline_args_ptr_v3;
g->ops.pmu_ver.get_pmu_allocation_struct_size =
get_pmu_allocation_size_v1;
g->ops.pmu_ver.set_pmu_allocation_ptr =
set_pmu_allocation_ptr_v1;
g->ops.pmu_ver.pmu_allocation_set_dmem_size =
pmu_allocation_set_dmem_size_v1;
g->ops.pmu_ver.pmu_allocation_get_dmem_size =
pmu_allocation_get_dmem_size_v1;
g->ops.pmu_ver.pmu_allocation_get_dmem_offset =
pmu_allocation_get_dmem_offset_v1;
g->ops.pmu_ver.pmu_allocation_get_dmem_offset_addr =
pmu_allocation_get_dmem_offset_addr_v1;
g->ops.pmu_ver.pmu_allocation_set_dmem_offset =
pmu_allocation_set_dmem_offset_v1;
g->ops.pmu_ver.get_pmu_init_msg_pmu_queue_params =
get_pmu_init_msg_pmu_queue_params_v1;
g->ops.pmu_ver.get_pmu_msg_pmu_init_msg_ptr =
get_pmu_msg_pmu_init_msg_ptr_v1;
g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_off =
get_pmu_init_msg_pmu_sw_mg_off_v1;
g->ops.pmu_ver.get_pmu_init_msg_pmu_sw_mg_size =
get_pmu_init_msg_pmu_sw_mg_size_v1;
g->ops.pmu_ver.get_pmu_perfmon_cmd_start_size =
get_pmu_perfmon_cmd_start_size_v1;
g->ops.pmu_ver.get_perfmon_cmd_start_offsetofvar =
get_perfmon_cmd_start_offsetofvar_v1;
g->ops.pmu_ver.perfmon_start_set_cmd_type =
perfmon_start_set_cmd_type_v1;
g->ops.pmu_ver.perfmon_start_set_group_id =
perfmon_start_set_group_id_v1;
g->ops.pmu_ver.perfmon_start_set_state_id =
perfmon_start_set_state_id_v1;
g->ops.pmu_ver.perfmon_start_set_flags =
perfmon_start_set_flags_v1;
g->ops.pmu_ver.perfmon_start_get_flags =
perfmon_start_get_flags_v1;
g->ops.pmu_ver.get_pmu_perfmon_cmd_init_size =
get_pmu_perfmon_cmd_init_size_v1;
g->ops.pmu_ver.get_perfmon_cmd_init_offsetofvar =
get_perfmon_cmd_init_offsetofvar_v1;
g->ops.pmu_ver.perfmon_cmd_init_set_sample_buffer =
perfmon_cmd_init_set_sample_buffer_v1;
g->ops.pmu_ver.perfmon_cmd_init_set_dec_cnt =
perfmon_cmd_init_set_dec_cnt_v1;
g->ops.pmu_ver.perfmon_cmd_init_set_base_cnt_id =
perfmon_cmd_init_set_base_cnt_id_v1;
g->ops.pmu_ver.perfmon_cmd_init_set_samp_period_us =
perfmon_cmd_init_set_samp_period_us_v1;
g->ops.pmu_ver.perfmon_cmd_init_set_num_cnt =
perfmon_cmd_init_set_num_cnt_v1;
g->ops.pmu_ver.perfmon_cmd_init_set_mov_avg =
perfmon_cmd_init_set_mov_avg_v1;
g->ops.pmu_ver.get_pmu_seq_in_a_ptr =
get_pmu_sequence_in_alloc_ptr_v1;
g->ops.pmu_ver.get_pmu_seq_out_a_ptr =
get_pmu_sequence_out_alloc_ptr_v1;
break;
case APP_VERSION_GM20B_3:
case APP_VERSION_GM20B_2:
g->ops.pmu_ver.get_perfmon_cntr_ptr = get_perfmon_cntr_ptr_v2;

View File

@@ -49,6 +49,7 @@
/* Mapping between AP_CTRLs and Idle counters */
#define PMU_AP_IDLE_MASK_GRAPHICS (PMU_AP_IDLE_MASK_HIST_IDX_1)
#define APP_VERSION_GM20B_4 19008461
#define APP_VERSION_GM20B_3 18935575
#define APP_VERSION_GM20B_2 18694072
#define APP_VERSION_GM20B_1 18547257
@@ -349,6 +350,18 @@ struct pmu_cmdline_args_v2 {
struct pmu_mem_v1 gc6_ctx; /* dmem offset of gc6 context */
};
struct pmu_cmdline_args_v3 {
u32 reserved;
u32 cpu_freq_hz; /* Frequency of the clock driving PMU */
u32 falc_trace_size; /* falctrace buffer size (bytes) */
u32 falc_trace_dma_base; /* 256-byte block address */
u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */
u8 secure_mode;
u8 raise_priv_sec; /*Raise priv level required for desired
registers*/
struct pmu_mem_v1 gc6_ctx; /* dmem offset of gc6 context */
};
#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */
#define GK20A_PMU_DMEM_BLKSIZE2 8
@@ -1174,6 +1187,7 @@ struct pmu_gk20a {
struct pmu_cmdline_args_v0 args_v0;
struct pmu_cmdline_args_v1 args_v1;
struct pmu_cmdline_args_v2 args_v2;
struct pmu_cmdline_args_v3 args_v3;
};
unsigned long perfmon_events_cnt;
bool perfmon_sampling_enabled;

View File

@@ -81,7 +81,7 @@ void gm20b_init_secure_pmu(struct gpu_ops *gops)
int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
{
const struct firmware *pmu_fw, *pmu_desc;
const struct firmware *pmu_fw, *pmu_desc, *pmu_sig;
struct pmu_gk20a *pmu = &g->pmu;
struct lsf_ucode_desc *lsf_desc;
int err;
@@ -89,7 +89,6 @@ int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
pmu_fw = gk20a_request_firmware(g, GM20B_PMU_UCODE_IMAGE);
if (!pmu_fw) {
gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode!!");
gm20b_dbg_pmu("requesting PMU ucode in GM20B failed\n");
return -ENOENT;
}
g->acr.pmu_fw = pmu_fw;
@@ -99,10 +98,15 @@ int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
pmu_desc = gk20a_request_firmware(g, GM20B_PMU_UCODE_DESC);
if (!pmu_desc) {
gk20a_err(dev_from_gk20a(g), "failed to load pmu ucode desc!!");
gm20b_dbg_pmu("requesting PMU ucode in GM20B failed\n");
err = -ENOENT;
goto release_img_fw;
}
pmu_sig = gk20a_request_firmware(g, GM20B_PMU_UCODE_SIG);
if (!pmu_sig) {
gk20a_err(dev_from_gk20a(g), "failed to load pmu sig!!");
err = -ENOENT;
goto release_desc;
}
pmu->desc = (struct pmu_ucode_desc *)pmu_desc->data;
pmu->ucode_image = (u32 *)pmu_fw->data;
g->acr.pmu_desc = pmu_desc;
@@ -116,8 +120,9 @@ int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc), GFP_KERNEL);
if (!lsf_desc) {
err = -ENOMEM;
goto release_desc;
goto release_sig;
}
memcpy(lsf_desc, (void *)pmu_sig->data, sizeof(struct lsf_ucode_desc));
lsf_desc->falcon_id = LSF_FALCON_ID_PMU;
p_img->desc = pmu->desc;
@@ -127,7 +132,10 @@ int pmu_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
p_img->header = NULL;
p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
gm20b_dbg_pmu("requesting PMU ucode in GM20B exit\n");
release_firmware(pmu_sig);
return 0;
release_sig:
release_firmware(pmu_sig);
release_desc:
release_firmware(pmu_desc);
release_img_fw:
@@ -138,41 +146,54 @@ release_img_fw:
int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
{
struct lsf_ucode_desc *lsf_desc;
const struct firmware *fecs_sig;
int err;
fecs_sig = gk20a_request_firmware(g, GM20B_FECS_UCODE_SIG);
if (!fecs_sig) {
gk20a_err(dev_from_gk20a(g), "failed to load fecs sig");
return -ENOENT;
}
lsf_desc = kzalloc(sizeof(struct lsf_ucode_desc), GFP_KERNEL);
if (!lsf_desc)
return -ENOMEM;
if (!lsf_desc) {
err = -ENOMEM;
goto rel_sig;
}
memcpy(lsf_desc, (void *)fecs_sig->data, sizeof(struct lsf_ucode_desc));
lsf_desc->falcon_id = LSF_FALCON_ID_FECS;
p_img->desc = kzalloc(sizeof(struct pmu_ucode_desc), GFP_KERNEL);
if (p_img->desc == NULL) {
kfree(lsf_desc);
return -ENOMEM;
err = -ENOMEM;
goto free_lsf_desc;
}
p_img->desc->bootloader_start_offset =
g->ctxsw_ucode_info.fecs.boot.offset;
p_img->desc->bootloader_size =
g->ctxsw_ucode_info.fecs.boot.size;
ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256);
p_img->desc->bootloader_imem_offset =
g->ctxsw_ucode_info.fecs.boot_imem_offset;
p_img->desc->bootloader_entry_point =
g->ctxsw_ucode_info.fecs.boot_entry;
p_img->desc->image_size = g->ctxsw_ucode_info.fecs.boot.size +
g->ctxsw_ucode_info.fecs.code.size +
g->ctxsw_ucode_info.fecs.data.size;
p_img->desc->app_size = 0;
p_img->desc->app_start_offset = 0;
p_img->desc->image_size =
ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) +
ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) +
ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256);
p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset;
p_img->desc->app_imem_offset = 0;
p_img->desc->app_imem_entry = 0;
p_img->desc->app_dmem_offset = 0;
p_img->desc->app_resident_code_offset =
g->ctxsw_ucode_info.fecs.code.offset;
p_img->desc->app_resident_code_offset = 0;
p_img->desc->app_resident_code_size =
g->ctxsw_ucode_info.fecs.code.size;
p_img->desc->app_resident_data_offset =
g->ctxsw_ucode_info.fecs.data.offset;
g->ctxsw_ucode_info.fecs.data.offset -
g->ctxsw_ucode_info.fecs.code.offset;
p_img->desc->app_resident_data_size =
g->ctxsw_ucode_info.fecs.data.size;
p_img->data = g->ctxsw_ucode_info.surface_desc.cpuva;
@@ -181,8 +202,14 @@ int fecs_ucode_details(struct gk20a *g, struct flcn_ucode_img *p_img)
p_img->fw_ver = NULL;
p_img->header = NULL;
p_img->lsf_desc = (struct lsf_ucode_desc *)lsf_desc;
gm20b_dbg_pmu("fecs fw loaded 2\n");
gm20b_dbg_pmu("fecs fw loaded\n");
release_firmware(fecs_sig);
return 0;
free_lsf_desc:
kfree(lsf_desc);
rel_sig:
release_firmware(fecs_sig);
return err;
}
int prepare_ucode_blob(struct gk20a *g)
@@ -411,7 +438,7 @@ int flcn_populate_bl_dmem_desc(struct gk20a *g,
struct flcn_ucode_img *p_img = &(lsfm->ucode_img);
struct flcn_bl_dmem_desc *ldr_cfg =
(struct flcn_bl_dmem_desc *)(&p_bl_gen_desc->loader_cfg);
(struct flcn_bl_dmem_desc *)(&p_bl_gen_desc->bl_dmem_desc);
u64 addr_base;
struct pmu_ucode_desc *desc;
u64 addr_code, addr_data;
@@ -580,7 +607,7 @@ static int lsfm_init_wpr_contents(struct gk20a *g, struct ls_flcn_mgr *plsfm,
/* Tag the terminator WPR header with an invalid falcon ID. */
gk20a_mem_wr32(&wpr_hdr[plsfm->managed_flcn_cnt].falcon_id,
1, LSF_FALCON_ID_INVALID);
0, LSF_FALCON_ID_INVALID);
}
return status;
}
@@ -635,6 +662,7 @@ static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
{
struct pmu_gk20a *pmu = &g->pmu;
u32 full_app_size = 0;
u32 data = 0;
if (pnode->ucode_img.lsf_desc)
@@ -669,11 +697,30 @@ static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g,
pnode->lsb_header.bl_code_size = ALIGN(
pnode->ucode_img.desc->bootloader_size,
LSF_BL_CODE_SIZE_ALIGNMENT);
full_app_size = ALIGN(pnode->ucode_img.desc->app_size,
LSF_BL_CODE_SIZE_ALIGNMENT) +
pnode->lsb_header.bl_code_size;
pnode->lsb_header.ucode_size = ALIGN(
pnode->ucode_img.desc->app_resident_data_offset,
LSF_BL_CODE_SIZE_ALIGNMENT) +
pnode->lsb_header.bl_code_size;
pnode->lsb_header.data_size = full_app_size -
pnode->lsb_header.ucode_size;
/* Though the BL is located at 0th offset of the image, the VA
is different to make sure that it doesnt collide the actual OS
VA range */
pnode->lsb_header.bl_imem_off =
pnode->ucode_img.desc->bootloader_imem_offset;
pnode->lsb_header.app_code_off =
pnode->ucode_img.desc->app_start_offset +
pnode->ucode_img.desc->app_resident_code_offset;
pnode->lsb_header.app_code_size =
pnode->ucode_img.desc->app_resident_code_size;
pnode->lsb_header.app_data_off =
pnode->ucode_img.desc->app_start_offset +
pnode->ucode_img.desc->app_resident_data_offset;
pnode->lsb_header.app_data_size =
pnode->ucode_img.desc->app_resident_data_size;
/* TODO: OBJFLCN should export properties using which the below
flags should be populated.*/
@@ -974,7 +1021,8 @@ err_release_acr_fw:
u8 pmu_is_debug_mode_en(struct gk20a *g)
{
return 1;
u32 ctl_stat = gk20a_readl(g, pwr_pmu_scpctl_stat_r());
return pwr_pmu_scpctl_stat_debug_mode_v(ctl_stat);
}
/*

View File

@@ -28,6 +28,8 @@
#define GM20B_PMU_UCODE_DESC "gpmu_ucode_desc.bin"
#define GM20B_HSBIN_PMU_UCODE_IMAGE "acr_ucode.bin"
#define GM20B_HSBIN_PMU_BL_UCODE_IMAGE "pmu_bl.bin"
#define GM20B_PMU_UCODE_SIG "pmu_sig.bin"
#define GM20B_FECS_UCODE_SIG "fecs_sig.bin"
#define LSFM_DISABLE_MASK_NONE (0x00000000) /*Disable all LS falcons*/
#define LSFM_DISABLE_MASK_ALL (0xFFFFFFFF) /*Enable all LS falcons*/
@@ -60,10 +62,13 @@
/*!
* Image Status Defines
*/
#define LSF_IMAGE_STATUS_NONE (0)
#define LSF_IMAGE_STATUS_COPY (1)
#define LSF_IMAGE_STATUS_VALIDATION (2)
#define LSF_IMAGE_STATUS_BOOTSTRAP_READY (3)
#define LSF_IMAGE_STATUS_NONE (0)
#define LSF_IMAGE_STATUS_COPY (1)
#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED (2)
#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED (3)
#define LSF_IMAGE_STATUS_VALIDATION_DONE (4)
#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED (5)
#define LSF_IMAGE_STATUS_BOOTSTRAP_READY (6)
/*LSB header related defines*/
#define NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0_FALSE 0
@@ -156,6 +161,10 @@ struct lsf_lsb_header {
u32 bl_imem_off;
u32 bl_data_off;
u32 bl_data_size;
u32 app_code_off;
u32 app_code_size;
u32 app_data_off;
u32 app_data_size;
u32 flags;
};
@@ -178,6 +187,7 @@ struct lsf_lsb_header {
* data_size - Size of data block. Should be multiple of 256B
*/
struct flcn_bl_dmem_desc {
u32 reserved[4]; /*Should be the first element..*/
u32 signature[4]; /*Should be the first element..*/
u32 ctx_dma;
u32 code_dma_base;
@@ -297,10 +307,14 @@ struct flcn_acr_regions {
* nonwpr_ucode_blob_end -stores non-WPR end where kernel stores ucode blob
*/
struct flcn_acr_desc {
u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
union {
u32 reserved_dmem[(LSF_BOOTSTRAP_OWNER_RESERVED_DMEM_SIZE/4)];
u32 signatures[4];
} ucode_reserved_space;
/*Always 1st*/
u32 wpr_region_id;
u32 wpr_offset;
u32 mmu_mem_range;
struct flcn_acr_regions regions;
u32 nonwpr_ucode_blob_size;
u64 nonwpr_ucode_blob_start;