diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index 213837602..af0480b2f 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -279,6 +279,8 @@ acr_fusa: common/acr/acr_blob_alloc.h, common/acr/acr_blob_construct.c, common/acr/acr_blob_construct.h, + common/acr/acr_blob_construct_v2.c, + common/acr/acr_blob_construct_v2.h, common/acr/acr_bootstrap.c, common/acr/acr_bootstrap.h, common/acr/acr_priv.h, @@ -287,6 +289,7 @@ acr_fusa: common/acr/acr_sw_gv11b.c, common/acr/acr_sw_gv11b.h, common/acr/nvgpu_acr_interface.h, + common/acr/nvgpu_acr_interface_v2.h, include/nvgpu/gops/acr.h, include/nvgpu/acr.h, include/nvgpu/riscv.h, diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 8be69021f..5120415e1 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -292,6 +292,7 @@ nvgpu-y += \ common/acr/acr_blob_alloc.o \ common/acr/acr_blob_construct_v0.o \ common/acr/acr_blob_construct.o \ + common/acr/acr_blob_construct_v2.o \ common/acr/acr_bootstrap.o \ common/acr/acr_sw_gm20b.o \ common/acr/acr_sw_gp10b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 36e7c4a27..a62b12402 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -145,6 +145,7 @@ srcs += common/device.c \ common/acr/acr_wpr.c \ common/acr/acr_blob_alloc.c \ common/acr/acr_blob_construct.c \ + common/acr/acr_blob_construct_v2.c \ common/acr/acr_bootstrap.c \ common/acr/acr_sw_gv11b.c \ common/ptimer/ptimer.c \ diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c index 15ab53825..f3210d8ce 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.c @@ -30,6 +30,7 @@ #include #include "nvgpu_acr_interface.h" +#include "nvgpu_acr_interface_v2.h" #include "acr_blob_construct.h" #include "acr_wpr.h" #include "acr_priv.h" @@ -38,26 +39,7 @@ #include #endif -#define APP_IMEM_OFFSET (0) -#define APP_IMEM_ENTRY (0) -#define APP_DMEM_OFFSET (0) -#define APP_RESIDENT_CODE_OFFSET (0) -#define MEMSET_VALUE (0) -#define LSB_HDR_DATA_SIZE (0) -#define BL_START_OFFSET (0) - -#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU) -#define UCODE_PARAMS (1) -#define UCODE_DESC_TOOL_VERSION 0x4U -#else -#define UCODE_PARAMS (0) -#endif - #ifdef CONFIG_NVGPU_LS_PMU -#if defined(CONFIG_NVGPU_NON_FUSA) -#define PMU_NVRISCV_WPR_RSVD_BYTES (0x8000) -#endif - int nvgpu_acr_lsf_pmu_ucode_details(struct gk20a *g, void *lsf_ucode_img) { struct lsf_ucode_desc *lsf_desc; @@ -628,36 +610,36 @@ static void lsfm_fill_static_lsb_hdr_info_pkc(struct gk20a *g, u32 full_app_size = 0; if (pnode->ucode_img.lsf_desc_wrapper != NULL) { - nvgpu_memcpy((u8 *)&pnode->lsb_header_v2.signature, + nvgpu_memcpy((u8 *)&pnode->lsb_header_v1.signature, (u8 *)pnode->ucode_img.lsf_desc_wrapper, sizeof(struct lsf_ucode_desc_wrapper)); } - pnode->lsb_header_v2.ucode_size = pnode->ucode_img.data_size; - pnode->lsb_header_v2.data_size = LSB_HDR_DATA_SIZE; + pnode->lsb_header_v1.ucode_size = pnode->ucode_img.data_size; + pnode->lsb_header_v1.data_size = LSB_HDR_DATA_SIZE; - pnode->lsb_header_v2.bl_code_size = NVGPU_ALIGN( + pnode->lsb_header_v1.bl_code_size = NVGPU_ALIGN( pnode->ucode_img.desc->bootloader_size, LSF_BL_CODE_SIZE_ALIGNMENT); full_app_size = nvgpu_safe_add_u32( NVGPU_ALIGN(pnode->ucode_img.desc->app_size, LSF_BL_CODE_SIZE_ALIGNMENT), - pnode->lsb_header_v2.bl_code_size); + pnode->lsb_header_v1.bl_code_size); - pnode->lsb_header_v2.ucode_size = nvgpu_safe_add_u32(NVGPU_ALIGN( + pnode->lsb_header_v1.ucode_size = nvgpu_safe_add_u32(NVGPU_ALIGN( pnode->ucode_img.desc->app_resident_data_offset, LSF_BL_CODE_SIZE_ALIGNMENT), - pnode->lsb_header_v2.bl_code_size); + pnode->lsb_header_v1.bl_code_size); - pnode->lsb_header_v2.data_size = nvgpu_safe_sub_u32(full_app_size, - pnode->lsb_header_v2.ucode_size); + pnode->lsb_header_v1.data_size = nvgpu_safe_sub_u32(full_app_size, + pnode->lsb_header_v1.ucode_size); - pnode->lsb_header_v2.bl_imem_off = + pnode->lsb_header_v1.bl_imem_off = pnode->ucode_img.desc->bootloader_imem_offset; - pnode->lsb_header_v2.flags = NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE; + pnode->lsb_header_v1.flags = NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE; if (g->acr->lsf[falcon_id].is_priv_load) { - pnode->lsb_header_v2.flags |= + pnode->lsb_header_v1.flags |= NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE; } @@ -690,23 +672,23 @@ static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g, } else { #ifdef CONFIG_NVGPU_LS_PMU if (pnode->ucode_img.lsf_desc_wrapper != NULL) { - nvgpu_memcpy((u8 *)&pnode->lsb_header_v2.signature, + nvgpu_memcpy((u8 *)&pnode->lsb_header_v1.signature, (u8 *)pnode->ucode_img.lsf_desc_wrapper, sizeof(struct lsf_ucode_desc_wrapper)); } - pnode->lsb_header_v2.ucode_size = ndesc->bootloader_offset + + pnode->lsb_header_v1.ucode_size = ndesc->bootloader_offset + ndesc->bootloader_size + ndesc->bootloader_param_size; - base_size = pnode->lsb_header_v2.ucode_size + - ndesc->next_core_elf_size; + base_size = nvgpu_safe_add_u32(pnode->lsb_header_v1.ucode_size, + ndesc->next_core_elf_size); image_padding_size = NVGPU_ALIGN(base_size, LSF_UCODE_DATA_ALIGNMENT) - base_size; - pnode->lsb_header_v2.data_size = ndesc->next_core_elf_size + + pnode->lsb_header_v1.data_size = ndesc->next_core_elf_size + image_padding_size; - pnode->lsb_header_v2.bl_code_size = 0; - pnode->lsb_header_v2.bl_imem_off = 0; - pnode->lsb_header_v2.bl_data_size = 0; - pnode->lsb_header_v2.bl_data_off = 0; + pnode->lsb_header_v1.bl_code_size = 0; + pnode->lsb_header_v1.bl_imem_off = 0; + pnode->lsb_header_v1.bl_data_size = 0; + pnode->lsb_header_v1.bl_data_off = 0; #endif } } @@ -742,7 +724,7 @@ static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm, pnode->lsb_header.signature.version; } else { pnode->wpr_header.bin_version = - pnode->lsb_header_v2.signature.lsf_ucode_desc_v2.ls_ucode_version; + pnode->lsb_header_v1.signature.lsf_ucode_desc_v2.ls_ucode_version; } pnode->next = plsfm->ucode_img_list; plsfm->ucode_img_list = pnode; @@ -943,36 +925,36 @@ static void lsf_calc_wpr_size_pkc(struct lsfm_managed_ucode_img *pnode, wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_LSB_HEADER_ALIGNMENT); pnode->wpr_header.lsb_offset = wpr_offset; wpr_offset = nvgpu_safe_add_u32(wpr_offset, - (u32)sizeof(struct lsf_lsb_header_v2)); + (u32)sizeof(struct lsf_lsb_header_v1)); wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_UCODE_DATA_ALIGNMENT); - pnode->lsb_header_v2.ucode_off = wpr_offset; + pnode->lsb_header_v1.ucode_off = wpr_offset; wpr_offset = nvgpu_safe_add_u32(wpr_offset, pnode->ucode_img.data_size); - pnode->lsb_header_v2.bl_data_size = NVGPU_ALIGN( + pnode->lsb_header_v1.bl_data_size = NVGPU_ALIGN( nvgpu_safe_cast_u64_to_u32( sizeof(pnode->bl_gen_desc)), LSF_BL_DATA_SIZE_ALIGNMENT); wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_BL_DATA_ALIGNMENT); - pnode->lsb_header_v2.bl_data_off = wpr_offset; + pnode->lsb_header_v1.bl_data_off = wpr_offset; wpr_offset = nvgpu_safe_add_u32(wpr_offset, - pnode->lsb_header_v2.bl_data_size); + pnode->lsb_header_v1.bl_data_size); pnode->full_ucode_size = wpr_offset - - pnode->lsb_header_v2.ucode_off; + pnode->lsb_header_v1.ucode_off; if (pnode->wpr_header.falcon_id != FALCON_ID_PMU && pnode->wpr_header.falcon_id != FALCON_ID_PMU_NEXT_CORE) { - pnode->lsb_header_v2.app_code_off = - pnode->lsb_header_v2.bl_code_size; - pnode->lsb_header_v2.app_code_size = - pnode->lsb_header_v2.ucode_size - - pnode->lsb_header_v2.bl_code_size; - pnode->lsb_header_v2.app_data_off = - pnode->lsb_header_v2.ucode_size; - pnode->lsb_header_v2.app_data_size = - pnode->lsb_header_v2.data_size; + pnode->lsb_header_v1.app_code_off = + pnode->lsb_header_v1.bl_code_size; + pnode->lsb_header_v1.app_code_size = + pnode->lsb_header_v1.ucode_size - + pnode->lsb_header_v1.bl_code_size; + pnode->lsb_header_v1.app_data_off = + pnode->lsb_header_v1.ucode_size; + pnode->lsb_header_v1.app_data_size = + pnode->lsb_header_v1.data_size; } *wpr_off = wpr_offset; @@ -1099,7 +1081,7 @@ static int lsfm_populate_flcn_bl_dmem_desc(struct gk20a *g, if (!nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { addr_base = p_lsfm->lsb_header.ucode_off; } else { - addr_base = p_lsfm->lsb_header_v2.ucode_off; + addr_base = p_lsfm->lsb_header_v1.ucode_off; } g->acr->get_wpr_info(g, &wpr_inf); addr_base = nvgpu_safe_add_u64(addr_base, wpr_inf.wpr_base); @@ -1239,9 +1221,9 @@ static int lsfm_init_wpr_contents(struct gk20a *g, sizeof(pnode->lsb_header))); } else { nvgpu_mem_wr_n(g, ucode, pnode->wpr_header.lsb_offset, - &pnode->lsb_header_v2, + &pnode->lsb_header_v1, nvgpu_safe_cast_u64_to_u32( - sizeof(pnode->lsb_header_v2))); + sizeof(pnode->lsb_header_v1))); } nvgpu_acr_dbg(g, "lsb header"); @@ -1272,29 +1254,29 @@ static int lsfm_init_wpr_contents(struct gk20a *g, pnode->lsb_header.flags); } else { nvgpu_acr_dbg(g, "ucode_off :%x", - pnode->lsb_header_v2.ucode_off); + pnode->lsb_header_v1.ucode_off); nvgpu_acr_dbg(g, "ucode_size :%x", - pnode->lsb_header_v2.ucode_size); + pnode->lsb_header_v1.ucode_size); nvgpu_acr_dbg(g, "data_size :%x", - pnode->lsb_header_v2.data_size); + pnode->lsb_header_v1.data_size); nvgpu_acr_dbg(g, "bl_code_size :%x", - pnode->lsb_header_v2.bl_code_size); + pnode->lsb_header_v1.bl_code_size); nvgpu_acr_dbg(g, "bl_imem_off :%x", - pnode->lsb_header_v2.bl_imem_off); + pnode->lsb_header_v1.bl_imem_off); nvgpu_acr_dbg(g, "bl_data_off :%x", - pnode->lsb_header_v2.bl_data_off); + pnode->lsb_header_v1.bl_data_off); nvgpu_acr_dbg(g, "bl_data_size :%x", - pnode->lsb_header_v2.bl_data_size); + pnode->lsb_header_v1.bl_data_size); nvgpu_acr_dbg(g, "app_code_off :%x", - pnode->lsb_header_v2.app_code_off); + pnode->lsb_header_v1.app_code_off); nvgpu_acr_dbg(g, "app_code_size :%x", - pnode->lsb_header_v2.app_code_size); + pnode->lsb_header_v1.app_code_size); nvgpu_acr_dbg(g, "app_data_off :%x", - pnode->lsb_header_v2.app_data_off); + pnode->lsb_header_v1.app_data_off); nvgpu_acr_dbg(g, "app_data_size :%x", - pnode->lsb_header_v2.app_data_size); + pnode->lsb_header_v1.app_data_size); nvgpu_acr_dbg(g, "flags :%x", - pnode->lsb_header_v2.flags); + pnode->lsb_header_v1.flags); } if (!pnode->ucode_img.is_next_core_img) { @@ -1315,7 +1297,7 @@ static int lsfm_init_wpr_contents(struct gk20a *g, pnode->bl_gen_desc_size); } else { nvgpu_mem_wr_n(g, ucode, - pnode->lsb_header_v2.bl_data_off, + pnode->lsb_header_v1.bl_data_off, &pnode->bl_gen_desc, pnode->bl_gen_desc_size); } @@ -1327,7 +1309,7 @@ static int lsfm_init_wpr_contents(struct gk20a *g, pnode->ucode_img.data, pnode->ucode_img.data_size); } else { - nvgpu_mem_wr_n(g, ucode, pnode->lsb_header_v2.ucode_off, + nvgpu_mem_wr_n(g, ucode, pnode->lsb_header_v1.ucode_off, pnode->ucode_img.data, pnode->ucode_img.data_size); } @@ -1389,7 +1371,7 @@ static void lsfm_free_sec2_ucode_img_res(struct gk20a *g, p_img->desc = NULL; } -static void free_acr_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm) +void nvgpu_acr_free_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm) { u32 cnt = plsfm->managed_flcn_cnt; struct lsfm_managed_ucode_img *mg_ucode_img; @@ -1485,6 +1467,6 @@ int nvgpu_acr_prepare_ucode_blob(struct gk20a *g) nvgpu_acr_dbg(g, "prepare ucode blob return 0\n"); cleanup_exit: - free_acr_resources(g, plsfm); + nvgpu_acr_free_resources(g, plsfm); return err; } diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h index 5249941db..9ed691df9 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,9 +27,31 @@ #include #include -#include "nvgpu_acr_interface.h" +#include "nvgpu_acr_interface_v2.h" + +#define APP_IMEM_OFFSET (0) +#define APP_IMEM_ENTRY (0) +#define APP_DMEM_OFFSET (0) +#define APP_RESIDENT_CODE_OFFSET (0) +#define MEMSET_VALUE (0) +#define LSB_HDR_DATA_SIZE (0) +#define BL_START_OFFSET (0) + +#if defined(CONFIG_NVGPU_DGPU) || defined(CONFIG_NVGPU_LS_PMU) +#define UCODE_PARAMS (1) +#define UCODE_DESC_TOOL_VERSION 0x4U +#else +#define UCODE_PARAMS (0) +#endif + +#ifdef CONFIG_NVGPU_LS_PMU +#if defined(CONFIG_NVGPU_NON_FUSA) +#define PMU_NVRISCV_WPR_RSVD_BYTES (0x8000) +#endif +#endif #define UCODE_NB_MAX_DATE_LENGTH 64U + struct ls_falcon_ucode_desc { u32 descriptor_size; u32 image_size; @@ -96,8 +118,10 @@ struct flcn_ucode_img { struct lsfm_managed_ucode_img { struct lsfm_managed_ucode_img *next; struct lsf_wpr_header wpr_header; + LSF_WPR_HEADER_WRAPPER wpr_header_wrapper; struct lsf_lsb_header lsb_header; - struct lsf_lsb_header_v2 lsb_header_v2; + struct lsf_lsb_header_v1 lsb_header_v1; + LSF_LSB_HEADER_WRAPPER lsb_header_v2; struct flcn_bl_dmem_desc bl_gen_desc; u32 bl_gen_desc_size; u32 full_ucode_size; @@ -140,6 +164,7 @@ struct ls_flcn_mgr { }; int nvgpu_acr_prepare_ucode_blob(struct gk20a *g); +void nvgpu_acr_free_resources(struct gk20a *g, struct ls_flcn_mgr *plsfm); #ifdef CONFIG_NVGPU_LS_PMU int nvgpu_acr_lsf_pmu_ucode_details(struct gk20a *g, void *lsf_ucode_img); s32 nvgpu_acr_lsf_pmu_ncore_ucode_details(struct gk20a *g, void *lsf_ucode_img); diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v2.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v2.c new file mode 100644 index 000000000..36fe40b58 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v2.c @@ -0,0 +1,554 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include + +#include "nvgpu_acr_interface_v2.h" +#include "acr_blob_construct_v2.h" +#include "acr_wpr.h" +#include "acr_priv.h" + +#if defined(CONFIG_NVGPU_NON_FUSA) && defined(CONFIG_NVGPU_NEXT) +#include +#endif + +static void lsfm_fill_static_lsb_hdr_info_pkc(struct gk20a *g, + u32 falcon_id, struct lsfm_managed_ucode_img *pnode) +{ + u32 full_app_size = 0; + + if (pnode->ucode_img.lsf_desc_wrapper != NULL) { + nvgpu_memcpy((u8 *)&pnode->lsb_header_v2.u.lsfLsbHdrV2.signature, + (u8 *)pnode->ucode_img.lsf_desc_wrapper, + sizeof(struct lsf_ucode_desc_wrapper)); + } + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_size = pnode->ucode_img.data_size; + pnode->lsb_header_v2.u.lsfLsbHdrV2.data_size = LSB_HDR_DATA_SIZE; + + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_code_size = NVGPU_ALIGN( + pnode->ucode_img.desc->bootloader_size, + LSF_BL_CODE_SIZE_ALIGNMENT); + full_app_size = nvgpu_safe_add_u32( + NVGPU_ALIGN(pnode->ucode_img.desc->app_size, + LSF_BL_CODE_SIZE_ALIGNMENT), + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_code_size); + + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_size = nvgpu_safe_add_u32(NVGPU_ALIGN( + pnode->ucode_img.desc->app_resident_data_offset, + LSF_BL_CODE_SIZE_ALIGNMENT), + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_code_size); + + pnode->lsb_header_v2.u.lsfLsbHdrV2.data_size = nvgpu_safe_sub_u32(full_app_size, + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_size); + + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_imem_off = + pnode->ucode_img.desc->bootloader_imem_offset; + + pnode->lsb_header_v2.u.lsfLsbHdrV2.flags = NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_FALSE; + + if (g->acr->lsf[falcon_id].is_priv_load) { + pnode->lsb_header_v2.u.lsfLsbHdrV2.flags |= + NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD_TRUE; + } + +} + +/* Populate static LSB header information using the provided ucode image */ +static void lsfm_fill_static_lsb_hdr_info(struct gk20a *g, + u32 falcon_id, struct lsfm_managed_ucode_img *pnode) +{ + pnode->lsb_header_v2.genericHdr.size = LSF_LSB_HEADER_WRAPPER_V2_SIZE_BYTE; + pnode->lsb_header_v2.genericHdr.version = LSF_LSB_HEADER_VERSION_2; + pnode->lsb_header_v2.genericHdr.identifier = WPR_GENERIC_HEADER_ID_LSF_LSB_HEADER; + + if (nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED) + && (!pnode->ucode_img.is_next_core_img)) { + lsfm_fill_static_lsb_hdr_info_pkc(g, falcon_id, pnode); + } +} + +/* Adds a ucode image to the list of managed ucode images managed. */ +static int lsfm_add_ucode_img(struct gk20a *g, struct ls_flcn_mgr *plsfm, + struct flcn_ucode_img *ucode_image, u32 falcon_id) +{ + struct lsfm_managed_ucode_img *pnode; + + pnode = nvgpu_kzalloc(g, sizeof(struct lsfm_managed_ucode_img)); + if (pnode == NULL) { + return -ENOMEM; + } + + /* Keep a copy of the ucode image info locally */ + nvgpu_memcpy((u8 *)&pnode->ucode_img, (u8 *)ucode_image, + sizeof(struct flcn_ucode_img)); + + /* Fill in static WPR header info*/ + pnode->wpr_header_wrapper.genericHdr.size = LSF_WPR_HEADER_WRAPPER_SIZE_BYTE; + pnode->wpr_header_wrapper.genericHdr.version = LSF_WPR_HEADER_VERSION; + pnode->wpr_header_wrapper.genericHdr.identifier = WPR_GENERIC_HEADER_ID_LSF_WPR_HEADER; + pnode->wpr_header_wrapper.u.lsfWprHdr.falcon_id = falcon_id; + pnode->wpr_header_wrapper.u.lsfWprHdr.bootstrap_owner = g->acr->bootstrap_owner; + pnode->wpr_header_wrapper.u.lsfWprHdr.status = LSF_IMAGE_STATUS_COPY; + + pnode->wpr_header_wrapper.u.lsfWprHdr.lazy_bootstrap = + nvgpu_safe_cast_bool_to_u32( + g->acr->lsf[falcon_id].is_lazy_bootstrap); + + /* Fill in static LSB header info elsewhere */ + lsfm_fill_static_lsb_hdr_info(g, falcon_id, pnode); + if (!nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + pnode->wpr_header_wrapper.u.lsfWprHdr.bin_version = + pnode->lsb_header.signature.version; + } else { + pnode->wpr_header_wrapper.u.lsfWprHdr.bin_version = + pnode->lsb_header_v2.u.lsfLsbHdrV2.signature.lsf_ucode_desc_v2.ls_ucode_version; + } + pnode->next = plsfm->ucode_img_list; + plsfm->ucode_img_list = pnode; + + return 0; +} + +static int lsfm_check_and_add_ucode_image(struct gk20a *g, + struct ls_flcn_mgr *plsfm, u32 lsf_index) +{ + struct flcn_ucode_img ucode_img; + struct nvgpu_acr *acr = g->acr; + u32 falcon_id = 0U; + int err = 0; + + if (!nvgpu_test_bit(lsf_index, (void *)&acr->lsf_enable_mask)) { + return err; + } + + if (acr->lsf[lsf_index].get_lsf_ucode_details == NULL) { + nvgpu_err(g, "LS falcon-%d ucode fetch details not initialized", + lsf_index); + return -ENOENT; + } + + (void) memset(&ucode_img, MEMSET_VALUE, sizeof(ucode_img)); + + err = acr->lsf[lsf_index].get_lsf_ucode_details(g, + (void *)&ucode_img); + if (err != 0) { + nvgpu_err(g, "LS falcon-%d ucode get failed", lsf_index); + return err; + } + + if (!nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + falcon_id = ucode_img.lsf_desc->falcon_id; + } else { + falcon_id = ucode_img.lsf_desc_wrapper->lsf_ucode_desc_v2.falcon_id; + } + + err = lsfm_add_ucode_img(g, plsfm, &ucode_img, falcon_id); + if (err != 0) { + nvgpu_err(g, " Failed to add falcon-%d to LSFM ", falcon_id); + return err; + } + + plsfm->managed_flcn_cnt++; + + return err; +} + +/* Discover all managed falcon ucode images */ +static int lsfm_discover_ucode_images(struct gk20a *g, + struct ls_flcn_mgr *plsfm) +{ + u32 i; + int err = 0; + + /* + * Enumerate all constructed falcon objects, as we need the ucode + * image info and total falcon count + */ + for (i = 0U; i < FALCON_ID_END; i++) { + err = lsfm_check_and_add_ucode_image(g, plsfm, i); + if (err != 0) { + return err; + } + } + + return err; +} + +static void lsf_calc_wpr_size_pkc(struct lsfm_managed_ucode_img *pnode, + u32 *wpr_off) +{ + u32 wpr_offset = *wpr_off; + + /* Align, save off, and include an LSB header size */ + wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_LSB_HEADER_ALIGNMENT); + pnode->wpr_header_wrapper.u.lsfWprHdr.lsb_offset = wpr_offset; + wpr_offset = nvgpu_safe_add_u32(wpr_offset, + (u32)sizeof(LSF_LSB_HEADER_WRAPPER)); + + wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_UCODE_DATA_ALIGNMENT); + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_off = wpr_offset; + wpr_offset = nvgpu_safe_add_u32(wpr_offset, + pnode->ucode_img.data_size); + + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_data_size = NVGPU_ALIGN( + nvgpu_safe_cast_u64_to_u32( + sizeof(pnode->bl_gen_desc)), + LSF_BL_DATA_SIZE_ALIGNMENT); + + wpr_offset = NVGPU_ALIGN(wpr_offset, LSF_BL_DATA_ALIGNMENT); + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_data_off = wpr_offset; + wpr_offset = nvgpu_safe_add_u32(wpr_offset, + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_data_size); + + pnode->full_ucode_size = wpr_offset - + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_off; + if (pnode->wpr_header_wrapper.u.lsfWprHdr.falcon_id != FALCON_ID_PMU && + pnode->wpr_header_wrapper.u.lsfWprHdr.falcon_id != FALCON_ID_PMU_NEXT_CORE) { + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_code_off = + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_code_size; + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_code_size = + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_size - + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_code_size; + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_data_off = + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_size; + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_data_size = + pnode->lsb_header_v2.u.lsfLsbHdrV2.data_size; + } + + *wpr_off = wpr_offset; +} + +/* Generate WPR requirements for ACR allocation request */ +static int lsf_gen_wpr_requirements(struct gk20a *g, + struct ls_flcn_mgr *plsfm) +{ + struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; + u32 wpr_offset; + u32 flcn_cnt; + + (void)g; + + /* + * Start with an array of WPR headers at the base of the WPR. + * The expectation here is that the secure falcon will do a single DMA + * read of this array and cache it internally so it's OK to pack these. + * Also, we add 1 to the falcon count to indicate the end of the array. + */ + flcn_cnt = U32(plsfm->managed_flcn_cnt); + wpr_offset = nvgpu_safe_mult_u32(U32(sizeof(LSF_WPR_HEADER_WRAPPER)), + nvgpu_safe_add_u32(flcn_cnt, U32(1))); + + /* + * Walk the managed falcons, accounting for the LSB structs + * as well as the ucode images. + */ + while (pnode != NULL) { + if (nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + lsf_calc_wpr_size_pkc(pnode, &wpr_offset); + } + + pnode = pnode->next; + } + + plsfm->wpr_size = wpr_offset; + return 0; +} + +/* Initialize WPR contents */ +static int lsfm_populate_flcn_bl_dmem_desc(struct gk20a *g, + void *lsfm, u32 *p_bl_gen_desc_size, u32 falconid) +{ + struct wpr_carveout_info wpr_inf; + struct lsfm_managed_ucode_img *p_lsfm = + (struct lsfm_managed_ucode_img *)lsfm; + struct flcn_ucode_img *p_img = &(p_lsfm->ucode_img); + struct flcn_bl_dmem_desc *ldr_cfg = + &(p_lsfm->bl_gen_desc); + u64 addr_base = 0; + struct ls_falcon_ucode_desc *desc; + u64 addr_code, addr_data; + + if (p_img->desc == NULL) { + /* + * This means its a header based ucode, + * and so we do not fill BL gen desc structure + */ + return -EINVAL; + } + desc = p_img->desc; + + /* + * Calculate physical and virtual addresses for various portions of + * the PMU ucode image + * Calculate the 32-bit addresses for the application code, application + * data, and bootloader code. These values are all based on IM_BASE. + * The 32-bit addresses will be the upper 32-bits of the virtual or + * physical addresses of each respective segment. + */ + if (!nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + addr_base = p_lsfm->lsb_header.ucode_off; + } else { + addr_base = p_lsfm->lsb_header_v2.u.lsfLsbHdrV2.ucode_off; + } + g->acr->get_wpr_info(g, &wpr_inf); + addr_base = nvgpu_safe_add_u64(addr_base, wpr_inf.wpr_base); + + nvgpu_acr_dbg(g, "falcon ID %x", p_lsfm->wpr_header_wrapper.u.lsfWprHdr.falcon_id); + nvgpu_acr_dbg(g, "gen loader cfg addrbase %llx ", addr_base); + addr_code = nvgpu_safe_add_u64(addr_base, desc->app_start_offset); + addr_data = nvgpu_safe_add_u64(addr_code, + desc->app_resident_data_offset); + + nvgpu_acr_dbg(g, "gen cfg addrcode %llx data %llx load offset %x", + addr_code, addr_data, desc->bootloader_start_offset); + + /* Populate the LOADER_CONFIG state */ + (void) memset((void *) ldr_cfg, MEMSET_VALUE, + sizeof(struct flcn_bl_dmem_desc)); + + ldr_cfg->ctx_dma = g->acr->lsf[falconid].falcon_dma_idx; + flcn64_set_dma(&ldr_cfg->code_dma_base, addr_code); + ldr_cfg->non_sec_code_off = desc->app_resident_code_offset; + ldr_cfg->non_sec_code_size = desc->app_resident_code_size; + flcn64_set_dma(&ldr_cfg->data_dma_base, addr_data); + ldr_cfg->data_size = desc->app_resident_data_size; + ldr_cfg->code_entry_point = desc->app_imem_entry; + + /* Update the argc/argv members*/ + ldr_cfg->argc = UCODE_PARAMS; + + *p_bl_gen_desc_size = (u32)sizeof(struct flcn_bl_dmem_desc); + return 0; +} + +/* Populate falcon boot loader generic desc.*/ +static int lsfm_fill_flcn_bl_gen_desc(struct gk20a *g, + struct lsfm_managed_ucode_img *pnode) +{ + return lsfm_populate_flcn_bl_dmem_desc(g, pnode, + &pnode->bl_gen_desc_size, + pnode->wpr_header_wrapper.u.lsfWprHdr.falcon_id); +} + +static int lsfm_init_wpr_contents(struct gk20a *g, + struct ls_flcn_mgr *plsfm, struct nvgpu_mem *ucode) +{ + struct lsfm_managed_ucode_img *pnode = plsfm->ucode_img_list; + LSF_WPR_HEADER_WRAPPER last_wpr_hdr; + u32 i = 0; + u64 tmp; + int err = 0; + + /* The WPR array is at the base of the WPR */ + pnode = plsfm->ucode_img_list; + (void) memset(&last_wpr_hdr, MEMSET_VALUE, sizeof(LSF_WPR_HEADER_WRAPPER)); + + /* + * Walk the managed falcons, flush WPR and LSB headers to FB. + * flush any bl args to the storage area relative to the + * ucode image (appended on the end as a DMEM area). + */ + while (pnode != NULL) { + /* Flush WPR header to memory*/ + nvgpu_mem_wr_n(g, ucode, + nvgpu_safe_mult_u32(i, + nvgpu_safe_cast_u64_to_u32(sizeof( + pnode->wpr_header_wrapper))), &pnode->wpr_header_wrapper, + nvgpu_safe_cast_u64_to_u32(sizeof(pnode->wpr_header_wrapper))); + + nvgpu_acr_dbg(g, "wpr header"); + nvgpu_acr_dbg(g, "falconid :%d", + pnode->wpr_header_wrapper.u.lsfWprHdr.falcon_id); + nvgpu_acr_dbg(g, "lsb_offset :%x", + pnode->wpr_header_wrapper.u.lsfWprHdr.lsb_offset); + nvgpu_acr_dbg(g, "bootstrap_owner :%d", + pnode->wpr_header_wrapper.u.lsfWprHdr.bootstrap_owner); + nvgpu_acr_dbg(g, "lazy_bootstrap :%d", + pnode->wpr_header_wrapper.u.lsfWprHdr.lazy_bootstrap); + nvgpu_acr_dbg(g, "status :%d", + pnode->wpr_header_wrapper.u.lsfWprHdr.status); + + /*Flush LSB header to memory*/ + if (!nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + nvgpu_mem_wr_n(g, ucode, pnode->wpr_header_wrapper.u.lsfWprHdr.lsb_offset, + &pnode->lsb_header, + nvgpu_safe_cast_u64_to_u32( + sizeof(pnode->lsb_header))); + } else { + nvgpu_mem_wr_n(g, ucode, pnode->wpr_header_wrapper.u.lsfWprHdr.lsb_offset, + &pnode->lsb_header_v2, + nvgpu_safe_cast_u64_to_u32( + sizeof(pnode->lsb_header_v2))); + } + + nvgpu_acr_dbg(g, "lsb header"); + if (nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + nvgpu_acr_dbg(g, "ucode_off :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_off); + nvgpu_acr_dbg(g, "ucode_size :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_size); + nvgpu_acr_dbg(g, "data_size :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.data_size); + nvgpu_acr_dbg(g, "bl_code_size :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_code_size); + nvgpu_acr_dbg(g, "bl_imem_off :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_imem_off); + nvgpu_acr_dbg(g, "bl_data_off :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_data_off); + nvgpu_acr_dbg(g, "bl_data_size :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_data_size); + nvgpu_acr_dbg(g, "app_code_off :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_code_off); + nvgpu_acr_dbg(g, "app_code_size :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_code_size); + nvgpu_acr_dbg(g, "app_data_off :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_data_off); + nvgpu_acr_dbg(g, "app_data_size :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.app_data_size); + nvgpu_acr_dbg(g, "flags :%x", + pnode->lsb_header_v2.u.lsfLsbHdrV2.flags); + } + + if (!pnode->ucode_img.is_next_core_img) { + /* + * If this falcon has a boot loader and related args, + * flush them. + */ + /* Populate gen bl and flush to memory */ + err = lsfm_fill_flcn_bl_gen_desc(g, pnode); + if (err != 0) { + nvgpu_err(g, "bl_gen_desc failed err=%d", err); + return err; + } + if (!nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + nvgpu_mem_wr_n(g, ucode, + pnode->lsb_header.bl_data_off, + &pnode->bl_gen_desc, + pnode->bl_gen_desc_size); + } else { + nvgpu_mem_wr_n(g, ucode, + pnode->lsb_header_v2.u.lsfLsbHdrV2.bl_data_off, + &pnode->bl_gen_desc, + pnode->bl_gen_desc_size); + } + } + + /* Copying of ucode */ + if (!nvgpu_is_enabled(g, NVGPU_PKC_LS_SIG_ENABLED)) { + nvgpu_mem_wr_n(g, ucode, pnode->lsb_header.ucode_off, + pnode->ucode_img.data, + pnode->ucode_img.data_size); + } else { + nvgpu_mem_wr_n(g, ucode, pnode->lsb_header_v2.u.lsfLsbHdrV2.ucode_off, + pnode->ucode_img.data, + pnode->ucode_img.data_size); + } + + pnode = pnode->next; + i = nvgpu_safe_add_u32(i, 1U); + } + + /* Tag the terminator WPR header with an invalid falcon ID. */ + last_wpr_hdr.u.lsfWprHdr.falcon_id = FALCON_ID_INVALID; + /* Fill in static WPR header info*/ + last_wpr_hdr.genericHdr.size = LSF_WPR_HEADER_WRAPPER_SIZE_BYTE; + last_wpr_hdr.genericHdr.version = LSF_WPR_HEADER_VERSION; + last_wpr_hdr.genericHdr.identifier = WPR_GENERIC_HEADER_ID_LSF_WPR_HEADER; + tmp = nvgpu_safe_mult_u32(plsfm->managed_flcn_cnt, + (u32)sizeof(LSF_WPR_HEADER_WRAPPER)); + nvgpu_assert(tmp <= U32_MAX); + nvgpu_mem_wr_n(g, ucode, (u32)tmp, &last_wpr_hdr, + (u32)sizeof(LSF_WPR_HEADER_WRAPPER)); + + return err; +} + +int nvgpu_acr_prepare_ucode_blob_v2(struct gk20a *g) +{ + int err = 0; + struct ls_flcn_mgr lsfm_l, *plsfm; + + struct wpr_carveout_info wpr_inf; + struct nvgpu_gr_falcon *gr_falcon = nvgpu_gr_get_falcon_ptr(g); + + /* Recovery case, we do not need to form non WPR blob of ucodes */ + if (g->acr->ucode_blob.cpu_va != NULL) { + return err; + } + + + plsfm = &lsfm_l; + (void) memset((void *)plsfm, MEMSET_VALUE, sizeof(struct ls_flcn_mgr)); + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, gr_falcon); + if (err != 0) { + nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err); + return err; + } + + g->acr->get_wpr_info(g, &wpr_inf); + nvgpu_acr_dbg(g, "wpr carveout base:%llx\n", (wpr_inf.wpr_base)); + nvgpu_acr_dbg(g, "wpr carveout size :%llx\n", wpr_inf.size); + + /* Discover all managed falcons */ + err = lsfm_discover_ucode_images(g, plsfm); + nvgpu_acr_dbg(g, " Managed Falcon cnt %d\n", plsfm->managed_flcn_cnt); + if (err != 0) { + goto cleanup_exit; + } + + if ((plsfm->managed_flcn_cnt != 0U) && + (g->acr->ucode_blob.cpu_va == NULL)) { + /* Generate WPR requirements */ + err = lsf_gen_wpr_requirements(g, plsfm); + if (err != 0) { + goto cleanup_exit; + } + + /* Alloc memory to hold ucode blob contents */ + err = g->acr->alloc_blob_space(g, plsfm->wpr_size, + &g->acr->ucode_blob); + if (err != 0) { + goto cleanup_exit; + } + + nvgpu_acr_dbg(g, "managed LS falcon %d, WPR size %d bytes.\n", + plsfm->managed_flcn_cnt, plsfm->wpr_size); + + err = lsfm_init_wpr_contents(g, plsfm, &g->acr->ucode_blob); + if (err != 0) { + nvgpu_kfree(g, &g->acr->ucode_blob); + goto cleanup_exit; + } + } else { + nvgpu_acr_dbg(g, "LSFM is managing no falcons.\n"); + } + nvgpu_acr_dbg(g, "prepare ucode blob return 0\n"); + +cleanup_exit: + nvgpu_acr_free_resources(g, plsfm); + return err; +} diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v2.h b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v2.h new file mode 100644 index 000000000..0401958ef --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v2.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef ACR_BLOB_CONSTRUCT_V2_H +#define ACR_BLOB_CONSTRUCT_V2_H + +#include +#include + +#include "nvgpu_acr_interface_v2.h" +#include "acr_blob_construct.h" + +int nvgpu_acr_prepare_ucode_blob_v2(struct gk20a *g); + +#endif /* ACR_BLOB_CONSTRUCT_V2_H */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h index 0ebd2f0eb..97c37b2a4 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h +++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,7 +23,7 @@ #ifndef ACR_BOOTSTRAP_H #define ACR_BOOTSTRAP_H -#include "nvgpu_acr_interface.h" +#include "nvgpu_acr_interface_v2.h" struct gk20a; struct nvgpu_acr; @@ -115,6 +115,7 @@ struct hs_acr { struct nvgpu_mem acr_falcon2_sysmem_desc; struct flcn2_acr_desc acr_sysmem_desc; + RM_RISCV_ACR_DESC_WRAPPER acr_sysmem_desc_v1; struct nvgpu_mem ls_pmu_desc; /* Falcon used to execute ACR ucode */ diff --git a/drivers/gpu/nvgpu/common/acr/acr_sw_ga10b.c b/drivers/gpu/nvgpu/common/acr/acr_sw_ga10b.c index 0d0b6a17c..62b40b20f 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_sw_ga10b.c +++ b/drivers/gpu/nvgpu/common/acr/acr_sw_ga10b.c @@ -35,7 +35,7 @@ #include "common/acr/acr_wpr.h" #include "common/acr/acr_priv.h" #include "common/acr/acr_blob_alloc.h" -#include "common/acr/acr_blob_construct.h" +#include "common/acr/acr_blob_construct_v2.h" #include "common/acr/acr_bootstrap.h" #include "common/acr/acr_sw_gv11b.h" #include "acr_sw_ga10b.h" @@ -75,6 +75,133 @@ static int ga10b_bootstrap_hs_acr(struct gk20a *g, struct nvgpu_acr *acr) return err; } +#ifndef CONFIG_NVGPU_NON_FUSA +static int ga10b_safety_acr_patch_wpr_info_to_ucode(struct gk20a *g, + struct nvgpu_acr *acr, struct hs_acr *acr_desc, bool is_recovery) +{ + int err = 0; + struct nvgpu_mem *acr_falcon2_sysmem_desc = + &acr_desc->acr_falcon2_sysmem_desc; + RM_RISCV_ACR_DESC_WRAPPER *acr_sysmem_desc = &acr_desc->acr_sysmem_desc_v1; + + (void)acr; + (void)is_recovery; + + nvgpu_log_fn(g, " "); + +#ifdef CONFIG_NVGPU_NON_FUSA + if (is_recovery) { + /* + * In case of recovery ucode blob size is 0 as it has already + * been authenticated during cold boot. + */ + if (!nvgpu_mem_is_valid(&acr_desc->acr_falcon2_sysmem_desc)) { + nvgpu_err(g, "invalid mem acr_falcon2_sysmem_desc"); + return -EINVAL; + } + /* + * In T234 the CTXSW LS ucodes are encrypted. ACR will perform the + * decryption and the decrypted content will be written back + * into the same WPR location. So on recovery with LSPMU absence + * and on warm boot case, to perform the authentication , the ucode + * blob needs to be copied into the WPR from sysmem always. + * Below are the LS ucodes authentication type + * LSPMU - Only Signed ( Only for non-safety build, for safety LSPMU is NA) + * CTXSW FECS/GPCCS - Encrypted and Signed (on both safety + * and non-safety build) + */ + if (!acr->is_lsf_encrypt_support) { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.regionDesc[0].nonWprBlobSize = + RECOVERY_UCODE_BLOB_SIZE; + } + } else +#endif + { + /* + * Alloc space for sys mem space to which interface struct is + * copied. + */ + if (!nvgpu_mem_is_valid(acr_falcon2_sysmem_desc)) { + err = nvgpu_dma_alloc_flags_sys(g, + NVGPU_DMA_PHYSICALLY_ADDRESSED, + sizeof(RM_RISCV_ACR_DESC_WRAPPER ), + acr_falcon2_sysmem_desc); + if (err != 0) { + nvgpu_err(g, "alloc for sysmem desc failed"); + goto end; + } + } else { + /* + * In T234 the CTXSW LS ucodes are encrypted. ACR will perform the + * decryption and the decrypted content will be written back + * into the same WPR location. So on recovery with LSPMU absence + * and on warm boot case, to perform the authentication , the ucode + * blob needs to be copied into the WPR from sysmem always. + * Below are the LS ucodes authentication type + * LSPMU - Only Signed ( Only for non-safety build, for safety LSPMU is NA) + * CTXSW FECS/GPCCS - Encrypted and Signed (on both safety + * and non-safety build + */ + if (!acr->is_lsf_encrypt_support) { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.regionDesc[0].nonWprBlobSize = + RECOVERY_UCODE_BLOB_SIZE; + } + goto load; + } + + /* + * Generic header info for ACR descriptor. + */ + acr_sysmem_desc->genericHdr.size = RM_RISCV_ACR_DESC_V1_WRAPPER_SIZE_BYTE; + acr_sysmem_desc->genericHdr.version = ACR_DESC_GENERIC_HEADER_VERSION_1; + acr_sysmem_desc->genericHdr.identifier = WPR_GENERIC_HEADER_ID_ACR_DESC_HEADER; + + /* + * Start address of non wpr sysmem region holding ucode blob. + */ + acr_sysmem_desc->acrDesc.riscvAcrDescV1.regionDesc[0].nonWprBlobStart = + nvgpu_mem_get_addr(g, &g->acr->ucode_blob); + /* + * LS ucode blob size. + */ + nvgpu_assert(g->acr->ucode_blob.size <= U32_MAX); + acr_sysmem_desc->acrDesc.riscvAcrDescV1.regionDesc[0].nonWprBlobSize = + (u32)g->acr->ucode_blob.size; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_EMULATE_MODE) && + (g->emulate_mode < EMULATE_MODE_MAX_CONFIG)) { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.mode &= (~EMULATE_MODE_MASK); + acr_sysmem_desc->acrDesc.riscvAcrDescV1.mode |= g->emulate_mode; + } else { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.mode &= (~EMULATE_MODE_MASK); + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.mode |= MIG_MODE; + } else { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.mode &= (u32)(~MIG_MODE); + } + + if (nvgpu_platform_is_simulation(g)) { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.mode |= ACR_SIMULATION_MODE; + } else { + acr_sysmem_desc->acrDesc.riscvAcrDescV1.mode &= (u32)(~ACR_SIMULATION_MODE); + } + } + +load: + /* + * Push the acr descriptor data to sysmem. + */ + nvgpu_mem_wr_n(g, acr_falcon2_sysmem_desc, 0U, + acr_sysmem_desc, + sizeof(RM_RISCV_ACR_DESC_WRAPPER )); + +end: + return err; +} + +#else static int ga10b_acr_patch_wpr_info_to_ucode(struct gk20a *g, struct nvgpu_acr *acr, struct hs_acr *acr_desc, bool is_recovery) { @@ -116,7 +243,7 @@ static int ga10b_acr_patch_wpr_info_to_ucode(struct gk20a *g, */ if (!acr->is_lsf_encrypt_support) { acr_sysmem_desc->nonwpr_ucode_blob_size = - RECOVERY_UCODE_BLOB_SIZE; + RECOVERY_UCODE_BLOB_SIZE; } } else #endif @@ -148,7 +275,7 @@ static int ga10b_acr_patch_wpr_info_to_ucode(struct gk20a *g, */ if (!acr->is_lsf_encrypt_support) { acr_sysmem_desc->nonwpr_ucode_blob_size = - RECOVERY_UCODE_BLOB_SIZE; + RECOVERY_UCODE_BLOB_SIZE; } goto load; } @@ -226,6 +353,7 @@ load: end: return err; } +#endif /* LSF static config functions */ #ifdef CONFIG_NVGPU_LS_PMU @@ -401,11 +529,20 @@ static void ga10b_acr_sw_init(struct gk20a *g, struct nvgpu_acr *acr) ga10b_acr_default_sw_init(g, &acr->acr_asc); +#ifndef CONFIG_NVGPU_NON_FUSA + acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob_v2; +#else acr->prepare_ucode_blob = nvgpu_acr_prepare_ucode_blob; +#endif + acr->get_wpr_info = nvgpu_acr_wpr_info_sys; acr->alloc_blob_space = nvgpu_acr_alloc_blob_space_sys; acr->bootstrap_hs_acr = ga10b_bootstrap_hs_acr; +#ifndef CONFIG_NVGPU_NON_FUSA + acr->patch_wpr_info_to_ucode = ga10b_safety_acr_patch_wpr_info_to_ucode; +#else acr->patch_wpr_info_to_ucode = ga10b_acr_patch_wpr_info_to_ucode; +#endif } diff --git a/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h index 5a001db02..924da0575 100644 --- a/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h +++ b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface.h @@ -281,12 +281,12 @@ struct lsf_ucode_desc { #define LS_ENCRYPTION_AES_CBC_IV_SIZE_BYTE (16) /*! - * WPR generic struct header - * identifier - To identify type of WPR struct i.e. WPR vs SUBWPR vs LSB vs LSF_UCODE_DESC + * ACR generic struct header + * identifier - To identify type of ACR struct i.e. WPR vs SUBWPR vs LSB vs LSF_UCODE_DESC * version - To specify version of struct, for backward compatibility * size - Size of struct, include header and body */ -struct wpr_generic_header { +struct acr_generic_header { u16 identifier; u16 version; u32 size; @@ -326,7 +326,7 @@ struct lsf_ucode_desc_v2 { * The wrapper for LSF_UCODE_DESC, start support from version 2. */ struct lsf_ucode_desc_wrapper { - struct wpr_generic_header generic_hdr; + struct acr_generic_header generic_hdr; union { struct lsf_ucode_desc_v2 lsf_ucode_desc_v2; @@ -441,7 +441,7 @@ struct lsf_lsb_header { u32 flags; }; -struct lsf_lsb_header_v2 { +struct lsf_lsb_header_v1 { /** Code/data signature details of each LS falcon */ struct lsf_ucode_desc_wrapper signature; /** diff --git a/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface_v2.h b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface_v2.h new file mode 100644 index 000000000..859109666 --- /dev/null +++ b/drivers/gpu/nvgpu/common/acr/nvgpu_acr_interface_v2.h @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_ACR_INTERFACE_V2_H +#define NVGPU_ACR_INTERFACE_V2_H + +#include "nvgpu_acr_interface.h" + +// ID to distinguish WPR component +#define WPR_GENERIC_HEADER_ID_LSF_UCODE_DESC (1) +#define WPR_GENERIC_HEADER_ID_LSF_WPR_HEADER (2) +#define WPR_GENERIC_HEADER_ID_LSF_SHARED_SUB_WPR (3) +#define WPR_GENERIC_HEADER_ID_LSF_LSB_HEADER (4) +#define WPR_GENERIC_HEADER_ID_ACR_DESC_HEADER (5) + +#define ACR_DESC_GENERIC_HEADER_VERSION_1 (1) +#define LSF_UCODE_DESC_VERSION_2 (2) +#define LSF_WPR_HEADER_VERSION (2) +#define LSF_LSB_HEADER_VERSION_2 (2) + +// The wrapper for LSF_WPR_HEADER +typedef struct +{ + struct acr_generic_header genericHdr; + + union + { + struct lsf_wpr_header lsfWprHdr; + } u; +} LSF_WPR_HEADER_WRAPPER, *PLSF_WPR_HEADER_WRAPPER; + +// +// For RSA3K operation, SE engine needs each input component size to be 512 bytes length. +// So even RSA3K signature is 384 bytes, we still need to pad zeros until size is 512. +// Currently, for version 2, we set MAX LS signature size to be 512 as well. +// +#define RSA3K_SIGNATURE_SIZE_BYTE (384) +#define RSA3K_SIGNATURE_PADDING_SIZE_BYTE (128) +#define RSA3K_SIGNATURE_PADDED_SIZE_BYTE (RSA3K_SIGNATURE_SIZE_BYTE + RSA3K_SIGNATURE_PADDING_SIZE_BYTE) +// Size in bytes for RSA 3K (RSA_3K struct from bootrom_pkc_parameters.h +#define RSA3K_PK_SIZE_BYTE (2048) + +typedef struct +{ + bool bHsFmc; // Whether HS Bootloader/First Mutable Code is used or not. The remaining fields are valid only when this is set. + u8 padding[3]; // For 32 bit alignment + u16 pkcAlgo; // Which PKC Algorithm is used. RSA3K for now + u16 pkcAlgoVersion; // Which version of the PKC Algorithm is used. + u32 engIdMask; + u32 ucodeId; + u32 fuseVer; // fuse version that corresponding to the signature below + u8 pkcSignature[RSA3K_SIGNATURE_PADDED_SIZE_BYTE]; // Max size, pkcAlgo determines the size of the signature. + u8 pkcKey[RSA3K_PK_SIZE_BYTE]; // (optional) Max size, pkcAlgo determines the size used. See: pkc_verification_parameters::pk. + u8 rsvd[4]; +} HS_FMC_PARAMS; + +typedef struct +{ + bool bHsOvlSigBlobPresent; // Indicates if Runtime patching of HS Ovl Sig Blob is supported + u32 hsOvlSigBlobOffset; // HS Ovl Sig Blob Offset of single version of signature for All HS overlays + u32 hsOvlSigBlobSize; // HS Ovl Sig Blob size +} HS_OVL_SIG_BLOB_PARAMS; + +typedef struct +{ + /** Code/data signature details of each LS falcon */ + struct lsf_ucode_desc_wrapper signature; + /** + * Offset from non-WPR base where UCODE is located, + * Offset = Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT + + * #LSF_UCODE_DATA_ALIGNMENT + ( #LSF_BL_DATA_ALIGNMENT * + * LS Falcon index) + */ + u32 ucode_off; + /** + * Size of LS Falcon ucode, required to perform signature verification + * of LS Falcon ucode by ACR HS. + */ + u32 ucode_size; + /** + * Size of LS Falcon ucode data, required to perform signature + * verification of LS Falcon ucode data by ACR HS. + */ + u32 data_size; + /** + * Size of bootloader that needs to be loaded by bootstrap owner. + * + * On GV11B, respective LS Falcon BL code size should not exceed + * below mentioned size. + * FALCON_ID_FECS IMEM size - 32k + * FALCON_ID_GPCCS IMEM size - 16k + */ + u32 bl_code_size; + /** BL starting virtual address. Need for tagging */ + u32 bl_imem_off; + /** + * Offset from non-WPR base holding the BL data + * Offset = (Non-WPR base + #LSF_LSB_HEADER_ALIGNMENT + + * #LSF_UCODE_DATA_ALIGNMENT + #LSF_BL_DATA_ALIGNMENT) * + * #LS Falcon index + */ + u32 bl_data_off; + /** + * Size of BL data, BL data will be copied to LS Falcon DMEM of + * bl data size + * + * On GV11B, respective LS Falcon BL data size should not exceed + * below mentioned size. + * FALCON_ID_FECS DMEM size - 8k + * FALCON_ID_GPCCS DMEM size - 5k + */ + u32 bl_data_size; + + u32 manifest_size; + /** + * Offset from non-WPR base address where UCODE Application code is + * located. + */ + u32 app_code_off; + /** + * Size of UCODE Application code. + * + * On GV11B, FECS/GPCCS LS Falcon app code size should not exceed + * below mentioned size. + * FALCON_ID_FECS IMEM size - 32k + * FALCON_ID_GPCCS IMEM size - 16k + */ + u32 app_code_size; + /** + * Offset from non-WPR base address where UCODE Application data + * is located + */ + u32 app_data_off; + /** + * Size of UCODE Application data. + * + * On GV11B, respective LS Falcon app data size should not exceed + * below mentioned size. + * FALCON_ID_FECS DMEM size - 8k + * FALCON_ID_GPCCS DMEM size - 5k + */ + u32 app_data_size; + + u32 app_imem_off; + + u32 app_dmem_off; + /** + * NV_FLCN_ACR_LSF_FLAG_LOAD_CODE_AT_0 - Load BL at 0th IMEM offset + * NV_FLCN_ACR_LSF_FLAG_DMACTL_REQ_CTX - This falcon requires a ctx + * before issuing DMAs. + * NV_FLCN_ACR_LSF_FLAG_FORCE_PRIV_LOAD - Use priv loading method + * instead of bootloader/DMAs + */ + u32 flags; + + u32 monitor_code_off; + + u32 monitor_data_off; + + u32 manifest_off; + + HS_FMC_PARAMS hs_fmc_params; + + HS_OVL_SIG_BLOB_PARAMS hs_ovl_sig_blob_params; + + u8 rsvd[20]; +}LSF_LSB_HEADER_V2, *PLSF_LSB_HEADER_V2; + +// The wrapper for LSF_LSB_HEADER +typedef struct +{ + struct acr_generic_header genericHdr; + union + { + LSF_LSB_HEADER_V2 lsfLsbHdrV2; + } u; +} LSF_LSB_HEADER_WRAPPER, *PLSF_LSB_HEADER_WRAPPER; + +typedef struct +{ + u32 regionID; + u32 startAddress; + u32 endAddress; + u64 nonWprBlobStart; + u32 nonWprBlobSize; + u32 reserved[4]; +} RM_RISCV_ACR_REGION_DESC, *PRM_RISCV_ACR_REGION_DESC; + +typedef struct +{ + RM_RISCV_ACR_REGION_DESC regionDesc[2]; + u32 mode; + u32 reserved[16]; +} RM_RISCV_ACR_DESC_V1, *PRM_RISCV_ACR_DESC_V1; + +typedef struct +{ + struct acr_generic_header genericHdr;; + union + { + RM_RISCV_ACR_DESC_V1 riscvAcrDescV1; + } acrDesc; +} RM_RISCV_ACR_DESC_WRAPPER, *PRM_RISCV_ACR_DESC_WRAPPER; + +/** + * Maximum WPR Header size + */ +#define LSF_WPR_HEADERS_WRAPPER_TOTAL_SIZE_MAX \ + (ALIGN_UP(((u32)sizeof(LSF_WPR_HEADER_WRAPPER) * FALCON_ID_END), \ + LSF_WPR_HEADER_ALIGNMENT)) + +// Wrapper and internal structs size defines +#define ACR_GENERIC_HEADER_SIZE_BYTE sizeof(struct acr_generic_header) + +#define LSF_LSB_HEADER_V2_SIZE_BYTE sizeof(LSF_LSB_HEADER_V2) +#define RM_RISCV_ACR_DESC_V1_SIZE_BYTE sizeof(RM_RISCV_ACR_DESC_V1) +#define LSF_WPR_HEADER_SIZE_BYTE sizeof(struct lsf_wpr_header) +#define LSF_UCODE_DESC_V2_SIZE_BYTE sizeof(struct lsf_ucode_desc_v2) + +#define LSF_UCODE_DESC_WRAPPER_V2_SIZE_BYTE LSF_UCODE_DESC_V2_SIZE_BYTE + ACR_GENERIC_HEADER_SIZE_BYTE +#define LSF_WPR_HEADER_WRAPPER_SIZE_BYTE LSF_WPR_HEADER_SIZE_BYTE + ACR_GENERIC_HEADER_SIZE_BYTE +#define RM_RISCV_ACR_DESC_V1_WRAPPER_SIZE_BYTE RM_RISCV_ACR_DESC_V1_SIZE_BYTE + ACR_GENERIC_HEADER_SIZE_BYTE +#define LSF_LSB_HEADER_WRAPPER_V2_SIZE_BYTE LSF_LSB_HEADER_V2_SIZE_BYTE + ACR_GENERIC_HEADER_SIZE_BYTE + +#endif /* NVGPU_ACR_INTERFACE_V2_H */