Files
linux-nvgpu/drivers/gpu/nvgpu/common/gr/gr_falcon.c
Seshendra Gadagottu 0f1726ae1f gpu: nvgpu: support for non-secure/secure ctxsw loading
Code for secure/non-secure ctxsw booting spread across gr_gk20a.c
and gr_gm20b.c. With this change this code is move to gr falcon unit.

Ctxsw loading is now supported with 2 supported common functions:
1.Non secure boot:
 int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g);
2.Secure boot:
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g);

Now gr ops function "int (*load_ctxsw_ucode)(struct gk20a *g);" is moved to
gr falcon ops and in chip hals it is set with secure/non-secure booting.

Non-secure booting: nvgpu_gr_falcon_load_ctxsw_ucode support ctxsw loading
in 2 methods: bit-banging uode or booting with bootloader

A. Common and hal functions for non-secure bit-banging ctxsw loading:
Common: static void nvgpu_gr_falcon_load_dmem(struct gk20a *g) ->
Hals: void (*load_gpccs_dmem)(struct gk20a *g,i
			 const u32 *ucode_u32_data, u32 size);
      void (*load_fecs_dmem)(struct gk20a *g,
			const u32 *ucode_u32_data, u32 size);
Common: static void nvgpu_gr_falcon_load_imem(struct gk20a *g) ->
Hals:  void (*load_gpccs_imem)(struct gk20a *g,
			 const u32 *ucode_u32_data, u32 size);
       void (*load_fecs_imem)(struct gk20a *g,
			const u32 *ucode_u32_data, u32 size);
Other basic HALs:
void (*configure_fmodel)(struct gk20a *g); -> configure fmodel for ctxsw loading
void (*start_ucode)(struct gk20a *g);  -> start running ctxcw ucode

B.Common and hal functions for non-secure ctxsw loading with bootloader
First get the ctxsw ucode using: nvgpu_gr_falcon_init_ctxsw_ucode, then
Common: static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g)
        void nvgpu_gr_falcon_bind_instblk((struct gk20a *g) ->
Hal: void (*bind_instblk)(struct gk20a *g, struct nvgpu_mem *mem, u64 inst_ptr);

Common: nvgpu_gr_falcon_load_ctxsw_ucode_segments ->
		nvgpu_gr_falcon_load_ctxsw_ucode_header ->
		nvgpu_gr_falcon_load_ctxsw_ucode_boot for both fecs and gpccs ->
Hals: void (*load_ctxsw_ucode_header)(struct gk20a *g, u32 reg_offset,
	u32 boot_signature, u32 addr_code32, u32 addr_data32,
	u32 code_size, u32 data_size);
void (*load_ctxsw_ucode_boot)(struct gk20a *g, u64 reg_offset, u32 boot_entry,
	u32 addr_load32, u32 blocks, u32 dst);
Other basic HAL to get gpccs start offset:
  u32 (*get_gpccs_start_reg_offset)(void);

C.Secure booting is support with gpmu and acr and with following additional
common function in gr falcon.
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g) ->
  nvgpu_gr_falcon_bind_instblk and  nvgpu_gr_falcon_load_ctxsw_ucode_segments
Additional basic hals:
void (*start_gpccs)(struct gk20a *g);
void (*start_fecs)(struct gk20a *g);

Following ops from gr is removed, since it is not required to set by chip hals:
void (*falcon_load_ucode)(struct gk20a *g, u64 addr_base,
	struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset);

Now this is handled by static common function:
static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments( struct gk20a *g,
	struct nvgpu_mem *dst, struct gk20a_ctxsw_ucode_segments *segments,
	u32 *bootimage, u32 *code, u32 *data)

JIRA NVGPU-1881

Change-Id: I895a03faaf1a21286316befde24765c8b55075cf
Signed-off-by: Seshendra Gadagottu <sgadagottu@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2083388
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2019-03-29 01:15:50 -07:00

432 lines
12 KiB
C

/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/gr/gr_falcon.h>
#include <nvgpu/debug.h>
#include <nvgpu/firmware.h>
#include <nvgpu/mm.h>
static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm = mm->pmu.vm;
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
int err;
err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc);
if (err != 0) {
return err;
}
g->ops.mm.init_inst_block(&ucode_info->inst_blk_desc, vm, 0);
/* Map ucode surface to GMMU */
ucode_info->surface_desc.gpu_va = nvgpu_gmmu_map(vm,
&ucode_info->surface_desc,
ucode_info->surface_desc.size,
0, /* flags */
gk20a_mem_flag_read_only,
false,
ucode_info->surface_desc.aperture);
if (ucode_info->surface_desc.gpu_va == 0ULL) {
nvgpu_err(g, "failed to update gmmu ptes");
return -ENOMEM;
}
return 0;
}
static void nvgpu_gr_falcon_init_ctxsw_ucode_segment(
struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size)
{
p_seg->offset = *offset;
p_seg->size = size;
*offset = ALIGN(*offset + size, SZ_256);
}
static void nvgpu_gr_falcon_init_ctxsw_ucode_segments(
struct gk20a_ctxsw_ucode_segments *segments, u32 *offset,
struct gk20a_ctxsw_bootloader_desc *bootdesc,
u32 code_size, u32 data_size)
{
u32 boot_size = ALIGN(bootdesc->size, sizeof(u32));
segments->boot_entry = bootdesc->entry_point;
segments->boot_imem_offset = bootdesc->imem_offset;
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->boot,
offset, boot_size);
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->code,
offset, code_size);
nvgpu_gr_falcon_init_ctxsw_ucode_segment(&segments->data,
offset, data_size);
}
static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments(
struct gk20a *g,
struct nvgpu_mem *dst,
struct gk20a_ctxsw_ucode_segments *segments,
u32 *bootimage,
u32 *code, u32 *data)
{
unsigned int i;
nvgpu_mem_wr_n(g, dst, segments->boot.offset, bootimage,
segments->boot.size);
nvgpu_mem_wr_n(g, dst, segments->code.offset, code,
segments->code.size);
nvgpu_mem_wr_n(g, dst, segments->data.offset, data,
segments->data.size);
/* compute a "checksum" for the boot binary to detect its version */
segments->boot_signature = 0;
for (i = 0; i < segments->boot.size / sizeof(u32); i++) {
segments->boot_signature += bootimage[i];
}
return 0;
}
int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g)
{
struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm = mm->pmu.vm;
struct gk20a_ctxsw_bootloader_desc *fecs_boot_desc;
struct gk20a_ctxsw_bootloader_desc *gpccs_boot_desc;
struct nvgpu_firmware *fecs_fw;
struct nvgpu_firmware *gpccs_fw;
u32 *fecs_boot_image;
u32 *gpccs_boot_image;
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u32 ucode_size;
int err = 0;
fecs_fw = nvgpu_request_firmware(g, GK20A_FECS_UCODE_IMAGE, 0);
if (fecs_fw == NULL) {
nvgpu_err(g, "failed to load fecs ucode!!");
return -ENOENT;
}
fecs_boot_desc = (void *)fecs_fw->data;
fecs_boot_image = (void *)(fecs_fw->data +
sizeof(struct gk20a_ctxsw_bootloader_desc));
gpccs_fw = nvgpu_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE, 0);
if (gpccs_fw == NULL) {
nvgpu_release_firmware(g, fecs_fw);
nvgpu_err(g, "failed to load gpccs ucode!!");
return -ENOENT;
}
gpccs_boot_desc = (void *)gpccs_fw->data;
gpccs_boot_image = (void *)(gpccs_fw->data +
sizeof(struct gk20a_ctxsw_bootloader_desc));
ucode_size = 0;
nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->fecs,
&ucode_size, fecs_boot_desc,
g->netlist_vars->ucode.fecs.inst.count * (u32)sizeof(u32),
g->netlist_vars->ucode.fecs.data.count * (u32)sizeof(u32));
nvgpu_gr_falcon_init_ctxsw_ucode_segments(&ucode_info->gpccs,
&ucode_size, gpccs_boot_desc,
g->netlist_vars->ucode.gpccs.inst.count * (u32)sizeof(u32),
g->netlist_vars->ucode.gpccs.data.count * (u32)sizeof(u32));
err = nvgpu_dma_alloc_sys(g, ucode_size, &ucode_info->surface_desc);
if (err != 0) {
goto clean_up;
}
nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
&ucode_info->fecs,
fecs_boot_image,
g->netlist_vars->ucode.fecs.inst.l,
g->netlist_vars->ucode.fecs.data.l);
nvgpu_release_firmware(g, fecs_fw);
fecs_fw = NULL;
nvgpu_gr_falcon_copy_ctxsw_ucode_segments(g, &ucode_info->surface_desc,
&ucode_info->gpccs,
gpccs_boot_image,
g->netlist_vars->ucode.gpccs.inst.l,
g->netlist_vars->ucode.gpccs.data.l);
nvgpu_release_firmware(g, gpccs_fw);
gpccs_fw = NULL;
err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g);
if (err != 0) {
goto clean_up;
}
return 0;
clean_up:
if (ucode_info->surface_desc.gpu_va != 0ULL) {
nvgpu_gmmu_unmap(vm, &ucode_info->surface_desc,
ucode_info->surface_desc.gpu_va);
}
nvgpu_dma_free(g, &ucode_info->surface_desc);
nvgpu_release_firmware(g, gpccs_fw);
gpccs_fw = NULL;
nvgpu_release_firmware(g, fecs_fw);
fecs_fw = NULL;
return err;
}
static void nvgpu_gr_falcon_load_dmem(struct gk20a *g)
{
u32 ucode_u32_size;
const u32 *ucode_u32_data;
nvgpu_log_fn(g, " ");
ucode_u32_size = g->netlist_vars->ucode.gpccs.data.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.data.l;
g->ops.gr.falcon.load_gpccs_dmem(g, ucode_u32_data, ucode_u32_size);
ucode_u32_size = g->netlist_vars->ucode.fecs.data.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.data.l;
g->ops.gr.falcon.load_fecs_dmem(g, ucode_u32_data, ucode_u32_size);
nvgpu_log_fn(g, "done");
}
static void nvgpu_gr_falcon_load_imem(struct gk20a *g)
{
u32 ucode_u32_size;
const u32 *ucode_u32_data;
nvgpu_log_fn(g, " ");
ucode_u32_size = g->netlist_vars->ucode.gpccs.inst.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.inst.l;
g->ops.gr.falcon.load_gpccs_imem(g, ucode_u32_data, ucode_u32_size);
ucode_u32_size = g->netlist_vars->ucode.fecs.inst.count;
ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.inst.l;
g->ops.gr.falcon.load_fecs_imem(g, ucode_u32_data, ucode_u32_size);
nvgpu_log_fn(g, "done");
}
static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 inst_ptr;
inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
g->ops.gr.falcon.bind_instblk(g, &ucode_info->inst_blk_desc,
inst_ptr);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g,
u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
u32 reg_offset)
{
u32 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8);
u32 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8);
g->ops.gr.falcon.load_ctxsw_ucode_header(g, reg_offset,
segments->boot_signature, addr_code32, addr_data32,
segments->code.size, segments->data.size);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g,
u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments,
u32 reg_offset)
{
u32 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8);
u32 blocks = ((segments->boot.size + 0xFFU) & ~0xFFU) >> 8;
u32 dst = segments->boot_imem_offset;
g->ops.gr.falcon.load_ctxsw_ucode_boot(g, reg_offset,
segments->boot_entry, addr_load32, blocks, dst);
}
static void nvgpu_gr_falcon_load_ctxsw_ucode_segments(
struct gk20a *g, u64 addr_base,
struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset)
{
/* Copy falcon bootloader into dmem */
nvgpu_gr_falcon_load_ctxsw_ucode_header(g, addr_base,
segments, reg_offset);
nvgpu_gr_falcon_load_ctxsw_ucode_boot(g,
addr_base, segments, reg_offset);
}
static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
nvgpu_gr_falcon_bind_instblk(g);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&g->ctxsw_ucode_info.fecs, 0);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&g->ctxsw_ucode_info.gpccs,
g->ops.gr.falcon.get_gpccs_start_reg_offset());
}
int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g)
{
int err;
nvgpu_log_fn(g, " ");
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
g->ops.gr.falcon.configure_fmodel(g);
}
/*
* In case bootloader is not supported, revert to the old way of
* loading gr ucode, without the faster bootstrap routine.
*/
if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) {
nvgpu_gr_falcon_load_dmem(g);
nvgpu_gr_falcon_load_imem(g);
g->ops.gr.falcon.start_ucode(g);
} else {
if (!g->gr.skip_ucode_init) {
err = nvgpu_gr_falcon_init_ctxsw_ucode(g);
if (err != 0) {
return err;
}
}
nvgpu_gr_falcon_load_with_bootloader(g);
g->gr.skip_ucode_init = true;
}
nvgpu_log_fn(g, "done");
return 0;
}
static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g)
{
struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info;
u64 addr_base = ucode_info->surface_desc.gpu_va;
nvgpu_gr_falcon_bind_instblk(g);
nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base,
&g->ctxsw_ucode_info.gpccs,
g->ops.gr.falcon.get_gpccs_start_reg_offset());
}
int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g)
{
int err = 0;
u8 falcon_id_mask = 0;
nvgpu_log_fn(g, " ");
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) {
g->ops.gr.falcon.configure_fmodel(g);
}
g->pmu_lsf_loaded_falcon_id = 0;
if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) {
/* this must be recovery so bootstrap fecs and gpccs */
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
err = g->ops.pmu.load_lsfalcon_ucode(g,
BIT32(FALCON_ID_FECS));
} else {
/* bind WPR VA inst block */
nvgpu_gr_falcon_bind_instblk(g);
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_FECS);
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_GPCCS);
} else if (g->support_ls_pmu) {
err = g->ops.pmu.load_lsfalcon_ucode(g,
BIT32(FALCON_ID_FECS) |
BIT32(FALCON_ID_GPCCS));
} else {
err = nvgpu_acr_bootstrap_hs_acr(g, g->acr);
if (err != 0) {
nvgpu_err(g,
"ACR GR LSF bootstrap failed");
}
}
}
if (err != 0) {
nvgpu_err(g, "Unable to recover GR falcon");
return err;
}
} else {
/* cold boot or rg exit */
nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true);
if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) {
nvgpu_gr_falcon_load_gpccs_with_bootloader(g);
} else {
/* bind WPR VA inst block */
nvgpu_gr_falcon_bind_instblk(g);
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
FALCON_ID_FECS)) {
falcon_id_mask |= BIT8(FALCON_ID_FECS);
}
if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr,
FALCON_ID_GPCCS)) {
falcon_id_mask |= BIT8(FALCON_ID_GPCCS);
}
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) {
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_FECS);
err = nvgpu_sec2_bootstrap_ls_falcons(g,
&g->sec2, FALCON_ID_GPCCS);
} else if (g->support_ls_pmu) {
err = g->ops.pmu.load_lsfalcon_ucode(g,
falcon_id_mask);
} else {
/* GR falcons bootstrapped by ACR */
err = 0;
}
if (err != 0) {
nvgpu_err(g, "Unable to boot GPCCS");
return err;
}
}
}
g->ops.gr.falcon.start_gpccs(g);
g->ops.gr.falcon.start_fecs(g);
nvgpu_log_fn(g, "done");
return 0;
}