From 0f1726ae1f64eddb1008b2ded9bda67fddccffdd Mon Sep 17 00:00:00 2001 From: Seshendra Gadagottu Date: Wed, 27 Mar 2019 18:56:51 -0700 Subject: [PATCH] gpu: nvgpu: support for non-secure/secure ctxsw loading Code for secure/non-secure ctxsw booting spread across gr_gk20a.c and gr_gm20b.c. With this change this code is move to gr falcon unit. Ctxsw loading is now supported with 2 supported common functions: 1.Non secure boot: int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g); 2.Secure boot: int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g); Now gr ops function "int (*load_ctxsw_ucode)(struct gk20a *g);" is moved to gr falcon ops and in chip hals it is set with secure/non-secure booting. Non-secure booting: nvgpu_gr_falcon_load_ctxsw_ucode support ctxsw loading in 2 methods: bit-banging uode or booting with bootloader A. Common and hal functions for non-secure bit-banging ctxsw loading: Common: static void nvgpu_gr_falcon_load_dmem(struct gk20a *g) -> Hals: void (*load_gpccs_dmem)(struct gk20a *g,i const u32 *ucode_u32_data, u32 size); void (*load_fecs_dmem)(struct gk20a *g, const u32 *ucode_u32_data, u32 size); Common: static void nvgpu_gr_falcon_load_imem(struct gk20a *g) -> Hals: void (*load_gpccs_imem)(struct gk20a *g, const u32 *ucode_u32_data, u32 size); void (*load_fecs_imem)(struct gk20a *g, const u32 *ucode_u32_data, u32 size); Other basic HALs: void (*configure_fmodel)(struct gk20a *g); -> configure fmodel for ctxsw loading void (*start_ucode)(struct gk20a *g); -> start running ctxcw ucode B.Common and hal functions for non-secure ctxsw loading with bootloader First get the ctxsw ucode using: nvgpu_gr_falcon_init_ctxsw_ucode, then Common: static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g) void nvgpu_gr_falcon_bind_instblk((struct gk20a *g) -> Hal: void (*bind_instblk)(struct gk20a *g, struct nvgpu_mem *mem, u64 inst_ptr); Common: nvgpu_gr_falcon_load_ctxsw_ucode_segments -> nvgpu_gr_falcon_load_ctxsw_ucode_header -> nvgpu_gr_falcon_load_ctxsw_ucode_boot for both fecs and gpccs -> Hals: void (*load_ctxsw_ucode_header)(struct gk20a *g, u32 reg_offset, u32 boot_signature, u32 addr_code32, u32 addr_data32, u32 code_size, u32 data_size); void (*load_ctxsw_ucode_boot)(struct gk20a *g, u64 reg_offset, u32 boot_entry, u32 addr_load32, u32 blocks, u32 dst); Other basic HAL to get gpccs start offset: u32 (*get_gpccs_start_reg_offset)(void); C.Secure booting is support with gpmu and acr and with following additional common function in gr falcon. static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g) -> nvgpu_gr_falcon_bind_instblk and nvgpu_gr_falcon_load_ctxsw_ucode_segments Additional basic hals: void (*start_gpccs)(struct gk20a *g); void (*start_fecs)(struct gk20a *g); Following ops from gr is removed, since it is not required to set by chip hals: void (*falcon_load_ucode)(struct gk20a *g, u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); Now this is handled by static common function: static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments( struct gk20a *g, struct nvgpu_mem *dst, struct gk20a_ctxsw_ucode_segments *segments, u32 *bootimage, u32 *code, u32 *data) JIRA NVGPU-1881 Change-Id: I895a03faaf1a21286316befde24765c8b55075cf Signed-off-by: Seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/2083388 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/gr_falcon.c | 234 ++++++++++ .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 5 +- .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 5 +- drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 408 +----------------- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 8 - drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 133 ------ drivers/gpu/nvgpu/gm20b/gr_gm20b.h | 1 - drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 25 +- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 24 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 22 +- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 24 +- .../gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c | 397 +++++++++++++++++ .../gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h | 21 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 33 +- .../gpu/nvgpu/include/nvgpu/gr/gr_falcon.h | 5 +- drivers/gpu/nvgpu/tu104/hal_tu104.c | 23 +- 16 files changed, 788 insertions(+), 580 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c index 08cb155e4..86bcd47b5 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c +++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c @@ -195,3 +195,237 @@ clean_up: return err; } + +static void nvgpu_gr_falcon_load_dmem(struct gk20a *g) +{ + u32 ucode_u32_size; + const u32 *ucode_u32_data; + + nvgpu_log_fn(g, " "); + + ucode_u32_size = g->netlist_vars->ucode.gpccs.data.count; + ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.data.l; + g->ops.gr.falcon.load_gpccs_dmem(g, ucode_u32_data, ucode_u32_size); + + ucode_u32_size = g->netlist_vars->ucode.fecs.data.count; + ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.data.l; + g->ops.gr.falcon.load_fecs_dmem(g, ucode_u32_data, ucode_u32_size); + + nvgpu_log_fn(g, "done"); +} + +static void nvgpu_gr_falcon_load_imem(struct gk20a *g) +{ + u32 ucode_u32_size; + const u32 *ucode_u32_data; + + nvgpu_log_fn(g, " "); + + ucode_u32_size = g->netlist_vars->ucode.gpccs.inst.count; + ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.inst.l; + g->ops.gr.falcon.load_gpccs_imem(g, ucode_u32_data, ucode_u32_size); + + + ucode_u32_size = g->netlist_vars->ucode.fecs.inst.count; + ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.inst.l; + g->ops.gr.falcon.load_fecs_imem(g, ucode_u32_data, ucode_u32_size); + + nvgpu_log_fn(g, "done"); +} + +static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g) +{ + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + u64 inst_ptr; + + inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); + + g->ops.gr.falcon.bind_instblk(g, &ucode_info->inst_blk_desc, + inst_ptr); + +} + +static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g, + u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, + u32 reg_offset) +{ + u32 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); + u32 addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); + + g->ops.gr.falcon.load_ctxsw_ucode_header(g, reg_offset, + segments->boot_signature, addr_code32, addr_data32, + segments->code.size, segments->data.size); +} + +static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, + u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, + u32 reg_offset) +{ + u32 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); + u32 blocks = ((segments->boot.size + 0xFFU) & ~0xFFU) >> 8; + u32 dst = segments->boot_imem_offset; + + g->ops.gr.falcon.load_ctxsw_ucode_boot(g, reg_offset, + segments->boot_entry, addr_load32, blocks, dst); + +} + +static void nvgpu_gr_falcon_load_ctxsw_ucode_segments( + struct gk20a *g, u64 addr_base, + struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) +{ + + /* Copy falcon bootloader into dmem */ + nvgpu_gr_falcon_load_ctxsw_ucode_header(g, addr_base, + segments, reg_offset); + nvgpu_gr_falcon_load_ctxsw_ucode_boot(g, + addr_base, segments, reg_offset); +} + + +static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g) +{ + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + u64 addr_base = ucode_info->surface_desc.gpu_va; + + nvgpu_gr_falcon_bind_instblk(g); + + nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, + &g->ctxsw_ucode_info.fecs, 0); + + nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, + &g->ctxsw_ucode_info.gpccs, + g->ops.gr.falcon.get_gpccs_start_reg_offset()); +} + +int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g) +{ + int err; + + nvgpu_log_fn(g, " "); + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + g->ops.gr.falcon.configure_fmodel(g); + } + + /* + * In case bootloader is not supported, revert to the old way of + * loading gr ucode, without the faster bootstrap routine. + */ + if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) { + nvgpu_gr_falcon_load_dmem(g); + nvgpu_gr_falcon_load_imem(g); + g->ops.gr.falcon.start_ucode(g); + } else { + if (!g->gr.skip_ucode_init) { + err = nvgpu_gr_falcon_init_ctxsw_ucode(g); + if (err != 0) { + return err; + } + } + nvgpu_gr_falcon_load_with_bootloader(g); + g->gr.skip_ucode_init = true; + } + nvgpu_log_fn(g, "done"); + return 0; +} + +static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g) +{ + struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + u64 addr_base = ucode_info->surface_desc.gpu_va; + + nvgpu_gr_falcon_bind_instblk(g); + + nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, + &g->ctxsw_ucode_info.gpccs, + g->ops.gr.falcon.get_gpccs_start_reg_offset()); +} + +int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g) +{ + int err = 0; + u8 falcon_id_mask = 0; + + nvgpu_log_fn(g, " "); + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + g->ops.gr.falcon.configure_fmodel(g); + } + + g->pmu_lsf_loaded_falcon_id = 0; + if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) { + /* this must be recovery so bootstrap fecs and gpccs */ + if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + nvgpu_gr_falcon_load_gpccs_with_bootloader(g); + err = g->ops.pmu.load_lsfalcon_ucode(g, + BIT32(FALCON_ID_FECS)); + } else { + /* bind WPR VA inst block */ + nvgpu_gr_falcon_bind_instblk(g); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { + err = nvgpu_sec2_bootstrap_ls_falcons(g, + &g->sec2, FALCON_ID_FECS); + err = nvgpu_sec2_bootstrap_ls_falcons(g, + &g->sec2, FALCON_ID_GPCCS); + } else if (g->support_ls_pmu) { + err = g->ops.pmu.load_lsfalcon_ucode(g, + BIT32(FALCON_ID_FECS) | + BIT32(FALCON_ID_GPCCS)); + } else { + err = nvgpu_acr_bootstrap_hs_acr(g, g->acr); + if (err != 0) { + nvgpu_err(g, + "ACR GR LSF bootstrap failed"); + } + } + } + if (err != 0) { + nvgpu_err(g, "Unable to recover GR falcon"); + return err; + } + + } else { + /* cold boot or rg exit */ + nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true); + if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + nvgpu_gr_falcon_load_gpccs_with_bootloader(g); + } else { + /* bind WPR VA inst block */ + nvgpu_gr_falcon_bind_instblk(g); + if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, + FALCON_ID_FECS)) { + falcon_id_mask |= BIT8(FALCON_ID_FECS); + } + if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, + FALCON_ID_GPCCS)) { + falcon_id_mask |= BIT8(FALCON_ID_GPCCS); + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { + err = nvgpu_sec2_bootstrap_ls_falcons(g, + &g->sec2, FALCON_ID_FECS); + err = nvgpu_sec2_bootstrap_ls_falcons(g, + &g->sec2, FALCON_ID_GPCCS); + } else if (g->support_ls_pmu) { + err = g->ops.pmu.load_lsfalcon_ucode(g, + falcon_id_mask); + } else { + /* GR falcons bootstrapped by ACR */ + err = 0; + } + + if (err != 0) { + nvgpu_err(g, "Unable to boot GPCCS"); + return err; + } + } + } + + g->ops.gr.falcon.start_gpccs(g); + g->ops.gr.falcon.start_fecs(g); + + nvgpu_log_fn(g, "done"); + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 66674fdd0..8c9a99479 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -127,8 +127,6 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, .set_hww_esr_report_mask = NULL, - .falcon_load_ucode = NULL, - .load_ctxsw_ucode = NULL, .set_gpc_tpc_mask = NULL, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .is_tpc_addr = gr_gm20b_is_tpc_addr, @@ -294,6 +292,9 @@ static const struct gpu_ops vgpu_gp10b_ops = { .get_zcull_info = vgpu_gr_get_zcull_info, .program_zcull_mapping = NULL, }, + .falcon = { + .load_ctxsw_ucode = NULL, + }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = { .alloc_user_buffer = vgpu_alloc_user_buffer, diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 7cfb428ff..36fd2fa45 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -148,8 +148,6 @@ static const struct gpu_ops vgpu_gv11b_ops = { .get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs, .get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs, .set_hww_esr_report_mask = NULL, - .falcon_load_ucode = NULL, - .load_ctxsw_ucode = NULL, .set_gpc_tpc_mask = NULL, .alloc_obj_ctx = vgpu_gr_alloc_obj_ctx, .is_tpc_addr = gr_gm20b_is_tpc_addr, @@ -344,6 +342,9 @@ static const struct gpu_ops vgpu_gv11b_ops = { .align_regs_perf_pma = gv100_gr_hwpm_map_align_regs_perf_pma, }, + .falcon = { + .load_ctxsw_ucode = NULL, + }, #ifdef CONFIG_GK20A_CTXSW_TRACE .fecs_trace = { .alloc_user_buffer = vgpu_alloc_user_buffer, diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 50ebef43a..db887c2a3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -69,12 +69,8 @@ #include #include -#define BLK_SIZE (256U) #define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX 1000U #define CTXSW_MEM_SCRUBBING_TIMEOUT_DEFAULT 10U -#define FECS_ARB_CMD_TIMEOUT_MAX 40 -#define FECS_ARB_CMD_TIMEOUT_DEFAULT 2 - static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( struct gk20a *g, u32 curr_ctx, u32 *curr_tsgid); @@ -183,126 +179,6 @@ static void gr_report_ctxsw_error(struct gk20a *g, u32 err_type, u32 chid, } } } - -static void gr_gk20a_load_falcon_dmem(struct gk20a *g) -{ - u32 i, ucode_u32_size; - const u32 *ucode_u32_data; - u32 checksum; - - nvgpu_log_fn(g, " "); - - gk20a_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) | - gr_gpccs_dmemc_blk_f(0) | - gr_gpccs_dmemc_aincw_f(1))); - - ucode_u32_size = g->netlist_vars->ucode.gpccs.data.count; - ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.data.l; - - for (i = 0, checksum = 0; i < ucode_u32_size; i++) { - gk20a_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]); - checksum += ucode_u32_data[i]; - } - - gk20a_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) | - gr_fecs_dmemc_blk_f(0) | - gr_fecs_dmemc_aincw_f(1))); - - ucode_u32_size = g->netlist_vars->ucode.fecs.data.count; - ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.data.l; - - for (i = 0, checksum = 0; i < ucode_u32_size; i++) { - gk20a_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]); - checksum += ucode_u32_data[i]; - } - nvgpu_log_fn(g, "done"); -} - -static void gr_gk20a_load_falcon_imem(struct gk20a *g) -{ - u32 cfg, fecs_imem_size, gpccs_imem_size, ucode_u32_size; - const u32 *ucode_u32_data; - u32 tag, i, pad_start, pad_end; - u32 checksum; - - nvgpu_log_fn(g, " "); - - cfg = gk20a_readl(g, gr_fecs_cfg_r()); - fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg); - - cfg = gk20a_readl(g, gr_gpc0_cfg_r()); - gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg); - - /* Use the broadcast address to access all of the GPCCS units. */ - gk20a_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) | - gr_gpccs_imemc_blk_f(0) | - gr_gpccs_imemc_aincw_f(1))); - - /* Setup the tags for the instruction memory. */ - tag = 0; - gk20a_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag)); - - ucode_u32_size = g->netlist_vars->ucode.gpccs.inst.count; - ucode_u32_data = (const u32 *)g->netlist_vars->ucode.gpccs.inst.l; - - for (i = 0, checksum = 0; i < ucode_u32_size; i++) { - if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) { - tag++; - gk20a_writel(g, gr_gpccs_imemt_r(0), - gr_gpccs_imemt_tag_f(tag)); - } - gk20a_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]); - checksum += ucode_u32_data[i]; - } - - pad_start = i * 4U; - pad_end = pad_start + (256U - pad_start % 256U) + 256U; - for (i = pad_start; - (i < gpccs_imem_size * 256U) && (i < pad_end); - i += 4U) { - if ((i != 0U) && ((i % 256U) == 0U)) { - tag++; - gk20a_writel(g, gr_gpccs_imemt_r(0), - gr_gpccs_imemt_tag_f(tag)); - } - gk20a_writel(g, gr_gpccs_imemd_r(0), 0); - } - - gk20a_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) | - gr_fecs_imemc_blk_f(0) | - gr_fecs_imemc_aincw_f(1))); - - /* Setup the tags for the instruction memory. */ - tag = 0; - gk20a_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag)); - - ucode_u32_size = g->netlist_vars->ucode.fecs.inst.count; - ucode_u32_data = (const u32 *)g->netlist_vars->ucode.fecs.inst.l; - - for (i = 0, checksum = 0; i < ucode_u32_size; i++) { - if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) { - tag++; - gk20a_writel(g, gr_fecs_imemt_r(0), - gr_fecs_imemt_tag_f(tag)); - } - gk20a_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]); - checksum += ucode_u32_data[i]; - } - - pad_start = i * 4U; - pad_end = pad_start + (256U - pad_start % 256U) + 256U; - for (i = pad_start; - (i < fecs_imem_size * 256U) && i < pad_end; - i += 4U) { - if ((i != 0U) && ((i % 256U) == 0U)) { - tag++; - gk20a_writel(g, gr_fecs_imemt_r(0), - gr_fecs_imemt_tag_f(tag)); - } - gk20a_writel(g, gr_fecs_imemd_r(0), 0); - } -} - int gr_gk20a_ctx_wait_ucode(struct gk20a *g, u32 mailbox_id, u32 *mailbox_ret, u32 opc_success, u32 mailbox_ok, u32 opc_fail, @@ -1076,288 +952,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, return ret; } -static void gr_gk20a_start_falcon_ucode(struct gk20a *g) -{ - nvgpu_log_fn(g, " "); - - gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), - gr_fecs_ctxsw_mailbox_clear_value_f(~U32(0U))); - - gk20a_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0U)); - gk20a_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0U)); - - gk20a_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1U)); - gk20a_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1U)); - - nvgpu_log_fn(g, "done"); -} - -static void gr_gk20a_wait_for_fecs_arb_idle(struct gk20a *g) -{ - int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT; - u32 val; - - val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); - while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) { - nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT); - retries--; - val = gk20a_readl(g, gr_fecs_arb_ctx_cmd_r()); - } - - if (retries == 0) { - nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x", - gk20a_readl(g, gr_fecs_arb_ctx_cmd_r())); - } - - retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT; - while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) & - gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) && - (retries != 0)) { - nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT); - retries--; - } - if (retries == 0) { - nvgpu_err(g, - "arbiter idle timeout, fecs ctxsw status: 0x%08x", - gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); - } -} - -void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g) -{ - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; - int retries = FECS_ARB_CMD_TIMEOUT_MAX / FECS_ARB_CMD_TIMEOUT_DEFAULT; - u64 inst_ptr_shifted_u64; - u32 inst_ptr_shifted_u32; - - while (((gk20a_readl(g, gr_fecs_ctxsw_status_1_r()) & - gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) && - (retries != 0)) { - nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT); - retries--; - } - if (retries == 0) { - nvgpu_err(g, - "arbiter idle timeout, status: %08x", - gk20a_readl(g, gr_fecs_ctxsw_status_1_r())); - } - - gk20a_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); - - inst_ptr_shifted_u64 = nvgpu_inst_block_addr(g, - &ucode_info->inst_blk_desc); - inst_ptr_shifted_u64 >>= 12; - BUG_ON(u64_hi32(inst_ptr_shifted_u64) != 0U); - inst_ptr_shifted_u32 = (u32)inst_ptr_shifted_u64; - gk20a_writel(g, gr_fecs_new_ctx_r(), - gr_fecs_new_ctx_ptr_f(inst_ptr_shifted_u32) | - nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, - gr_fecs_new_ctx_target_sys_mem_ncoh_f(), - gr_fecs_new_ctx_target_sys_mem_coh_f(), - gr_fecs_new_ctx_target_vid_mem_f()) | - gr_fecs_new_ctx_valid_m()); - - gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), - gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr_shifted_u32) | - nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, - gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), - gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(), - gr_fecs_arb_ctx_ptr_target_vid_mem_f())); - - gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); - - /* Wait for arbiter command to complete */ - gr_gk20a_wait_for_fecs_arb_idle(g); - - gk20a_writel(g, gr_fecs_current_ctx_r(), - gr_fecs_current_ctx_ptr_f(inst_ptr_shifted_u32) | - gr_fecs_current_ctx_target_m() | - gr_fecs_current_ctx_valid_m()); - /* Send command to arbiter to flush */ - gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s()); - - gr_gk20a_wait_for_fecs_arb_idle(g); - -} - -void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) -{ - u32 addr_code32; - u32 addr_data32; - - addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); - addr_data32 = u64_lo32((addr_base + segments->data.offset) >> 8); - - /* - * Copy falcon bootloader header into dmem at offset 0. - * Configure dmem port 0 for auto-incrementing writes starting at dmem - * offset 0. - */ - gk20a_writel(g, reg_offset + gr_fecs_dmemc_r(0), - gr_fecs_dmemc_offs_f(0) | - gr_fecs_dmemc_blk_f(0) | - gr_fecs_dmemc_aincw_f(1)); - - /* Write out the actual data */ - switch (segments->boot_signature) { - case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED: - case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE: - case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED: - case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED: - case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED: - case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED: - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - /* fallthrough */ - case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED: - case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED: - case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED: - case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2: - case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED: - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - addr_code32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - segments->code.size); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - addr_data32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - segments->data.size); - break; - case FALCON_UCODE_SIG_T12X_FECS_OLDER: - case FALCON_UCODE_SIG_T12X_GPCCS_OLDER: - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - addr_code32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - segments->code.size); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - addr_data32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - segments->data.size); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), - addr_code32); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - gk20a_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); - break; - default: - nvgpu_err(g, - "unknown falcon ucode boot signature 0x%08x" - " with reg_offset 0x%08x", - segments->boot_signature, reg_offset); - BUG(); - } -} - -void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) -{ - u32 addr_load32; - u32 blocks; - u32 b; - u32 dst; - - addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); - blocks = ((segments->boot.size + 0xFFU) & ~0xFFU) >> 8; - - /* - * Set the base FB address for the DMA transfer. Subtract off the 256 - * byte IMEM block offset such that the relative FB and IMEM offsets - * match, allowing the IMEM tags to be properly created. - */ - - dst = segments->boot_imem_offset; - gk20a_writel(g, reg_offset + gr_fecs_dmatrfbase_r(), - (addr_load32 - (dst >> 8))); - - for (b = 0; b < blocks; b++) { - /* Setup destination IMEM offset */ - gk20a_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(), - dst + (b << 8)); - - /* Setup source offset (relative to BASE) */ - gk20a_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(), - dst + (b << 8)); - - gk20a_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(), - gr_fecs_dmatrfcmd_imem_f(0x01) | - gr_fecs_dmatrfcmd_write_f(0x00) | - gr_fecs_dmatrfcmd_size_f(0x06) | - gr_fecs_dmatrfcmd_ctxdma_f(0)); - } - - /* Specify the falcon boot vector */ - gk20a_writel(g, reg_offset + gr_fecs_bootvec_r(), - gr_fecs_bootvec_vec_f(segments->boot_entry)); -} - -static void gr_gk20a_load_falcon_with_bootloader(struct gk20a *g) -{ - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; - u64 addr_base = ucode_info->surface_desc.gpu_va; - - gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); - - gr_gk20a_load_falcon_bind_instblk(g); - - g->ops.gr.falcon_load_ucode(g, addr_base, - &g->ctxsw_ucode_info.fecs, 0); - - g->ops.gr.falcon_load_ucode(g, addr_base, - &g->ctxsw_ucode_info.gpccs, - gr_gpcs_gpccs_falcon_hwcfg_r() - - gr_fecs_falcon_hwcfg_r()); -} - -int gr_gk20a_load_ctxsw_ucode(struct gk20a *g) -{ - int err; - - nvgpu_log_fn(g, " "); - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { - gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), - gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U)); - gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), - gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U)); - } - - /* - * In case bootloader is not supported, revert to the old way of - * loading gr ucode, without the faster bootstrap routine. - */ - if (!nvgpu_is_enabled(g, NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP)) { - gr_gk20a_load_falcon_dmem(g); - gr_gk20a_load_falcon_imem(g); - gr_gk20a_start_falcon_ucode(g); - } else { - if (!g->gr.skip_ucode_init) { - err = nvgpu_gr_falcon_init_ctxsw_ucode(g); - - if (err != 0) { - return err; - } - } - gr_gk20a_load_falcon_with_bootloader(g); - g->gr.skip_ucode_init = true; - } - nvgpu_log_fn(g, "done"); - return 0; -} - static int gr_gk20a_wait_ctxsw_ready(struct gk20a *g) { int ret; @@ -1939,7 +1533,7 @@ static int gr_gk20a_init_ctxsw(struct gk20a *g) { int err = 0; - err = g->ops.gr.load_ctxsw_ucode(g); + err = g->ops.gr.falcon.load_ctxsw_ucode(g); if (err != 0) { goto out; } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 33dbbf672..bef045bef 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -341,14 +341,6 @@ int gr_gk20a_update_hwpm_ctxsw_mode(struct gk20a *g, u32 mode); void gk20a_gr_set_shader_exceptions(struct gk20a *g, u32 data); -int gr_gk20a_load_ctxsw_ucode(struct gk20a *g); -void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g); -void gr_gk20a_load_ctxsw_ucode_header(struct gk20a *g, u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); -void gr_gk20a_load_ctxsw_ucode_boot(struct gk20a *g, u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); - - void gr_gk20a_free_tsg_gr_ctx(struct tsg_gk20a *tsg); int gr_gk20a_disable_ctxsw(struct gk20a *g); int gr_gk20a_enable_ctxsw(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index 805533057..d282dbb64 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -340,23 +340,6 @@ void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index) } } -void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) -{ - gk20a_writel(g, reg_offset + gr_fecs_dmactl_r(), - gr_fecs_dmactl_require_ctx_f(0)); - - /* Copy falcon bootloader into dmem */ - gr_gk20a_load_ctxsw_ucode_header(g, addr_base, segments, reg_offset); - gr_gk20a_load_ctxsw_ucode_boot(g, addr_base, segments, reg_offset); - - /* start the falcon immediately if PRIV security is disabled*/ - if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { - gk20a_writel(g, reg_offset + gr_fecs_cpuctl_r(), - gr_fecs_cpuctl_startcpu_f(0x01)); - } -} - static bool gr_gm20b_is_tpc_addr_shared(struct gk20a *g, u32 addr) { u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE); @@ -394,122 +377,6 @@ u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr) return 0; } -static void gr_gm20b_load_gpccs_with_bootloader(struct gk20a *g) -{ - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; - u64 addr_base = ucode_info->surface_desc.gpu_va; - - gr_gk20a_load_falcon_bind_instblk(g); - - g->ops.gr.falcon_load_ucode(g, addr_base, - &g->ctxsw_ucode_info.gpccs, - gr_gpcs_gpccs_falcon_hwcfg_r() - - gr_fecs_falcon_hwcfg_r()); -} - -int gr_gm20b_load_ctxsw_ucode(struct gk20a *g) -{ - int err = 0; - u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - - gr_fecs_falcon_hwcfg_r(); - u8 falcon_id_mask = 0; - - nvgpu_log_fn(g, " "); - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { - gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(7), - gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U)); - gk20a_writel(g, gr_gpccs_ctxsw_mailbox_r(7), - gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U)); - } - - g->pmu_lsf_loaded_falcon_id = 0; - if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) { - /* this must be recovery so bootstrap fecs and gpccs */ - if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { - gr_gm20b_load_gpccs_with_bootloader(g); - err = g->ops.pmu.load_lsfalcon_ucode(g, - BIT32(FALCON_ID_FECS)); - } else { - /* bind WPR VA inst block */ - gr_gk20a_load_falcon_bind_instblk(g); - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { - err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2, - FALCON_ID_FECS); - err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2, - FALCON_ID_GPCCS); - } else if (g->support_ls_pmu) { - err = g->ops.pmu.load_lsfalcon_ucode(g, - BIT32(FALCON_ID_FECS) | - BIT32(FALCON_ID_GPCCS)); - } else { - err = nvgpu_acr_bootstrap_hs_acr(g, g->acr); - if (err != 0) { - nvgpu_err(g, "GR Recovery: ACR GR LSF bootstrap failed"); - } - } - } - if (err != 0) { - nvgpu_err(g, "Unable to recover GR falcon"); - return err; - } - - } else { - /* cold boot or rg exit */ - nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true); - if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { - gr_gm20b_load_gpccs_with_bootloader(g); - } else { - /* bind WPR VA inst block */ - gr_gk20a_load_falcon_bind_instblk(g); - if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, FALCON_ID_FECS)) { - falcon_id_mask |= BIT8(FALCON_ID_FECS); - } - if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, FALCON_ID_GPCCS)) { - falcon_id_mask |= BIT8(FALCON_ID_GPCCS); - } - - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { - err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2, - FALCON_ID_FECS); - err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2, - FALCON_ID_GPCCS); - } else if (g->support_ls_pmu) { - err = g->ops.pmu.load_lsfalcon_ucode(g, falcon_id_mask); - } else { - /* GR falcons bootstrapped by ACR */ - err = 0; - } - - if (err != 0) { - nvgpu_err(g, "Unable to boot GPCCS"); - return err; - } - } - } - - /*start gpccs */ - if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { - gk20a_writel(g, reg_offset + - gr_fecs_cpuctl_alias_r(), - gr_gpccs_cpuctl_startcpu_f(1U)); - } else { - gk20a_writel(g, gr_gpccs_dmactl_r(), - gr_gpccs_dmactl_require_ctx_f(0U)); - gk20a_writel(g, gr_gpccs_cpuctl_r(), - gr_gpccs_cpuctl_startcpu_f(1U)); - } - /* start fecs */ - gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), ~U32(0U)); - gk20a_writel(g, gr_fecs_ctxsw_mailbox_r(1U), 1U); - gk20a_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6U), 0xffffffffU); - gk20a_writel(g, gr_fecs_cpuctl_alias_r(), - gr_fecs_cpuctl_startcpu_f(1U)); - nvgpu_log_fn(g, "done"); - - return 0; -} - void gr_gm20b_detect_sm_arch(struct gk20a *g) { u32 v = gk20a_readl(g, gr_gpc0_tpc0_sm_arch_r()); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index 01df78158..b03b45119 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -68,7 +68,6 @@ void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr); u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr); -int gr_gm20b_load_ctxsw_ucode(struct gk20a *g); void gr_gm20b_detect_sm_arch(struct gk20a *g); int gr_gm20b_init_ctxsw_preemption_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm, diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 9ec80dc2f..e1ac7ccba 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "hal/bus/bus_gm20b.h" #include "hal/bus/bus_gk20a.h" @@ -252,8 +253,6 @@ static const struct gpu_ops gm20b_ops = { .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask, - .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, - .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gm20b_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .is_tpc_addr = gr_gm20b_is_tpc_addr, @@ -488,6 +487,23 @@ static const struct gpu_ops gm20b_ops = { gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size, .get_fecs_ctx_state_store_major_rev_id = gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id, + .load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem, + .load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem, + .load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem, + .load_fecs_imem = gm20b_gr_falcon_load_fecs_imem, + .configure_fmodel = gm20b_gr_falcon_configure_fmodel, + .start_ucode = gm20b_gr_falcon_start_ucode, + .start_gpccs = gm20b_gr_falcon_start_gpccs, + .start_fecs = gm20b_gr_falcon_start_fecs, + .get_gpccs_start_reg_offset = + gm20b_gr_falcon_get_gpccs_start_reg_offset, + .bind_instblk = gm20b_gr_falcon_bind_instblk, + .load_ctxsw_ucode_header = + gm20b_gr_falcon_load_ctxsw_ucode_header, + .load_ctxsw_ucode_boot = + gm20b_gr_falcon_load_ctxsw_ucode_boot, + .load_ctxsw_ucode = + nvgpu_gr_falcon_load_ctxsw_ucode, }, }, .fb = { @@ -1028,7 +1044,8 @@ int gm20b_init_hal(struct gk20a *g) gops->pmu.init_wpr_region = gm20b_pmu_init_acr; gops->pmu.load_lsfalcon_ucode = gm20b_load_falcon_ucode; - gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; + gops->gr.falcon.load_ctxsw_ucode = + nvgpu_gr_falcon_load_secure_ctxsw_ucode; } else { /* Inherit from gk20a */ gops->pmu.pmu_setup_hw_and_bootstrap = @@ -1037,8 +1054,6 @@ int gm20b_init_hal(struct gk20a *g) gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.init_wpr_region = NULL; - - gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; } nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false); diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index b2b5e5e3d..d174fda0d 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include "hal/bus/bus_gk20a.h" @@ -277,8 +278,6 @@ static const struct gpu_ops gp10b_ops = { .get_sm_dsm_perf_regs = gr_gm20b_get_sm_dsm_perf_regs, .get_sm_dsm_perf_ctrl_regs = gr_gm20b_get_sm_dsm_perf_ctrl_regs, .set_hww_esr_report_mask = gr_gm20b_set_hww_esr_report_mask, - .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, - .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gp10b_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .is_tpc_addr = gr_gm20b_is_tpc_addr, @@ -566,6 +565,23 @@ static const struct gpu_ops gp10b_ops = { gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size, .get_fecs_ctx_state_store_major_rev_id = gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id, + .load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem, + .load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem, + .load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem, + .load_fecs_imem = gm20b_gr_falcon_load_fecs_imem, + .configure_fmodel = gm20b_gr_falcon_configure_fmodel, + .start_ucode = gm20b_gr_falcon_start_ucode, + .start_gpccs = gm20b_gr_falcon_start_gpccs, + .start_fecs = gm20b_gr_falcon_start_fecs, + .get_gpccs_start_reg_offset = + gm20b_gr_falcon_get_gpccs_start_reg_offset, + .bind_instblk = gm20b_gr_falcon_bind_instblk, + .load_ctxsw_ucode_header = + gm20b_gr_falcon_load_ctxsw_ucode_header, + .load_ctxsw_ucode_boot = + gm20b_gr_falcon_load_ctxsw_ucode_boot, + .load_ctxsw_ucode = + nvgpu_gr_falcon_load_ctxsw_ucode, }, }, .fb = { @@ -1107,7 +1123,8 @@ int gp10b_init_hal(struct gk20a *g) gops->pmu.init_wpr_region = gm20b_pmu_init_acr; gops->pmu.load_lsfalcon_ucode = gp10b_load_falcon_ucode; - gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; + gops->gr.falcon.load_ctxsw_ucode = + nvgpu_gr_falcon_load_secure_ctxsw_ucode; } else { /* Inherit from gk20a */ gops->pmu.pmu_setup_hw_and_bootstrap = @@ -1117,7 +1134,6 @@ int gp10b_init_hal(struct gk20a *g) gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.init_wpr_region = NULL; - gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; } nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, false); diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 94ba82541..7de1d2b4e 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -62,6 +62,7 @@ #include "hal/gr/fecs_trace/fecs_trace_gm20b.h" #include "hal/gr/config/gr_config_gm20b.h" #include "hal/gr/config/gr_config_gv100.h" +#include "hal/gr/falcon/gr_falcon_gm20b.h" #include "hal/gr/zbc/zbc_gp10b.h" #include "hal/gr/zbc/zbc_gv11b.h" #include "hal/gr/init/gr_init_gm20b.h" @@ -72,7 +73,6 @@ #include "hal/gr/intr/gr_intr_gv11b.h" #include "hal/gr/zcull/zcull_gm20b.h" #include "hal/gr/zcull/zcull_gv11b.h" -#include "hal/gr/falcon/gr_falcon_gm20b.h" #include "hal/gr/hwpm_map/hwpm_map_gv100.h" #include "hal/gr/ctxsw_prog/ctxsw_prog_gm20b.h" #include "hal/gr/ctxsw_prog/ctxsw_prog_gp10b.h" @@ -166,6 +166,7 @@ #include #include #include +#include #include #include @@ -389,8 +390,6 @@ static const struct gpu_ops gv100_ops = { .get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs, .get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs, .set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask, - .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, - .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .is_tpc_addr = gr_gm20b_is_tpc_addr, @@ -710,6 +709,23 @@ static const struct gpu_ops gv100_ops = { gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size, .get_fecs_ctx_state_store_major_rev_id = gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id, + .load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem, + .load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem, + .load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem, + .load_fecs_imem = gm20b_gr_falcon_load_fecs_imem, + .configure_fmodel = gm20b_gr_falcon_configure_fmodel, + .start_ucode = gm20b_gr_falcon_start_ucode, + .start_gpccs = gm20b_gr_falcon_start_gpccs, + .start_fecs = gm20b_gr_falcon_start_fecs, + .get_gpccs_start_reg_offset = + gm20b_gr_falcon_get_gpccs_start_reg_offset, + .bind_instblk = gm20b_gr_falcon_bind_instblk, + .load_ctxsw_ucode_header = + gm20b_gr_falcon_load_ctxsw_ucode_header, + .load_ctxsw_ucode_boot = + gm20b_gr_falcon_load_ctxsw_ucode_boot, + .load_ctxsw_ucode = + nvgpu_gr_falcon_load_secure_ctxsw_ucode, }, }, .fb = { diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index a48e7f0d0..8ad5759d4 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "hal/bus/bus_gk20a.h" #include "hal/bus/bus_gp10b.h" @@ -340,8 +341,6 @@ static const struct gpu_ops gv11b_ops = { .get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs, .get_sm_dsm_perf_ctrl_regs = gv11b_gr_get_sm_dsm_perf_ctrl_regs, .set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask, - .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, - .load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv11b_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .is_tpc_addr = gr_gm20b_is_tpc_addr, @@ -669,6 +668,23 @@ static const struct gpu_ops gv11b_ops = { gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size, .get_fecs_ctx_state_store_major_rev_id = gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id, + .load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem, + .load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem, + .load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem, + .load_fecs_imem = gm20b_gr_falcon_load_fecs_imem, + .configure_fmodel = gm20b_gr_falcon_configure_fmodel, + .start_ucode = gm20b_gr_falcon_start_ucode, + .start_gpccs = gm20b_gr_falcon_start_gpccs, + .start_fecs = gm20b_gr_falcon_start_fecs, + .get_gpccs_start_reg_offset = + gm20b_gr_falcon_get_gpccs_start_reg_offset, + .bind_instblk = gm20b_gr_falcon_bind_instblk, + .load_ctxsw_ucode_header = + gm20b_gr_falcon_load_ctxsw_ucode_header, + .load_ctxsw_ucode_boot = + gm20b_gr_falcon_load_ctxsw_ucode_boot, + .load_ctxsw_ucode = + nvgpu_gr_falcon_load_ctxsw_ucode, }, }, .fb = { @@ -1257,7 +1273,8 @@ int gv11b_init_hal(struct gk20a *g) /* priv security dependent ops */ if (nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { - gops->gr.load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode; + gops->gr.falcon.load_ctxsw_ucode = + nvgpu_gr_falcon_load_secure_ctxsw_ucode; } else { /* non-secure boot */ gops->pmu.pmu_nsbootstrap = gv11b_pmu_bootstrap; @@ -1267,7 +1284,6 @@ int gv11b_init_hal(struct gk20a *g) gops->pmu.load_lsfalcon_ucode = NULL; gops->pmu.init_wpr_region = NULL; - gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; } nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, false); diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c index 183ff4ddf..86f51a0d6 100644 --- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c @@ -21,6 +21,7 @@ */ #include +#include #include #include @@ -28,6 +29,402 @@ #include +#define FECS_ARB_CMD_TIMEOUT_MAX_US 40U +#define FECS_ARB_CMD_TIMEOUT_DEFAULT_US 2U + +void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size) +{ + u32 i, checksum; + + /* enable access for gpccs dmem */ + nvgpu_writel(g, gr_gpccs_dmemc_r(0), (gr_gpccs_dmemc_offs_f(0) | + gr_gpccs_dmemc_blk_f(0) | + gr_gpccs_dmemc_aincw_f(1))); + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + nvgpu_writel(g, gr_gpccs_dmemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + nvgpu_log_info(g, "gpccs dmem checksum: 0x%x", checksum); +} + +void gm20b_gr_falcon_load_fecs_dmem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size) +{ + u32 i, checksum; + + /* set access for fecs dmem */ + nvgpu_writel(g, gr_fecs_dmemc_r(0), (gr_fecs_dmemc_offs_f(0) | + gr_fecs_dmemc_blk_f(0) | + gr_fecs_dmemc_aincw_f(1))); + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + nvgpu_writel(g, gr_fecs_dmemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + nvgpu_log_info(g, "fecs dmem checksum: 0x%x", checksum); +} + +void gm20b_gr_falcon_load_gpccs_imem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size) +{ + u32 cfg, gpccs_imem_size; + u32 tag, i, pad_start, pad_end; + u32 checksum; + + /* enable access for gpccs imem */ + nvgpu_writel(g, gr_gpccs_imemc_r(0), (gr_gpccs_imemc_offs_f(0) | + gr_gpccs_imemc_blk_f(0) | + gr_gpccs_imemc_aincw_f(1))); + + cfg = nvgpu_readl(g, gr_gpc0_cfg_r()); + gpccs_imem_size = gr_gpc0_cfg_imem_sz_v(cfg); + + /* Setup the tags for the instruction memory. */ + tag = 0; + nvgpu_writel(g, gr_gpccs_imemt_r(0), gr_gpccs_imemt_tag_f(tag)); + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) { + tag++; + nvgpu_writel(g, gr_gpccs_imemt_r(0), + gr_gpccs_imemt_tag_f(tag)); + } + nvgpu_writel(g, gr_gpccs_imemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + + pad_start = i * 4U; + pad_end = pad_start + (256U - pad_start % 256U) + 256U; + for (i = pad_start; + (i < gpccs_imem_size * 256U) && (i < pad_end); i += 4U) { + if ((i != 0U) && ((i % 256U) == 0U)) { + tag++; + nvgpu_writel(g, gr_gpccs_imemt_r(0), + gr_gpccs_imemt_tag_f(tag)); + } + nvgpu_writel(g, gr_gpccs_imemd_r(0), 0); + } + + nvgpu_log_info(g, "gpccs imem checksum: 0x%x", checksum); +} + +void gm20b_gr_falcon_load_fecs_imem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size) +{ + u32 cfg, fecs_imem_size; + u32 tag, i, pad_start, pad_end; + u32 checksum; + + /* set access for fecs imem */ + nvgpu_writel(g, gr_fecs_imemc_r(0), (gr_fecs_imemc_offs_f(0) | + gr_fecs_imemc_blk_f(0) | + gr_fecs_imemc_aincw_f(1))); + + cfg = nvgpu_readl(g, gr_fecs_cfg_r()); + fecs_imem_size = gr_fecs_cfg_imem_sz_v(cfg); + + /* Setup the tags for the instruction memory. */ + tag = 0; + nvgpu_writel(g, gr_fecs_imemt_r(0), gr_fecs_imemt_tag_f(tag)); + + for (i = 0, checksum = 0; i < ucode_u32_size; i++) { + if ((i != 0U) && ((i % (256U/sizeof(u32))) == 0U)) { + tag++; + nvgpu_writel(g, gr_fecs_imemt_r(0), + gr_fecs_imemt_tag_f(tag)); + } + nvgpu_writel(g, gr_fecs_imemd_r(0), ucode_u32_data[i]); + checksum += ucode_u32_data[i]; + } + + pad_start = i * 4U; + pad_end = pad_start + (256U - pad_start % 256U) + 256U; + for (i = pad_start; + (i < fecs_imem_size * 256U) && i < pad_end; + i += 4U) { + if ((i != 0U) && ((i % 256U) == 0U)) { + tag++; + nvgpu_writel(g, gr_fecs_imemt_r(0), + gr_fecs_imemt_tag_f(tag)); + } + nvgpu_writel(g, gr_fecs_imemd_r(0), 0); + } + nvgpu_log_info(g, "fecs imem checksum: 0x%x", checksum); +} + +u32 gm20b_gr_falcon_get_gpccs_start_reg_offset(void) +{ + return (gr_gpcs_gpccs_falcon_hwcfg_r() - gr_fecs_falcon_hwcfg_r()); +} + +void gm20b_gr_falcon_configure_fmodel(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(7), + gr_fecs_ctxsw_mailbox_value_f(0xc0de7777U)); + nvgpu_writel(g, gr_gpccs_ctxsw_mailbox_r(7), + gr_gpccs_ctxsw_mailbox_value_f(0xc0de7777U)); + +} + +void gm20b_gr_falcon_start_ucode(struct gk20a *g) +{ + nvgpu_log_fn(g, " "); + + nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), + gr_fecs_ctxsw_mailbox_clear_value_f(~U32(0U))); + + nvgpu_writel(g, gr_gpccs_dmactl_r(), gr_gpccs_dmactl_require_ctx_f(0U)); + nvgpu_writel(g, gr_fecs_dmactl_r(), gr_fecs_dmactl_require_ctx_f(0U)); + + nvgpu_writel(g, gr_gpccs_cpuctl_r(), gr_gpccs_cpuctl_startcpu_f(1U)); + nvgpu_writel(g, gr_fecs_cpuctl_r(), gr_fecs_cpuctl_startcpu_f(1U)); + + nvgpu_log_fn(g, "done"); +} + + +void gm20b_gr_falcon_start_gpccs(struct gk20a *g) +{ + u32 reg_offset = gr_gpcs_gpccs_falcon_hwcfg_r() - + gr_fecs_falcon_hwcfg_r(); + + if (nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { + nvgpu_writel(g, reg_offset + + gr_fecs_cpuctl_alias_r(), + gr_gpccs_cpuctl_startcpu_f(1U)); + } else { + nvgpu_writel(g, gr_gpccs_dmactl_r(), + gr_gpccs_dmactl_require_ctx_f(0U)); + nvgpu_writel(g, gr_gpccs_cpuctl_r(), + gr_gpccs_cpuctl_startcpu_f(1U)); + } +} + +void gm20b_gr_falcon_start_fecs(struct gk20a *g) +{ + nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0U), ~U32(0U)); + nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(1U), 1U); + nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(6U), 0xffffffffU); + nvgpu_writel(g, gr_fecs_cpuctl_alias_r(), + gr_fecs_cpuctl_startcpu_f(1U)); +} + +static void gm20b_gr_falcon_wait_for_fecs_arb_idle(struct gk20a *g) +{ + int retries = FECS_ARB_CMD_TIMEOUT_MAX_US / + FECS_ARB_CMD_TIMEOUT_DEFAULT_US; + u32 val; + + val = nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r()); + while ((gr_fecs_arb_ctx_cmd_cmd_v(val) != 0U) && (retries != 0)) { + nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US); + retries--; + val = nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r()); + } + + if (retries == 0) { + nvgpu_err(g, "arbiter cmd timeout, fecs arb ctx cmd: 0x%08x", + nvgpu_readl(g, gr_fecs_arb_ctx_cmd_r())); + } + + retries = FECS_ARB_CMD_TIMEOUT_MAX_US / + FECS_ARB_CMD_TIMEOUT_DEFAULT_US; + while (((nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()) & + gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) && + (retries != 0)) { + nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US); + retries--; + } + if (retries == 0) { + nvgpu_err(g, + "arbiter idle timeout, fecs ctxsw status: 0x%08x", + nvgpu_readl(g, gr_fecs_ctxsw_status_1_r())); + } +} + +void gm20b_gr_falcon_bind_instblk(struct gk20a *g, + struct nvgpu_mem *mem, u64 inst_ptr) +{ + u32 retries = FECS_ARB_CMD_TIMEOUT_MAX_US / + FECS_ARB_CMD_TIMEOUT_DEFAULT_US; + u32 inst_ptr_u32; + + nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(0), 0x0); + + while (((nvgpu_readl(g, gr_fecs_ctxsw_status_1_r()) & + gr_fecs_ctxsw_status_1_arb_busy_m()) != 0U) && + (retries != 0)) { + nvgpu_udelay(FECS_ARB_CMD_TIMEOUT_DEFAULT_US); + retries--; + } + if (retries == 0) { + nvgpu_err(g, + "arbiter idle timeout, status: %08x", + nvgpu_readl(g, gr_fecs_ctxsw_status_1_r())); + } + + nvgpu_writel(g, gr_fecs_arb_ctx_adr_r(), 0x0); + + inst_ptr >>= 12; + BUG_ON(u64_hi32(inst_ptr) != 0U); + inst_ptr_u32 = (u32)inst_ptr; + nvgpu_writel(g, gr_fecs_new_ctx_r(), + gr_fecs_new_ctx_ptr_f(inst_ptr_u32) | + nvgpu_aperture_mask(g, mem, + gr_fecs_new_ctx_target_sys_mem_ncoh_f(), + gr_fecs_new_ctx_target_sys_mem_coh_f(), + gr_fecs_new_ctx_target_vid_mem_f()) | + gr_fecs_new_ctx_valid_m()); + + nvgpu_writel(g, gr_fecs_arb_ctx_ptr_r(), + gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr_u32) | + nvgpu_aperture_mask(g, mem, + gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), + gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(), + gr_fecs_arb_ctx_ptr_target_vid_mem_f())); + + nvgpu_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); + + /* Wait for arbiter command to complete */ + gm20b_gr_falcon_wait_for_fecs_arb_idle(g); + + nvgpu_writel(g, gr_fecs_current_ctx_r(), + gr_fecs_current_ctx_ptr_f(inst_ptr_u32) | + gr_fecs_current_ctx_target_m() | + gr_fecs_current_ctx_valid_m()); + /* Send command to arbiter to flush */ + nvgpu_writel(g, gr_fecs_arb_ctx_cmd_r(), gr_fecs_arb_ctx_cmd_cmd_s()); + + gm20b_gr_falcon_wait_for_fecs_arb_idle(g); + +} + +void gm20b_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g, + u32 reg_offset, u32 boot_signature, u32 addr_code32, + u32 addr_data32, u32 code_size, u32 data_size) +{ + + nvgpu_writel(g, reg_offset + gr_fecs_dmactl_r(), + gr_fecs_dmactl_require_ctx_f(0)); + + /* + * Copy falcon bootloader header into dmem at offset 0. + * Configure dmem port 0 for auto-incrementing writes starting at dmem + * offset 0. + */ + nvgpu_writel(g, reg_offset + gr_fecs_dmemc_r(0), + gr_fecs_dmemc_offs_f(0) | + gr_fecs_dmemc_blk_f(0) | + gr_fecs_dmemc_aincw_f(1)); + + /* Write out the actual data */ + switch (boot_signature) { + case FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED: + case FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE: + case FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED: + case FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED: + case FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED: + case FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED: + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + /* fallthrough */ + case FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED: + case FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED: + case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED: + case FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2: + case FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED: + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 4); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_code32); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + code_size); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_data32); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + data_size); + break; + case FALCON_UCODE_SIG_T12X_FECS_OLDER: + case FALCON_UCODE_SIG_T12X_GPCCS_OLDER: + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_code32); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + code_size); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_data32); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + data_size); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), + addr_code32); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + nvgpu_writel(g, reg_offset + gr_fecs_dmemd_r(0), 0); + break; + default: + nvgpu_err(g, + "unknown falcon ucode boot signature 0x%08x" + " with reg_offset 0x%08x", + boot_signature, reg_offset); + BUG(); + } +} + +void gm20b_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, u32 reg_offset, + u32 boot_entry, u32 addr_load32, u32 blocks, u32 dst) +{ + u32 b; + + /* + * Set the base FB address for the DMA transfer. Subtract off the 256 + * byte IMEM block offset such that the relative FB and IMEM offsets + * match, allowing the IMEM tags to be properly created. + */ + + nvgpu_writel(g, reg_offset + gr_fecs_dmatrfbase_r(), + (addr_load32 - (dst >> 8))); + + for (b = 0; b < blocks; b++) { + /* Setup destination IMEM offset */ + nvgpu_writel(g, reg_offset + gr_fecs_dmatrfmoffs_r(), + dst + (b << 8)); + + /* Setup source offset (relative to BASE) */ + nvgpu_writel(g, reg_offset + gr_fecs_dmatrffboffs_r(), + dst + (b << 8)); + + nvgpu_writel(g, reg_offset + gr_fecs_dmatrfcmd_r(), + gr_fecs_dmatrfcmd_imem_f(0x01) | + gr_fecs_dmatrfcmd_write_f(0x00) | + gr_fecs_dmatrfcmd_size_f(0x06) | + gr_fecs_dmatrfcmd_ctxdma_f(0)); + } + + /* Specify the falcon boot vector */ + nvgpu_writel(g, reg_offset + gr_fecs_bootvec_r(), + gr_fecs_bootvec_vec_f(boot_entry)); + + /* start the falcon immediately if PRIV security is disabled*/ + if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { + nvgpu_writel(g, reg_offset + gr_fecs_cpuctl_r(), + gr_fecs_cpuctl_startcpu_f(0x01)); + } +} + u32 gm20b_gr_falcon_fecs_base_addr(void) { return gr_fecs_irqsset_r(); diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h index 853778629..5a991275e 100644 --- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h +++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.h @@ -32,5 +32,26 @@ u32 gm20b_gr_falcon_gpccs_base_addr(void); void gm20b_gr_falcon_fecs_dump_stats(struct gk20a *g); u32 gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id(struct gk20a *g); u32 gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size(void); +void gm20b_gr_falcon_load_gpccs_dmem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size); +void gm20b_gr_falcon_load_fecs_dmem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size); +void gm20b_gr_falcon_load_gpccs_imem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size); +void gm20b_gr_falcon_load_fecs_imem(struct gk20a *g, + const u32 *ucode_u32_data, u32 ucode_u32_size); +void gm20b_gr_falcon_configure_fmodel(struct gk20a *g); +void gm20b_gr_falcon_start_ucode(struct gk20a *g); +void gm20b_gr_falcon_start_gpccs(struct gk20a *g); +void gm20b_gr_falcon_start_fecs(struct gk20a *g); +u32 gm20b_gr_falcon_get_gpccs_start_reg_offset(void); +void gm20b_gr_falcon_bind_instblk(struct gk20a *g, + struct nvgpu_mem *mem, u64 inst_ptr); +void gm20b_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g, + u32 reg_offset, u32 boot_signature, u32 addr_code32, + u32 addr_data32, u32 code_size, u32 data_size); +void gm20b_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, + u32 reg_offset, u32 boot_entry, u32 addr_load32, u32 blocks, + u32 dst); #endif /* NVGPU_GR_FALCON_GM20B_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 492758e0a..c0e4f6b05 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -285,11 +285,6 @@ struct gpu_ops { u32 *num_ovr_perf_regs, u32 **ovr_perf_regsr); void (*set_hww_esr_report_mask)(struct gk20a *g); - void (*falcon_load_ucode)(struct gk20a *g, - u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, - u32 reg_offset); - int (*load_ctxsw_ucode)(struct gk20a *g); void (*set_gpc_tpc_mask)(struct gk20a *g, u32 gpc_index); int (*alloc_obj_ctx)(struct channel_gk20a *c, u32 class_num, u32 flags); @@ -574,9 +569,33 @@ struct gpu_ops { u32 (*gpccs_base_addr)(void); void (*dump_stats)(struct gk20a *g); u32 (*fecs_ctxsw_mailbox_size)(void); - u32 (*get_fecs_ctx_state_store_major_rev_id) - (struct gk20a *g); + u32 (*get_fecs_ctx_state_store_major_rev_id)( + struct gk20a *g); + void (*load_gpccs_dmem)(struct gk20a *g, + const u32 *ucode_u32_data, u32 size); + void (*load_fecs_dmem)(struct gk20a *g, + const u32 *ucode_u32_data, u32 size); + void (*load_gpccs_imem)(struct gk20a *g, + const u32 *ucode_u32_data, u32 size); + void (*load_fecs_imem)(struct gk20a *g, + const u32 *ucode_u32_data, u32 size); + void (*configure_fmodel)(struct gk20a *g); + void (*start_ucode)(struct gk20a *g); + void (*start_gpccs)(struct gk20a *g); + void (*start_fecs)(struct gk20a *g); + u32 (*get_gpccs_start_reg_offset)(void); + void (*bind_instblk)(struct gk20a *g, + struct nvgpu_mem *mem, u64 inst_ptr); + void (*load_ctxsw_ucode_header)(struct gk20a *g, + u32 reg_offset, u32 boot_signature, + u32 addr_code32, u32 addr_data32, + u32 code_size, u32 data_size); + void (*load_ctxsw_ucode_boot)(struct gk20a *g, + u32 reg_offset, u32 boot_entry, + u32 addr_load32, u32 blocks, u32 dst); + int (*load_ctxsw_ucode)(struct gk20a *g); } falcon; + #ifdef CONFIG_GK20A_CTXSW_TRACE struct { int (*init)(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h index 5fd5f1db6..39ca6e290 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h @@ -27,6 +27,9 @@ struct gk20a; +int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g); int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g); +int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g); +int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g); -#endif /* NVGPU_GR_SUBCTX_H */ +#endif /* NVGPU_GR_FALCON_H */ diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 449679fd2..6269405ea 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -186,6 +186,7 @@ #include #include #include +#include #include #include @@ -410,8 +411,6 @@ static const struct gpu_ops tu104_ops = { .get_sm_dsm_perf_regs = gv11b_gr_get_sm_dsm_perf_regs, .get_sm_dsm_perf_ctrl_regs = gr_tu104_get_sm_dsm_perf_ctrl_regs, .set_hww_esr_report_mask = gv11b_gr_set_hww_esr_report_mask, - .falcon_load_ucode = gr_gm20b_load_ctxsw_ucode_segments, - .load_ctxsw_ucode = gr_gm20b_load_ctxsw_ucode, .set_gpc_tpc_mask = gr_gv100_set_gpc_tpc_mask, .alloc_obj_ctx = gk20a_alloc_obj_ctx, .is_tpc_addr = gr_gm20b_is_tpc_addr, @@ -743,6 +742,23 @@ static const struct gpu_ops tu104_ops = { gm20b_gr_falcon_get_fecs_ctxsw_mailbox_size, .get_fecs_ctx_state_store_major_rev_id = gm20b_gr_falcon_get_fecs_ctx_state_store_major_rev_id, + .load_gpccs_dmem = gm20b_gr_falcon_load_gpccs_dmem, + .load_fecs_dmem = gm20b_gr_falcon_load_fecs_dmem, + .load_gpccs_imem = gm20b_gr_falcon_load_gpccs_imem, + .load_fecs_imem = gm20b_gr_falcon_load_fecs_imem, + .configure_fmodel = gm20b_gr_falcon_configure_fmodel, + .start_ucode = gm20b_gr_falcon_start_ucode, + .start_gpccs = gm20b_gr_falcon_start_gpccs, + .start_fecs = gm20b_gr_falcon_start_fecs, + .get_gpccs_start_reg_offset = + gm20b_gr_falcon_get_gpccs_start_reg_offset, + .bind_instblk = gm20b_gr_falcon_bind_instblk, + .load_ctxsw_ucode_header = + gm20b_gr_falcon_load_ctxsw_ucode_header, + .load_ctxsw_ucode_boot = + gm20b_gr_falcon_load_ctxsw_ucode_boot, + .load_ctxsw_ucode = + nvgpu_gr_falcon_load_secure_ctxsw_ucode, }, }, .fb = { @@ -1465,7 +1481,8 @@ int tu104_init_hal(struct gk20a *g) gops->cbc.ctrl = NULL; gops->cbc.alloc_comptags = NULL; - gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode; + gops->gr.falcon.load_ctxsw_ucode = + nvgpu_gr_falcon_load_ctxsw_ucode; /* Disable pmu pstate, as there is no pmu support */ nvgpu_set_enabled(g, NVGPU_PMU_PSTATE, false);