diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c index 69859d933..745286450 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v0.c @@ -67,6 +67,8 @@ int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) struct lsf_ucode_desc *lsf_desc; struct nvgpu_firmware *fecs_sig; struct flcn_ucode_img *p_img = (struct flcn_ucode_img *)lsf_ucode_img; + struct nvgpu_ctxsw_ucode_segments *fecs = + nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr.falcon); int err; fecs_sig = nvgpu_request_firmware(g, GM20B_FECS_UCODE_SIG, 0); @@ -90,34 +92,25 @@ int nvgpu_acr_lsf_fecs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) goto free_lsf_desc; } - p_img->desc->bootloader_start_offset = - g->ctxsw_ucode_info.fecs.boot.offset; - p_img->desc->bootloader_size = - ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256); - p_img->desc->bootloader_imem_offset = - g->ctxsw_ucode_info.fecs.boot_imem_offset; - p_img->desc->bootloader_entry_point = - g->ctxsw_ucode_info.fecs.boot_entry; + p_img->desc->bootloader_start_offset = fecs->boot.offset; + p_img->desc->bootloader_size = ALIGN(fecs->boot.size, 256); + p_img->desc->bootloader_imem_offset = fecs->boot_imem_offset; + p_img->desc->bootloader_entry_point = fecs->boot_entry; - p_img->desc->image_size = - ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) + - ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) + - ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256); - p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) + - ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256); - p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset; + p_img->desc->image_size = ALIGN(fecs->boot.size, 256) + + ALIGN(fecs->code.size, 256) + ALIGN(fecs->data.size, 256); + p_img->desc->app_size = ALIGN(fecs->code.size, 256) + + ALIGN(fecs->data.size, 256); + p_img->desc->app_start_offset = fecs->code.offset; p_img->desc->app_imem_offset = 0; p_img->desc->app_imem_entry = 0; p_img->desc->app_dmem_offset = 0; p_img->desc->app_resident_code_offset = 0; - p_img->desc->app_resident_code_size = - g->ctxsw_ucode_info.fecs.code.size; + p_img->desc->app_resident_code_size = fecs->code.size; p_img->desc->app_resident_data_offset = - g->ctxsw_ucode_info.fecs.data.offset - - g->ctxsw_ucode_info.fecs.code.offset; - p_img->desc->app_resident_data_size = - g->ctxsw_ucode_info.fecs.data.size; - p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va; + fecs->data.offset - fecs->code.offset; + p_img->desc->app_resident_data_size = fecs->data.size; + p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon); p_img->data_size = p_img->desc->image_size; p_img->fw_ver = NULL; @@ -138,6 +131,8 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) struct lsf_ucode_desc *lsf_desc; struct nvgpu_firmware *gpccs_sig; struct flcn_ucode_img *p_img = (struct flcn_ucode_img *)lsf_ucode_img; + struct nvgpu_ctxsw_ucode_segments *gpccs = + nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr.falcon); int err; if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { @@ -166,33 +161,26 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v0(struct gk20a *g, void *lsf_ucode_img) p_img->desc->bootloader_start_offset = 0; - p_img->desc->bootloader_size = - ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256); - p_img->desc->bootloader_imem_offset = - g->ctxsw_ucode_info.gpccs.boot_imem_offset; - p_img->desc->bootloader_entry_point = - g->ctxsw_ucode_info.gpccs.boot_entry; + p_img->desc->bootloader_size = ALIGN(gpccs->boot.size, 256); + p_img->desc->bootloader_imem_offset = gpccs->boot_imem_offset; + p_img->desc->bootloader_entry_point = gpccs->boot_entry; - p_img->desc->image_size = - ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) + - ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) + - ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); - p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) - + ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); + p_img->desc->image_size = ALIGN(gpccs->boot.size, 256) + + ALIGN(gpccs->code.size, 256) + ALIGN(gpccs->data.size, 256); + p_img->desc->app_size = + ALIGN(gpccs->code.size, 256) + ALIGN(gpccs->data.size, 256); p_img->desc->app_start_offset = p_img->desc->bootloader_size; p_img->desc->app_imem_offset = 0; p_img->desc->app_imem_entry = 0; p_img->desc->app_dmem_offset = 0; p_img->desc->app_resident_code_offset = 0; - p_img->desc->app_resident_code_size = - ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256); - p_img->desc->app_resident_data_offset = - ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) - - ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256); - p_img->desc->app_resident_data_size = - ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); - p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va + - g->ctxsw_ucode_info.gpccs.boot.offset); + p_img->desc->app_resident_code_size = ALIGN(gpccs->code.size, 256); + p_img->desc->app_resident_data_offset = ALIGN(gpccs->data.offset, 256) - + ALIGN(gpccs->code.offset, 256); + p_img->desc->app_resident_data_size = ALIGN(gpccs->data.size, 256); + p_img->data = (u32 *) + ((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon) + + gpccs->boot.offset); p_img->data_size = ALIGN(p_img->desc->image_size, 256); p_img->fw_ver = NULL; p_img->header = NULL; @@ -820,7 +808,7 @@ int nvgpu_acr_prepare_ucode_blob_v0(struct gk20a *g) return err; } - err = nvgpu_gr_falcon_init_ctxsw_ucode(g); + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err); return err; diff --git a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c index 4ab5fd7f6..264519abc 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c +++ b/drivers/gpu/nvgpu/common/acr/acr_blob_construct_v1.c @@ -75,6 +75,8 @@ int nvgpu_acr_lsf_fecs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) struct nvgpu_firmware *fecs_sig = NULL; struct flcn_ucode_img_v1 *p_img = (struct flcn_ucode_img_v1 *)lsf_ucode_img; + struct nvgpu_ctxsw_ucode_segments *fecs = + nvgpu_gr_falcon_get_fecs_ucode_segments(g->gr.falcon); int err; switch (ver) { @@ -113,34 +115,25 @@ int nvgpu_acr_lsf_fecs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) goto free_lsf_desc; } - p_img->desc->bootloader_start_offset = - g->ctxsw_ucode_info.fecs.boot.offset; - p_img->desc->bootloader_size = - ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256); - p_img->desc->bootloader_imem_offset = - g->ctxsw_ucode_info.fecs.boot_imem_offset; - p_img->desc->bootloader_entry_point = - g->ctxsw_ucode_info.fecs.boot_entry; + p_img->desc->bootloader_start_offset = fecs->boot.offset; + p_img->desc->bootloader_size = ALIGN(fecs->boot.size, 256); + p_img->desc->bootloader_imem_offset = fecs->boot_imem_offset; + p_img->desc->bootloader_entry_point = fecs->boot_entry; - p_img->desc->image_size = - ALIGN(g->ctxsw_ucode_info.fecs.boot.size, 256) + - ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) + - ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256); - p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.fecs.code.size, 256) + - ALIGN(g->ctxsw_ucode_info.fecs.data.size, 256); - p_img->desc->app_start_offset = g->ctxsw_ucode_info.fecs.code.offset; + p_img->desc->image_size = ALIGN(fecs->boot.size, 256) + + ALIGN(fecs->code.size, 256) + ALIGN(fecs->data.size, 256); + p_img->desc->app_size = ALIGN(fecs->code.size, 256) + + ALIGN(fecs->data.size, 256); + p_img->desc->app_start_offset = fecs->code.offset; p_img->desc->app_imem_offset = 0; p_img->desc->app_imem_entry = 0; p_img->desc->app_dmem_offset = 0; p_img->desc->app_resident_code_offset = 0; - p_img->desc->app_resident_code_size = - g->ctxsw_ucode_info.fecs.code.size; - p_img->desc->app_resident_data_offset = - g->ctxsw_ucode_info.fecs.data.offset - - g->ctxsw_ucode_info.fecs.code.offset; - p_img->desc->app_resident_data_size = - g->ctxsw_ucode_info.fecs.data.size; - p_img->data = g->ctxsw_ucode_info.surface_desc.cpu_va; + p_img->desc->app_resident_code_size = fecs->code.size; + p_img->desc->app_resident_data_offset = fecs->data.offset - + fecs->code.offset; + p_img->desc->app_resident_data_size = fecs->data.size; + p_img->data = nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon); p_img->data_size = p_img->desc->image_size; p_img->fw_ver = NULL; @@ -166,6 +159,8 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) struct nvgpu_firmware *gpccs_sig = NULL; struct flcn_ucode_img_v1 *p_img = (struct flcn_ucode_img_v1 *)lsf_ucode_img; + struct nvgpu_ctxsw_ucode_segments *gpccs = + nvgpu_gr_falcon_get_gpccs_ucode_segments(g->gr.falcon); int err; if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { @@ -208,33 +203,26 @@ int nvgpu_acr_lsf_gpccs_ucode_details_v1(struct gk20a *g, void *lsf_ucode_img) } p_img->desc->bootloader_start_offset = 0; - p_img->desc->bootloader_size = - ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256); - p_img->desc->bootloader_imem_offset = - g->ctxsw_ucode_info.gpccs.boot_imem_offset; - p_img->desc->bootloader_entry_point = - g->ctxsw_ucode_info.gpccs.boot_entry; + p_img->desc->bootloader_size = ALIGN(gpccs->boot.size, 256); + p_img->desc->bootloader_imem_offset = gpccs->boot_imem_offset; + p_img->desc->bootloader_entry_point = gpccs->boot_entry; - p_img->desc->image_size = - ALIGN(g->ctxsw_ucode_info.gpccs.boot.size, 256) + - ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) + - ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); - p_img->desc->app_size = ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256) - + ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); + p_img->desc->image_size = ALIGN(gpccs->boot.size, 256) + + ALIGN(gpccs->code.size, 256) + ALIGN(gpccs->data.size, 256); + p_img->desc->app_size = ALIGN(gpccs->code.size, 256) + + ALIGN(gpccs->data.size, 256); p_img->desc->app_start_offset = p_img->desc->bootloader_size; p_img->desc->app_imem_offset = 0; p_img->desc->app_imem_entry = 0; p_img->desc->app_dmem_offset = 0; p_img->desc->app_resident_code_offset = 0; - p_img->desc->app_resident_code_size = - ALIGN(g->ctxsw_ucode_info.gpccs.code.size, 256); - p_img->desc->app_resident_data_offset = - ALIGN(g->ctxsw_ucode_info.gpccs.data.offset, 256) - - ALIGN(g->ctxsw_ucode_info.gpccs.code.offset, 256); - p_img->desc->app_resident_data_size = - ALIGN(g->ctxsw_ucode_info.gpccs.data.size, 256); - p_img->data = (u32 *)((u8 *)g->ctxsw_ucode_info.surface_desc.cpu_va + - g->ctxsw_ucode_info.gpccs.boot.offset); + p_img->desc->app_resident_code_size = ALIGN(gpccs->code.size, 256); + p_img->desc->app_resident_data_offset = ALIGN(gpccs->data.offset, 256) - + ALIGN(gpccs->code.offset, 256); + p_img->desc->app_resident_data_size = ALIGN(gpccs->data.size, 256); + p_img->data = (u32 *) + ((u8 *)nvgpu_gr_falcon_get_surface_desc_cpu_va(g->gr.falcon) + + gpccs->boot.offset); p_img->data_size = ALIGN(p_img->desc->image_size, 256); p_img->fw_ver = NULL; p_img->header = NULL; @@ -943,7 +931,7 @@ int nvgpu_acr_prepare_ucode_blob_v1(struct gk20a *g) plsfm = &lsfm_l; (void) memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr_v1)); - err = nvgpu_gr_falcon_init_ctxsw_ucode(g); + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "gr_falcon_init_ctxsw_ucode failed err=%d", err); return err; diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 87ab67185..2fe1a7d18 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -1483,7 +1483,7 @@ void nvgpu_channel_recover(struct gk20a *g, struct channel_gk20a *ch, /* stop context switching to prevent engine assignments from changing until channel is recovered */ nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "failed to disable ctxsw"); goto fail; @@ -1502,7 +1502,7 @@ void nvgpu_channel_recover(struct gk20a *g, struct channel_gk20a *ch, } } - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "failed to enable ctxsw"); } diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 66ce30497..adedc2fbf 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -289,7 +289,7 @@ void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg, * changing until engine status is checked to make sure tsg * being recovered is not loaded on the engines */ - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { /* if failed to disable ctxsw, just abort tsg */ @@ -306,7 +306,7 @@ void nvgpu_tsg_recover(struct gk20a *g, struct tsg_gk20a *tsg, * By that time if tsg is not on the engine, engine need not * be reset. */ - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "failed to enable ctxsw"); } diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index d60c05c21..126fd3f14 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -271,6 +271,9 @@ static void gr_remove_support(struct gr_gk20a *gr) nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map); + nvgpu_gr_falcon_remove_support(g, gr->falcon); + gr->falcon = NULL; + nvgpu_ecc_remove_support(g); nvgpu_gr_zbc_deinit(g, gr->zbc); nvgpu_gr_zcull_deinit(g, gr->zcull); @@ -498,7 +501,31 @@ out: return 0; } -static void gr_init_prepare(struct gk20a *g) +int nvgpu_gr_prepare_sw(struct gk20a *g) +{ + struct gr_gk20a *gr = &g->gr; + int err = 0; + + nvgpu_log_fn(g, " "); + + err = nvgpu_netlist_init_ctx_vars(g); + if (err != 0) { + nvgpu_err(g, "failed to parse netlist"); + return err; + } + + if (gr->falcon == NULL) { + gr->falcon = nvgpu_gr_falcon_init_support(g); + if (gr->falcon == NULL) { + nvgpu_err(g, "failed to init gr falcon"); + err = -ENOMEM; + return err; + } + } + return err; +} + +static void gr_init_prepare_hw(struct gk20a *g) { /* reset gr engine */ g->ops.mc.reset(g, g->ops.mc.reset_mask(g, NVGPU_UNIT_GRAPH) | @@ -520,13 +547,7 @@ int nvgpu_gr_enable_hw(struct gk20a *g) nvgpu_log_fn(g, " "); - gr_init_prepare(g); - - err = nvgpu_netlist_init_ctx_vars(g); - if (err != 0) { - nvgpu_err(g, "failed to parse netlist"); - return err; - } + gr_init_prepare_hw(g); err = gr_init_reset_enable_hw(g); if (err != 0) { @@ -541,30 +562,32 @@ int nvgpu_gr_enable_hw(struct gk20a *g) int nvgpu_gr_reset(struct gk20a *g) { int err; + struct nvgpu_mutex *fecs_mutex = + nvgpu_gr_falcon_get_fecs_mutex(g->gr.falcon); g->gr.initialized = false; - nvgpu_mutex_acquire(&g->gr.fecs_mutex); + nvgpu_mutex_acquire(fecs_mutex); err = nvgpu_gr_enable_hw(g); if (err != 0) { - nvgpu_mutex_release(&g->gr.fecs_mutex); + nvgpu_mutex_release(fecs_mutex); return err; } err = gr_init_setup_hw(g); if (err != 0) { - nvgpu_mutex_release(&g->gr.fecs_mutex); + nvgpu_mutex_release(fecs_mutex); return err; } - err = nvgpu_gr_falcon_init_ctxsw(g); + err = nvgpu_gr_falcon_init_ctxsw(g, g->gr.falcon); if (err != 0) { - nvgpu_mutex_release(&g->gr.fecs_mutex); + nvgpu_mutex_release(fecs_mutex); return err; } - nvgpu_mutex_release(&g->gr.fecs_mutex); + nvgpu_mutex_release(fecs_mutex); /* this appears query for sw states but fecs actually init ramchain, etc so this is hw init */ @@ -598,14 +621,7 @@ int nvgpu_gr_init_support(struct gk20a *g) g->gr.initialized = false; - /* this is required before gr_gk20a_init_ctx_state */ - err = nvgpu_mutex_init(&g->gr.fecs_mutex); - if (err != 0) { - nvgpu_err(g, "Error in gr.fecs_mutex initialization"); - return err; - } - - err = nvgpu_gr_falcon_init_ctxsw(g); + err = nvgpu_gr_falcon_init_ctxsw(g, g->gr.falcon); if (err != 0) { return err; } diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon.c b/drivers/gpu/nvgpu/common/gr/gr_falcon.c index 650b19764..3918763a4 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_falcon.c +++ b/drivers/gpu/nvgpu/common/gr/gr_falcon.c @@ -37,6 +37,53 @@ #include "gr_falcon_priv.h" +#define NVGPU_FECS_UCODE_IMAGE "fecs.bin" +#define NVGPU_GPCCS_UCODE_IMAGE "gpccs.bin" + +struct nvgpu_gr_falcon *nvgpu_gr_falcon_init_support(struct gk20a *g) +{ + struct nvgpu_gr_falcon *falcon; + int err = 0; + + nvgpu_log_fn(g, " "); + + falcon = nvgpu_kzalloc(g, sizeof(*falcon)); + if (falcon == NULL) { + return falcon; + } + + err = nvgpu_mutex_init(&falcon->ctxsw_disable_mutex); + if (err != 0) { + nvgpu_err(g, "Error in ctxsw_disable_mutex init"); + goto done; + } + falcon->ctxsw_disable_count = 0; + + err = nvgpu_mutex_init(&falcon->fecs_mutex); + if (err != 0) { + nvgpu_err(g, "Error in fecs_mutex init"); + goto done; + } + +done: + if (err != 0) { + nvgpu_kfree(g, falcon); + falcon = NULL; + } + return falcon; +} + +void nvgpu_gr_falcon_remove_support(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) +{ + nvgpu_log_fn(g, " "); + + if (falcon == NULL) { + return; + } + nvgpu_kfree(g, falcon); +} + int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g) { struct nvgpu_pmu *pmu = &g->pmu; @@ -88,13 +135,13 @@ int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g) return err; } -int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g) +int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g, struct nvgpu_gr_falcon *falcon) { int err = 0; nvgpu_log_fn(g, " "); - err = g->ops.gr.falcon.load_ctxsw_ucode(g); + err = g->ops.gr.falcon.load_ctxsw_ucode(g, falcon); if (err != 0) { goto out; } @@ -145,11 +192,12 @@ out: return err; } -static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g) +static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = mm->pmu.vm; - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info; int err; err = g->ops.mm.alloc_inst_block(g, &ucode_info->inst_blk_desc); @@ -176,7 +224,7 @@ static int nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(struct gk20a *g) } static void nvgpu_gr_falcon_init_ctxsw_ucode_segment( - struct gk20a_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size) + struct nvgpu_ctxsw_ucode_segment *p_seg, u32 *offset, u32 size) { p_seg->offset = *offset; p_seg->size = size; @@ -184,7 +232,7 @@ static void nvgpu_gr_falcon_init_ctxsw_ucode_segment( } static void nvgpu_gr_falcon_init_ctxsw_ucode_segments( - struct gk20a_ctxsw_ucode_segments *segments, u32 *offset, + struct nvgpu_ctxsw_ucode_segments *segments, u32 *offset, struct nvgpu_ctxsw_bootloader_desc *bootdesc, u32 code_size, u32 data_size) { @@ -203,7 +251,7 @@ static void nvgpu_gr_falcon_init_ctxsw_ucode_segments( static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments( struct gk20a *g, struct nvgpu_mem *dst, - struct gk20a_ctxsw_ucode_segments *segments, + struct nvgpu_ctxsw_ucode_segments *segments, u32 *bootimage, u32 *code, u32 *data) { @@ -225,7 +273,8 @@ static int nvgpu_gr_falcon_copy_ctxsw_ucode_segments( return 0; } -int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g) +int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { struct mm_gk20a *mm = &g->mm; struct vm_gk20a *vm = mm->pmu.vm; @@ -235,11 +284,11 @@ int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g) struct nvgpu_firmware *gpccs_fw; u32 *fecs_boot_image; u32 *gpccs_boot_image; - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + struct nvgpu_ctxsw_ucode_info *ucode_info = &falcon->ctxsw_ucode_info; u32 ucode_size; int err = 0; - fecs_fw = nvgpu_request_firmware(g, GK20A_FECS_UCODE_IMAGE, 0); + fecs_fw = nvgpu_request_firmware(g, NVGPU_FECS_UCODE_IMAGE, 0); if (fecs_fw == NULL) { nvgpu_err(g, "failed to load fecs ucode!!"); return -ENOENT; @@ -249,7 +298,7 @@ int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g) fecs_boot_image = (void *)(fecs_fw->data + sizeof(struct nvgpu_ctxsw_bootloader_desc)); - gpccs_fw = nvgpu_request_firmware(g, GK20A_GPCCS_UCODE_IMAGE, 0); + gpccs_fw = nvgpu_request_firmware(g, NVGPU_GPCCS_UCODE_IMAGE, 0); if (gpccs_fw == NULL) { nvgpu_release_firmware(g, fecs_fw); nvgpu_err(g, "failed to load gpccs ucode!!"); @@ -293,7 +342,7 @@ int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g) nvgpu_release_firmware(g, gpccs_fw); gpccs_fw = NULL; - err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g); + err = nvgpu_gr_falcon_init_ctxsw_ucode_vaspace(g, falcon); if (err != 0) { goto clean_up; } @@ -352,9 +401,11 @@ static void nvgpu_gr_falcon_load_imem(struct gk20a *g) nvgpu_log_fn(g, "done"); } -static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g) +static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + struct nvgpu_ctxsw_ucode_info *ucode_info = + &falcon->ctxsw_ucode_info; u64 inst_ptr; inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); @@ -365,7 +416,7 @@ static void nvgpu_gr_falcon_bind_instblk(struct gk20a *g) } static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g, - u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, + u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments, u32 reg_offset) { u32 addr_code32 = u64_lo32((addr_base + segments->code.offset) >> 8); @@ -377,7 +428,7 @@ static void nvgpu_gr_falcon_load_ctxsw_ucode_header(struct gk20a *g, } static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, - u64 addr_base, struct gk20a_ctxsw_ucode_segments *segments, + u64 addr_base, struct nvgpu_ctxsw_ucode_segments *segments, u32 reg_offset) { u32 addr_load32 = u64_lo32((addr_base + segments->boot.offset) >> 8); @@ -391,7 +442,7 @@ static void nvgpu_gr_falcon_load_ctxsw_ucode_boot(struct gk20a *g, static void nvgpu_gr_falcon_load_ctxsw_ucode_segments( struct gk20a *g, u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset) + struct nvgpu_ctxsw_ucode_segments *segments, u32 reg_offset) { /* Copy falcon bootloader into dmem */ @@ -402,24 +453,28 @@ static void nvgpu_gr_falcon_load_ctxsw_ucode_segments( } -static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g) +static void nvgpu_gr_falcon_load_with_bootloader(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + struct nvgpu_ctxsw_ucode_info *ucode_info = + &falcon->ctxsw_ucode_info; u64 addr_base = ucode_info->surface_desc.gpu_va; - nvgpu_gr_falcon_bind_instblk(g); + nvgpu_gr_falcon_bind_instblk(g, falcon); nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, - &g->ctxsw_ucode_info.fecs, 0); + &falcon->ctxsw_ucode_info.fecs, 0); nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, - &g->ctxsw_ucode_info.gpccs, + &falcon->ctxsw_ucode_info.gpccs, g->ops.gr.falcon.get_gpccs_start_reg_offset()); } -int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g) +int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { int err; + struct gr_gk20a *gr = &g->gr; nvgpu_log_fn(g, " "); @@ -436,32 +491,35 @@ int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g) nvgpu_gr_falcon_load_imem(g); g->ops.gr.falcon.start_ucode(g); } else { - if (!g->gr.skip_ucode_init) { - err = nvgpu_gr_falcon_init_ctxsw_ucode(g); + if (!gr->falcon->skip_ucode_init) { + err = nvgpu_gr_falcon_init_ctxsw_ucode(g, falcon); if (err != 0) { return err; } } - nvgpu_gr_falcon_load_with_bootloader(g); - g->gr.skip_ucode_init = true; + nvgpu_gr_falcon_load_with_bootloader(g, falcon); + gr->falcon->skip_ucode_init = true; } nvgpu_log_fn(g, "done"); return 0; } -static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g) +static void nvgpu_gr_falcon_load_gpccs_with_bootloader(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { - struct gk20a_ctxsw_ucode_info *ucode_info = &g->ctxsw_ucode_info; + struct nvgpu_ctxsw_ucode_info *ucode_info = + &falcon->ctxsw_ucode_info; u64 addr_base = ucode_info->surface_desc.gpu_va; - nvgpu_gr_falcon_bind_instblk(g); + nvgpu_gr_falcon_bind_instblk(g, falcon); nvgpu_gr_falcon_load_ctxsw_ucode_segments(g, addr_base, - &g->ctxsw_ucode_info.gpccs, + &falcon->ctxsw_ucode_info.gpccs, g->ops.gr.falcon.get_gpccs_start_reg_offset()); } -int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g) +int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { int err = 0; u8 falcon_id_mask = 0; @@ -475,12 +533,12 @@ int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g) if (nvgpu_is_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE)) { /* this must be recovery so bootstrap fecs and gpccs */ if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { - nvgpu_gr_falcon_load_gpccs_with_bootloader(g); + nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon); err = nvgpu_pmu_lsfm_bootstrap_ls_falcon(g, &g->pmu, g->pmu.lsfm, BIT32(FALCON_ID_FECS)); } else { /* bind WPR VA inst block */ - nvgpu_gr_falcon_bind_instblk(g); + nvgpu_gr_falcon_bind_instblk(g, falcon); if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_RTOS)) { err = nvgpu_sec2_bootstrap_ls_falcons(g, &g->sec2, FALCON_ID_FECS); @@ -508,10 +566,10 @@ int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g) /* cold boot or rg exit */ nvgpu_set_enabled(g, NVGPU_PMU_FECS_BOOTSTRAP_DONE, true); if (!nvgpu_is_enabled(g, NVGPU_SEC_SECUREGPCCS)) { - nvgpu_gr_falcon_load_gpccs_with_bootloader(g); + nvgpu_gr_falcon_load_gpccs_with_bootloader(g, falcon); } else { /* bind WPR VA inst block */ - nvgpu_gr_falcon_bind_instblk(g); + nvgpu_gr_falcon_bind_instblk(g, falcon); if (nvgpu_acr_is_lsf_lazy_bootstrap(g, g->acr, FALCON_ID_FECS)) { falcon_id_mask |= BIT8(FALCON_ID_FECS); @@ -558,53 +616,55 @@ int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g) * to pmu elpg sequence. It could come as pmu halt or abort or * maybe ext error too. */ -int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g) +int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { int err = 0; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - nvgpu_mutex_acquire(&g->ctxsw_disable_lock); - g->ctxsw_disable_count++; - if (g->ctxsw_disable_count == 1) { + nvgpu_mutex_acquire(&falcon->ctxsw_disable_mutex); + falcon->ctxsw_disable_count++; + if (falcon->ctxsw_disable_count == 1) { err = nvgpu_pg_elpg_disable(g); if (err != 0) { nvgpu_err(g, "failed to disable elpg for stop_ctxsw"); /* stop ctxsw command is not sent */ - g->ctxsw_disable_count--; + falcon->ctxsw_disable_count--; } else { err = g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_CTXSW_STOP, 0U, NULL); if (err != 0) { nvgpu_err(g, "failed to stop fecs ctxsw"); /* stop ctxsw failed */ - g->ctxsw_disable_count--; + falcon->ctxsw_disable_count--; } } } else { nvgpu_log_info(g, "ctxsw disabled, ctxsw_disable_count: %d", - g->ctxsw_disable_count); + falcon->ctxsw_disable_count); } - nvgpu_mutex_release(&g->ctxsw_disable_lock); + nvgpu_mutex_release(&falcon->ctxsw_disable_mutex); return err; } /* Start processing (continue) context switches at FECS */ -int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g) +int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g, + struct nvgpu_gr_falcon *falcon) { int err = 0; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); - nvgpu_mutex_acquire(&g->ctxsw_disable_lock); - if (g->ctxsw_disable_count == 0) { + nvgpu_mutex_acquire(&falcon->ctxsw_disable_mutex); + if (falcon->ctxsw_disable_count == 0) { goto ctxsw_already_enabled; } - g->ctxsw_disable_count--; - WARN_ON(g->ctxsw_disable_count < 0); - if (g->ctxsw_disable_count == 0) { + falcon->ctxsw_disable_count--; + WARN_ON(falcon->ctxsw_disable_count < 0); + if (falcon->ctxsw_disable_count == 0) { err = g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_CTXSW_START, 0U, NULL); if (err != 0) { @@ -617,10 +677,10 @@ int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g) } } else { nvgpu_log_info(g, "ctxsw_disable_count: %d is not 0 yet", - g->ctxsw_disable_count); + falcon->ctxsw_disable_count); } ctxsw_already_enabled: - nvgpu_mutex_release(&g->ctxsw_disable_lock); + nvgpu_mutex_release(&falcon->ctxsw_disable_mutex); return err; } @@ -630,3 +690,23 @@ int nvgpu_gr_falcon_halt_pipe(struct gk20a *g) return g->ops.gr.falcon.ctrl_ctxsw(g, NVGPU_GR_FALCON_METHOD_HALT_PIPELINE, 0U, NULL); } + +struct nvgpu_mutex *nvgpu_gr_falcon_get_fecs_mutex( + struct nvgpu_gr_falcon *falcon) +{ + return &falcon->fecs_mutex; +} +struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_fecs_ucode_segments( + struct nvgpu_gr_falcon *falcon) +{ + return &falcon->ctxsw_ucode_info.fecs; +} +struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_gpccs_ucode_segments( + struct nvgpu_gr_falcon *falcon) +{ + return &falcon->ctxsw_ucode_info.gpccs; +} +void *nvgpu_gr_falcon_get_surface_desc_cpu_va(struct nvgpu_gr_falcon *falcon) +{ + return falcon->ctxsw_ucode_info.surface_desc.cpu_va; +} diff --git a/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h b/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h index e9a940f67..6f5a85880 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h +++ b/drivers/gpu/nvgpu/common/gr/gr_falcon_priv.h @@ -24,6 +24,9 @@ #define GR_FALOCN_PRIV_H #include +#include + +struct nvgpu_ctxsw_ucode_segments; struct nvgpu_fecs_method_op { struct { @@ -54,5 +57,66 @@ struct nvgpu_ctxsw_bootloader_desc { u32 entry_point; }; +struct nvgpu_ctxsw_ucode_info { + u64 *p_va; + struct nvgpu_mem inst_blk_desc; + struct nvgpu_mem surface_desc; + struct nvgpu_ctxsw_ucode_segments fecs; + struct nvgpu_ctxsw_ucode_segments gpccs; +}; + +struct nvgpu_gr_falcon { + struct nvgpu_ctxsw_ucode_info ctxsw_ucode_info; + struct nvgpu_mutex ctxsw_disable_mutex; + int ctxsw_disable_count; + struct nvgpu_mutex fecs_mutex; /* protect fecs method */ + bool skip_ucode_init; +}; + +enum wait_ucode_status { + WAIT_UCODE_LOOP, + WAIT_UCODE_TIMEOUT, + WAIT_UCODE_ERROR, + WAIT_UCODE_OK +}; + +enum { + GR_IS_UCODE_OP_EQUAL, + GR_IS_UCODE_OP_NOT_EQUAL, + GR_IS_UCODE_OP_AND, + GR_IS_UCODE_OP_LESSER, + GR_IS_UCODE_OP_LESSER_EQUAL, + GR_IS_UCODE_OP_SKIP +}; + +enum { + eUcodeHandshakeInitComplete = 1, + eUcodeHandshakeMethodFinished +}; + +/* sums over the ucode files as sequences of u32, computed to the + * boot_signature field in the structure above */ + +/* T18X FECS remains same as T21X, + * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used + * for T18X*/ +#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34U +#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5cU +#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5cU +#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78U +#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344bU +#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09fU + +#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2U +#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5U +#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3U +#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877U + +#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7dU +#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10U + +#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161daU + + #endif /* GR_FALOCN_PRIV_H */ diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index ad5fb6ef8..483636535 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -331,6 +331,14 @@ int gk20a_finalize_poweron(struct gk20a *g) } } + /* prepare portion of sw required for enable hw */ + err = nvgpu_gr_prepare_sw(g); + if (err != 0) { + nvgpu_err(g, "failed to prepare sw"); + nvgpu_mutex_release(&g->tpc_pg_lock); + goto done; + } + err = nvgpu_gr_enable_hw(g); if (err != 0) { nvgpu_err(g, "failed to enable gr"); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 4abeac26d..d9597aa93 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -176,7 +176,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch) return 0; } - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "failed to disable ctxsw"); goto fail; @@ -211,7 +211,7 @@ int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch) nvgpu_mutex_release(&f->deferred_reset_mutex); clean_up: - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "failed to enable ctxsw"); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index ece997f52..e42daa562 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -2326,7 +2326,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, * at that point the hardware state can be inspected to * determine if the context we're interested in is current. */ - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); /* this should probably be ctx-fatal... */ @@ -2343,7 +2343,7 @@ int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, err = __gr_gk20a_exec_ctx_ops(ch, ctx_ops, num_ops, num_ctx_wr_ops, num_ctx_rd_ops, ch_is_curr_ctx); - tmp_err = g->ops.gr.falcon.enable_ctxsw(g); + tmp_err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (tmp_err != 0) { nvgpu_err(g, "unable to restart ctxsw!"); err = tmp_err; @@ -2689,7 +2689,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto clean_up; @@ -2709,7 +2709,7 @@ int gr_gk20a_suspend_contexts(struct gk20a *g, nvgpu_mutex_release(&dbg_s->ch_list_lock); - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to restart ctxsw!"); } @@ -2734,7 +2734,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto clean_up; @@ -2750,7 +2750,7 @@ int gr_gk20a_resume_contexts(struct gk20a *g, } } - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to restart ctxsw!"); } diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index 752cbf96f..81b068f7a 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -31,13 +31,8 @@ #include #include -#define GR_FECS_POLL_INTERVAL 5U /* usec */ - #define INVALID_MAX_WAYS 0xFFFFFFFFU -#define GK20A_FECS_UCODE_IMAGE "fecs.bin" -#define GK20A_GPCCS_UCODE_IMAGE "gpccs.bin" - #define GK20A_TIMEOUT_FPGA 100000U /* 100 sec */ /* Flags to be passed to g->ops.gr.alloc_obj_ctx() */ @@ -49,6 +44,7 @@ struct nvgpu_gr_ctx; struct channel_gk20a; struct nvgpu_warpstate; struct nvgpu_gr_ctx_desc; +struct nvgpu_gr_falcon; struct nvgpu_gr_global_ctx_buffer_desc; struct nvgpu_gr_global_ctx_local_golden_image; struct nvgpu_gr_zbc; @@ -58,27 +54,6 @@ struct nvgpu_gr_ctx_desc; enum ctxsw_addr_type; -enum wait_ucode_status { - WAIT_UCODE_LOOP, - WAIT_UCODE_TIMEOUT, - WAIT_UCODE_ERROR, - WAIT_UCODE_OK -}; - -enum { - GR_IS_UCODE_OP_EQUAL, - GR_IS_UCODE_OP_NOT_EQUAL, - GR_IS_UCODE_OP_AND, - GR_IS_UCODE_OP_LESSER, - GR_IS_UCODE_OP_LESSER_EQUAL, - GR_IS_UCODE_OP_SKIP -}; - -enum { - eUcodeHandshakeInitComplete = 1, - eUcodeHandshakeMethodFinished -}; - enum { ELCG_MODE = (1 << 0), BLCG_MODE = (1 << 1), @@ -135,7 +110,6 @@ struct gr_gk20a { } ctx_vars; struct nvgpu_mutex ctx_mutex; /* protect golden ctx init */ - struct nvgpu_mutex fecs_mutex; /* protect fecs method */ struct nvgpu_cond init_wq; bool initialized; @@ -160,6 +134,8 @@ struct gr_gk20a { struct nvgpu_gr_zbc *zbc; + struct nvgpu_gr_falcon *falcon; + #define GR_CHANNEL_MAP_TLB_SIZE 2U /* must of power of 2 */ struct gr_channel_map_tlb_entry chid_tlb[GR_CHANNEL_MAP_TLB_SIZE]; u32 channel_tlb_flush_index; @@ -167,7 +143,6 @@ struct gr_gk20a { void (*remove_support)(struct gr_gk20a *gr); bool sw_ready; - bool skip_ucode_init; u32 fecs_feature_override_ecc_val; @@ -184,50 +159,7 @@ struct gr_gk20a { u32 max_ctxsw_ring_buffer_size; }; -struct gk20a_ctxsw_ucode_segment { - u32 offset; - u32 size; -}; -struct gk20a_ctxsw_ucode_segments { - u32 boot_entry; - u32 boot_imem_offset; - u32 boot_signature; - struct gk20a_ctxsw_ucode_segment boot; - struct gk20a_ctxsw_ucode_segment code; - struct gk20a_ctxsw_ucode_segment data; -}; - -/* sums over the ucode files as sequences of u32, computed to the - * boot_signature field in the structure above */ - -/* T18X FECS remains same as T21X, - * so FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED used - * for T18X*/ -#define FALCON_UCODE_SIG_T18X_GPCCS_WITH_RESERVED 0x68edab34U -#define FALCON_UCODE_SIG_T21X_FECS_WITH_DMEM_SIZE 0x9121ab5cU -#define FALCON_UCODE_SIG_T21X_FECS_WITH_RESERVED 0x9125ab5cU -#define FALCON_UCODE_SIG_T12X_FECS_WITH_RESERVED 0x8a621f78U -#define FALCON_UCODE_SIG_T12X_FECS_WITHOUT_RESERVED 0x67e5344bU -#define FALCON_UCODE_SIG_T12X_FECS_OLDER 0x56da09fU - -#define FALCON_UCODE_SIG_T21X_GPCCS_WITH_RESERVED 0x3d3d65e2U -#define FALCON_UCODE_SIG_T12X_GPCCS_WITH_RESERVED 0x303465d5U -#define FALCON_UCODE_SIG_T12X_GPCCS_WITHOUT_RESERVED 0x3fdd33d3U -#define FALCON_UCODE_SIG_T12X_GPCCS_OLDER 0x53d7877U - -#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED 0x93671b7dU -#define FALCON_UCODE_SIG_T21X_FECS_WITHOUT_RESERVED2 0x4d6cbc10U - -#define FALCON_UCODE_SIG_T21X_GPCCS_WITHOUT_RESERVED 0x393161daU - -struct gk20a_ctxsw_ucode_info { - u64 *p_va; - struct nvgpu_mem inst_blk_desc; - struct nvgpu_mem surface_desc; - struct gk20a_ctxsw_ucode_segments fecs; - struct gk20a_ctxsw_ucode_segments gpccs; -}; struct nvgpu_warpstate { u64 valid_warps[2]; diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index bd327e499..3601352ce 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -695,7 +695,7 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g, (void) memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states)); - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto fail; @@ -716,7 +716,7 @@ int gm20b_gr_clear_sm_error_state(struct gk20a *g, 0); } - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); fail: nvgpu_mutex_release(&g->dbg_sessions_lock); diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h index b22498f78..ab229c7e7 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.h @@ -57,8 +57,6 @@ void gr_gm20b_get_sm_dsm_perf_ctrl_regs(struct gk20a *g, u32 **sm_dsm_perf_ctrl_regs, u32 *ctrl_register_stride); void gr_gm20b_set_gpc_tpc_mask(struct gk20a *g, u32 gpc_index); -void gr_gm20b_load_ctxsw_ucode_segments(struct gk20a *g, u64 addr_base, - struct gk20a_ctxsw_ucode_segments *segments, u32 reg_offset); bool gr_gm20b_is_tpc_addr(struct gk20a *g, u32 addr); u32 gr_gm20b_get_tpc_num(struct gk20a *g, u32 addr); int gr_gm20b_dump_gr_status_regs(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index 7f984684d..239b9472d 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -1053,7 +1053,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, nvgpu_mutex_acquire(&g->dbg_sessions_lock); - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); nvgpu_mutex_release(&g->dbg_sessions_lock); @@ -1078,7 +1078,7 @@ int gr_gp10b_suspend_contexts(struct gk20a *g, nvgpu_mutex_release(&dbg_s->ch_list_lock); - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_mutex_release(&g->dbg_sessions_lock); goto clean_up; diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 5b57243c4..c3aaaf390 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -3185,7 +3185,7 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g, (void)memset(&tsg->sm_error_states[sm_id], 0, sizeof(*tsg->sm_error_states)); - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err != 0) { nvgpu_err(g, "unable to stop gr ctxsw"); goto fail; @@ -3217,7 +3217,7 @@ int gv11b_gr_clear_sm_error_state(struct gk20a *g, 0); } - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); fail: nvgpu_mutex_release(&g->dbg_sessions_lock); diff --git a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c index a46f8185a..7af921faf 100644 --- a/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/falcon/gr_falcon_gm20b.c @@ -33,6 +33,8 @@ #include #include +#define GR_FECS_POLL_INTERVAL 5U /* usec */ + #define FECS_ARB_CMD_TIMEOUT_MAX_US 40U #define FECS_ARB_CMD_TIMEOUT_DEFAULT_US 2U #define CTXSW_MEM_SCRUBBING_TIMEOUT_MAX_US 1000U @@ -733,10 +735,10 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g, struct nvgpu_fecs_method_op op, bool sleepduringwait) { - struct gr_gk20a *gr = &g->gr; int ret; + struct gr_gk20a *gr = &g->gr; - nvgpu_mutex_acquire(&gr->fecs_mutex); + nvgpu_mutex_acquire(&gr->falcon->fecs_mutex); if (op.mailbox.id != 0U) { nvgpu_writel(g, gr_fecs_ctxsw_mailbox_r(op.mailbox.id), @@ -766,7 +768,7 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g, op.method.data, op.method.addr); } - nvgpu_mutex_release(&gr->fecs_mutex); + nvgpu_mutex_release(&gr->falcon->fecs_mutex); return ret; } @@ -775,10 +777,10 @@ int gm20b_gr_falcon_submit_fecs_method_op(struct gk20a *g, int gm20b_gr_falcon_submit_fecs_sideband_method_op(struct gk20a *g, struct nvgpu_fecs_method_op op) { - struct gr_gk20a *gr = &g->gr; int ret; + struct gr_gk20a *gr = &g->gr; - nvgpu_mutex_acquire(&gr->fecs_mutex); + nvgpu_mutex_acquire(&gr->falcon->fecs_mutex); nvgpu_writel(g, gr_fecs_ctxsw_mailbox_clear_r(op.mailbox.id), gr_fecs_ctxsw_mailbox_clear_value_f(op.mailbox.clr)); @@ -796,7 +798,7 @@ int gm20b_gr_falcon_submit_fecs_sideband_method_op(struct gk20a *g, op.method.data, op.method.addr); } - nvgpu_mutex_release(&gr->fecs_mutex); + nvgpu_mutex_release(&gr->falcon->fecs_mutex); return ret; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 569aba03e..6f94f274e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -56,6 +56,7 @@ struct perf_pmupstate; struct boardobjgrp; struct boardobjgrp_pmu_cmd; struct boardobjgrpmask; +struct nvgpu_gr_falcon; struct nvgpu_sgt; struct nvgpu_sgl; struct nvgpu_device_info; @@ -564,7 +565,8 @@ struct gpu_ops { void (*load_ctxsw_ucode_boot)(struct gk20a *g, u32 reg_offset, u32 boot_entry, u32 addr_load32, u32 blocks, u32 dst); - int (*load_ctxsw_ucode)(struct gk20a *g); + int (*load_ctxsw_ucode)(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); int (*wait_mem_scrubbing)(struct gk20a *g); int (*wait_ctxsw_ready)(struct gk20a *g); int (*submit_fecs_method_op)(struct gk20a *g, @@ -575,8 +577,10 @@ struct gpu_ops { int (*ctrl_ctxsw)(struct gk20a *g, u32 fecs_method, u32 fecs_data, u32 *ret_val); int (*halt_pipe)(struct gk20a *g); - int (*disable_ctxsw)(struct gk20a *g); - int (*enable_ctxsw)(struct gk20a *g); + int (*disable_ctxsw)(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); + int (*enable_ctxsw)(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); u32 (*get_current_ctx)(struct gk20a *g); u32 (*get_ctx_ptr)(u32 ctx); u32 (*get_fecs_current_ctx_data)(struct gk20a *g, @@ -1988,9 +1992,6 @@ struct gk20a { nvgpu_atomic_t usage_count; - struct nvgpu_mutex ctxsw_disable_lock; - int ctxsw_disable_count; - struct nvgpu_ref refcount; const char *name; @@ -2087,8 +2088,6 @@ struct gk20a { u32 emc3d_ratio; - struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; - /* * A group of semaphore pools. One for each channel. */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h index 6c741ac65..2e62f30e1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr.h @@ -25,6 +25,7 @@ #include +int nvgpu_gr_prepare_sw(struct gk20a *g); int nvgpu_gr_enable_hw(struct gk20a *g); int nvgpu_gr_reset(struct gk20a *g); int nvgpu_gr_init_support(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h index d5668a922..1a14f0567 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gr/gr_falcon.h @@ -26,6 +26,21 @@ #include struct gk20a; +struct nvgpu_gr_falcon; + +struct nvgpu_ctxsw_ucode_segment { + u32 offset; + u32 size; +}; + +struct nvgpu_ctxsw_ucode_segments { + u32 boot_entry; + u32 boot_imem_offset; + u32 boot_signature; + struct nvgpu_ctxsw_ucode_segment boot; + struct nvgpu_ctxsw_ucode_segment code; + struct nvgpu_ctxsw_ucode_segment data; +}; #define NVGPU_GR_FALCON_METHOD_CTXSW_STOP 0 #define NVGPU_GR_FALCON_METHOD_CTXSW_START 1 @@ -57,14 +72,31 @@ struct nvgpu_fecs_host_intr_status { bool watchdog_active; }; +struct nvgpu_gr_falcon *nvgpu_gr_falcon_init_support(struct gk20a *g); +void nvgpu_gr_falcon_remove_support(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); int nvgpu_gr_falcon_bind_fecs_elpg(struct gk20a *g); -int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g); +int nvgpu_gr_falcon_init_ctxsw(struct gk20a *g, struct nvgpu_gr_falcon *falcon); int nvgpu_gr_falcon_init_ctx_state(struct gk20a *g); -int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g); -int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g); -int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g); -int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g); -int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g); +int nvgpu_gr_falcon_init_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); +int nvgpu_gr_falcon_load_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); +int nvgpu_gr_falcon_load_secure_ctxsw_ucode(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); +int nvgpu_gr_falcon_disable_ctxsw(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); +int nvgpu_gr_falcon_enable_ctxsw(struct gk20a *g, + struct nvgpu_gr_falcon *falcon); int nvgpu_gr_falcon_halt_pipe(struct gk20a *g); +struct nvgpu_mutex *nvgpu_gr_falcon_get_fecs_mutex( + struct nvgpu_gr_falcon *falcon); +struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_fecs_ucode_segments( + struct nvgpu_gr_falcon *falcon); +struct nvgpu_ctxsw_ucode_segments *nvgpu_gr_falcon_get_gpccs_ucode_segments( + struct nvgpu_gr_falcon *falcon); +void *nvgpu_gr_falcon_get_surface_desc_cpu_va( + struct nvgpu_gr_falcon *falcon); + #endif /* NVGPU_GR_FALCON_H */ diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index d87220115..59b707022 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -63,7 +63,6 @@ static void nvgpu_init_vars(struct gk20a *g) nvgpu_mutex_init(&g->dbg_sessions_lock); nvgpu_mutex_init(&g->client_lock); nvgpu_mutex_init(&g->power_lock); - nvgpu_mutex_init(&g->ctxsw_disable_lock); nvgpu_mutex_init(&g->tpc_pg_lock); nvgpu_mutex_init(&g->clk_arb_enable_lock); nvgpu_mutex_init(&g->cg_pg_lock); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c index 33aacf4ad..d3ac492b7 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_dbg.c @@ -1101,7 +1101,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( nvgpu_mutex_acquire(&g->dbg_sessions_lock); /* Suspend GPU context switching */ - err = g->ops.gr.falcon.disable_ctxsw(g); + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); if (err) { nvgpu_err(g, "unable to stop gr ctxsw"); /* this should probably be ctx-fatal... */ @@ -1119,7 +1119,7 @@ static int nvgpu_dbg_gpu_ioctl_suspend_resume_sm( break; } - err = g->ops.gr.falcon.enable_ctxsw(g); + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); if (err) nvgpu_err(g, "unable to restart ctxsw!"); diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c index ba4875077..b7a98232f 100644 --- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c @@ -81,7 +81,6 @@ static void vgpu_init_vars(struct gk20a *g, struct gk20a_platform *platform) struct vgpu_priv_data *priv = vgpu_get_priv_data(g); nvgpu_mutex_init(&g->power_lock); - nvgpu_mutex_init(&g->ctxsw_disable_lock); nvgpu_mutex_init(&g->clk_arb_enable_lock); nvgpu_mutex_init(&g->cg_pg_lock);