From a67729dcfc1ccb07056e03b31b5822c2786bc5f2 Mon Sep 17 00:00:00 2001 From: Mahantesh Kumbar Date: Thu, 21 Mar 2019 12:17:42 +0530 Subject: [PATCH] gpu: nvgpu: Create separate VM space for SEC2/GSP engine Currently SEC2/GSP uses the PMU VM space for memory access which adds dependency on PMU, So, created separate VM space for SEC2/GSP of size 32MB as currently used for ucode handling by these units. SEC2/GSP VM space allocation happens if NVGPU_SUPPORT_SEC2_VM/ NVGPU_SUPPORT_GSP_VM enable flags set. JIRA NVGPU-2910 Change-Id: I4dfe50a1c0adb7e83379bf6c15343fe57ff44c38 Signed-off-by: Mahantesh Kumbar Reviewed-on: https://git-master.nvidia.com/r/2077596 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/acr/acr_bootstrap.c | 47 ++++++++++++++-- drivers/gpu/nvgpu/common/mm/mm.c | 58 ++++++++++++++++++++ drivers/gpu/nvgpu/gp106/sec2_gp106.c | 4 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 1 + drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c | 4 +- drivers/gpu/nvgpu/include/nvgpu/enabled.h | 8 ++- drivers/gpu/nvgpu/include/nvgpu/mm.h | 4 +- drivers/gpu/nvgpu/tu104/hal_tu104.c | 2 + drivers/gpu/nvgpu/tu104/sec2_tu104.c | 4 +- 9 files changed, 119 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c index 0588d8dcf..644556d6f 100644 --- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c +++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c @@ -33,6 +33,8 @@ #include "acr_bootstrap.h" #include "acr_priv.h" +struct vm_gk20a* acr_get_engine_vm(struct gk20a *g, u32 falcon_id); + static int acr_wait_for_completion(struct gk20a *g, struct nvgpu_falcon *flcn, unsigned int timeout) { @@ -89,6 +91,32 @@ exit: return completion; } +struct vm_gk20a* acr_get_engine_vm(struct gk20a *g, u32 falcon_id) +{ + struct vm_gk20a *vm = NULL; + + switch (falcon_id) { + case FALCON_ID_PMU: + vm = g->mm.pmu.vm; + break; + case FALCON_ID_SEC2: + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) { + vm = g->mm.sec2.vm; + } + break; + case FALCON_ID_GSPLITE: + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) { + vm = g->mm.gsp.vm; + } + break; + default: + vm = NULL; + break; + } + + return vm; +} + static int acr_hs_bl_exec(struct gk20a *g, struct nvgpu_acr *acr, struct hs_acr *acr_desc, bool b_wait_for_halt) { @@ -96,8 +124,7 @@ static int acr_hs_bl_exec(struct gk20a *g, struct nvgpu_acr *acr, struct hsflcn_bl_desc *hs_bl_desc; struct nvgpu_falcon_bl_info bl_info; struct hs_flcn_bl *hs_bl = &acr_desc->acr_hs_bl; - struct mm_gk20a *mm = &g->mm; - struct vm_gk20a *vm = mm->pmu.vm; + struct vm_gk20a *vm = NULL; u32 flcn_id = nvgpu_falcon_get_id(acr_desc->acr_flcn); u32 *hs_bl_code = NULL; int err = 0; @@ -106,6 +133,12 @@ static int acr_hs_bl_exec(struct gk20a *g, struct nvgpu_acr *acr, nvgpu_acr_dbg(g, "Executing ACR HS Bootloader %s on Falcon-ID - %d", hs_bl->bl_fw_name, flcn_id); + vm = acr_get_engine_vm(g, flcn_id); + if (vm == NULL) { + nvgpu_err(g, "vm space not allocated for engine falcon - %d", flcn_id); + return -ENOMEM; + } + if (hs_bl_fw == NULL) { hs_bl_fw = nvgpu_request_firmware(g, hs_bl->bl_fw_name, 0); if (hs_bl_fw == NULL) { @@ -232,12 +265,12 @@ static int acr_ucode_patch_sig(struct gk20a *g, int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr, struct hs_acr *acr_desc) { - struct mm_gk20a *mm = &g->mm; - struct vm_gk20a *vm = mm->pmu.vm; + struct vm_gk20a *vm = NULL; struct nvgpu_firmware *acr_fw = acr_desc->acr_fw; struct bin_hdr *acr_fw_bin_hdr = NULL; struct acr_fw_header *acr_fw_hdr = NULL; struct nvgpu_mem *acr_ucode_mem = &acr_desc->acr_ucode; + u32 flcn_id = nvgpu_falcon_get_id(acr_desc->acr_flcn); u32 img_size_in_bytes = 0; u32 *acr_ucode_data; u32 *acr_ucode_header; @@ -245,6 +278,12 @@ int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr, nvgpu_acr_dbg(g, "ACR TYPE %x ", acr_desc->acr_type); + vm = acr_get_engine_vm(g, flcn_id); + if (vm == NULL) { + nvgpu_err(g, "vm space not allocated for engine falcon - %d", flcn_id); + return -ENOMEM; + } + if (acr_fw != NULL) { acr->patch_wpr_info_to_ucode(g, acr, acr_desc, true); } else { diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c index 9412e774d..336f60996 100644 --- a/drivers/gpu/nvgpu/common/mm/mm.c +++ b/drivers/gpu/nvgpu/common/mm/mm.c @@ -199,6 +199,16 @@ static void nvgpu_remove_mm_support(struct mm_gk20a *mm) nvgpu_free_inst_block(g, &mm->hwpm.inst_block); nvgpu_vm_put(mm->pmu.vm); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) { + nvgpu_free_inst_block(g, &mm->sec2.inst_block); + nvgpu_vm_put(mm->sec2.vm); + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) { + nvgpu_free_inst_block(g, &mm->gsp.inst_block); + nvgpu_vm_put(mm->gsp.vm); + } + if (g->has_cde) { nvgpu_vm_put(mm->cde.vm); } @@ -405,6 +415,40 @@ clean_up_vm: return err; } +static int nvgpu_init_engine_ucode_vm(struct gk20a *g, + struct engine_ucode *ucode, const char *address_space_name) +{ + int err; + struct nvgpu_mem *inst_block = &ucode->inst_block; + u32 big_page_size = g->ops.mm.get_default_big_page_size(); + + /* ucode aperture size is 32MB */ + ucode->aperture_size = U32(32) << 20U; + nvgpu_log_info(g, "%s vm size = 0x%x", address_space_name, + ucode->aperture_size); + + ucode->vm = nvgpu_vm_init(g, big_page_size, SZ_4K, + ucode->aperture_size - SZ_4K, + ucode->aperture_size, false, false, false, address_space_name); + if (ucode->vm == NULL) { + return -ENOMEM; + } + + /* allocate instance mem for engine ucode */ + err = g->ops.mm.alloc_inst_block(g, inst_block); + if (err != 0) { + goto clean_up_va; + } + + g->ops.mm.init_inst_block(inst_block, ucode->vm, big_page_size); + + return 0; + +clean_up_va: + nvgpu_vm_put(ucode->vm); + return err; +} + static int nvgpu_init_mm_setup_sw(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; @@ -478,6 +522,20 @@ static int nvgpu_init_mm_setup_sw(struct gk20a *g) return err; } + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) { + err = nvgpu_init_engine_ucode_vm(g, &mm->sec2, "sec2"); + if (err != 0) { + return err; + } + } + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) { + err = nvgpu_init_engine_ucode_vm(g, &mm->gsp, "gsp"); + if (err != 0) { + return err; + } + } + if (g->has_cde) { err = nvgpu_init_cde_vm(mm); if (err != 0) { diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c index d379183af..cb0ce7a02 100644 --- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c +++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c @@ -85,13 +85,13 @@ void gp106_sec2_flcn_setup_boot_config(struct gk20a *g) * The instance block address to write is the lower 32-bits of the 4K- * aligned physical instance block address. */ - tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U; + tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U; nvgpu_assert(u64_hi32(tmp_addr) == 0U); gk20a_writel(g, psec_falcon_nxtctx_r(), pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | pwr_pmu_new_instblk_valid_f(1U) | - nvgpu_aperture_mask(g, &mm->pmu.inst_block, + nvgpu_aperture_mask(g, &mm->sec2.inst_block, pwr_pmu_new_instblk_target_sys_ncoh_f(), pwr_pmu_new_instblk_target_sys_coh_f(), pwr_pmu_new_instblk_target_fb_f())); diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 0477ed747..5fbf8d0f7 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -1359,6 +1359,7 @@ int gv100_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true); /* * gv100 bypasses the IOMMU since it uses the nvlink path memory. diff --git a/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c b/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c index 5f81309cf..cd19dc362 100644 --- a/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c +++ b/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c @@ -77,13 +77,13 @@ void gv100_gsp_flcn_setup_boot_config(struct gk20a *g) * The instance block address to write is the lower 32-bits of the 4K- * aligned physical instance block address. */ - tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U; + tmp_addr = nvgpu_inst_block_addr(g, &mm->gsp.inst_block) >> 12U; nvgpu_assert(u64_hi32(tmp_addr) == 0U); gk20a_writel(g, pgsp_falcon_nxtctx_r(), pgsp_falcon_nxtctx_ctxptr_f((u32)tmp_addr) | pgsp_falcon_nxtctx_ctxvalid_f(1) | - nvgpu_aperture_mask(g, &mm->pmu.inst_block, + nvgpu_aperture_mask(g, &mm->gsp.inst_block, pgsp_falcon_nxtctx_ctxtgt_sys_ncoh_f(), pgsp_falcon_nxtctx_ctxtgt_sys_coh_f(), pgsp_falcon_nxtctx_ctxtgt_fb_f())); diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 254e20461..b4dba8bf1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -185,10 +185,16 @@ struct gk20a; /* PLATFORM_ATOMIC support */ #define NVGPU_SUPPORT_PLATFORM_ATOMIC 72 +/* SEC2 VM support */ +#define NVGPU_SUPPORT_SEC2_VM 73 + +/* GSP VM support */ +#define NVGPU_SUPPORT_GSP_VM 74 + /* * Must be greater than the largest bit offset in the above list. */ -#define NVGPU_MAX_ENABLED_BITS 73U +#define NVGPU_MAX_ENABLED_BITS 75U /** * nvgpu_is_enabled - Check if the passed flag is enabled. diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h index b2e8ce07f..ca045da27 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/mm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h @@ -99,11 +99,11 @@ struct mm_gk20a { struct nvgpu_mem inst_block; } bar2; - struct { + struct engine_ucode { u32 aperture_size; struct vm_gk20a *vm; struct nvgpu_mem inst_block; - } pmu; + } pmu, sec2, gsp; struct { /* using pmu vm currently */ diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 0012e69ab..30f096145 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -1406,6 +1406,8 @@ int tu104_init_hal(struct gk20a *g) nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true); nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true); + nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true); /* for now */ gops->clk.support_clk_freq_controller = false; diff --git a/drivers/gpu/nvgpu/tu104/sec2_tu104.c b/drivers/gpu/nvgpu/tu104/sec2_tu104.c index 3b1b7eb79..d8b2438fd 100644 --- a/drivers/gpu/nvgpu/tu104/sec2_tu104.c +++ b/drivers/gpu/nvgpu/tu104/sec2_tu104.c @@ -211,13 +211,13 @@ void tu104_sec2_flcn_setup_boot_config(struct gk20a *g) * The instance block address to write is the lower 32-bits of the 4K- * aligned physical instance block address. */ - tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U; + tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U; nvgpu_assert(u64_hi32(tmp_addr) == 0U); gk20a_writel(g, psec_falcon_nxtctx_r(), pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) | pwr_pmu_new_instblk_valid_f(1U) | - nvgpu_aperture_mask(g, &mm->pmu.inst_block, + nvgpu_aperture_mask(g, &mm->sec2.inst_block, pwr_pmu_new_instblk_target_sys_ncoh_f(), pwr_pmu_new_instblk_target_sys_coh_f(), pwr_pmu_new_instblk_target_fb_f()));