From a67729dcfc1ccb07056e03b31b5822c2786bc5f2 Mon Sep 17 00:00:00 2001
From: Mahantesh Kumbar <mkumbar@nvidia.com>
Date: Thu, 21 Mar 2019 12:17:42 +0530
Subject: [PATCH] gpu: nvgpu: Create separate VM space for SEC2/GSP engine

Currently SEC2/GSP uses the PMU VM space for memory access which adds
dependency on PMU, So, created separate VM space for SEC2/GSP of
size 32MB as currently used for ucode handling by these units.

SEC2/GSP VM space allocation happens if NVGPU_SUPPORT_SEC2_VM/
NVGPU_SUPPORT_GSP_VM enable flags set.

JIRA NVGPU-2910

Change-Id: I4dfe50a1c0adb7e83379bf6c15343fe57ff44c38
Signed-off-by: Mahantesh Kumbar <mkumbar@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2077596
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/acr/acr_bootstrap.c | 47 ++++++++++++++--
 drivers/gpu/nvgpu/common/mm/mm.c             | 58 ++++++++++++++++++++
 drivers/gpu/nvgpu/gp106/sec2_gp106.c         |  4 +-
 drivers/gpu/nvgpu/gv100/hal_gv100.c          |  1 +
 drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c        |  4 +-
 drivers/gpu/nvgpu/include/nvgpu/enabled.h    |  8 ++-
 drivers/gpu/nvgpu/include/nvgpu/mm.h         |  4 +-
 drivers/gpu/nvgpu/tu104/hal_tu104.c          |  2 +
 drivers/gpu/nvgpu/tu104/sec2_tu104.c         |  4 +-
 9 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c
index 0588d8dcf..644556d6f 100644
--- a/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c
+++ b/drivers/gpu/nvgpu/common/acr/acr_bootstrap.c
@@ -33,6 +33,8 @@
 #include "acr_bootstrap.h"
 #include "acr_priv.h"
 
+struct vm_gk20a* acr_get_engine_vm(struct gk20a *g, u32 falcon_id);
+
 static int acr_wait_for_completion(struct gk20a *g,
 	struct nvgpu_falcon *flcn, unsigned int timeout)
 {
@@ -89,6 +91,32 @@ exit:
 	return completion;
 }
 
+struct vm_gk20a* acr_get_engine_vm(struct gk20a *g, u32 falcon_id)
+{
+	struct vm_gk20a *vm = NULL;
+
+	switch (falcon_id) {
+	case FALCON_ID_PMU:
+		vm = g->mm.pmu.vm;
+		break;
+	case FALCON_ID_SEC2:
+		if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) {
+			vm = g->mm.sec2.vm;
+		}
+		break;
+	case FALCON_ID_GSPLITE:
+		if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) {
+			vm = g->mm.gsp.vm;
+		}
+		break;
+	default:
+		vm = NULL;
+		break;
+	}
+
+	return vm;
+}
+
 static int acr_hs_bl_exec(struct gk20a *g, struct nvgpu_acr *acr,
 	struct hs_acr *acr_desc, bool b_wait_for_halt)
 {
@@ -96,8 +124,7 @@ static int acr_hs_bl_exec(struct gk20a *g, struct nvgpu_acr *acr,
 	struct hsflcn_bl_desc *hs_bl_desc;
 	struct nvgpu_falcon_bl_info bl_info;
 	struct hs_flcn_bl *hs_bl = &acr_desc->acr_hs_bl;
-	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = mm->pmu.vm;
+	struct vm_gk20a *vm = NULL;
 	u32 flcn_id = nvgpu_falcon_get_id(acr_desc->acr_flcn);
 	u32 *hs_bl_code = NULL;
 	int err = 0;
@@ -106,6 +133,12 @@ static int acr_hs_bl_exec(struct gk20a *g, struct nvgpu_acr *acr,
 	nvgpu_acr_dbg(g, "Executing ACR HS Bootloader %s on Falcon-ID - %d",
 		hs_bl->bl_fw_name, flcn_id);
 
+	vm = acr_get_engine_vm(g, flcn_id);
+	if (vm == NULL) {
+		nvgpu_err(g, "vm space not allocated for engine falcon - %d", flcn_id);
+		return -ENOMEM;
+	}
+
 	if (hs_bl_fw == NULL) {
 		hs_bl_fw = nvgpu_request_firmware(g, hs_bl->bl_fw_name, 0);
 		if (hs_bl_fw == NULL) {
@@ -232,12 +265,12 @@ static int acr_ucode_patch_sig(struct gk20a *g,
 int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr,
 	struct hs_acr *acr_desc)
 {
-	struct mm_gk20a *mm = &g->mm;
-	struct vm_gk20a *vm = mm->pmu.vm;
+	struct vm_gk20a *vm = NULL;
 	struct nvgpu_firmware *acr_fw = acr_desc->acr_fw;
 	struct bin_hdr *acr_fw_bin_hdr = NULL;
 	struct acr_fw_header *acr_fw_hdr = NULL;
 	struct nvgpu_mem *acr_ucode_mem = &acr_desc->acr_ucode;
+	u32 flcn_id = nvgpu_falcon_get_id(acr_desc->acr_flcn);
 	u32 img_size_in_bytes = 0;
 	u32 *acr_ucode_data;
 	u32 *acr_ucode_header;
@@ -245,6 +278,12 @@ int nvgpu_acr_bootstrap_hs_ucode(struct gk20a *g, struct nvgpu_acr *acr,
 
 	nvgpu_acr_dbg(g, "ACR TYPE %x ", acr_desc->acr_type);
 
+	vm = acr_get_engine_vm(g, flcn_id);
+	if (vm == NULL) {
+		nvgpu_err(g, "vm space not allocated for engine falcon - %d", flcn_id);
+		return -ENOMEM;
+	}
+
 	if (acr_fw != NULL) {
 		acr->patch_wpr_info_to_ucode(g, acr, acr_desc, true);
 	} else {
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
index 9412e774d..336f60996 100644
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -199,6 +199,16 @@ static void nvgpu_remove_mm_support(struct mm_gk20a *mm)
 	nvgpu_free_inst_block(g, &mm->hwpm.inst_block);
 	nvgpu_vm_put(mm->pmu.vm);
 
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) {
+		nvgpu_free_inst_block(g, &mm->sec2.inst_block);
+		nvgpu_vm_put(mm->sec2.vm);
+	}
+
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) {
+		nvgpu_free_inst_block(g, &mm->gsp.inst_block);
+		nvgpu_vm_put(mm->gsp.vm);
+	}
+
 	if (g->has_cde) {
 		nvgpu_vm_put(mm->cde.vm);
 	}
@@ -405,6 +415,40 @@ clean_up_vm:
 	return err;
 }
 
+static int nvgpu_init_engine_ucode_vm(struct gk20a *g,
+	struct engine_ucode *ucode, const char *address_space_name)
+{
+	int err;
+	struct nvgpu_mem *inst_block = &ucode->inst_block;
+	u32 big_page_size = g->ops.mm.get_default_big_page_size();
+
+	/* ucode aperture size is 32MB */
+	ucode->aperture_size = U32(32) << 20U;
+	nvgpu_log_info(g, "%s vm size = 0x%x", address_space_name,
+		ucode->aperture_size);
+
+	ucode->vm = nvgpu_vm_init(g, big_page_size, SZ_4K,
+		ucode->aperture_size - SZ_4K,
+		ucode->aperture_size, false, false, false, address_space_name);
+	if (ucode->vm == NULL) {
+		return -ENOMEM;
+	}
+
+	/* allocate instance mem for engine ucode */
+	err = g->ops.mm.alloc_inst_block(g, inst_block);
+	if (err != 0) {
+		goto clean_up_va;
+	}
+
+	g->ops.mm.init_inst_block(inst_block, ucode->vm, big_page_size);
+
+	return 0;
+
+clean_up_va:
+	nvgpu_vm_put(ucode->vm);
+	return err;
+}
+
 static int nvgpu_init_mm_setup_sw(struct gk20a *g)
 {
 	struct mm_gk20a *mm = &g->mm;
@@ -478,6 +522,20 @@ static int nvgpu_init_mm_setup_sw(struct gk20a *g)
 		return err;
 	}
 
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_SEC2_VM)) {
+		err = nvgpu_init_engine_ucode_vm(g, &mm->sec2, "sec2");
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GSP_VM)) {
+		err = nvgpu_init_engine_ucode_vm(g, &mm->gsp, "gsp");
+		if (err != 0) {
+			return err;
+		}
+	}
+
 	if (g->has_cde) {
 		err = nvgpu_init_cde_vm(mm);
 			if (err != 0) {
diff --git a/drivers/gpu/nvgpu/gp106/sec2_gp106.c b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
index d379183af..cb0ce7a02 100644
--- a/drivers/gpu/nvgpu/gp106/sec2_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/sec2_gp106.c
@@ -85,13 +85,13 @@ void gp106_sec2_flcn_setup_boot_config(struct gk20a *g)
 	 * The instance block address to write is the lower 32-bits of the 4K-
 	 * aligned physical instance block address.
 	 */
-	tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U;
+	tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U;
 		nvgpu_assert(u64_hi32(tmp_addr) == 0U);
 
 	gk20a_writel(g, psec_falcon_nxtctx_r(),
 		pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) |
 		pwr_pmu_new_instblk_valid_f(1U) |
-		nvgpu_aperture_mask(g, &mm->pmu.inst_block,
+		nvgpu_aperture_mask(g, &mm->sec2.inst_block,
 			pwr_pmu_new_instblk_target_sys_ncoh_f(),
 			pwr_pmu_new_instblk_target_sys_coh_f(),
 			pwr_pmu_new_instblk_target_fb_f()));
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index 0477ed747..5fbf8d0f7 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -1359,6 +1359,7 @@ int gv100_init_hal(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_FECS_TRACE_VA, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
 
 	/*
 	 * gv100 bypasses the IOMMU since it uses the nvlink path memory.
diff --git a/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c b/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c
index 5f81309cf..cd19dc362 100644
--- a/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c
+++ b/drivers/gpu/nvgpu/hal/gsp/gsp_gv100.c
@@ -77,13 +77,13 @@ void gv100_gsp_flcn_setup_boot_config(struct gk20a *g)
 	 * The instance block address to write is the lower 32-bits of the 4K-
 	 * aligned physical instance block address.
 	 */
-	tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U;
+	tmp_addr = nvgpu_inst_block_addr(g, &mm->gsp.inst_block) >> 12U;
 	nvgpu_assert(u64_hi32(tmp_addr) == 0U);
 
 	gk20a_writel(g, pgsp_falcon_nxtctx_r(),
 		pgsp_falcon_nxtctx_ctxptr_f((u32)tmp_addr) |
 		pgsp_falcon_nxtctx_ctxvalid_f(1) |
-		nvgpu_aperture_mask(g, &mm->pmu.inst_block,
+		nvgpu_aperture_mask(g, &mm->gsp.inst_block,
 			pgsp_falcon_nxtctx_ctxtgt_sys_ncoh_f(),
 			pgsp_falcon_nxtctx_ctxtgt_sys_coh_f(),
 			pgsp_falcon_nxtctx_ctxtgt_fb_f()));
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index 254e20461..b4dba8bf1 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -185,10 +185,16 @@ struct gk20a;
 /* PLATFORM_ATOMIC support */
 #define NVGPU_SUPPORT_PLATFORM_ATOMIC		72
 
+/* SEC2 VM support */
+#define NVGPU_SUPPORT_SEC2_VM			73
+
+/* GSP VM support */
+#define NVGPU_SUPPORT_GSP_VM			74
+
 /*
  * Must be greater than the largest bit offset in the above list.
  */
-#define NVGPU_MAX_ENABLED_BITS			73U
+#define NVGPU_MAX_ENABLED_BITS			75U
 
 /**
  * nvgpu_is_enabled - Check if the passed flag is enabled.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h
index b2e8ce07f..ca045da27 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/mm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -99,11 +99,11 @@ struct mm_gk20a {
 		struct nvgpu_mem inst_block;
 	} bar2;
 
-	struct {
+	struct engine_ucode {
 		u32 aperture_size;
 		struct vm_gk20a *vm;
 		struct nvgpu_mem inst_block;
-	} pmu;
+	} pmu, sec2, gsp;
 
 	struct {
 		/* using pmu vm currently */
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index 0012e69ab..30f096145 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -1406,6 +1406,8 @@ int tu104_init_hal(struct gk20a *g)
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PMU_RTOS_FBQ, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_ZBC_STENCIL, true);
 	nvgpu_set_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_SEC2_VM, true);
+	nvgpu_set_enabled(g, NVGPU_SUPPORT_GSP_VM, true);
 
 	/* for now */
 	gops->clk.support_clk_freq_controller = false;
diff --git a/drivers/gpu/nvgpu/tu104/sec2_tu104.c b/drivers/gpu/nvgpu/tu104/sec2_tu104.c
index 3b1b7eb79..d8b2438fd 100644
--- a/drivers/gpu/nvgpu/tu104/sec2_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/sec2_tu104.c
@@ -211,13 +211,13 @@ void tu104_sec2_flcn_setup_boot_config(struct gk20a *g)
 		 * The instance block address to write is the lower 32-bits of the 4K-
 		 * aligned physical instance block address.
 		 */
-	tmp_addr = nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12U;
+	tmp_addr = nvgpu_inst_block_addr(g, &mm->sec2.inst_block) >> 12U;
 	nvgpu_assert(u64_hi32(tmp_addr) == 0U);
 
 	gk20a_writel(g, psec_falcon_nxtctx_r(),
 			pwr_pmu_new_instblk_ptr_f((u32)tmp_addr) |
 			pwr_pmu_new_instblk_valid_f(1U) |
-			nvgpu_aperture_mask(g, &mm->pmu.inst_block,
+			nvgpu_aperture_mask(g, &mm->sec2.inst_block,
 				pwr_pmu_new_instblk_target_sys_ncoh_f(),
 				pwr_pmu_new_instblk_target_sys_coh_f(),
 				pwr_pmu_new_instblk_target_fb_f()));