gpu: nvgpu: Use preallocated VPR buffer

To prevent deadlock while allocating VPR in nvgpu, allocate all the needed VPR memory at probe time and use an internal allocator to hand out space for VPR buffers. Change-Id: I584b9a0f746d5d1dec021cdfbd6f26b4b92e4412 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1655324 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2018-02-09 14:42:07 -08:00
parent 1582bdb5ee
commit ec00a6c2db
6 changed files with 77 additions and 81 deletions
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a.h
@@ -32,7 +32,8 @@ struct gk20a_scale_profile;
 struct secure_page_buffer {
 	void (*destroy)(struct gk20a *, struct secure_page_buffer *);
 	size_t size;
-	u64 iova;
+	dma_addr_t phys;
 	size_t used;
 };
 struct gk20a_platform {
@@ -148,6 +149,8 @@ struct gk20a_platform {
 	/* Powerdown platform dependencies */
 	void (*idle)(struct device *dev);
 	/* Preallocated VPR buffer for kernel */
 	size_t secure_buffer_size;
 	struct secure_page_buffer secure_buffer;
 	/* Device is going to be suspended */
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.c
@@ -103,103 +103,61 @@ static void gk20a_tegra_secure_page_destroy(struct gk20a *g,
 	DEFINE_DMA_ATTRS(attrs);
 	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
 	dma_free_attrs(&tegra_vpr_dev, secure_buffer->size,
-			(void *)(uintptr_t)secure_buffer->iova,
+			(void *)(uintptr_t)secure_buffer->phys,
-			secure_buffer->iova, __DMA_ATTR(attrs));
+			secure_buffer->phys, __DMA_ATTR(attrs));
 	secure_buffer->destroy = NULL;
 }
 int gk20a_tegra_secure_page_alloc(struct device *dev)
 {
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
 	struct gk20a *g = get_gk20a(dev);
 	struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
 	DEFINE_DMA_ATTRS(attrs);
 	dma_addr_t iova;
 	size_t size = PAGE_SIZE;
 	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
 		return -EINVAL;
 	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
 	(void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
 				      GFP_KERNEL, __DMA_ATTR(attrs));
 	if (dma_mapping_error(&tegra_vpr_dev, iova))
 		return -ENOMEM;
 	secure_buffer->size = size;
 	secure_buffer->iova = iova;
 	secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
 	return 0;
 }
 static void gk20a_tegra_secure_destroy(struct gk20a *g,
 				       struct gr_ctx_buffer_desc *desc)
 {
 	DEFINE_DMA_ATTRS(attrs);
 	if (desc->mem.priv.sgt) {
 		u64 pa = nvgpu_mem_get_phys_addr(g, &desc->mem);
 		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
 		dma_free_attrs(&tegra_vpr_dev, desc->mem.size,
 			(void *)(uintptr_t)pa,
 			pa, __DMA_ATTR(attrs));
 		nvgpu_free_sgtable(g, &desc->mem.priv.sgt);
 		desc->mem.priv.sgt = NULL;
 	}
 }
 static int gk20a_tegra_secure_alloc(struct gk20a *g,
 			     struct gr_ctx_buffer_desc *desc,
 			     size_t size)
 {
 	struct device *dev = dev_from_gk20a(g);
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
-	DEFINE_DMA_ATTRS(attrs);
+	struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
-	dma_addr_t iova;
+	dma_addr_t phys;
 	struct sg_table *sgt;
 	struct page *page;
 	int err = 0;
 	size_t aligned_size = PAGE_ALIGN(size);
-	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
+	/* We ran out of preallocated memory */
-	(void)dma_alloc_attrs(&tegra_vpr_dev, size, &iova,
+	if (secure_buffer->used + aligned_size > secure_buffer->size) {
-				      GFP_KERNEL, __DMA_ATTR(attrs));
+		nvgpu_err(platform->g, "failed to alloc %zu bytes of VPR, %zu/%zu used",
-	if (dma_mapping_error(&tegra_vpr_dev, iova))
+				size, secure_buffer->used, secure_buffer->size);
 		return -ENOMEM;
 	}
 	phys = secure_buffer->phys + secure_buffer->used;
 	sgt = nvgpu_kzalloc(platform->g, sizeof(*sgt));
 	if (!sgt) {
 		nvgpu_err(platform->g, "failed to allocate memory");
-		goto fail;
+		return -ENOMEM;
 	}
 	err = sg_alloc_table(sgt, 1, GFP_KERNEL);
 	if (err) {
 		nvgpu_err(platform->g, "failed to allocate sg_table");
 		goto fail_sgt;
 	}
-	page = phys_to_page(iova);
+	page = phys_to_page(phys);
 	sg_set_page(sgt->sgl, page, size, 0);
 	/* This bypasses SMMU for VPR during gmmu_map. */
 	sg_dma_address(sgt->sgl) = 0;
-	desc->destroy = gk20a_tegra_secure_destroy;
+	desc->destroy = NULL;
 	desc->mem.priv.sgt = sgt;
 	desc->mem.size = size;
 	desc->mem.aperture = APERTURE_SYSMEM;
-	if (platform->secure_buffer.destroy)
+	secure_buffer->used += aligned_size;
 		platform->secure_buffer.destroy(g, &platform->secure_buffer);
 	return err;
 fail_sgt:
 	nvgpu_kfree(platform->g, sgt);
 fail:
 	dma_free_attrs(&tegra_vpr_dev, desc->mem.size,
 			(void *)(uintptr_t)iova, iova, __DMA_ATTR(attrs));
 	return err;
 }
@@ -664,10 +622,32 @@ void gk20a_tegra_idle(struct device *dev)
 #endif
 }
-void gk20a_tegra_init_secure_alloc(struct gk20a *g)
+int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform)
 {
 	struct gk20a *g = platform->g;
 	struct secure_page_buffer *secure_buffer = &platform->secure_buffer;
 	DEFINE_DMA_ATTRS(attrs);
 	dma_addr_t iova;
 	if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
 		return 0;
 	dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, __DMA_ATTR(attrs));
 	(void)dma_alloc_attrs(&tegra_vpr_dev, platform->secure_buffer_size, &iova,
 				      GFP_KERNEL, __DMA_ATTR(attrs));
 	/* Some platforms disable VPR. In that case VPR allocations always
 	 * fail. Just disable VPR usage in nvgpu in that case. */
 	if (dma_mapping_error(&tegra_vpr_dev, iova))
 		return 0;
 	secure_buffer->size = platform->secure_buffer_size;
 	secure_buffer->phys = iova;
 	secure_buffer->destroy = gk20a_tegra_secure_page_destroy;
 	g->ops.secure_alloc = gk20a_tegra_secure_alloc;
 	__nvgpu_set_enabled(g, NVGPU_SUPPORT_VPR, true);
 	return 0;
 }
 #ifdef CONFIG_COMMON_CLK
@@ -836,7 +816,9 @@ static int gk20a_tegra_probe(struct device *dev)
 	gk20a_tegra_get_clocks(dev);
 	nvgpu_linux_init_clk_support(platform->g);
-	gk20a_tegra_init_secure_alloc(platform->g);
+	ret = gk20a_tegra_init_secure_alloc(platform);
 	if (ret)
 		return ret;
 	if (platform->clk_register) {
 		ret = platform->clk_register(platform->g);
@@ -851,9 +833,6 @@ static int gk20a_tegra_probe(struct device *dev)
 static int gk20a_tegra_late_probe(struct device *dev)
 {
 	/* Cause early VPR resize */
 	gk20a_tegra_secure_page_alloc(dev);
 	return 0;
 }
@@ -974,4 +953,6 @@ struct gk20a_platform gm20b_tegra_platform = {
 	.soc_name = "tegra21x",
 	.unified_memory = true,
 	.secure_buffer_size = 335872,
 };
--- a/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
+++ b/drivers/gpu/nvgpu/common/linux/platform_gk20a_tegra.h
@@ -16,10 +16,8 @@
 #ifndef _NVGPU_PLATFORM_GK20A_TEGRA_H_
 #define _NVGPU_PLATFORM_GK20A_TEGRA_H_
-struct device;
+struct gk20a_platform;
 struct gk20a;
-void gk20a_tegra_init_secure_alloc(struct gk20a *g);
+int gk20a_tegra_init_secure_alloc(struct gk20a_platform *platform);
 int gk20a_tegra_secure_page_alloc(struct device *dev);
 #endif
--- a/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gp10b_tegra.c
@@ -137,6 +137,10 @@ static int gp10b_tegra_probe(struct device *dev)
 		return ret;
 #endif
 	ret = gk20a_tegra_init_secure_alloc(platform);
 	if (ret)
 		return ret;
 	platform->disable_bigpage = !device_is_iommuable(dev);
 	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
@@ -149,16 +153,12 @@ static int gp10b_tegra_probe(struct device *dev)
 	gp10b_tegra_get_clocks(dev);
 	nvgpu_linux_init_clk_support(platform->g);
 	gk20a_tegra_init_secure_alloc(platform->g);
 	return 0;
 }
 static int gp10b_tegra_late_probe(struct device *dev)
 {
 	/* Cause early VPR resize */
 	gk20a_tegra_secure_page_alloc(dev);
 	return 0;
 }
@@ -422,6 +422,8 @@ struct gk20a_platform gp10b_tegra_platform = {
 	.unified_memory = true,
 	.ltc_streamid = TEGRA_SID_GPUB,
 	.secure_buffer_size = 401408,
 };
--- a/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
+++ b/drivers/gpu/nvgpu/common/linux/platform_gv11b_tegra.c
@@ -81,6 +81,10 @@ static int gv11b_tegra_probe(struct device *dev)
 	g->has_syncpoints = false;
 #endif
 	err = gk20a_tegra_init_secure_alloc(platform);
 	if (err)
 		return err;
 	platform->disable_bigpage = !device_is_iommuable(dev);
 	platform->g->gr.ctx_vars.dump_ctxsw_stats_on_channel_close
@@ -93,15 +97,12 @@ static int gv11b_tegra_probe(struct device *dev)
 	gp10b_tegra_get_clocks(dev);
 	nvgpu_linux_init_clk_support(platform->g);
 	gk20a_tegra_init_secure_alloc(platform->g);
 	return 0;
 }
 static int gv11b_tegra_late_probe(struct device *dev)
 {
 	/* Cause early VPR resize */
 	gk20a_tegra_secure_page_alloc(dev);
 	return 0;
 }
@@ -263,6 +264,8 @@ struct gk20a_platform gv11b_tegra_platform = {
 	.reset_assert = gp10b_tegra_reset_assert,
 	.reset_deassert = gp10b_tegra_reset_deassert,
 	.secure_buffer_size = 667648,
 };
 static struct device_attribute *dev_attr_sm_l1_tag_ecc_corrected_err_count_array;
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2532,10 +2532,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
 	if (err)
 		goto clean_up;
-	if (g->ops.secure_alloc)
+	if (g->ops.secure_alloc) {
-		g->ops.secure_alloc(g,
+		err = g->ops.secure_alloc(g,
 				       &gr->global_ctx_buffer[CIRCULAR_VPR],
 				       cb_buffer_size);
 		if (err)
 			goto clean_up;
 	}
 	gk20a_dbg_info("pagepool_buffer_size : %d", pagepool_buffer_size);
@@ -2544,10 +2547,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
 	if (err)
 		goto clean_up;
-	if (g->ops.secure_alloc)
+	if (g->ops.secure_alloc) {
-		g->ops.secure_alloc(g,
+		err = g->ops.secure_alloc(g,
 				       &gr->global_ctx_buffer[PAGEPOOL_VPR],
 				       pagepool_buffer_size);
 		if (err)
 			goto clean_up;
 	}
 	gk20a_dbg_info("attr_buffer_size : %d", attr_buffer_size);
@@ -2556,10 +2562,13 @@ static int gr_gk20a_alloc_global_ctx_buffers(struct gk20a *g)
 	if (err)
 		goto clean_up;
-	if (g->ops.secure_alloc)
+	if (g->ops.secure_alloc) {
-		g->ops.secure_alloc(g,
+		err = g->ops.secure_alloc(g,
 				       &gr->global_ctx_buffer[ATTRIBUTE_VPR],
 				       attr_buffer_size);
 		if (err)
 			goto clean_up;
 	}
 	gk20a_dbg_info("golden_image_size : %d",
 		   gr->ctx_vars.golden_image_size);