gpu: nvgpu: Add non-contiguous memory allocation

The latest GPU uses nvlink and its own MMU to access memory, instead of SMMU like others. So it doesn't go through IOMMU framework to allocate physically non-contiguous memory. The DMA API had a pair of downstream functions to allocate the memory for this situation, but it is removed since it's not likely acceptable for upstream kernel. In order not to hack the dma-direct ops that by its meaning is supposed to provide contiguous memory, this patch adds a pair of memory-allocation functions inside the gpu driver, since nvgpu is the only user. This pair of functions are only used when GPU driver doesn't go through either dma-direct (FORCE_CONTIGUOUS) or iommu. It also requires GPU driver to map the non-contiguous pages. Bug 200444660 Change-Id: I26678a3f8d63bba340872beeecbb7b0e1e7a35fa Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2029680 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2019-03-07 17:47:18 -08:00
parent a8e6d13652
commit b0d6964325
1 changed files with 122 additions and 8 deletions
--- a/drivers/gpu/nvgpu/os/linux/linux-dma.c
+++ b/drivers/gpu/nvgpu/os/linux/linux-dma.c
@@ -15,6 +15,7 @@
 */

 #include <linux/dma-mapping.h>
+#include <linux/slab.h>
 #include <linux/version.h>

 #include <nvgpu/log.h>
@@ -150,6 +151,86 @@ static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at,
 }
 #endif

+/**
+ * The nvgpu_dma_alloc_no_iommu/nvgpu_dma_free_no_iommu() are for use
+ * cases where memory can be physically non-contiguous even if GPU is
+ * not iommuable as GPU uses nvlink to access the memory and lets GMMU
+ * fully control it
+ */
+static void __nvgpu_dma_free_no_iommu(struct page **pages,
+				      int max, bool big_array)
+{
+	int i;
+
+	for (i = 0; i < max; i++)
+		if (pages[i])
+			__free_pages(pages[i], 0);
+
+	if (big_array)
+		vfree(pages);
+	else
+		kfree(pages);
+}
+
+static void *nvgpu_dma_alloc_no_iommu(struct device *dev, size_t size,
+				      dma_addr_t *dma_handle, gfp_t gfps)
+{
+	int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int array_size = count * sizeof(struct page *);
+	struct page **pages;
+	int i = 0;
+
+	if (array_size <= PAGE_SIZE)
+		pages = kzalloc(array_size, GFP_KERNEL);
+	else
+		pages = vzalloc(array_size);
+	if (!pages)
+		return NULL;
+
+	gfps |= __GFP_HIGHMEM | __GFP_NOWARN;
+
+	while (count) {
+		int j, order = __fls(count);
+
+		pages[i] = alloc_pages(gfps, order);
+		while (!pages[i] && order)
+			pages[i] = alloc_pages(gfps, --order);
+		if (!pages[i])
+			goto error;
+
+		if (order) {
+			split_page(pages[i], order);
+			j = 1 << order;
+			while (--j)
+				pages[i + j] = pages[i] + j;
+		}
+
+		memset(page_address(pages[i]), 0, PAGE_SIZE << order);
+
+		i += 1 << order;
+		count -= 1 << order;
+	}
+
+	*dma_handle = __pfn_to_phys(page_to_pfn(pages[0]));
+
+	return (void *)pages;
+
+error:
+	__nvgpu_dma_free_no_iommu(pages, i, array_size > PAGE_SIZE);
+	return NULL;
+}
+
+static void nvgpu_dma_free_no_iommu(size_t size, void *vaddr)
+{
+	int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int array_size = count * sizeof(struct page *);
+	struct page **pages = vaddr;
+
+	WARN_ON(!pages);
+
+	__nvgpu_dma_free_no_iommu(pages, count, array_size > PAGE_SIZE);
+}
+
 /* Check if IOMMU is available and if GPU uses it */
 #define nvgpu_uses_iommu(g) \
 	(nvgpu_iommuable(g) && !nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG))
@@ -163,10 +244,20 @@ static void nvgpu_dma_flags_to_attrs(struct gk20a *g, unsigned long *attrs,
 		*attrs |= DMA_ATTR_FORCE_CONTIGUOUS;
 }

+/*
+ * When GPU uses nvlink instead of IOMMU, memory can be non-contiguous if
+ * no NVGPU_DMA_PHYSICALLY_ADDRESSED flag is assigned. This means the GPU
+ * driver will need to map the memory after allocation
+ */
+#define nvgpu_nvlink_non_contig(g, flags) \
+	(nvgpu_is_enabled(g, NVGPU_MM_BYPASSES_IOMMU) && \
+	 !(flags & NVGPU_DMA_PHYSICALLY_ADDRESSED))
+
 int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
 		size_t size, struct nvgpu_mem *mem)
 {
 	struct device *d = dev_from_gk20a(g);
+	gfp_t gfps = GFP_KERNEL|__GFP_ZERO;
 	dma_addr_t iova;
 	unsigned long dma_attrs = 0;
 	void *alloc_ret;
@@ -193,14 +284,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
 	size = PAGE_ALIGN(size);

 	nvgpu_dma_flags_to_attrs(g, &dma_attrs, flags);
-	alloc_ret = dma_alloc_attrs(d, size, &iova,
-				    GFP_KERNEL|__GFP_ZERO, dma_attrs);
+	if (nvgpu_nvlink_non_contig(g, flags))
+		alloc_ret = nvgpu_dma_alloc_no_iommu(d, size, &iova, gfps);
+	else
+		alloc_ret = dma_alloc_attrs(d, size, &iova, gfps, dma_attrs);
 	if (!alloc_ret) {
 		err = -ENOMEM;
 		goto print_dma_err;
 	}

-	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+	if (nvgpu_nvlink_non_contig(g, flags) ||
+	    flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
 		mem->priv.pages = alloc_ret;
 		err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt,
 						   mem->priv.pages,
@@ -213,6 +307,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
 	if (err)
 		goto fail_free_dma;

+	/* Map the page list from the non-contiguous allocation */
+	if (nvgpu_nvlink_non_contig(g, flags)) {
+		mem->cpu_va = vmap(mem->priv.pages, size >> PAGE_SHIFT,
+				   0, PAGE_KERNEL);
+		if (!mem->cpu_va) {
+			err = -ENOMEM;
+			goto fail_free_sgt;
+		}
+	}
+
 	mem->aligned_size = size;
 	mem->aperture = APERTURE_SYSMEM;
 	mem->priv.flags = flags;
@@ -221,6 +325,8 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,

 	return 0;

+fail_free_sgt:
+	nvgpu_free_sgtable(g, &mem->priv.sgt);
 fail_free_dma:
 	dma_free_attrs(d, size, alloc_ret, iova, dma_attrs);
 	mem->cpu_va = NULL;
@@ -336,13 +442,21 @@ void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
 	    (mem->cpu_va || mem->priv.pages)) {
 		void *cpu_addr = mem->cpu_va;

-		/* NO_KERNEL_MAPPING uses pages pointer instead of cpu_va */
-		if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		/* These two use pages pointer instead of cpu_va */
+		if (nvgpu_nvlink_non_contig(g, mem->priv.flags) ||
+		    mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)
 			cpu_addr = mem->priv.pages;

-		nvgpu_dma_flags_to_attrs(g, &dma_attrs, mem->priv.flags);
-		dma_free_attrs(d, mem->aligned_size, cpu_addr,
-			       sg_dma_address(mem->priv.sgt->sgl), dma_attrs);
+		if (nvgpu_nvlink_non_contig(g, mem->priv.flags)) {
+			vunmap(mem->cpu_va);
+			nvgpu_dma_free_no_iommu(mem->aligned_size, cpu_addr);
+		} else {
+			nvgpu_dma_flags_to_attrs(g, &dma_attrs,
+						 mem->priv.flags);
+			dma_free_attrs(d, mem->aligned_size, cpu_addr,
+					sg_dma_address(mem->priv.sgt->sgl),
+					dma_attrs);
+		}

 		mem->cpu_va = NULL;
 		mem->priv.pages = NULL;