diff --git a/drivers/gpu/nvgpu/os/linux/linux-dma.c b/drivers/gpu/nvgpu/os/linux/linux-dma.c index 9163fb69f..4d1b3ebb1 100644 --- a/drivers/gpu/nvgpu/os/linux/linux-dma.c +++ b/drivers/gpu/nvgpu/os/linux/linux-dma.c @@ -15,6 +15,7 @@ */ #include +#include #include #include @@ -150,6 +151,86 @@ static u64 __nvgpu_dma_alloc(struct nvgpu_allocator *allocator, u64 at, } #endif +/** + * The nvgpu_dma_alloc_no_iommu/nvgpu_dma_free_no_iommu() are for use + * cases where memory can be physically non-contiguous even if GPU is + * not iommuable as GPU uses nvlink to access the memory and lets GMMU + * fully control it + */ +static void __nvgpu_dma_free_no_iommu(struct page **pages, + int max, bool big_array) +{ + int i; + + for (i = 0; i < max; i++) + if (pages[i]) + __free_pages(pages[i], 0); + + if (big_array) + vfree(pages); + else + kfree(pages); +} + +static void *nvgpu_dma_alloc_no_iommu(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfps) +{ + int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int array_size = count * sizeof(struct page *); + struct page **pages; + int i = 0; + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, GFP_KERNEL); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + gfps |= __GFP_HIGHMEM | __GFP_NOWARN; + + while (count) { + int j, order = __fls(count); + + pages[i] = alloc_pages(gfps, order); + while (!pages[i] && order) + pages[i] = alloc_pages(gfps, --order); + if (!pages[i]) + goto error; + + if (order) { + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + } + + memset(page_address(pages[i]), 0, PAGE_SIZE << order); + + i += 1 << order; + count -= 1 << order; + } + + *dma_handle = __pfn_to_phys(page_to_pfn(pages[0])); + + return (void *)pages; + +error: + __nvgpu_dma_free_no_iommu(pages, i, array_size > PAGE_SIZE); + return NULL; +} + +static void nvgpu_dma_free_no_iommu(size_t size, void *vaddr) +{ + int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned int array_size = count * sizeof(struct page *); + struct page **pages = vaddr; + + WARN_ON(!pages); + + __nvgpu_dma_free_no_iommu(pages, count, array_size > PAGE_SIZE); +} + /* Check if IOMMU is available and if GPU uses it */ #define nvgpu_uses_iommu(g) \ (nvgpu_iommuable(g) && !nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG)) @@ -163,10 +244,20 @@ static void nvgpu_dma_flags_to_attrs(struct gk20a *g, unsigned long *attrs, *attrs |= DMA_ATTR_FORCE_CONTIGUOUS; } +/* + * When GPU uses nvlink instead of IOMMU, memory can be non-contiguous if + * no NVGPU_DMA_PHYSICALLY_ADDRESSED flag is assigned. This means the GPU + * driver will need to map the memory after allocation + */ +#define nvgpu_nvlink_non_contig(g, flags) \ + (nvgpu_is_enabled(g, NVGPU_MM_BYPASSES_IOMMU) && \ + !(flags & NVGPU_DMA_PHYSICALLY_ADDRESSED)) + int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, size_t size, struct nvgpu_mem *mem) { struct device *d = dev_from_gk20a(g); + gfp_t gfps = GFP_KERNEL|__GFP_ZERO; dma_addr_t iova; unsigned long dma_attrs = 0; void *alloc_ret; @@ -193,14 +284,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, size = PAGE_ALIGN(size); nvgpu_dma_flags_to_attrs(g, &dma_attrs, flags); - alloc_ret = dma_alloc_attrs(d, size, &iova, - GFP_KERNEL|__GFP_ZERO, dma_attrs); + if (nvgpu_nvlink_non_contig(g, flags)) + alloc_ret = nvgpu_dma_alloc_no_iommu(d, size, &iova, gfps); + else + alloc_ret = dma_alloc_attrs(d, size, &iova, gfps, dma_attrs); if (!alloc_ret) { err = -ENOMEM; goto print_dma_err; } - if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) { + if (nvgpu_nvlink_non_contig(g, flags) || + flags & NVGPU_DMA_NO_KERNEL_MAPPING) { mem->priv.pages = alloc_ret; err = nvgpu_get_sgtable_from_pages(g, &mem->priv.sgt, mem->priv.pages, @@ -213,6 +307,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, if (err) goto fail_free_dma; + /* Map the page list from the non-contiguous allocation */ + if (nvgpu_nvlink_non_contig(g, flags)) { + mem->cpu_va = vmap(mem->priv.pages, size >> PAGE_SHIFT, + 0, PAGE_KERNEL); + if (!mem->cpu_va) { + err = -ENOMEM; + goto fail_free_sgt; + } + } + mem->aligned_size = size; mem->aperture = APERTURE_SYSMEM; mem->priv.flags = flags; @@ -221,6 +325,8 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, return 0; +fail_free_sgt: + nvgpu_free_sgtable(g, &mem->priv.sgt); fail_free_dma: dma_free_attrs(d, size, alloc_ret, iova, dma_attrs); mem->cpu_va = NULL; @@ -336,13 +442,21 @@ void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) (mem->cpu_va || mem->priv.pages)) { void *cpu_addr = mem->cpu_va; - /* NO_KERNEL_MAPPING uses pages pointer instead of cpu_va */ - if (mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) + /* These two use pages pointer instead of cpu_va */ + if (nvgpu_nvlink_non_contig(g, mem->priv.flags) || + mem->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) cpu_addr = mem->priv.pages; - nvgpu_dma_flags_to_attrs(g, &dma_attrs, mem->priv.flags); - dma_free_attrs(d, mem->aligned_size, cpu_addr, - sg_dma_address(mem->priv.sgt->sgl), dma_attrs); + if (nvgpu_nvlink_non_contig(g, mem->priv.flags)) { + vunmap(mem->cpu_va); + nvgpu_dma_free_no_iommu(mem->aligned_size, cpu_addr); + } else { + nvgpu_dma_flags_to_attrs(g, &dma_attrs, + mem->priv.flags); + dma_free_attrs(d, mem->aligned_size, cpu_addr, + sg_dma_address(mem->priv.sgt->sgl), + dma_attrs); + } mem->cpu_va = NULL; mem->priv.pages = NULL;