diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index 1dbbd1a06..2587d56a5 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c @@ -529,6 +529,12 @@ static u64 nvgpu_mem_linux_sgl_gpu_addr(struct gk20a *g, void *sgl, sg_dma_address((struct scatterlist *)sgl)); } +static bool nvgpu_mem_linux_sgt_iommuable(struct gk20a *g, + struct nvgpu_sgt *sgt) +{ + return true; +} + static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) { /* @@ -539,12 +545,13 @@ static void nvgpu_mem_linux_sgl_free(struct gk20a *g, struct nvgpu_sgt *sgt) } static const struct nvgpu_sgt_ops nvgpu_linux_sgt_ops = { - .sgl_next = nvgpu_mem_linux_sgl_next, - .sgl_phys = nvgpu_mem_linux_sgl_phys, - .sgl_dma = nvgpu_mem_linux_sgl_dma, - .sgl_length = nvgpu_mem_linux_sgl_length, - .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, - .sgt_free = nvgpu_mem_linux_sgl_free, + .sgl_next = nvgpu_mem_linux_sgl_next, + .sgl_phys = nvgpu_mem_linux_sgl_phys, + .sgl_dma = nvgpu_mem_linux_sgl_dma, + .sgl_length = nvgpu_mem_linux_sgl_length, + .sgl_gpu_addr = nvgpu_mem_linux_sgl_gpu_addr, + .sgt_iommuable = nvgpu_mem_linux_sgt_iommuable, + .sgt_free = nvgpu_mem_linux_sgl_free, }; static struct nvgpu_sgt *__nvgpu_mem_get_sgl_from_vidmem( diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 895a5771b..006216c26 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -59,62 +59,6 @@ static struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_reverse( return NULL; } -/* - * Determine alignment for a passed buffer. Necessary since the buffer may - * appear big to map with large pages but the SGL may have chunks that are not - * aligned on a 64/128kB large page boundary. - */ -static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, - enum nvgpu_aperture aperture) -{ - u64 align = 0, chunk_align = 0; - u64 buf_addr; - - if (aperture == APERTURE_VIDMEM) { - struct nvgpu_page_alloc *alloc = - nvgpu_vidmem_get_page_alloc(sgl); - struct nvgpu_sgt *sgt = &alloc->sgt; - void *sgl_vid = sgt->sgl; - - while (sgl_vid) { - chunk_align = 1ULL << - __ffs(nvgpu_sgt_get_phys(sgt, sgl_vid)) | - nvgpu_sgt_get_length(sgt, sgl_vid); - - if (align) - align = min(align, chunk_align); - else - align = chunk_align; - - sgl_vid = nvgpu_sgt_get_next(sgt, sgl_vid); - } - - return align; - } - - buf_addr = (u64)sg_dma_address(sgl); - - if (g->mm.bypass_smmu || buf_addr == DMA_ERROR_CODE || !buf_addr) { - while (sgl) { - buf_addr = (u64)sg_phys(sgl); - chunk_align = 1ULL << __ffs(buf_addr | - (u64)sgl->length); - - if (align) - align = min(align, chunk_align); - else - align = chunk_align; - sgl = sg_next(sgl); - } - - return align; - } - - align = 1ULL << __ffs(buf_addr); - - return align; -} - int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va, struct dma_buf **dmabuf, u64 *offset) @@ -218,7 +162,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, struct nvgpu_ctag_buffer_info binfo = { 0 }; struct gk20a_comptags comptags; struct nvgpu_vm_area *vm_area = NULL; - struct nvgpu_sgt *nvgpu_sgt; + struct nvgpu_sgt *nvgpu_sgt = NULL; struct sg_table *sgt; struct nvgpu_mapped_buf *mapped_buffer = NULL; enum nvgpu_aperture aperture; @@ -279,6 +223,10 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, goto clean_up; } + nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); + if (!nvgpu_sgt) + goto clean_up; + aperture = gk20a_dmabuf_aperture(g, dmabuf); if (aperture == APERTURE_INVALID) { err = -EINVAL; @@ -288,7 +236,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, if (flags & NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET) map_offset = offset_align; - align = nvgpu_get_buffer_alignment(g, sgt->sgl, aperture); + align = nvgpu_sgt_alignment(g, nvgpu_sgt); if (g->mm.disable_bigpage) binfo.pgsz_idx = gmmu_page_size_small; else @@ -370,8 +318,6 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, ctag_offset += buffer_offset >> ilog2(g->ops.fb.compression_page_size(g)); - nvgpu_sgt = nvgpu_linux_sgt_create(g, sgt); - /* update gmmu ptes */ map_offset = g->ops.mm.gmmu_map(vm, map_offset, @@ -391,7 +337,7 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, if (!map_offset) goto clean_up; - nvgpu_sgt_free(nvgpu_sgt, g); + nvgpu_sgt_free(g, nvgpu_sgt); mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); if (!mapped_buffer) { @@ -434,6 +380,9 @@ int nvgpu_vm_map_linux(struct vm_gk20a *vm, clean_up: nvgpu_kfree(g, mapped_buffer); + + if (nvgpu_sgt) + nvgpu_sgt_free(g, nvgpu_sgt); if (va_allocated) __nvgpu_vm_free_va(vm, map_offset, binfo.pgsz_idx); if (!IS_ERR(sgt)) diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 875bcc4e9..4289104d9 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -93,7 +93,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, aperture); nvgpu_mutex_release(&vm->update_gmmu_lock); - nvgpu_sgt_free(sgt, g); + nvgpu_sgt_free(g, sgt); if (!vaddr) { nvgpu_err(g, "failed to map buffer!"); @@ -500,7 +500,7 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm, * IO address and will be contiguous. */ if (attrs->aperture == APERTURE_SYSMEM && !g->mm.bypass_smmu) { - u64 io_addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, attrs); + u64 io_addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, attrs); io_addr += space_to_skip; diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 9f677058f..b4e718b46 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -47,13 +47,20 @@ u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl) return sgt->ops->sgl_length(sgl); } -u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, +u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl, struct nvgpu_gmmu_attrs *attrs) { return sgt->ops->sgl_gpu_addr(g, sgl, attrs); } -void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g) +bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + if (sgt->ops->sgt_iommuable) + return sgt->ops->sgt_iommuable(g, sgt); + return false; +} + +void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) { if (sgt && sgt->ops->sgt_free) sgt->ops->sgt_free(g, sgt); @@ -69,3 +76,44 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys) return phys; } + +/* + * Determine alignment for a passed buffer. Necessary since the buffer may + * appear big enough to map with large pages but the SGL may have chunks that + * are not aligned on a 64/128kB large page boundary. There's also the + * possibility chunks are odd sizes which will necessitate small page mappings + * to correctly glue them together into a contiguous virtual mapping. + */ +u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + u64 align = 0, chunk_align = 0; + void *sgl; + + /* + * If this SGT is iommuable and we want to use the IOMMU address then + * the SGT's first entry has the IOMMU address. We will align on this + * and double check length of buffer later. Also, since there's an + * IOMMU we know that this DMA address is contiguous. + */ + if (!g->mm.bypass_smmu && + nvgpu_sgt_iommuable(g, sgt) && + nvgpu_sgt_get_dma(sgt, sgt->sgl)) + return 1ULL << __ffs(nvgpu_sgt_get_dma(sgt, sgt->sgl)); + + /* + * Otherwise the buffer is not iommuable (VIDMEM, for example) or we are + * bypassing the IOMMU and need to use the underlying physical entries + * of the SGT. + */ + nvgpu_sgt_for_each_sgl(sgl, sgt) { + chunk_align = 1ULL << __ffs(nvgpu_sgt_get_phys(sgt, sgl) | + nvgpu_sgt_get_length(sgt, sgl)); + + if (align) + align = min(align, chunk_align); + else + align = chunk_align; + } + + return align; +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 6feacff72..2b8b7015e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -61,6 +61,11 @@ struct nvgpu_sgt_ops { u64 (*sgl_length)(void *sgl); u64 (*sgl_gpu_addr)(struct gk20a *g, void *sgl, struct nvgpu_gmmu_attrs *attrs); + /* + * If left NULL then iommuable is assumed to be false. + */ + bool (*sgt_iommuable)(struct gk20a *g, struct nvgpu_sgt *sgt); + /* * Note: this operates on the whole SGT not a specific SGL entry. */ @@ -232,9 +237,12 @@ void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl); u64 nvgpu_sgt_get_phys(struct nvgpu_sgt *sgt, void *sgl); u64 nvgpu_sgt_get_dma(struct nvgpu_sgt *sgt, void *sgl); u64 nvgpu_sgt_get_length(struct nvgpu_sgt *sgt, void *sgl); -u64 nvgpu_sgt_get_gpu_addr(struct nvgpu_sgt *sgt, struct gk20a *g, void *sgl, +u64 nvgpu_sgt_get_gpu_addr(struct gk20a *g, struct nvgpu_sgt *sgt, void *sgl, struct nvgpu_gmmu_attrs *attrs); -void nvgpu_sgt_free(struct nvgpu_sgt *sgt, struct gk20a *g); +void nvgpu_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt); + +bool nvgpu_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt); +u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt); /** * nvgpu_mem_create_from_mem - Create a new nvgpu_mem struct from an old one. diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 498a15284..a125366a4 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -107,7 +107,7 @@ u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_as_map_params *p = &msg.params.as_map; - u64 addr = nvgpu_sgt_get_gpu_addr(sgt, g, sgt->sgl, NULL); + u64 addr = nvgpu_sgt_get_gpu_addr(g, sgt, sgt->sgl, NULL); u8 prot; gk20a_dbg_fn("");