diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index d02870fbb..6e475fcb2 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -55,6 +55,7 @@ nvgpu-y := \ common/mm/pd_cache.o \ common/mm/vm.o \ common/mm/vm_area.o \ + common/mm/nvgpu_mem.o \ common/bus.o \ common/enabled.o \ common/pramin.o \ diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index e4991d0da..eb54f3fd6 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -395,3 +396,116 @@ int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, return 0; } + +static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_dup(struct gk20a *g, + struct nvgpu_mem_sgl *sgl) +{ + struct nvgpu_mem_sgl *head, *next; + + head = nvgpu_kzalloc(g, sizeof(*sgl)); + if (!head) + return NULL; + + next = head; + while (true) { + nvgpu_log(g, gpu_dbg_sgl, + " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", + sgl->phys, sgl->dma, sgl->length); + + next->dma = sgl->dma; + next->phys = sgl->phys; + next->length = sgl->length; + next->next = NULL; + + sgl = nvgpu_mem_sgl_next(sgl); + if (!sgl) + break; + + next->next = nvgpu_kzalloc(g, sizeof(*sgl)); + if (!next->next) { + nvgpu_mem_sgl_free(g, head); + return NULL; + } + next = next->next; + } + + return head; +} + +static struct nvgpu_mem_sgl *__nvgpu_mem_sgl_create_from_vidmem( + struct gk20a *g, + struct scatterlist *linux_sgl) +{ + struct nvgpu_page_alloc *vidmem_alloc; + + vidmem_alloc = get_vidmem_page_alloc(linux_sgl); + if (!vidmem_alloc) + return NULL; + + nvgpu_log(g, gpu_dbg_sgl, "Vidmem sgl:"); + + return __nvgpu_mem_sgl_dup(g, vidmem_alloc->sgl); +} + +struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, + struct sg_table *sgt) +{ + struct nvgpu_mem_sgl *head, *sgl, *next; + struct scatterlist *linux_sgl = sgt->sgl; + + if (is_vidmem_page_alloc(sg_dma_address(linux_sgl))) + return __nvgpu_mem_sgl_create_from_vidmem(g, linux_sgl); + + head = nvgpu_kzalloc(g, sizeof(*sgl)); + if (!head) + return NULL; + + nvgpu_log(g, gpu_dbg_sgl, "Making sgl:"); + + sgl = head; + while (true) { + sgl->dma = sg_dma_address(linux_sgl); + sgl->phys = sg_phys(linux_sgl); + sgl->length = linux_sgl->length; + + /* + * We don't like offsets in the pages here. This will cause + * problems. + */ + if (WARN_ON(linux_sgl->offset)) { + nvgpu_mem_sgl_free(g, head); + return NULL; + } + + nvgpu_log(g, gpu_dbg_sgl, + " phys: 0x%-12llx dma: 0x%-12llx len: 0x%llx", + sgl->phys, sgl->dma, sgl->length); + + /* + * When there's no more SGL ents for the Linux SGL we are + * done. Don't bother making any more SGL ents for the nvgpu + * SGL. + */ + linux_sgl = sg_next(linux_sgl); + if (!linux_sgl) + break; + + next = nvgpu_kzalloc(g, sizeof(*sgl)); + if (!next) { + nvgpu_mem_sgl_free(g, head); + return NULL; + } + + sgl->next = next; + sgl = next; + } + + nvgpu_log(g, gpu_dbg_sgl, "Done!"); + return head; +} + +struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, + struct nvgpu_mem *mem) +{ + return nvgpu_mem_sgl_create(g, mem->priv.sgt); +} diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 86d8bec9c..4a4429dc4 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -21,8 +21,11 @@ #include #include #include +#include #include +#include + #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" #include "gk20a/kind_gk20a.h" @@ -66,17 +69,19 @@ static u64 nvgpu_get_buffer_alignment(struct gk20a *g, struct scatterlist *sgl, if (aperture == APERTURE_VIDMEM) { struct nvgpu_page_alloc *alloc = get_vidmem_page_alloc(sgl); - struct page_alloc_chunk *chunk = NULL; + struct nvgpu_mem_sgl *sgl_vid = alloc->sgl; - nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, - page_alloc_chunk, list_entry) { - chunk_align = 1ULL << __ffs(chunk->base | - chunk->length); + while (sgl_vid) { + chunk_align = 1ULL << + __ffs(nvgpu_mem_sgl_phys(sgl_vid) | + nvgpu_mem_sgl_length(sgl_vid)); if (align) align = min(align, chunk_align); else align = chunk_align; + + sgl_vid = nvgpu_mem_sgl_next(sgl_vid); } return align; @@ -237,6 +242,7 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, struct nvgpu_vm_area *vm_area = NULL; u32 ctag_offset; enum nvgpu_aperture aperture; + struct nvgpu_mem_sgl *nvgpu_sgl; /* * The kind used as part of the key for map caching. HW may @@ -393,9 +399,12 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, ctag_offset += buffer_offset >> ilog2(g->ops.fb.compression_page_size(g)); + nvgpu_sgl = nvgpu_mem_sgl_create(g, bfr.sgt); + /* update gmmu ptes */ - map_offset = g->ops.mm.gmmu_map(vm, map_offset, - bfr.sgt, + map_offset = g->ops.mm.gmmu_map(vm, + map_offset, + nvgpu_sgl, buffer_offset, /* sg offset */ mapping_size, bfr.pgsz_idx, @@ -410,6 +419,8 @@ u64 nvgpu_vm_map(struct vm_gk20a *vm, if (!map_offset) goto clean_up; + nvgpu_mem_sgl_free(g, nvgpu_sgl); + mapped_buffer = nvgpu_kzalloc(g, sizeof(*mapped_buffer)); if (!mapped_buffer) { nvgpu_warn(g, "oom allocating tracking buffer"); diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c index 7f486d682..41f5acdd7 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu.c @@ -65,11 +65,14 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, struct gk20a *g = gk20a_from_vm(vm); u64 vaddr; - struct sg_table *sgt = mem->priv.sgt; + struct nvgpu_mem_sgl *sgl = nvgpu_mem_sgl_create_from_mem(g, mem); + + if (!sgl) + return -ENOMEM; nvgpu_mutex_acquire(&vm->update_gmmu_lock); vaddr = g->ops.mm.gmmu_map(vm, addr, - sgt, /* sg table */ + sgl, /* sg list */ 0, /* sg offset */ size, gmmu_page_size_kernel, @@ -82,8 +85,11 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, NULL, /* mapping_batch handle */ aperture); nvgpu_mutex_release(&vm->update_gmmu_lock); + + nvgpu_mem_sgl_free(g, sgl); + if (!vaddr) { - nvgpu_err(g, "failed to allocate va space"); + nvgpu_err(g, "failed to map buffer!"); return 0; } @@ -91,7 +97,7 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm, } /* - * Convenience wrapper over __nvgpu_gmmu_map() for non-fixed mappings. + * Map a nvgpu_mem into the GMMU. This is for kernel space to use. */ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, struct nvgpu_mem *mem, @@ -106,7 +112,7 @@ u64 nvgpu_gmmu_map(struct vm_gk20a *vm, } /* - * Like nvgpu_gmmu_map() except it can work on a fixed address instead. + * Like nvgpu_gmmu_map() except this can work on a fixed address. */ u64 nvgpu_gmmu_map_fixed(struct vm_gk20a *vm, struct nvgpu_mem *mem, @@ -407,7 +413,7 @@ static int __set_pd_level(struct vm_gk20a *vm, */ target_addr = next_pd ? nvgpu_pde_phys_addr(g, next_pd) : - g->ops.mm.gpu_phys_addr(g, attrs, phys_addr); + phys_addr; l->update_entry(vm, l, pd, pd_idx, @@ -458,18 +464,16 @@ static int __set_pd_level(struct vm_gk20a *vm, * VIDMEM version of the update_ptes logic. */ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, - struct sg_table *sgt, + struct nvgpu_mem_sgl *sgl, u64 space_to_skip, u64 virt_addr, u64 length, struct nvgpu_gmmu_attrs *attrs) { - struct nvgpu_page_alloc *alloc = NULL; - struct page_alloc_chunk *chunk = NULL; u64 phys_addr, chunk_length; int err = 0; - if (!sgt) { + if (!sgl) { /* * This is considered an unmap. Just pass in 0 as the physical * address for the entire GPU range. @@ -482,22 +486,21 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, return err; } - alloc = get_vidmem_page_alloc(sgt->sgl); - /* * Otherwise iterate across all the chunks in this allocation and * map them. */ - nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, - page_alloc_chunk, list_entry) { + while (sgl) { if (space_to_skip && - space_to_skip >= chunk->length) { - space_to_skip -= chunk->length; + space_to_skip >= nvgpu_mem_sgl_length(sgl)) { + space_to_skip -= nvgpu_mem_sgl_length(sgl); + sgl = nvgpu_mem_sgl_next(sgl); continue; } - phys_addr = chunk->base + space_to_skip; - chunk_length = min(length, (chunk->length - space_to_skip)); + phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; + chunk_length = min(length, (nvgpu_mem_sgl_length(sgl) - + space_to_skip)); err = __set_pd_level(vm, &vm->pdb, 0, @@ -518,23 +521,24 @@ static int __nvgpu_gmmu_update_page_table_vidmem(struct vm_gk20a *vm, if (length == 0) break; + + sgl = nvgpu_mem_sgl_next(sgl); } return err; } static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, - struct sg_table *sgt, + struct nvgpu_mem_sgl *sgl, u64 space_to_skip, u64 virt_addr, u64 length, struct nvgpu_gmmu_attrs *attrs) { int err; - struct scatterlist *sgl; struct gk20a *g = gk20a_from_vm(vm); - if (!sgt) { + if (!sgl) { /* * This is considered an unmap. Just pass in 0 as the physical * address for the entire GPU range. @@ -548,19 +552,15 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, } /* - * At this point we have a Linux scatter-gather list pointing to some - * number of discontiguous chunks of memory. Iterate over that list and + * At this point we have a scatter-gather list pointing to some number + * of discontiguous chunks of memory. We must iterate over that list and * generate a GMMU map call for each chunk. There are two possibilities: - * either the IOMMU is enabled or not. When the IOMMU is enabled the + * either an IOMMU is enabled or not. When an IOMMU is enabled the * mapping is simple since the "physical" address is actually a virtual - * IO address and will be contiguous. The no-IOMMU case is more - * complicated. We will have to iterate over the SGT and do a separate - * map for each chunk of the SGT. + * IO address and will be contiguous. */ - sgl = sgt->sgl; - if (!g->mm.bypass_smmu) { - u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl); + u64 io_addr = nvgpu_mem_sgl_gpu_addr(g, sgl, attrs); io_addr += space_to_skip; @@ -585,14 +585,16 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, /* * Cut out sgl ents for space_to_skip. */ - if (space_to_skip && space_to_skip >= sgl->length) { - space_to_skip -= sgl->length; - sgl = sg_next(sgl); + if (space_to_skip && + space_to_skip >= nvgpu_mem_sgl_length(sgl)) { + space_to_skip -= nvgpu_mem_sgl_length(sgl); + sgl = nvgpu_mem_sgl_next(sgl); continue; } - phys_addr = sg_phys(sgl) + space_to_skip; - chunk_length = min(length, sgl->length - space_to_skip); + phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; + chunk_length = min(length, + nvgpu_mem_sgl_length(sgl) - space_to_skip); err = __set_pd_level(vm, &vm->pdb, 0, @@ -600,13 +602,11 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, virt_addr, chunk_length, attrs); - if (err) - return err; space_to_skip = 0; virt_addr += chunk_length; length -= chunk_length; - sgl = sg_next(sgl); + sgl = nvgpu_mem_sgl_next(sgl); if (length == 0) break; @@ -624,22 +624,20 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm, * implementations. But the logic around that is generic to all chips. Every * chip has some number of PDE levels and then a PTE level. * - * Each chunk of the incoming SGT is sent to the chip specific implementation + * Each chunk of the incoming SGL is sent to the chip specific implementation * of page table update. * * [*] Note: the "physical" address may actually be an IO virtual address in the * case of SMMU usage. */ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, - struct sg_table *sgt, + struct nvgpu_mem_sgl *sgl, u64 space_to_skip, u64 virt_addr, u64 length, struct nvgpu_gmmu_attrs *attrs) { struct gk20a *g = gk20a_from_vm(vm); - struct nvgpu_page_alloc *alloc; - u64 phys_addr = 0; u32 page_size; int err; @@ -665,25 +663,16 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, return err; } - if (sgt) { - if (attrs->aperture == APERTURE_VIDMEM) { - alloc = get_vidmem_page_alloc(sgt->sgl); - - phys_addr = alloc->base; - } else - phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); - } - __gmmu_dbg(g, attrs, "vm=%s " "%-5s GPU virt %#-12llx +%#-9llx phys %#-12llx " "phys offset: %#-4llx; pgsz: %3dkb perm=%-2s | " "kind=%#02x APT=%-6s %c%c%c%c%c", vm->name, - sgt ? "MAP" : "UNMAP", + sgl ? "MAP" : "UNMAP", virt_addr, length, - phys_addr, + sgl ? nvgpu_mem_sgl_phys(sgl) : 0, space_to_skip, page_size >> 10, nvgpu_gmmu_perm_str(attrs->rw_flag), @@ -696,19 +685,19 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, attrs->valid ? 'V' : '-'); /* - * Handle VIDMEM progamming. Currently uses a different scatter list - * format. + * For historical reasons these are separate, but soon these will be + * unified. */ if (attrs->aperture == APERTURE_VIDMEM) err = __nvgpu_gmmu_update_page_table_vidmem(vm, - sgt, + sgl, space_to_skip, virt_addr, length, attrs); else err = __nvgpu_gmmu_update_page_table_sysmem(vm, - sgt, + sgl, space_to_skip, virt_addr, length, @@ -717,7 +706,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, unmap_gmmu_pages(g, &vm->pdb); nvgpu_smp_mb(); - __gmmu_dbg(g, attrs, "%-5s Done!", sgt ? "MAP" : "UNMAP"); + __gmmu_dbg(g, attrs, "%-5s Done!", sgl ? "MAP" : "UNMAP"); return err; } @@ -736,7 +725,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm, */ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, u64 vaddr, - struct sg_table *sgt, + struct nvgpu_mem_sgl *sgl, u64 buffer_offset, u64 size, int pgsz_idx, @@ -785,7 +774,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, allocated = true; } - err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset, + err = __nvgpu_gmmu_update_page_table(vm, sgl, buffer_offset, vaddr, size, &attrs); if (err) { nvgpu_err(g, "failed to update ptes on map"); diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c new file mode 100644 index 000000000..7296c6738 --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "gk20a/gk20a.h" + +struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl) +{ + return sgl->next; +} + +u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl) +{ + return sgl->phys; +} + +u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl) +{ + return sgl->dma; +} + +u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl) +{ + return sgl->length; +} + +/* + * This builds a GPU address for the %sgl based on whether an IOMMU is present + * or not. It also handles turning the physical address into the true GPU + * physical address that should be programmed into the page tables. + */ +u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, + struct nvgpu_gmmu_attrs *attrs) +{ + if (nvgpu_mem_sgl_dma(sgl) == 0) + return g->ops.mm.gpu_phys_addr(g, attrs, + nvgpu_mem_sgl_phys(sgl)); + + if (nvgpu_mem_sgl_dma(sgl) == DMA_ERROR_CODE) + return 0; + + return gk20a_mm_smmu_vaddr_translate(g, nvgpu_mem_sgl_dma(sgl)); +} + +void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) +{ + struct nvgpu_mem_sgl *next; + + /* + * Free each of the elements. We expect each element to have been + * nvgpu_k[mz]alloc()ed. + */ + while (sgl) { + next = nvgpu_mem_sgl_next(sgl); + nvgpu_kfree(g, sgl); + sgl = next; + } +} diff --git a/drivers/gpu/nvgpu/common/mm/page_allocator.c b/drivers/gpu/nvgpu/common/mm/page_allocator.c index 72ff8f2dc..6d92b4570 100644 --- a/drivers/gpu/nvgpu/common/mm/page_allocator.c +++ b/drivers/gpu/nvgpu/common/mm/page_allocator.c @@ -147,19 +147,16 @@ static void __nvgpu_free_pages(struct nvgpu_page_allocator *a, struct nvgpu_page_alloc *alloc, bool free_buddy_alloc) { - struct page_alloc_chunk *chunk; + struct nvgpu_mem_sgl *sgl = alloc->sgl; - while (!nvgpu_list_empty(&alloc->alloc_chunks)) { - chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, - page_alloc_chunk, - list_entry); - nvgpu_list_del(&chunk->list_entry); - - if (free_buddy_alloc) - nvgpu_free(&a->source_allocator, chunk->base); - nvgpu_kmem_cache_free(a->chunk_cache, chunk); + if (free_buddy_alloc) { + while (sgl) { + nvgpu_free(&a->source_allocator, sgl->phys); + sgl = nvgpu_mem_sgl_next(sgl); + } } + nvgpu_mem_sgl_free(a->owner->g, alloc->sgl); nvgpu_kmem_cache_free(a->alloc_cache, alloc); } @@ -243,15 +240,14 @@ static void free_slab_page(struct nvgpu_page_allocator *a, } /* - * This expects @alloc to have 1 empty page_alloc_chunk already added to the - * alloc_chunks list. + * This expects @alloc to have 1 empty sgl_entry ready for usage. */ static int __do_slab_alloc(struct nvgpu_page_allocator *a, struct page_alloc_slab *slab, struct nvgpu_page_alloc *alloc) { struct page_alloc_slab_page *slab_page = NULL; - struct page_alloc_chunk *chunk; + struct nvgpu_mem_sgl *sgl; unsigned long offs; /* @@ -302,18 +298,19 @@ static int __do_slab_alloc(struct nvgpu_page_allocator *a, BUG(); /* Should be impossible to hit this. */ /* - * Handle building the nvgpu_page_alloc struct. We expect one - * page_alloc_chunk to be present. + * Handle building the nvgpu_page_alloc struct. We expect one sgl + * to be present. */ alloc->slab_page = slab_page; alloc->nr_chunks = 1; alloc->length = slab_page->slab_size; alloc->base = slab_page->page_addr + (offs * slab_page->slab_size); - chunk = nvgpu_list_first_entry(&alloc->alloc_chunks, - page_alloc_chunk, list_entry); - chunk->base = alloc->base; - chunk->length = alloc->length; + sgl = alloc->sgl; + sgl->phys = alloc->base; + sgl->dma = alloc->base; + sgl->length = alloc->length; + sgl->next = NULL; return 0; } @@ -327,7 +324,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( int err, slab_nr; struct page_alloc_slab *slab; struct nvgpu_page_alloc *alloc = NULL; - struct page_alloc_chunk *chunk = NULL; + struct nvgpu_mem_sgl *sgl = NULL; /* * Align the length to a page and then divide by the page size (4k for @@ -341,15 +338,13 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( palloc_dbg(a, "OOM: could not alloc page_alloc struct!\n"); goto fail; } - chunk = nvgpu_kmem_cache_alloc(a->chunk_cache); - if (!chunk) { - palloc_dbg(a, "OOM: could not alloc alloc_chunk struct!\n"); + sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); + if (!sgl) { + palloc_dbg(a, "OOM: could not alloc sgl struct!\n"); goto fail; } - nvgpu_init_list_node(&alloc->alloc_chunks); - nvgpu_list_add(&chunk->list_entry, &alloc->alloc_chunks); - + alloc->sgl = sgl; err = __do_slab_alloc(a, slab, alloc); if (err) goto fail; @@ -363,8 +358,8 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_slab( fail: if (alloc) nvgpu_kmem_cache_free(a->alloc_cache, alloc); - if (chunk) - nvgpu_kmem_cache_free(a->chunk_cache, chunk); + if (sgl) + nvgpu_kfree(a->owner->g, sgl); return NULL; } @@ -426,7 +421,7 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( struct nvgpu_page_allocator *a, u64 pages) { struct nvgpu_page_alloc *alloc; - struct page_alloc_chunk *c; + struct nvgpu_mem_sgl *sgl, *prev_sgl = NULL; u64 max_chunk_len = pages << a->page_shift; int i = 0; @@ -436,7 +431,6 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( memset(alloc, 0, sizeof(*alloc)); - nvgpu_init_list_node(&alloc->alloc_chunks); alloc->length = pages << a->page_shift; while (pages) { @@ -482,36 +476,48 @@ static struct nvgpu_page_alloc *__do_nvgpu_alloc_pages( goto fail_cleanup; } - c = nvgpu_kmem_cache_alloc(a->chunk_cache); - if (!c) { + sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); + if (!sgl) { nvgpu_free(&a->source_allocator, chunk_addr); goto fail_cleanup; } pages -= chunk_pages; - c->base = chunk_addr; - c->length = chunk_len; - nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); + sgl->phys = chunk_addr; + sgl->dma = chunk_addr; + sgl->length = chunk_len; + + /* + * Build the singly linked list with a head node that is part of + * the list. + */ + if (prev_sgl) + prev_sgl->next = sgl; + else + alloc->sgl = sgl; + + prev_sgl = sgl; i++; } alloc->nr_chunks = i; - c = nvgpu_list_first_entry(&alloc->alloc_chunks, - page_alloc_chunk, list_entry); - alloc->base = c->base; + alloc->base = alloc->sgl->phys; return alloc; fail_cleanup: - while (!nvgpu_list_empty(&alloc->alloc_chunks)) { - c = nvgpu_list_first_entry(&alloc->alloc_chunks, - page_alloc_chunk, list_entry); - nvgpu_list_del(&c->list_entry); - nvgpu_free(&a->source_allocator, c->base); - nvgpu_kmem_cache_free(a->chunk_cache, c); + sgl = alloc->sgl; + while (sgl) { + struct nvgpu_mem_sgl *next = sgl->next; + + nvgpu_free(&a->source_allocator, sgl->phys); + nvgpu_kfree(a->owner->g, sgl); + + sgl = next; } + nvgpu_kmem_cache_free(a->alloc_cache, alloc); fail: return NULL; @@ -521,7 +527,7 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( struct nvgpu_page_allocator *a, u64 len) { struct nvgpu_page_alloc *alloc = NULL; - struct page_alloc_chunk *c; + struct nvgpu_mem_sgl *sgl; u64 pages; int i = 0; @@ -536,11 +542,15 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages( palloc_dbg(a, "Alloc 0x%llx (%llu) id=0x%010llx\n", pages << a->page_shift, pages, alloc->base); - nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, - page_alloc_chunk, list_entry) { + sgl = alloc->sgl; + while (sgl) { palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", - i++, c->base, c->length); + i++, + nvgpu_mem_sgl_phys(sgl), + nvgpu_mem_sgl_length(sgl)); + sgl = sgl->next; } + palloc_dbg(a, "Alloc done\n"); return alloc; } @@ -638,11 +648,11 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( struct nvgpu_page_allocator *a, u64 base, u64 length, u32 unused) { struct nvgpu_page_alloc *alloc; - struct page_alloc_chunk *c; + struct nvgpu_mem_sgl *sgl; alloc = nvgpu_kmem_cache_alloc(a->alloc_cache); - c = nvgpu_kmem_cache_alloc(a->chunk_cache); - if (!alloc || !c) + sgl = nvgpu_kzalloc(a->owner->g, sizeof(*sgl)); + if (!alloc || !sgl) goto fail; alloc->base = nvgpu_alloc_fixed(&a->source_allocator, base, length, 0); @@ -653,17 +663,18 @@ static struct nvgpu_page_alloc *__nvgpu_alloc_pages_fixed( alloc->nr_chunks = 1; alloc->length = length; - nvgpu_init_list_node(&alloc->alloc_chunks); + alloc->sgl = sgl; - c->base = alloc->base; - c->length = length; - nvgpu_list_add(&c->list_entry, &alloc->alloc_chunks); + sgl->phys = alloc->base; + sgl->dma = alloc->base; + sgl->length = length; + sgl->next = NULL; return alloc; fail: - if (c) - nvgpu_kmem_cache_free(a->chunk_cache, c); + if (sgl) + nvgpu_kfree(a->owner->g, sgl); if (alloc) nvgpu_kmem_cache_free(a->alloc_cache, alloc); return NULL; @@ -677,7 +688,7 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, { struct nvgpu_page_allocator *a = page_allocator(__a); struct nvgpu_page_alloc *alloc = NULL; - struct page_alloc_chunk *c; + struct nvgpu_mem_sgl *sgl; u64 aligned_len, pages; int i = 0; @@ -697,10 +708,13 @@ static u64 nvgpu_page_alloc_fixed(struct nvgpu_allocator *__a, palloc_dbg(a, "Alloc [fixed] @ 0x%010llx + 0x%llx (%llu)\n", alloc->base, aligned_len, pages); - nvgpu_list_for_each_entry(c, &alloc->alloc_chunks, - page_alloc_chunk, list_entry) { + sgl = alloc->sgl; + while (sgl) { palloc_dbg(a, " Chunk %2d: 0x%010llx + 0x%llx\n", - i++, c->base, c->length); + i++, + nvgpu_mem_sgl_phys(sgl), + nvgpu_mem_sgl_length(sgl)); + sgl = sgl->next; } a->nr_fixed_allocs++; @@ -896,11 +910,9 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, a->alloc_cache = nvgpu_kmem_cache_create(g, sizeof(struct nvgpu_page_alloc)); - a->chunk_cache = nvgpu_kmem_cache_create(g, - sizeof(struct page_alloc_chunk)); a->slab_page_cache = nvgpu_kmem_cache_create(g, sizeof(struct page_alloc_slab_page)); - if (!a->alloc_cache || !a->chunk_cache || !a->slab_page_cache) { + if (!a->alloc_cache || !a->slab_page_cache) { err = -ENOMEM; goto fail; } @@ -941,8 +953,6 @@ int nvgpu_page_allocator_init(struct gk20a *g, struct nvgpu_allocator *__a, fail: if (a->alloc_cache) nvgpu_kmem_cache_destroy(a->alloc_cache); - if (a->chunk_cache) - nvgpu_kmem_cache_destroy(a->chunk_cache); if (a->slab_page_cache) nvgpu_kmem_cache_destroy(a->slab_page_cache); nvgpu_kfree(g, a); diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c index 425bfdb47..bb7d930e2 100644 --- a/drivers/gpu/nvgpu/common/pramin.c +++ b/drivers/gpu/nvgpu/common/pramin.c @@ -84,37 +84,40 @@ void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) { struct nvgpu_page_alloc *alloc = NULL; - struct page_alloc_chunk *chunk = NULL; + struct nvgpu_mem_sgl *sgl; u32 byteoff, start_reg, until_end, n; alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); - nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, - page_alloc_chunk, list_entry) { - if (offset >= chunk->length) - offset -= chunk->length; - else + sgl = alloc->sgl; + while (sgl) { + if (offset >= nvgpu_mem_sgl_length(sgl)) { + offset -= nvgpu_mem_sgl_length(sgl); + sgl = sgl->next; + } else { break; + } } while (size) { - byteoff = g->ops.pramin.enter(g, mem, chunk, + u32 sgl_len = (u32)nvgpu_mem_sgl_length(sgl); + + byteoff = g->ops.pramin.enter(g, mem, sgl, offset / sizeof(u32)); start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); until_end = SZ_1M - (byteoff & (SZ_1M - 1)); - n = min3(size, until_end, (u32)(chunk->length - offset)); + n = min3(size, until_end, (u32)(sgl_len - offset)); loop(g, start_reg, n / sizeof(u32), arg); /* read back to synchronize accesses */ gk20a_readl(g, start_reg); - g->ops.pramin.exit(g, mem, chunk); + g->ops.pramin.exit(g, mem, sgl); size -= n; - if (n == (chunk->length - offset)) { - chunk = nvgpu_list_next_entry(chunk, page_alloc_chunk, - list_entry); + if (n == (sgl_len - offset)) { + sgl = nvgpu_mem_sgl_next(sgl); offset = 0; } else { offset += n; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index 7eee2d514..355228dba 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -34,6 +34,7 @@ struct gk20a_debug_output; struct nvgpu_clk_pll_debug_data; struct nvgpu_nvhost_dev; struct nvgpu_cpu_time_correlation_sample; +struct nvgpu_mem_sgl; #include #include @@ -70,8 +71,6 @@ struct nvgpu_cpu_time_correlation_sample; #endif #include "ecc_gk20a.h" -struct page_alloc_chunk; - /* PTIMER_REF_FREQ_HZ corresponds to a period of 32 nanoseconds. 32 ns is the resolution of ptimer. */ #define PTIMER_REF_FREQ_HZ 31250000 @@ -701,7 +700,7 @@ struct gpu_ops { bool (*support_sparse)(struct gk20a *g); u64 (*gmmu_map)(struct vm_gk20a *vm, u64 map_offset, - struct sg_table *sgt, + struct nvgpu_mem_sgl *sgl, u64 buffer_offset, u64 size, int pgsz_idx, @@ -761,9 +760,9 @@ struct gpu_ops { size_t size); struct { u32 (*enter)(struct gk20a *g, struct nvgpu_mem *mem, - struct page_alloc_chunk *chunk, u32 w); + struct nvgpu_mem_sgl *sgl, u32 w); void (*exit)(struct gk20a *g, struct nvgpu_mem *mem, - struct page_alloc_chunk *chunk); + struct nvgpu_mem_sgl *sgl); u32 (*data032_r)(u32 i); } pramin; struct { diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 97b7aa800..cd34e769d 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1151,7 +1151,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) struct gk20a_fence *gk20a_fence_out = NULL; struct gk20a_fence *gk20a_last_fence = NULL; struct nvgpu_page_alloc *alloc = NULL; - struct page_alloc_chunk *chunk = NULL; + struct nvgpu_mem_sgl *sgl = NULL; int err = 0; if (g->mm.vidmem.ce_ctx_id == (u32)~0) @@ -1159,16 +1159,16 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl); - nvgpu_list_for_each_entry(chunk, &alloc->alloc_chunks, - page_alloc_chunk, list_entry) { + sgl = alloc->sgl; + while (sgl) { if (gk20a_last_fence) gk20a_fence_put(gk20a_last_fence); err = gk20a_ce_execute_ops(g, g->mm.vidmem.ce_ctx_id, 0, - chunk->base, - chunk->length, + nvgpu_mem_sgl_phys(sgl), + nvgpu_mem_sgl_length(sgl), 0x00000000, NVGPU_CE_DST_LOCATION_LOCAL_FB, NVGPU_CE_MEMSET, @@ -1183,6 +1183,7 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem) } gk20a_last_fence = gk20a_fence_out; + sgl = nvgpu_mem_sgl_next(sgl); } if (gk20a_last_fence) { @@ -1262,10 +1263,10 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr) return addr; } -u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova) +u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, u64 iova) { /* ensure it is not vidmem allocation */ - WARN_ON(is_vidmem_page_alloc((u64)iova)); + WARN_ON(is_vidmem_page_alloc(iova)); if (device_is_iommuable(dev_from_gk20a(g)) && g->ops.mm.get_physical_addr_bits) @@ -2167,11 +2168,6 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g) return 34; } -u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags) -{ - return phys; -} - const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size) { diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index c77bebf8e..2fdc17299 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -336,7 +336,6 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm, int gk20a_mm_suspend(struct gk20a *g); -u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags); u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova); void gk20a_mm_ltc_isr(struct gk20a *g); @@ -361,29 +360,29 @@ static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem) } u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, - u64 map_offset, - struct sg_table *sgt, - u64 buffer_offset, - u64 size, - int pgsz_idx, - u8 kind_v, - u32 ctag_offset, - u32 flags, - int rw_flag, - bool clear_ctags, - bool sparse, - bool priv, - struct vm_gk20a_mapping_batch *batch, - enum nvgpu_aperture aperture); + u64 map_offset, + struct nvgpu_mem_sgl *sgl, + u64 buffer_offset, + u64 size, + int pgsz_idx, + u8 kind_v, + u32 ctag_offset, + u32 flags, + int rw_flag, + bool clear_ctags, + bool sparse, + bool priv, + struct vm_gk20a_mapping_batch *batch, + enum nvgpu_aperture aperture); void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, - u64 vaddr, - u64 size, - int pgsz_idx, - bool va_allocated, - int rw_flag, - bool sparse, - struct vm_gk20a_mapping_batch *batch); + u64 vaddr, + u64 size, + int pgsz_idx, + bool va_allocated, + int rw_flag, + bool sparse, + struct vm_gk20a_mapping_batch *batch); struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c index 9d19e9e53..8a34a63c9 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.c @@ -26,9 +26,9 @@ /* WARNING: returns pramin_window_lock taken, complement with pramin_exit() */ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, - struct page_alloc_chunk *chunk, u32 w) + struct nvgpu_mem_sgl *sgl, u32 w) { - u64 bufbase = chunk->base; + u64 bufbase = nvgpu_mem_sgl_phys(sgl); u64 addr = bufbase + w * sizeof(u32); u32 hi = (u32)((addr & ~(u64)0xfffff) >> bus_bar0_window_target_bar0_window_base_shift_v()); @@ -40,8 +40,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, gk20a_dbg(gpu_dbg_mem, "0x%08x:%08x begin for %p,%p at [%llx,%llx] (sz %llx)", - hi, lo, mem, chunk, bufbase, - bufbase + chunk->length, chunk->length); + hi, lo, mem, sgl, bufbase, + bufbase + nvgpu_mem_sgl_phys(sgl), + nvgpu_mem_sgl_length(sgl)); WARN_ON(!bufbase); @@ -57,9 +58,9 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, } void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, - struct page_alloc_chunk *chunk) + struct nvgpu_mem_sgl *sgl) { - gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, chunk); + gk20a_dbg(gpu_dbg_mem, "end for %p,%p", mem, sgl); nvgpu_spinlock_release(&g->mm.pramin_window_lock); } diff --git a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h index 1a1ac8714..fc5ba919e 100644 --- a/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/pramin_gk20a.h @@ -19,10 +19,10 @@ struct gk20a; struct nvgpu_mem; -struct page_alloc_chunk; +struct nvgpu_mem_sgl; u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem, - struct page_alloc_chunk *chunk, u32 w); + struct nvgpu_mem_sgl *sgl, u32 w); void gk20a_pramin_exit(struct gk20a *g, struct nvgpu_mem *mem, - struct page_alloc_chunk *chunk); + struct nvgpu_mem_sgl *sgl); #endif diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c index fc27b120a..c276f5a64 100644 --- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c @@ -904,7 +904,7 @@ int gr_gp10b_alloc_buffer(struct vm_gk20a *vm, size_t size, mem->gpu_va = nvgpu_gmmu_map(vm, mem, - size, + mem->aligned_size, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, gk20a_mem_flag_none, false, diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index de129a5f4..11060300b 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -27,8 +27,6 @@ #include #endif -struct scatterlist; - /* * This is the GMMU API visible to blocks outside of the GMMU. Basically this * API supports all the different types of mappings that might be done in the diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h index e2d4d3367..f96c28018 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h @@ -32,6 +32,8 @@ struct nvgpu_mem_priv { }; u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl); +struct nvgpu_mem_sgl *nvgpu_mem_sgl_create(struct gk20a *g, + struct sg_table *sgt); /** * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. diff --git a/drivers/gpu/nvgpu/include/nvgpu/log.h b/drivers/gpu/nvgpu/include/nvgpu/log.h index 4cac3e702..cfce8c5b2 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/log.h +++ b/drivers/gpu/nvgpu/include/nvgpu/log.h @@ -71,6 +71,7 @@ enum nvgpu_log_categories { gpu_dbg_pd_cache = BIT(20), /* PD cache traces. */ gpu_dbg_alloc = BIT(21), /* Allocator debugging. */ gpu_dbg_dma = BIT(22), /* DMA allocation prints. */ + gpu_dbg_sgl = BIT(23), /* SGL related traces. */ gpu_dbg_mem = BIT(31), /* memory accesses; very verbose. */ }; diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index a112623e0..7d19cf81d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -33,6 +33,8 @@ struct gk20a; struct nvgpu_allocator; struct nvgpu_gmmu_attrs; +#define NVGPU_MEM_DMA_ERROR (~0ULL) + /* * Real location of a buffer - nvgpu_aperture_mask() will deduce what will be * told to the gpu about the aperture, but this flag designates where the @@ -44,6 +46,28 @@ enum nvgpu_aperture { APERTURE_VIDMEM }; +/* + * This struct holds the necessary information for describing a struct + * nvgpu_mem's scatter gather list. + * + * These are created in a platform dependent way. As a result the function + * definition for allocating these lives in the file. + */ +struct nvgpu_mem_sgl { + /* + * Internally this is implemented as a singly linked list. + */ + struct nvgpu_mem_sgl *next; + + /* + * There is both a phys address and a DMA address since some systems, + * for example ones with an IOMMU, may see these as different addresses. + */ + u64 phys; + u64 dma; + u64 length; +}; + struct nvgpu_mem { /* * Populated for all nvgpu_mem structs - vidmem or system. @@ -176,6 +200,27 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, struct nvgpu_mem *dest, struct nvgpu_mem *src, int start_page, int nr_pages); +/** + * nvgpu_mem_sgl_create_from_mem - Create a scatter list from an nvgpu_mem. + * + * @g - The GPU. + * @mem - The source memory allocation to use. + * + * Create a scatter gather list from the passed @mem struct. This list lets the + * calling code iterate across each chunk of a DMA allocation for when that DMA + * allocation is not completely contiguous. + */ +struct nvgpu_mem_sgl *nvgpu_mem_sgl_create_from_mem(struct gk20a *g, + struct nvgpu_mem *mem); +void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl); + +struct nvgpu_mem_sgl *nvgpu_mem_sgl_next(struct nvgpu_mem_sgl *sgl); +u64 nvgpu_mem_sgl_phys(struct nvgpu_mem_sgl *sgl); +u64 nvgpu_mem_sgl_dma(struct nvgpu_mem_sgl *sgl); +u64 nvgpu_mem_sgl_length(struct nvgpu_mem_sgl *sgl); +u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, struct nvgpu_mem_sgl *sgl, + struct nvgpu_gmmu_attrs *attrs); + /* * Buffer accessors - wrap between begin() and end() if there is no permanent * kernel mapping for this buffer. diff --git a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h index 9a5ef8d37..de83ca7f3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h +++ b/drivers/gpu/nvgpu/include/nvgpu/page_allocator.h @@ -18,6 +18,7 @@ #define PAGE_ALLOCATOR_PRIV_H #include +#include #include #include #include @@ -83,27 +84,17 @@ page_alloc_slab_page_from_list_entry(struct nvgpu_list_node *node) ((uintptr_t)node - offsetof(struct page_alloc_slab_page, list_entry)); }; -struct page_alloc_chunk { - struct nvgpu_list_node list_entry; - - u64 base; - u64 length; -}; - -static inline struct page_alloc_chunk * -page_alloc_chunk_from_list_entry(struct nvgpu_list_node *node) -{ - return (struct page_alloc_chunk *) - ((uintptr_t)node - offsetof(struct page_alloc_chunk, list_entry)); -}; - /* * Struct to handle internal management of page allocation. It holds a list * of the chunks of pages that make up the overall allocation - much like a * scatter gather table. */ struct nvgpu_page_alloc { - struct nvgpu_list_node alloc_chunks; + /* + * nvgpu_mem_sgl for describing the actual allocation. Convenient for + * GMMU mapping. + */ + struct nvgpu_mem_sgl *sgl; int nr_chunks; u64 length; @@ -156,7 +147,6 @@ struct nvgpu_page_allocator { int nr_slabs; struct nvgpu_kmem_cache *alloc_cache; - struct nvgpu_kmem_cache *chunk_cache; struct nvgpu_kmem_cache *slab_page_cache; u64 flags; diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c index 85c436e5a..ee9b791af 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c @@ -13,7 +13,6 @@ * more details. */ -#include #include "vgpu/vgpu.h" #include "vgpu_mm_gp10b.h" #include "gk20a/mm_gk20a.h" @@ -41,7 +40,7 @@ static inline int add_mem_desc(struct tegra_vgpu_mem_desc *mem_desc, static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, u64 map_offset, - struct sg_table *sgt, + struct nvgpu_mem_sgl *sgl, u64 buffer_offset, u64 size, int pgsz_idx, @@ -61,10 +60,9 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, struct tegra_vgpu_as_map_ex_params *p = &msg.params.as_map_ex; struct tegra_vgpu_mem_desc *mem_desc; u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; + u64 buffer_size = PAGE_ALIGN(size); u64 space_to_skip = buffer_offset; - u64 buffer_size = 0; u32 mem_desc_count = 0, i; - struct scatterlist *sgl; void *handle = NULL; size_t oob_size; u8 prot; @@ -73,7 +71,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, /* FIXME: add support for sparse mappings */ - if (WARN_ON(!sgt) || WARN_ON(!g->mm.bypass_smmu)) + if (WARN_ON(!sgl) || WARN_ON(!g->mm.bypass_smmu)) return 0; if (space_to_skip & (page_size - 1)) @@ -100,33 +98,36 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, goto fail; } - sgl = sgt->sgl; - while (space_to_skip && sgl && - (space_to_skip + page_size > sgl->length)) { - space_to_skip -= sgl->length; - sgl = sg_next(sgl); - } - WARN_ON(!sgl); + while (sgl) { + u64 phys_addr; + u64 chunk_length; - if (add_mem_desc(&mem_desc[mem_desc_count++], - sg_phys(sgl) + space_to_skip, - sgl->length - space_to_skip, - &oob_size)) { - err = -ENOMEM; - goto fail; - } - buffer_size += sgl->length - space_to_skip; + /* + * Cut out sgl ents for space_to_skip. + */ + if (space_to_skip && + space_to_skip >= nvgpu_mem_sgl_length(sgl)) { + space_to_skip -= nvgpu_mem_sgl_length(sgl); + sgl = nvgpu_mem_sgl_next(sgl); + continue; + } - sgl = sg_next(sgl); - while (sgl && buffer_size < size) { - if (add_mem_desc(&mem_desc[mem_desc_count++], sg_phys(sgl), - sgl->length, &oob_size)) { + phys_addr = nvgpu_mem_sgl_phys(sgl) + space_to_skip; + chunk_length = min(size, + nvgpu_mem_sgl_length(sgl) - space_to_skip); + + if (add_mem_desc(&mem_desc[mem_desc_count++], phys_addr, + chunk_length, &oob_size)) { err = -ENOMEM; goto fail; } - buffer_size += sgl->length; - sgl = sg_next(sgl); + space_to_skip = 0; + size -= chunk_length; + sgl = nvgpu_mem_sgl_next(sgl); + + if (size == 0) + break; } if (rw_flag == gk20a_mem_flag_read_only) @@ -153,7 +154,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, msg.handle = vgpu_get_handle(g); p->handle = vm->handle; p->gpu_va = map_offset; - p->size = size; + p->size = buffer_size; p->mem_desc_count = mem_desc_count; p->pgsz_idx = pgsz_idx; p->iova = 0; diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index ef9e00c8c..5da6f158b 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -78,7 +78,7 @@ int vgpu_init_mm_support(struct gk20a *g) static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, u64 map_offset, - struct sg_table *sgt, + struct nvgpu_mem_sgl *sgl, u64 buffer_offset, u64 size, int pgsz_idx, @@ -98,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d); struct tegra_vgpu_cmd_msg msg; struct tegra_vgpu_as_map_params *p = &msg.params.as_map; - u64 addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl); + u64 addr = nvgpu_mem_sgl_gpu_addr(g, sgl, NULL); u8 prot; gk20a_dbg_fn("");