From f766c6af91adad5e8920bcb784d357e878e4bf7d Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Tue, 15 Jan 2019 13:35:53 -0800 Subject: [PATCH] gpu: nvgpu: Make "phys" nvgpu_mem impl Make a physical nvgpu_mem implementation in the common code. This implementation assumes a single, contiguous, physical range. GMMU mappability is provided by building a one entry SGT. Since this is now "common" code the original Linux code has been moved to commom/mm/nvgpu_mem.c. Also fix the '__' prefix in the nvgpu_mem function. This is not necessary as this function, although somewhat tricky, is expected to be used by arbitrary users within the nvgpu driver. JIRA NVGPU-1029 Bug 2441531 Change-Id: I42313e5c664df3cd94933cc63ff0528326628683 Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/1995866 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/init/nvgpu_init.c | 2 +- drivers/gpu/nvgpu/common/mm/nvgpu_mem.c | 106 ++++++++++++++++++ drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c | 5 + .../nvgpu/common/sync/syncpt_cmdbuf_gv11b.c | 4 +- .../gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h | 47 -------- drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h | 29 +++++ drivers/gpu/nvgpu/os/linux/linux-dma.c | 11 +- drivers/gpu/nvgpu/os/linux/nvgpu_mem.c | 60 ---------- 8 files changed, 150 insertions(+), 114 deletions(-) diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index d30266c97..a93fbafde 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -432,7 +432,7 @@ int gk20a_finalize_poweron(struct gk20a *g) if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) { if (!nvgpu_mem_is_valid(&g->syncpt_mem)) { nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE); - __nvgpu_mem_create_from_phys(g, &g->syncpt_mem, + nvgpu_mem_create_from_phys(g, &g->syncpt_mem, g->syncpt_unit_base, nr_pages); } } diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index c591c285e..74f4d1e61 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -222,3 +223,108 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, (void)WARN(true, "Accessing unallocated nvgpu_mem"); } } + +static struct nvgpu_sgl *nvgpu_mem_phys_sgl_next(struct nvgpu_sgl *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return (struct nvgpu_sgl *)sgl_impl->next; +} + +/* + * Provided for compatibility - the DMA address is the same as the phys address + * for these nvgpu_mem's. + */ +static u64 nvgpu_mem_phys_sgl_dma(struct nvgpu_sgl *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static u64 nvgpu_mem_phys_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static u64 nvgpu_mem_phys_sgl_ipa_to_pa(struct gk20a *g, + struct nvgpu_sgl *sgl, u64 ipa, u64 *pa_len) +{ + return ipa; +} + +static u64 nvgpu_mem_phys_sgl_length(struct nvgpu_sgl *sgl) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->length; +} + +static u64 nvgpu_mem_phys_sgl_gpu_addr(struct gk20a *g, + struct nvgpu_sgl *sgl, + struct nvgpu_gmmu_attrs *attrs) +{ + struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl; + + return sgl_impl->phys; +} + +static void nvgpu_mem_phys_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) +{ + /* + * No-op here. The free is handled by freeing the nvgpu_mem itself. + */ +} + +static const struct nvgpu_sgt_ops nvgpu_mem_phys_ops = { + .sgl_next = nvgpu_mem_phys_sgl_next, + .sgl_dma = nvgpu_mem_phys_sgl_dma, + .sgl_phys = nvgpu_mem_phys_sgl_phys, + .sgl_ipa = nvgpu_mem_phys_sgl_phys, + .sgl_ipa_to_pa = nvgpu_mem_phys_sgl_ipa_to_pa, + .sgl_length = nvgpu_mem_phys_sgl_length, + .sgl_gpu_addr = nvgpu_mem_phys_sgl_gpu_addr, + .sgt_free = nvgpu_mem_phys_sgt_free, + + /* + * The physical nvgpu_mems are never IOMMU'able by definition. + */ + .sgt_iommuable = NULL +}; + +int nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, + u64 src_phys, u32 nr_pages) +{ + int ret = 0; + struct nvgpu_sgt *sgt; + struct nvgpu_mem_sgl *sgl; + + /* + * Do the two operations that can fail before touching *dest. + */ + sgt = nvgpu_kzalloc(g, sizeof(*sgt)); + sgl = nvgpu_kzalloc(g, sizeof(*sgl)); + if (sgt == NULL || sgl == NULL) { + nvgpu_kfree(g, sgt); + nvgpu_kfree(g, sgl); + return -ENOMEM; + } + + (void) memset(dest, 0, sizeof(*dest)); + + dest->aperture = APERTURE_SYSMEM; + dest->size = (u64)nr_pages * SZ_4K; + dest->aligned_size = dest->size; + dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; + dest->phys_sgt = sgt; + + sgl->next = NULL; + sgl->phys = src_phys; + sgl->length = dest->size; + sgt->sgl = (struct nvgpu_sgl *)sgl; + sgt->ops = &nvgpu_mem_phys_ops; + + return ret; +} diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c b/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c index 39d99d433..f2df87e25 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -127,5 +128,9 @@ u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt) struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g, struct nvgpu_mem *mem) { + if ((mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) != 0) { + return mem->phys_sgt; + } + return nvgpu_sgt_os_create_from_mem(g, mem); } diff --git a/drivers/gpu/nvgpu/common/sync/syncpt_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/common/sync/syncpt_cmdbuf_gv11b.c index 9229c8572..cd39ecc27 100644 --- a/drivers/gpu/nvgpu/common/sync/syncpt_cmdbuf_gv11b.c +++ b/drivers/gpu/nvgpu/common/sync/syncpt_cmdbuf_gv11b.c @@ -72,7 +72,7 @@ int gv11b_alloc_syncpt_buf(struct channel_gk20a *c, return err; nr_pages = DIV_ROUND_UP(g->syncpt_size, PAGE_SIZE); - __nvgpu_mem_create_from_phys(g, syncpt_buf, + nvgpu_mem_create_from_phys(g, syncpt_buf, (g->syncpt_unit_base + nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id)), nr_pages); @@ -188,4 +188,4 @@ void gv11b_add_syncpt_incr_cmd(struct gk20a *g, u32 gv11b_get_syncpt_incr_cmd_size(bool wfi_cmd) { return 10U; -} \ No newline at end of file +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h index e5f5031a2..6c641e855 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h @@ -38,52 +38,5 @@ struct nvgpu_sgt *nvgpu_mem_linux_sgt_create(struct gk20a *g, void nvgpu_mem_linux_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt); struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g, struct sg_table *sgt); -/** - * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages. - * - * @g - The GPU. - * @dest - nvgpu_mem to initialize. - * @pages - A list of page pointers. - * @nr_pages - The number of pages in @pages. - * - * Create a new nvgpu_mem struct from a pre-existing list of physical pages. The - * pages need not be contiguous (the underlying scatter gather list will help - * with that). However, note, this API will explicitly make it so that the GMMU - * mapping code bypasses SMMU access for the passed pages. This allows one to - * make mem_descs that describe MMIO regions or other non-DRAM things. - * - * This only works for SYSMEM (or other things like SYSMEM - basically just not - * VIDMEM). Also, this API is only available for Linux as it heavily depends on - * the notion of struct %page. - * - * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the - * nvgpu_dma_unmap_free() function depending on whether or not the resulting - * nvgpu_mem has been mapped. The underlying pages themselves must be cleaned up - * by the caller of this API. - * - * Returns 0 on success, or a relevant error otherwise. - */ -int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, - struct page **pages, int nr_pages); -/** - * __nvgpu_mem_create_from_phys - Create an nvgpu_mem from physical mem. - * - * @g - The GPU. - * @dest - nvgpu_mem to initialize. - * @src_phys - start address of physical mem - * @nr_pages - The number of pages in phys. - * - * Create a new nvgpu_mem struct from a physical memory aperure. The physical - * memory aperture needs to be contiguous for requested @nr_pages. This API - * only works for SYSMEM. - * - * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the - * nvgpu_dma_unmap_free() function depending on whether or not the resulting - * nvgpu_mem has been mapped. - * - * Returns 0 on success, or a relevant error otherwise. - */ -int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, - u64 src_phys, int nr_pages); #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 3d79893b6..728a5b2f5 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -42,6 +42,7 @@ struct gk20a; struct nvgpu_allocator; struct nvgpu_gmmu_attrs; struct nvgpu_page_alloc; +struct nvgpu_sgt; #define NVGPU_MEM_DMA_ERROR (~0ULL) @@ -124,6 +125,11 @@ struct nvgpu_mem { struct nvgpu_allocator *allocator; struct nvgpu_list_node clear_list_entry; + /* + * Fields for direct "physical" nvgpu_mem structs. + */ + struct nvgpu_sgt *phys_sgt; + /* * This is defined by the system specific header. It can be empty if * there's no system specific stuff for a given system. @@ -212,6 +218,29 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, struct nvgpu_mem *dest, struct nvgpu_mem *src, u64 start_page, int nr_pages); +/** + * nvgpu_mem_create_from_phys - Create an nvgpu_mem from physical mem. + * + * @g - The GPU. + * @dest - nvgpu_mem to initialize. + * @src_phys - start address of physical mem + * @nr_pages - The number of pages in phys. + * + * Create a new nvgpu_mem struct from a physical memory aperture. The physical + * memory aperture needs to be contiguous for requested @nr_pages. This API + * only works for SYSMEM. This also assumes a 4K page granule since the GMMU + * always supports 4K pages. If _system_ pages are larger than 4K then the + * resulting nvgpu_mem will represent less than 1 OS page worth of memory + * + * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the + * nvgpu_dma_unmap_free() function depending on whether or not the resulting + * nvgpu_mem has been mapped. + * + * Returns 0 on success, or a relevant error otherwise. + */ +int nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, + u64 src_phys, u32 nr_pages); + /* * Really free a vidmem buffer. There's a fair amount of work involved in * freeing vidmem buffers in the DMA API. This handles none of that - it only diff --git a/drivers/gpu/nvgpu/os/linux/linux-dma.c b/drivers/gpu/nvgpu/os/linux/linux-dma.c index 4035e4145..cbaba0de8 100644 --- a/drivers/gpu/nvgpu/os/linux/linux-dma.c +++ b/drivers/gpu/nvgpu/os/linux/linux-dma.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -345,11 +346,13 @@ void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) } /* - * When this flag is set we expect that pages is still populated but not - * by the DMA API. + * When this flag is set this means we are freeing a "phys" nvgpu_mem. + * To handle this just nvgpu_kfree() the nvgpu_sgt and nvgpu_sgl. */ - if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) - nvgpu_kfree(g, mem->priv.pages); + if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) { + nvgpu_kfree(g, mem->phys_sgt->sgl); + nvgpu_kfree(g, mem->phys_sgt); + } if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 && mem->priv.sgt != NULL) { diff --git a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c index 663b7ac36..3d0f8a6d0 100644 --- a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c @@ -188,66 +188,6 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, return ret; } -int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest, - struct page **pages, int nr_pages) -{ - struct sg_table *sgt; - struct page **our_pages = - nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); - - if (!our_pages) - return -ENOMEM; - - nvgpu_memcpy((u8 *)our_pages, (u8 *)pages, - sizeof(struct page *) * nr_pages); - - if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0, - nr_pages * PAGE_SIZE)) { - nvgpu_kfree(g, our_pages); - return -ENOMEM; - } - - /* - * If we are making an SGT from physical pages we can be reasonably - * certain that this should bypass the SMMU - thus we set the DMA (aka - * IOVA) address to 0. This tells the GMMU mapping code to not make a - * mapping directed to the SMMU. - */ - sg_dma_address(sgt->sgl) = 0; - - dest->mem_flags = __NVGPU_MEM_FLAG_NO_DMA; - dest->aperture = APERTURE_SYSMEM; - dest->skip_wmb = 0; - dest->size = PAGE_SIZE * nr_pages; - - dest->priv.flags = 0; - dest->priv.pages = our_pages; - dest->priv.sgt = sgt; - - return 0; -} - -#ifdef CONFIG_TEGRA_GK20A_NVHOST -int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, - u64 src_phys, int nr_pages) -{ - struct page **pages = - nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages); - int i, ret = 0; - - if (!pages) - return -ENOMEM; - - for (i = 0; i < nr_pages; i++) - pages[i] = phys_to_page(src_phys + PAGE_SIZE * i); - - ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages); - nvgpu_kfree(g, pages); - - return ret; -} -#endif - static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl) { return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);