gpu: nvgpu: Make "phys" nvgpu_mem impl

Make a physical nvgpu_mem implementation in the common code. This implementation assumes a single, contiguous, physical range. GMMU mappability is provided by building a one entry SGT. Since this is now "common" code the original Linux code has been moved to commom/mm/nvgpu_mem.c. Also fix the '__' prefix in the nvgpu_mem function. This is not necessary as this function, although somewhat tricky, is expected to be used by arbitrary users within the nvgpu driver. JIRA NVGPU-1029 Bug 2441531 Change-Id: I42313e5c664df3cd94933cc63ff0528326628683 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1995866 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2019-01-15 13:35:53 -08:00
parent e2a29dbb96
commit f766c6af91
8 changed files with 150 additions and 114 deletions
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -432,7 +432,7 @@ int gk20a_finalize_poweron(struct gk20a *g)
 	if (nvgpu_has_syncpoints(g) && g->syncpt_unit_size) {
 		if (!nvgpu_mem_is_valid(&g->syncpt_mem)) {
 			nr_pages = DIV_ROUND_UP(g->syncpt_unit_size, PAGE_SIZE);
-			__nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
+			nvgpu_mem_create_from_phys(g, &g->syncpt_mem,
 					g->syncpt_unit_base, nr_pages);
 		}
 	}
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -23,6 +23,7 @@
 #include <nvgpu/bug.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/nvgpu_mem.h>
+#include <nvgpu/nvgpu_sgt.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/vidmem.h>
 #include <nvgpu/gk20a.h>
@@ -222,3 +223,108 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
 		(void)WARN(true, "Accessing unallocated nvgpu_mem");
 	}
 }
+
+static struct nvgpu_sgl *nvgpu_mem_phys_sgl_next(struct nvgpu_sgl *sgl)
+{
+	struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
+
+	return (struct nvgpu_sgl *)sgl_impl->next;
+}
+
+/*
+ * Provided for compatibility - the DMA address is the same as the phys address
+ * for these nvgpu_mem's.
+ */
+static u64 nvgpu_mem_phys_sgl_dma(struct nvgpu_sgl *sgl)
+{
+	struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
+
+	return sgl_impl->phys;
+}
+
+static u64 nvgpu_mem_phys_sgl_phys(struct gk20a *g, struct nvgpu_sgl *sgl)
+{
+	struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
+
+	return sgl_impl->phys;
+}
+
+static u64 nvgpu_mem_phys_sgl_ipa_to_pa(struct gk20a *g,
+		struct nvgpu_sgl *sgl, u64 ipa, u64 *pa_len)
+{
+	return ipa;
+}
+
+static u64 nvgpu_mem_phys_sgl_length(struct nvgpu_sgl *sgl)
+{
+	struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
+
+	return sgl_impl->length;
+}
+
+static u64 nvgpu_mem_phys_sgl_gpu_addr(struct gk20a *g,
+					 struct nvgpu_sgl *sgl,
+					 struct nvgpu_gmmu_attrs *attrs)
+{
+	struct nvgpu_mem_sgl *sgl_impl = (struct nvgpu_mem_sgl *)sgl;
+
+	return sgl_impl->phys;
+}
+
+static void nvgpu_mem_phys_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
+{
+	/*
+	 * No-op here. The free is handled by freeing the nvgpu_mem itself.
+	 */
+}
+
+static const struct nvgpu_sgt_ops nvgpu_mem_phys_ops = {
+	.sgl_next      = nvgpu_mem_phys_sgl_next,
+	.sgl_dma       = nvgpu_mem_phys_sgl_dma,
+	.sgl_phys      = nvgpu_mem_phys_sgl_phys,
+	.sgl_ipa       = nvgpu_mem_phys_sgl_phys,
+	.sgl_ipa_to_pa = nvgpu_mem_phys_sgl_ipa_to_pa,
+	.sgl_length    = nvgpu_mem_phys_sgl_length,
+	.sgl_gpu_addr  = nvgpu_mem_phys_sgl_gpu_addr,
+	.sgt_free      = nvgpu_mem_phys_sgt_free,
+
+	/*
+	 * The physical nvgpu_mems are never IOMMU'able by definition.
+	 */
+	.sgt_iommuable = NULL
+};
+
+int nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
+			       u64 src_phys, u32 nr_pages)
+{
+	int ret = 0;
+	struct nvgpu_sgt *sgt;
+	struct nvgpu_mem_sgl *sgl;
+
+	/*
+	 * Do the two operations that can fail before touching *dest.
+	 */
+	sgt = nvgpu_kzalloc(g, sizeof(*sgt));
+	sgl = nvgpu_kzalloc(g, sizeof(*sgl));
+	if (sgt == NULL || sgl == NULL) {
+		nvgpu_kfree(g, sgt);
+		nvgpu_kfree(g, sgl);
+		return -ENOMEM;
+	}
+
+	(void) memset(dest, 0, sizeof(*dest));
+
+	dest->aperture     = APERTURE_SYSMEM;
+	dest->size         = (u64)nr_pages * SZ_4K;
+	dest->aligned_size = dest->size;
+	dest->mem_flags    = __NVGPU_MEM_FLAG_NO_DMA;
+	dest->phys_sgt     = sgt;
+
+	sgl->next   = NULL;
+	sgl->phys   = src_phys;
+	sgl->length = dest->size;
+	sgt->sgl    = (struct nvgpu_sgl *)sgl;
+	sgt->ops    = &nvgpu_mem_phys_ops;
+
+	return ret;
+}
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_sgt.c
@@ -22,6 +22,7 @@

 #include <nvgpu/dma.h>
 #include <nvgpu/bitops.h>
+#include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/nvgpu_sgt.h>
 #include <nvgpu/nvgpu_sgt_os.h>

@@ -127,5 +128,9 @@ u64 nvgpu_sgt_alignment(struct gk20a *g, struct nvgpu_sgt *sgt)
 struct nvgpu_sgt *nvgpu_sgt_create_from_mem(struct gk20a *g,
 					    struct nvgpu_mem *mem)
 {
+	if ((mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) != 0) {
+		return mem->phys_sgt;
+	}
+
 	return nvgpu_sgt_os_create_from_mem(g, mem);
 }
--- a/drivers/gpu/nvgpu/common/sync/syncpt_cmdbuf_gv11b.c
+++ b/drivers/gpu/nvgpu/common/sync/syncpt_cmdbuf_gv11b.c
@@ -72,7 +72,7 @@ int gv11b_alloc_syncpt_buf(struct channel_gk20a *c,
 		return err;

 	nr_pages = DIV_ROUND_UP(g->syncpt_size, PAGE_SIZE);
-	__nvgpu_mem_create_from_phys(g, syncpt_buf,
+	nvgpu_mem_create_from_phys(g, syncpt_buf,
 		(g->syncpt_unit_base +
 		nvgpu_nvhost_syncpt_unit_interface_get_byte_offset(syncpt_id)),
 		nr_pages);
@@ -188,4 +188,4 @@ void gv11b_add_syncpt_incr_cmd(struct gk20a *g,
 u32 gv11b_get_syncpt_incr_cmd_size(bool wfi_cmd)
 {
 	return 10U;
-}
+}
--- a/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/linux/nvgpu_mem.h
@@ -38,52 +38,5 @@ struct nvgpu_sgt *nvgpu_mem_linux_sgt_create(struct gk20a *g,
 void nvgpu_mem_linux_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt);
 struct nvgpu_sgt *nvgpu_linux_sgt_create(struct gk20a *g,
 					   struct sg_table *sgt);
-/**
- * __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages.
- *
- * @g        - The GPU.
- * @dest     - nvgpu_mem to initialize.
- * @pages    - A list of page pointers.
- * @nr_pages - The number of pages in @pages.
- *
- * Create a new nvgpu_mem struct from a pre-existing list of physical pages. The
- * pages need not be contiguous (the underlying scatter gather list will help
- * with that). However, note, this API will explicitly make it so that the GMMU
- * mapping code bypasses SMMU access for the passed pages. This allows one to
- * make mem_descs that describe MMIO regions or other non-DRAM things.
- *
- * This only works for SYSMEM (or other things like SYSMEM - basically just not
- * VIDMEM). Also, this API is only available for Linux as it heavily depends on
- * the notion of struct %page.
- *
- * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the
- * nvgpu_dma_unmap_free() function depending on whether or not the resulting
- * nvgpu_mem has been mapped. The underlying pages themselves must be cleaned up
- * by the caller of this API.
- *
- * Returns 0 on success, or a relevant error otherwise.
- */
-int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
-				  struct page **pages, int nr_pages);

-/**
- * __nvgpu_mem_create_from_phys - Create an nvgpu_mem from physical mem.
- *
- * @g        - The GPU.
- * @dest     - nvgpu_mem to initialize.
- * @src_phys - start address of physical mem
- * @nr_pages - The number of pages in phys.
- *
- * Create a new nvgpu_mem struct from a physical memory aperure. The physical
- * memory aperture needs to be contiguous for requested @nr_pages. This API
- * only works for SYSMEM.
- *
- * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the
- * nvgpu_dma_unmap_free() function depending on whether or not the resulting
- * nvgpu_mem has been mapped.
- *
- * Returns 0 on success, or a relevant error otherwise.
- */
-int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
-				  u64 src_phys, int nr_pages);
 #endif
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -42,6 +42,7 @@ struct gk20a;
 struct nvgpu_allocator;
 struct nvgpu_gmmu_attrs;
 struct nvgpu_page_alloc;
+struct nvgpu_sgt;

 #define NVGPU_MEM_DMA_ERROR		(~0ULL)

@@ -124,6 +125,11 @@ struct nvgpu_mem {
 	struct nvgpu_allocator			*allocator;
 	struct nvgpu_list_node			 clear_list_entry;

+	/*
+	 * Fields for direct "physical" nvgpu_mem structs.
+	 */
+	struct nvgpu_sgt			*phys_sgt;
+
 	/*
 	 * This is defined by the system specific header. It can be empty if
 	 * there's no system specific stuff for a given system.
@@ -212,6 +218,29 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
 			      struct nvgpu_mem *dest, struct nvgpu_mem *src,
 			      u64 start_page, int nr_pages);

+/**
+ * nvgpu_mem_create_from_phys - Create an nvgpu_mem from physical mem.
+ *
+ * @g        - The GPU.
+ * @dest     - nvgpu_mem to initialize.
+ * @src_phys - start address of physical mem
+ * @nr_pages - The number of pages in phys.
+ *
+ * Create a new nvgpu_mem struct from a physical memory aperture. The physical
+ * memory aperture needs to be contiguous for requested @nr_pages. This API
+ * only works for SYSMEM. This also assumes a 4K page granule since the GMMU
+ * always supports 4K pages. If _system_ pages are larger than 4K then the
+ * resulting nvgpu_mem will represent less than 1 OS page worth of memory
+ *
+ * The resulting nvgpu_mem should be released with the nvgpu_dma_free() or the
+ * nvgpu_dma_unmap_free() function depending on whether or not the resulting
+ * nvgpu_mem has been mapped.
+ *
+ * Returns 0 on success, or a relevant error otherwise.
+ */
+int nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
+			       u64 src_phys, u32 nr_pages);
+
 /*
 * Really free a vidmem buffer. There's a fair amount of work involved in
 * freeing vidmem buffers in the DMA API. This handles none of that - it only
--- a/drivers/gpu/nvgpu/os/linux/linux-dma.c
+++ b/drivers/gpu/nvgpu/os/linux/linux-dma.c
@@ -26,6 +26,7 @@
 #include <nvgpu/enabled.h>
 #include <nvgpu/vidmem.h>
 #include <nvgpu/gk20a.h>
+#include <nvgpu/nvgpu_sgt.h>

 #include <nvgpu/linux/dma.h>

@@ -345,11 +346,13 @@ void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
 	}

 	/*
-	 * When this flag is set we expect that pages is still populated but not
-	 * by the DMA API.
+	 * When this flag is set this means we are freeing a "phys" nvgpu_mem.
+	 * To handle this just nvgpu_kfree() the nvgpu_sgt and nvgpu_sgl.
 	 */
-	if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA)
-		nvgpu_kfree(g, mem->priv.pages);
+	if (mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) {
+		nvgpu_kfree(g, mem->phys_sgt->sgl);
+		nvgpu_kfree(g, mem->phys_sgt);
+	}

 	if ((mem->mem_flags & NVGPU_MEM_FLAG_FOREIGN_SGT) == 0 &&
 			mem->priv.sgt != NULL) {
--- a/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/os/linux/nvgpu_mem.c
@@ -188,66 +188,6 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
 	return ret;
 }

-int __nvgpu_mem_create_from_pages(struct gk20a *g, struct nvgpu_mem *dest,
-				  struct page **pages, int nr_pages)
-{
-	struct sg_table *sgt;
-	struct page **our_pages =
-		nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
-
-	if (!our_pages)
-		return -ENOMEM;
-
-	nvgpu_memcpy((u8 *)our_pages, (u8 *)pages,
-		sizeof(struct page *) * nr_pages);
-
-	if (nvgpu_get_sgtable_from_pages(g, &sgt, pages, 0,
-					 nr_pages * PAGE_SIZE)) {
-		nvgpu_kfree(g, our_pages);
-		return -ENOMEM;
-	}
-
-	/*
-	 * If we are making an SGT from physical pages we can be reasonably
-	 * certain that this should bypass the SMMU - thus we set the DMA (aka
-	 * IOVA) address to 0. This tells the GMMU mapping code to not make a
-	 * mapping directed to the SMMU.
-	 */
-	sg_dma_address(sgt->sgl) = 0;
-
-	dest->mem_flags  = __NVGPU_MEM_FLAG_NO_DMA;
-	dest->aperture   = APERTURE_SYSMEM;
-	dest->skip_wmb   = 0;
-	dest->size       = PAGE_SIZE * nr_pages;
-
-	dest->priv.flags = 0;
-	dest->priv.pages = our_pages;
-	dest->priv.sgt   = sgt;
-
-	return 0;
-}
-
-#ifdef CONFIG_TEGRA_GK20A_NVHOST
-int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
-				 u64 src_phys, int nr_pages)
-{
-	struct page **pages =
-		nvgpu_kmalloc(g, sizeof(struct page *) * nr_pages);
-	int i, ret = 0;
-
-	if (!pages)
-		return -ENOMEM;
-
-	for (i = 0; i < nr_pages; i++)
-		pages[i] = phys_to_page(src_phys + PAGE_SIZE * i);
-
-	ret = __nvgpu_mem_create_from_pages(g, dest, pages, nr_pages);
-	nvgpu_kfree(g, pages);
-
-	return ret;
-}
-#endif
-
 static struct nvgpu_sgl *nvgpu_mem_linux_sgl_next(struct nvgpu_sgl *sgl)
 {
 	return (struct nvgpu_sgl *)sg_next((struct scatterlist *)sgl);