gpu: nvgpu: Refactor VM init/cleanup

Refactor the API for initializing and cleaning up VMs. This also involved moving a bunch of GMMU code out into the gmmu code since part of initializing a VM involves initializing the page tables for the VM. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I4710f08c26a6e39806f0762a35f6db5c94b64c50 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1477746 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
2025-12-24 02:22:34 +03:00 · 2017-05-01 16:12:16 -07:00
parent f76febb962
commit fbafc7eba4
9 changed files with 493 additions and 498 deletions
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -15,12 +15,150 @@
 */

 #include <nvgpu/log.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
 #include <nvgpu/nvgpu_mem.h>

 #include "gk20a/gk20a.h"
 #include "gk20a/mm_gk20a.h"

+static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
+				 struct gk20a_mm_entry *entry)
+{
+	u32 num_pages = 1 << order;
+	u32 len = num_pages * PAGE_SIZE;
+	int err;
+	struct page *pages;
+	struct gk20a *g = vm->mm->g;
+
+	/* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */
+
+	pages = alloc_pages(GFP_KERNEL, order);
+	if (!pages) {
+		nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed");
+		goto err_out;
+	}
+	entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt));
+	if (!entry->mem.priv.sgt) {
+		nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table");
+		goto err_alloced;
+	}
+	err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL);
+	if (err) {
+		nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed");
+		goto err_sg_table;
+	}
+	sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0);
+	entry->mem.cpu_va = page_address(pages);
+	memset(entry->mem.cpu_va, 0, len);
+	entry->mem.size = len;
+	entry->mem.aperture = APERTURE_SYSMEM;
+	FLUSH_CPU_DCACHE(entry->mem.cpu_va,
+			 sg_phys(entry->mem.priv.sgt->sgl), len);
+
+	return 0;
+
+err_sg_table:
+	nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
+err_alloced:
+	__free_pages(pages, order);
+err_out:
+	return -ENOMEM;
+}
+
+static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
+				  struct gk20a_mm_entry *entry)
+{
+	struct gk20a *g = gk20a_from_vm(vm);
+	u32 num_pages = 1 << order;
+	u32 len = num_pages * PAGE_SIZE;
+	int err;
+
+	if (g->is_fmodel)
+		return alloc_gmmu_phys_pages(vm, order, entry);
+
+	/*
+	 * On arm32 we're limited by vmalloc space, so we do not map pages by
+	 * default.
+	 */
+	if (IS_ENABLED(CONFIG_ARM64))
+		err = nvgpu_dma_alloc(g, len, &entry->mem);
+	else
+		err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
+				len, &entry->mem);
+
+
+	if (err) {
+		nvgpu_err(g, "memory allocation failed");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Allocate a phys contig region big enough for a full
+ * sized gmmu page table for the given gmmu_page_size.
+ * the whole range is zeroed so it's "invalid"/will fault.
+ *
+ * If a previous entry is supplied, its memory will be used for
+ * suballocation for this next entry too, if there is space.
+ */
+int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
+				 enum gmmu_pgsz_gk20a pgsz_idx,
+				 const struct gk20a_mmu_level *l,
+				 struct gk20a_mm_entry *entry,
+				 struct gk20a_mm_entry *prev_entry)
+{
+	int err = -ENOMEM;
+	int order;
+	struct gk20a *g = gk20a_from_vm(vm);
+	u32 bytes;
+
+	/* allocate enough pages for the table */
+	order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
+	order += ilog2(l->entry_size);
+	bytes = 1 << order;
+	order -= PAGE_SHIFT;
+	if (order < 0 && prev_entry) {
+		/* try to suballocate from previous chunk */
+		u32 capacity = prev_entry->mem.size / bytes;
+		u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
+		u32 free = capacity - prev - 1;
+
+		nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
+				capacity, prev, free, bytes);
+
+		if (free) {
+			memcpy(&entry->mem, &prev_entry->mem,
+					sizeof(entry->mem));
+			entry->woffset = prev_entry->woffset
+				+ bytes / sizeof(u32);
+			err = 0;
+		}
+	}
+
+	if (err) {
+		/* no suballoc space */
+		order = max(0, order);
+		err = nvgpu_alloc_gmmu_pages(vm, order, entry);
+		entry->woffset = 0;
+	}
+
+	nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
+		  entry,
+		  (entry->mem.priv.sgt &&
+		   entry->mem.aperture == APERTURE_SYSMEM) ?
+		  g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0,
+		  order, entry->woffset);
+	if (err)
+		return err;
+	entry->pgsz = pgsz_idx;
+	entry->mem.skip_wmb = true;
+
+	return err;
+}
+
 /*
 * Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
 * VA will be allocated for you. If addr is non-zero then the buffer will be