mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: Refactor VM init/cleanup
Refactor the API for initializing and cleaning up VMs. This also involved moving a bunch of GMMU code out into the gmmu code since part of initializing a VM involves initializing the page tables for the VM. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I4710f08c26a6e39806f0762a35f6db5c94b64c50 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1477746 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
f76febb962
commit
fbafc7eba4
@@ -15,12 +15,150 @@
|
||||
*/
|
||||
|
||||
#include <nvgpu/log.h>
|
||||
#include <nvgpu/dma.h>
|
||||
#include <nvgpu/gmmu.h>
|
||||
#include <nvgpu/nvgpu_mem.h>
|
||||
|
||||
#include "gk20a/gk20a.h"
|
||||
#include "gk20a/mm_gk20a.h"
|
||||
|
||||
static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
|
||||
struct gk20a_mm_entry *entry)
|
||||
{
|
||||
u32 num_pages = 1 << order;
|
||||
u32 len = num_pages * PAGE_SIZE;
|
||||
int err;
|
||||
struct page *pages;
|
||||
struct gk20a *g = vm->mm->g;
|
||||
|
||||
/* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */
|
||||
|
||||
pages = alloc_pages(GFP_KERNEL, order);
|
||||
if (!pages) {
|
||||
nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed");
|
||||
goto err_out;
|
||||
}
|
||||
entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt));
|
||||
if (!entry->mem.priv.sgt) {
|
||||
nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table");
|
||||
goto err_alloced;
|
||||
}
|
||||
err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL);
|
||||
if (err) {
|
||||
nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed");
|
||||
goto err_sg_table;
|
||||
}
|
||||
sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0);
|
||||
entry->mem.cpu_va = page_address(pages);
|
||||
memset(entry->mem.cpu_va, 0, len);
|
||||
entry->mem.size = len;
|
||||
entry->mem.aperture = APERTURE_SYSMEM;
|
||||
FLUSH_CPU_DCACHE(entry->mem.cpu_va,
|
||||
sg_phys(entry->mem.priv.sgt->sgl), len);
|
||||
|
||||
return 0;
|
||||
|
||||
err_sg_table:
|
||||
nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
|
||||
err_alloced:
|
||||
__free_pages(pages, order);
|
||||
err_out:
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
|
||||
struct gk20a_mm_entry *entry)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
u32 num_pages = 1 << order;
|
||||
u32 len = num_pages * PAGE_SIZE;
|
||||
int err;
|
||||
|
||||
if (g->is_fmodel)
|
||||
return alloc_gmmu_phys_pages(vm, order, entry);
|
||||
|
||||
/*
|
||||
* On arm32 we're limited by vmalloc space, so we do not map pages by
|
||||
* default.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_ARM64))
|
||||
err = nvgpu_dma_alloc(g, len, &entry->mem);
|
||||
else
|
||||
err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
|
||||
len, &entry->mem);
|
||||
|
||||
|
||||
if (err) {
|
||||
nvgpu_err(g, "memory allocation failed");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a phys contig region big enough for a full
|
||||
* sized gmmu page table for the given gmmu_page_size.
|
||||
* the whole range is zeroed so it's "invalid"/will fault.
|
||||
*
|
||||
* If a previous entry is supplied, its memory will be used for
|
||||
* suballocation for this next entry too, if there is space.
|
||||
*/
|
||||
int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
|
||||
enum gmmu_pgsz_gk20a pgsz_idx,
|
||||
const struct gk20a_mmu_level *l,
|
||||
struct gk20a_mm_entry *entry,
|
||||
struct gk20a_mm_entry *prev_entry)
|
||||
{
|
||||
int err = -ENOMEM;
|
||||
int order;
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
u32 bytes;
|
||||
|
||||
/* allocate enough pages for the table */
|
||||
order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
|
||||
order += ilog2(l->entry_size);
|
||||
bytes = 1 << order;
|
||||
order -= PAGE_SHIFT;
|
||||
if (order < 0 && prev_entry) {
|
||||
/* try to suballocate from previous chunk */
|
||||
u32 capacity = prev_entry->mem.size / bytes;
|
||||
u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
|
||||
u32 free = capacity - prev - 1;
|
||||
|
||||
nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
|
||||
capacity, prev, free, bytes);
|
||||
|
||||
if (free) {
|
||||
memcpy(&entry->mem, &prev_entry->mem,
|
||||
sizeof(entry->mem));
|
||||
entry->woffset = prev_entry->woffset
|
||||
+ bytes / sizeof(u32);
|
||||
err = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (err) {
|
||||
/* no suballoc space */
|
||||
order = max(0, order);
|
||||
err = nvgpu_alloc_gmmu_pages(vm, order, entry);
|
||||
entry->woffset = 0;
|
||||
}
|
||||
|
||||
nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
|
||||
entry,
|
||||
(entry->mem.priv.sgt &&
|
||||
entry->mem.aperture == APERTURE_SYSMEM) ?
|
||||
g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0,
|
||||
order, entry->woffset);
|
||||
if (err)
|
||||
return err;
|
||||
entry->pgsz = pgsz_idx;
|
||||
entry->mem.skip_wmb = true;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
|
||||
* VA will be allocated for you. If addr is non-zero then the buffer will be
|
||||
|
||||
Reference in New Issue
Block a user