mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Refactor VM init/cleanup
Refactor the API for initializing and cleaning up VMs. This also involved moving a bunch of GMMU code out into the gmmu code since part of initializing a VM involves initializing the page tables for the VM. JIRA NVGPU-12 JIRA NVGPU-30 Change-Id: I4710f08c26a6e39806f0762a35f6db5c94b64c50 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1477746 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
f76febb962
commit
fbafc7eba4
@@ -15,12 +15,150 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include <nvgpu/log.h>
|
#include <nvgpu/log.h>
|
||||||
|
#include <nvgpu/dma.h>
|
||||||
#include <nvgpu/gmmu.h>
|
#include <nvgpu/gmmu.h>
|
||||||
#include <nvgpu/nvgpu_mem.h>
|
#include <nvgpu/nvgpu_mem.h>
|
||||||
|
|
||||||
#include "gk20a/gk20a.h"
|
#include "gk20a/gk20a.h"
|
||||||
#include "gk20a/mm_gk20a.h"
|
#include "gk20a/mm_gk20a.h"
|
||||||
|
|
||||||
|
static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
|
||||||
|
struct gk20a_mm_entry *entry)
|
||||||
|
{
|
||||||
|
u32 num_pages = 1 << order;
|
||||||
|
u32 len = num_pages * PAGE_SIZE;
|
||||||
|
int err;
|
||||||
|
struct page *pages;
|
||||||
|
struct gk20a *g = vm->mm->g;
|
||||||
|
|
||||||
|
/* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */
|
||||||
|
|
||||||
|
pages = alloc_pages(GFP_KERNEL, order);
|
||||||
|
if (!pages) {
|
||||||
|
nvgpu_log(g, gpu_dbg_pte, "alloc_pages failed");
|
||||||
|
goto err_out;
|
||||||
|
}
|
||||||
|
entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt));
|
||||||
|
if (!entry->mem.priv.sgt) {
|
||||||
|
nvgpu_log(g, gpu_dbg_pte, "cannot allocate sg table");
|
||||||
|
goto err_alloced;
|
||||||
|
}
|
||||||
|
err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_log(g, gpu_dbg_pte, "sg_alloc_table failed");
|
||||||
|
goto err_sg_table;
|
||||||
|
}
|
||||||
|
sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0);
|
||||||
|
entry->mem.cpu_va = page_address(pages);
|
||||||
|
memset(entry->mem.cpu_va, 0, len);
|
||||||
|
entry->mem.size = len;
|
||||||
|
entry->mem.aperture = APERTURE_SYSMEM;
|
||||||
|
FLUSH_CPU_DCACHE(entry->mem.cpu_va,
|
||||||
|
sg_phys(entry->mem.priv.sgt->sgl), len);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
err_sg_table:
|
||||||
|
nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
|
||||||
|
err_alloced:
|
||||||
|
__free_pages(pages, order);
|
||||||
|
err_out:
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int nvgpu_alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
|
||||||
|
struct gk20a_mm_entry *entry)
|
||||||
|
{
|
||||||
|
struct gk20a *g = gk20a_from_vm(vm);
|
||||||
|
u32 num_pages = 1 << order;
|
||||||
|
u32 len = num_pages * PAGE_SIZE;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
if (g->is_fmodel)
|
||||||
|
return alloc_gmmu_phys_pages(vm, order, entry);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On arm32 we're limited by vmalloc space, so we do not map pages by
|
||||||
|
* default.
|
||||||
|
*/
|
||||||
|
if (IS_ENABLED(CONFIG_ARM64))
|
||||||
|
err = nvgpu_dma_alloc(g, len, &entry->mem);
|
||||||
|
else
|
||||||
|
err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
|
||||||
|
len, &entry->mem);
|
||||||
|
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "memory allocation failed");
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a phys contig region big enough for a full
|
||||||
|
* sized gmmu page table for the given gmmu_page_size.
|
||||||
|
* the whole range is zeroed so it's "invalid"/will fault.
|
||||||
|
*
|
||||||
|
* If a previous entry is supplied, its memory will be used for
|
||||||
|
* suballocation for this next entry too, if there is space.
|
||||||
|
*/
|
||||||
|
int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
|
||||||
|
enum gmmu_pgsz_gk20a pgsz_idx,
|
||||||
|
const struct gk20a_mmu_level *l,
|
||||||
|
struct gk20a_mm_entry *entry,
|
||||||
|
struct gk20a_mm_entry *prev_entry)
|
||||||
|
{
|
||||||
|
int err = -ENOMEM;
|
||||||
|
int order;
|
||||||
|
struct gk20a *g = gk20a_from_vm(vm);
|
||||||
|
u32 bytes;
|
||||||
|
|
||||||
|
/* allocate enough pages for the table */
|
||||||
|
order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
|
||||||
|
order += ilog2(l->entry_size);
|
||||||
|
bytes = 1 << order;
|
||||||
|
order -= PAGE_SHIFT;
|
||||||
|
if (order < 0 && prev_entry) {
|
||||||
|
/* try to suballocate from previous chunk */
|
||||||
|
u32 capacity = prev_entry->mem.size / bytes;
|
||||||
|
u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
|
||||||
|
u32 free = capacity - prev - 1;
|
||||||
|
|
||||||
|
nvgpu_log(g, gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
|
||||||
|
capacity, prev, free, bytes);
|
||||||
|
|
||||||
|
if (free) {
|
||||||
|
memcpy(&entry->mem, &prev_entry->mem,
|
||||||
|
sizeof(entry->mem));
|
||||||
|
entry->woffset = prev_entry->woffset
|
||||||
|
+ bytes / sizeof(u32);
|
||||||
|
err = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err) {
|
||||||
|
/* no suballoc space */
|
||||||
|
order = max(0, order);
|
||||||
|
err = nvgpu_alloc_gmmu_pages(vm, order, entry);
|
||||||
|
entry->woffset = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
nvgpu_log(g, gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
|
||||||
|
entry,
|
||||||
|
(entry->mem.priv.sgt &&
|
||||||
|
entry->mem.aperture == APERTURE_SYSMEM) ?
|
||||||
|
g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0,
|
||||||
|
order, entry->woffset);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
entry->pgsz = pgsz_idx;
|
||||||
|
entry->mem.skip_wmb = true;
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
|
* Core GMMU map function for the kernel to use. If @addr is 0 then the GPU
|
||||||
* VA will be allocated for you. If addr is non-zero then the buffer will be
|
* VA will be allocated for you. If addr is non-zero then the buffer will be
|
||||||
|
|||||||
@@ -14,6 +14,8 @@
|
|||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <nvgpu/log.h>
|
||||||
|
#include <nvgpu/dma.h>
|
||||||
#include <nvgpu/vm.h>
|
#include <nvgpu/vm.h>
|
||||||
#include <nvgpu/vm_area.h>
|
#include <nvgpu/vm_area.h>
|
||||||
#include <nvgpu/lock.h>
|
#include <nvgpu/lock.h>
|
||||||
@@ -23,6 +25,7 @@
|
|||||||
|
|
||||||
#include "gk20a/gk20a.h"
|
#include "gk20a/gk20a.h"
|
||||||
#include "gk20a/mm_gk20a.h"
|
#include "gk20a/mm_gk20a.h"
|
||||||
|
#include "gk20a/platform_gk20a.h"
|
||||||
|
|
||||||
int vm_aspace_id(struct vm_gk20a *vm)
|
int vm_aspace_id(struct vm_gk20a *vm)
|
||||||
{
|
{
|
||||||
@@ -104,6 +107,341 @@ void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm,
|
|||||||
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
nvgpu_mutex_release(&vm->update_gmmu_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int nvgpu_vm_init_page_tables(struct vm_gk20a *vm)
|
||||||
|
{
|
||||||
|
u32 pde_lo, pde_hi;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
pde_range_from_vaddr_range(vm,
|
||||||
|
0, vm->va_limit-1,
|
||||||
|
&pde_lo, &pde_hi);
|
||||||
|
vm->pdb.entries = nvgpu_vzalloc(vm->mm->g,
|
||||||
|
sizeof(struct gk20a_mm_entry) *
|
||||||
|
(pde_hi + 1));
|
||||||
|
vm->pdb.num_entries = pde_hi + 1;
|
||||||
|
|
||||||
|
if (!vm->pdb.entries)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
err = nvgpu_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0],
|
||||||
|
&vm->pdb, NULL);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_vfree(vm->mm->g, vm->pdb.entries);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine if the passed address space can support big pages or not.
|
||||||
|
*/
|
||||||
|
int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
|
||||||
|
{
|
||||||
|
u64 mask = ((u64)vm->big_page_size << 10) - 1;
|
||||||
|
|
||||||
|
if (base & mask || size & mask)
|
||||||
|
return 0;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Initialize a semaphore pool. Just return successfully if we do not need
|
||||||
|
* semaphores (i.e when sync-pts are active).
|
||||||
|
*/
|
||||||
|
static int nvgpu_init_sema_pool(struct vm_gk20a *vm)
|
||||||
|
{
|
||||||
|
struct nvgpu_semaphore_sea *sema_sea;
|
||||||
|
struct mm_gk20a *mm = vm->mm;
|
||||||
|
struct gk20a *g = mm->g;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Don't waste the memory on semaphores if we don't need them.
|
||||||
|
*/
|
||||||
|
if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (vm->sema_pool)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
sema_sea = nvgpu_semaphore_sea_create(g);
|
||||||
|
if (!sema_sea)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea);
|
||||||
|
if (!vm->sema_pool)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a chunk of GPU VA space for mapping the semaphores. We will
|
||||||
|
* do a fixed alloc in the kernel VM so that all channels have the same
|
||||||
|
* RO address range for the semaphores.
|
||||||
|
*
|
||||||
|
* !!! TODO: cleanup.
|
||||||
|
*/
|
||||||
|
sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
|
||||||
|
vm->va_limit -
|
||||||
|
mm->channel.kernel_size,
|
||||||
|
512 * PAGE_SIZE,
|
||||||
|
SZ_4K);
|
||||||
|
if (!sema_sea->gpu_va) {
|
||||||
|
nvgpu_free(&vm->kernel, sema_sea->gpu_va);
|
||||||
|
nvgpu_vm_put(vm);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = nvgpu_semaphore_pool_map(vm->sema_pool, vm);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
|
||||||
|
nvgpu_free(vm->vma[gmmu_page_size_small],
|
||||||
|
vm->sema_pool->gpu_va);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* nvgpu_init_vm() - Initialize an address space.
|
||||||
|
*
|
||||||
|
* @mm - Parent MM.
|
||||||
|
* @vm - The VM to init.
|
||||||
|
* @big_page_size - Size of big pages associated with this VM.
|
||||||
|
* @low_hole - The size of the low hole (unaddressable memory at the bottom of
|
||||||
|
* the address space.
|
||||||
|
* @kernel_reserved - Space reserved for kernel only allocations.
|
||||||
|
* @aperture_size - Total size of the aperture.
|
||||||
|
* @big_pages - Ignored. Will be set based on other passed params.
|
||||||
|
* @name - Name of the address space.
|
||||||
|
*
|
||||||
|
* This function initializes an address space according to the following map:
|
||||||
|
*
|
||||||
|
* +--+ 0x0
|
||||||
|
* | |
|
||||||
|
* +--+ @low_hole
|
||||||
|
* | |
|
||||||
|
* ~ ~ This is the "user" section.
|
||||||
|
* | |
|
||||||
|
* +--+ @aperture_size - @kernel_reserved
|
||||||
|
* | |
|
||||||
|
* ~ ~ This is the "kernel" section.
|
||||||
|
* | |
|
||||||
|
* +--+ @aperture_size
|
||||||
|
*
|
||||||
|
* The user section is therefor what ever is left over after the @low_hole and
|
||||||
|
* @kernel_reserved memory have been portioned out. The @kernel_reserved is
|
||||||
|
* always persent at the top of the memory space and the @low_hole is always at
|
||||||
|
* the bottom.
|
||||||
|
*
|
||||||
|
* For certain address spaces a "user" section makes no sense (bar1, etc) so in
|
||||||
|
* such cases the @kernel_reserved and @low_hole should sum to exactly
|
||||||
|
* @aperture_size.
|
||||||
|
*/
|
||||||
|
int nvgpu_init_vm(struct mm_gk20a *mm,
|
||||||
|
struct vm_gk20a *vm,
|
||||||
|
u32 big_page_size,
|
||||||
|
u64 low_hole,
|
||||||
|
u64 kernel_reserved,
|
||||||
|
u64 aperture_size,
|
||||||
|
bool big_pages,
|
||||||
|
bool userspace_managed,
|
||||||
|
char *name)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
char alloc_name[32];
|
||||||
|
u64 kernel_vma_flags;
|
||||||
|
u64 user_vma_start, user_vma_limit;
|
||||||
|
u64 user_lp_vma_start, user_lp_vma_limit;
|
||||||
|
u64 kernel_vma_start, kernel_vma_limit;
|
||||||
|
struct gk20a *g = mm->g;
|
||||||
|
struct gk20a_platform *p = gk20a_get_platform(g->dev);
|
||||||
|
|
||||||
|
if (WARN_ON(kernel_reserved + low_hole > aperture_size))
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
nvgpu_log_info(g, "Init space for %s: valimit=0x%llx, "
|
||||||
|
"LP size=0x%x lowhole=0x%llx",
|
||||||
|
name, aperture_size,
|
||||||
|
(unsigned int)big_page_size, low_hole);
|
||||||
|
|
||||||
|
vm->mm = mm;
|
||||||
|
|
||||||
|
vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K;
|
||||||
|
vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size;
|
||||||
|
vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;
|
||||||
|
|
||||||
|
/* Set up vma pointers. */
|
||||||
|
vm->vma[gmmu_page_size_small] = &vm->user;
|
||||||
|
vm->vma[gmmu_page_size_big] = &vm->user;
|
||||||
|
vm->vma[gmmu_page_size_kernel] = &vm->kernel;
|
||||||
|
if (!p->unify_address_spaces)
|
||||||
|
vm->vma[gmmu_page_size_big] = &vm->user_lp;
|
||||||
|
|
||||||
|
vm->va_start = low_hole;
|
||||||
|
vm->va_limit = aperture_size;
|
||||||
|
vm->big_pages = big_pages;
|
||||||
|
|
||||||
|
vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big];
|
||||||
|
vm->userspace_managed = userspace_managed;
|
||||||
|
vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
|
||||||
|
|
||||||
|
/* Initialize the page table data structures. */
|
||||||
|
err = nvgpu_vm_init_page_tables(vm);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
/* Setup vma limits. */
|
||||||
|
if (kernel_reserved + low_hole < aperture_size) {
|
||||||
|
if (p->unify_address_spaces) {
|
||||||
|
user_vma_start = low_hole;
|
||||||
|
user_vma_limit = vm->va_limit - kernel_reserved;
|
||||||
|
user_lp_vma_start = user_vma_limit;
|
||||||
|
user_lp_vma_limit = user_vma_limit;
|
||||||
|
} else {
|
||||||
|
user_vma_start = low_hole;
|
||||||
|
user_vma_limit = __nv_gmmu_va_small_page_limit();
|
||||||
|
user_lp_vma_start = __nv_gmmu_va_small_page_limit();
|
||||||
|
user_lp_vma_limit = vm->va_limit - kernel_reserved;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
user_vma_start = 0;
|
||||||
|
user_vma_limit = 0;
|
||||||
|
user_lp_vma_start = 0;
|
||||||
|
user_lp_vma_limit = 0;
|
||||||
|
}
|
||||||
|
kernel_vma_start = vm->va_limit - kernel_reserved;
|
||||||
|
kernel_vma_limit = vm->va_limit;
|
||||||
|
|
||||||
|
nvgpu_log_info(g, "user_vma [0x%llx,0x%llx)",
|
||||||
|
user_vma_start, user_vma_limit);
|
||||||
|
nvgpu_log_info(g, "user_lp_vma [0x%llx,0x%llx)",
|
||||||
|
user_lp_vma_start, user_lp_vma_limit);
|
||||||
|
nvgpu_log_info(g, "kernel_vma [0x%llx,0x%llx)",
|
||||||
|
kernel_vma_start, kernel_vma_limit);
|
||||||
|
|
||||||
|
if (WARN_ON(user_vma_start > user_vma_limit) ||
|
||||||
|
WARN_ON(user_lp_vma_start > user_lp_vma_limit) ||
|
||||||
|
WARN_ON(kernel_vma_start >= kernel_vma_limit)) {
|
||||||
|
err = -EINVAL;
|
||||||
|
goto clean_up_page_tables;
|
||||||
|
}
|
||||||
|
|
||||||
|
kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ?
|
||||||
|
0 : GPU_ALLOC_GVA_SPACE;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A "user" area only makes sense for the GVA spaces. For VMs where
|
||||||
|
* there is no "user" area user_vma_start will be equal to
|
||||||
|
* user_vma_limit (i.e a 0 sized space). In such a situation the kernel
|
||||||
|
* area must be non-zero in length.
|
||||||
|
*/
|
||||||
|
if (user_vma_start >= user_vma_limit &&
|
||||||
|
kernel_vma_start >= kernel_vma_limit) {
|
||||||
|
err = -EINVAL;
|
||||||
|
goto clean_up_page_tables;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine if big pages are possible in this VM. If a split address
|
||||||
|
* space is used then check the user_lp vma instead of the user vma.
|
||||||
|
*/
|
||||||
|
if (p->unify_address_spaces)
|
||||||
|
vm->big_pages = nvgpu_big_pages_possible(vm, user_vma_start,
|
||||||
|
user_vma_limit - user_vma_start);
|
||||||
|
else
|
||||||
|
vm->big_pages = nvgpu_big_pages_possible(vm, user_lp_vma_start,
|
||||||
|
user_lp_vma_limit - user_lp_vma_start);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* User VMA.
|
||||||
|
*/
|
||||||
|
if (user_vma_start < user_vma_limit) {
|
||||||
|
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
|
||||||
|
err = __nvgpu_buddy_allocator_init(g, &vm->user,
|
||||||
|
vm, alloc_name,
|
||||||
|
user_vma_start,
|
||||||
|
user_vma_limit -
|
||||||
|
user_vma_start,
|
||||||
|
SZ_4K,
|
||||||
|
GPU_BALLOC_MAX_ORDER,
|
||||||
|
GPU_ALLOC_GVA_SPACE);
|
||||||
|
if (err)
|
||||||
|
goto clean_up_page_tables;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Make these allocator pointers point to the kernel allocator
|
||||||
|
* since we still use the legacy notion of page size to choose
|
||||||
|
* the allocator.
|
||||||
|
*/
|
||||||
|
vm->vma[0] = &vm->kernel;
|
||||||
|
vm->vma[1] = &vm->kernel;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* User VMA for large pages when a split address range is used.
|
||||||
|
*/
|
||||||
|
if (user_lp_vma_start < user_lp_vma_limit) {
|
||||||
|
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name);
|
||||||
|
err = __nvgpu_buddy_allocator_init(g, &vm->user_lp,
|
||||||
|
vm, alloc_name,
|
||||||
|
user_lp_vma_start,
|
||||||
|
user_lp_vma_limit -
|
||||||
|
user_lp_vma_start,
|
||||||
|
vm->big_page_size,
|
||||||
|
GPU_BALLOC_MAX_ORDER,
|
||||||
|
GPU_ALLOC_GVA_SPACE);
|
||||||
|
if (err)
|
||||||
|
goto clean_up_allocators;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Kernel VMA. Must always exist for an address space.
|
||||||
|
*/
|
||||||
|
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
|
||||||
|
err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
|
||||||
|
vm, alloc_name,
|
||||||
|
kernel_vma_start,
|
||||||
|
kernel_vma_limit - kernel_vma_start,
|
||||||
|
SZ_4K,
|
||||||
|
GPU_BALLOC_MAX_ORDER,
|
||||||
|
kernel_vma_flags);
|
||||||
|
if (err)
|
||||||
|
goto clean_up_allocators;
|
||||||
|
|
||||||
|
vm->mapped_buffers = NULL;
|
||||||
|
|
||||||
|
nvgpu_mutex_init(&vm->update_gmmu_lock);
|
||||||
|
kref_init(&vm->ref);
|
||||||
|
nvgpu_init_list_node(&vm->vm_area_list);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is only necessary for channel address spaces. The best way to
|
||||||
|
* distinguish channel address spaces from other address spaces is by
|
||||||
|
* size - if the address space is 4GB or less, it's not a channel.
|
||||||
|
*/
|
||||||
|
if (vm->va_limit > SZ_4G) {
|
||||||
|
err = nvgpu_init_sema_pool(vm);
|
||||||
|
if (err)
|
||||||
|
goto clean_up_allocators;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
clean_up_allocators:
|
||||||
|
if (nvgpu_alloc_initialized(&vm->kernel))
|
||||||
|
nvgpu_alloc_destroy(&vm->kernel);
|
||||||
|
if (nvgpu_alloc_initialized(&vm->user))
|
||||||
|
nvgpu_alloc_destroy(&vm->user);
|
||||||
|
if (nvgpu_alloc_initialized(&vm->user_lp))
|
||||||
|
nvgpu_alloc_destroy(&vm->user_lp);
|
||||||
|
clean_up_page_tables:
|
||||||
|
/* Cleans up nvgpu_vm_init_page_tables() */
|
||||||
|
nvgpu_vfree(g, vm->pdb.entries);
|
||||||
|
free_gmmu_pages(vm, &vm->pdb);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
|
void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
|
||||||
{
|
{
|
||||||
struct nvgpu_mapped_buf *mapped_buffer;
|
struct nvgpu_mapped_buf *mapped_buffer;
|
||||||
@@ -111,8 +449,6 @@ void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm)
|
|||||||
struct nvgpu_rbtree_node *node = NULL;
|
struct nvgpu_rbtree_node *node = NULL;
|
||||||
struct gk20a *g = vm->mm->g;
|
struct gk20a *g = vm->mm->g;
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do this outside of the update_gmmu_lock since unmapping the semaphore
|
* Do this outside of the update_gmmu_lock since unmapping the semaphore
|
||||||
* pool involves unmapping a GMMU mapping which means aquiring the
|
* pool involves unmapping a GMMU mapping which means aquiring the
|
||||||
@@ -172,12 +508,10 @@ void nvgpu_vm_put(struct vm_gk20a *vm)
|
|||||||
kref_put(&vm->ref, nvgpu_vm_remove_support_kref);
|
kref_put(&vm->ref, nvgpu_vm_remove_support_kref);
|
||||||
}
|
}
|
||||||
|
|
||||||
void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
|
void nvgpu_vm_remove(struct vm_gk20a *vm, struct nvgpu_mem *inst_block)
|
||||||
{
|
{
|
||||||
struct gk20a *g = vm->mm->g;
|
struct gk20a *g = vm->mm->g;
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
|
||||||
|
|
||||||
gk20a_free_inst_block(g, inst_block);
|
gk20a_free_inst_block(g, inst_block);
|
||||||
nvgpu_vm_remove_support_nofree(vm);
|
nvgpu_vm_remove_support_nofree(vm);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1924,7 +1924,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s,
|
|||||||
err_unmap:
|
err_unmap:
|
||||||
nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
|
nvgpu_vm_unmap_buffer(vm, args->offset, NULL);
|
||||||
err_remove_vm:
|
err_remove_vm:
|
||||||
nvgpu_remove_vm(vm, &mm->perfbuf.inst_block);
|
nvgpu_vm_remove(vm, &mm->perfbuf.inst_block);
|
||||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@@ -1962,7 +1962,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset)
|
|||||||
err = gk20a_perfbuf_disable_locked(g);
|
err = gk20a_perfbuf_disable_locked(g);
|
||||||
|
|
||||||
nvgpu_vm_unmap_buffer(vm, offset, NULL);
|
nvgpu_vm_unmap_buffer(vm, offset, NULL);
|
||||||
nvgpu_remove_vm(vm, &mm->perfbuf.inst_block);
|
nvgpu_vm_remove(vm, &mm->perfbuf.inst_block);
|
||||||
|
|
||||||
g->perfbuf.owner = NULL;
|
g->perfbuf.owner = NULL;
|
||||||
g->perfbuf.offset = 0;
|
g->perfbuf.offset = 0;
|
||||||
|
|||||||
@@ -476,9 +476,9 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm)
|
|||||||
g->ops.mm.remove_bar2_vm(g);
|
g->ops.mm.remove_bar2_vm(g);
|
||||||
|
|
||||||
if (g->ops.mm.is_bar1_supported(g))
|
if (g->ops.mm.is_bar1_supported(g))
|
||||||
nvgpu_remove_vm(&mm->bar1.vm, &mm->bar1.inst_block);
|
nvgpu_vm_remove(&mm->bar1.vm, &mm->bar1.inst_block);
|
||||||
|
|
||||||
nvgpu_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block);
|
nvgpu_vm_remove(&mm->pmu.vm, &mm->pmu.inst_block);
|
||||||
gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
|
gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block);
|
||||||
nvgpu_vm_remove_support_nofree(&mm->cde.vm);
|
nvgpu_vm_remove_support_nofree(&mm->cde.vm);
|
||||||
|
|
||||||
@@ -779,52 +779,6 @@ void gk20a_init_mm_ce_context(struct gk20a *g)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static int alloc_gmmu_phys_pages(struct vm_gk20a *vm, u32 order,
|
|
||||||
struct gk20a_mm_entry *entry)
|
|
||||||
{
|
|
||||||
u32 num_pages = 1 << order;
|
|
||||||
u32 len = num_pages * PAGE_SIZE;
|
|
||||||
int err;
|
|
||||||
struct page *pages;
|
|
||||||
struct gk20a *g = vm->mm->g;
|
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
|
||||||
|
|
||||||
/* note: mem_desc slightly abused (wrt. alloc_gmmu_pages) */
|
|
||||||
|
|
||||||
pages = alloc_pages(GFP_KERNEL, order);
|
|
||||||
if (!pages) {
|
|
||||||
gk20a_dbg(gpu_dbg_pte, "alloc_pages failed");
|
|
||||||
goto err_out;
|
|
||||||
}
|
|
||||||
entry->mem.priv.sgt = nvgpu_kzalloc(g, sizeof(*entry->mem.priv.sgt));
|
|
||||||
if (!entry->mem.priv.sgt) {
|
|
||||||
gk20a_dbg(gpu_dbg_pte, "cannot allocate sg table");
|
|
||||||
goto err_alloced;
|
|
||||||
}
|
|
||||||
err = sg_alloc_table(entry->mem.priv.sgt, 1, GFP_KERNEL);
|
|
||||||
if (err) {
|
|
||||||
gk20a_dbg(gpu_dbg_pte, "sg_alloc_table failed");
|
|
||||||
goto err_sg_table;
|
|
||||||
}
|
|
||||||
sg_set_page(entry->mem.priv.sgt->sgl, pages, len, 0);
|
|
||||||
entry->mem.cpu_va = page_address(pages);
|
|
||||||
memset(entry->mem.cpu_va, 0, len);
|
|
||||||
entry->mem.size = len;
|
|
||||||
entry->mem.aperture = APERTURE_SYSMEM;
|
|
||||||
FLUSH_CPU_DCACHE(entry->mem.cpu_va,
|
|
||||||
sg_phys(entry->mem.priv.sgt->sgl), len);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
err_sg_table:
|
|
||||||
nvgpu_kfree(vm->mm->g, entry->mem.priv.sgt);
|
|
||||||
err_alloced:
|
|
||||||
__free_pages(pages, order);
|
|
||||||
err_out:
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void free_gmmu_phys_pages(struct vm_gk20a *vm,
|
static void free_gmmu_phys_pages(struct vm_gk20a *vm,
|
||||||
struct gk20a_mm_entry *entry)
|
struct gk20a_mm_entry *entry)
|
||||||
{
|
{
|
||||||
@@ -857,38 +811,6 @@ static void unmap_gmmu_phys_pages(struct gk20a_mm_entry *entry)
|
|||||||
entry->mem.priv.sgt->sgl->length);
|
entry->mem.priv.sgt->sgl->length);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int alloc_gmmu_pages(struct vm_gk20a *vm, u32 order,
|
|
||||||
struct gk20a_mm_entry *entry)
|
|
||||||
{
|
|
||||||
struct gk20a *g = gk20a_from_vm(vm);
|
|
||||||
u32 num_pages = 1 << order;
|
|
||||||
u32 len = num_pages * PAGE_SIZE;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
|
||||||
|
|
||||||
if (g->is_fmodel)
|
|
||||||
return alloc_gmmu_phys_pages(vm, order, entry);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* On arm32 we're limited by vmalloc space, so we do not map pages by
|
|
||||||
* default.
|
|
||||||
*/
|
|
||||||
if (IS_ENABLED(CONFIG_ARM64))
|
|
||||||
err = nvgpu_dma_alloc(g, len, &entry->mem);
|
|
||||||
else
|
|
||||||
err = nvgpu_dma_alloc_flags(g, NVGPU_DMA_NO_KERNEL_MAPPING,
|
|
||||||
len, &entry->mem);
|
|
||||||
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
nvgpu_err(g, "memory allocation failed");
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
void free_gmmu_pages(struct vm_gk20a *vm,
|
void free_gmmu_pages(struct vm_gk20a *vm,
|
||||||
struct gk20a_mm_entry *entry)
|
struct gk20a_mm_entry *entry)
|
||||||
{
|
{
|
||||||
@@ -955,72 +877,6 @@ void unmap_gmmu_pages(struct gk20a *g, struct gk20a_mm_entry *entry)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate a phys contig region big enough for a full
|
|
||||||
* sized gmmu page table for the given gmmu_page_size.
|
|
||||||
* the whole range is zeroed so it's "invalid"/will fault.
|
|
||||||
*
|
|
||||||
* If a previous entry is supplied, its memory will be used for
|
|
||||||
* suballocation for this next entry too, if there is space.
|
|
||||||
*/
|
|
||||||
|
|
||||||
static int gk20a_zalloc_gmmu_page_table(struct vm_gk20a *vm,
|
|
||||||
enum gmmu_pgsz_gk20a pgsz_idx,
|
|
||||||
const struct gk20a_mmu_level *l,
|
|
||||||
struct gk20a_mm_entry *entry,
|
|
||||||
struct gk20a_mm_entry *prev_entry)
|
|
||||||
{
|
|
||||||
int err = -ENOMEM;
|
|
||||||
int order;
|
|
||||||
struct gk20a *g = gk20a_from_vm(vm);
|
|
||||||
u32 bytes;
|
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
|
||||||
|
|
||||||
/* allocate enough pages for the table */
|
|
||||||
order = l->hi_bit[pgsz_idx] - l->lo_bit[pgsz_idx] + 1;
|
|
||||||
order += ilog2(l->entry_size);
|
|
||||||
bytes = 1 << order;
|
|
||||||
order -= PAGE_SHIFT;
|
|
||||||
if (order < 0 && prev_entry) {
|
|
||||||
/* try to suballocate from previous chunk */
|
|
||||||
u32 capacity = prev_entry->mem.size / bytes;
|
|
||||||
u32 prev = prev_entry->woffset * sizeof(u32) / bytes;
|
|
||||||
u32 free = capacity - prev - 1;
|
|
||||||
|
|
||||||
gk20a_dbg(gpu_dbg_pte, "cap %d prev %d free %d bytes %d",
|
|
||||||
capacity, prev, free, bytes);
|
|
||||||
|
|
||||||
if (free) {
|
|
||||||
memcpy(&entry->mem, &prev_entry->mem,
|
|
||||||
sizeof(entry->mem));
|
|
||||||
entry->woffset = prev_entry->woffset
|
|
||||||
+ bytes / sizeof(u32);
|
|
||||||
err = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (err) {
|
|
||||||
/* no suballoc space */
|
|
||||||
order = max(0, order);
|
|
||||||
err = alloc_gmmu_pages(vm, order, entry);
|
|
||||||
entry->woffset = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
gk20a_dbg(gpu_dbg_pte, "entry = 0x%p, addr=%08llx, size %d, woff %x",
|
|
||||||
entry,
|
|
||||||
(entry->mem.priv.sgt &&
|
|
||||||
entry->mem.aperture == APERTURE_SYSMEM) ?
|
|
||||||
g->ops.mm.get_iova_addr(g, entry->mem.priv.sgt->sgl, 0) : 0,
|
|
||||||
order, entry->woffset);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
entry->pgsz = pgsz_idx;
|
|
||||||
entry->mem.skip_wmb = true;
|
|
||||||
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
|
int gk20a_mm_pde_coverage_bit_count(struct vm_gk20a *vm)
|
||||||
{
|
{
|
||||||
return vm->mmu_levels[0].lo_bit[0];
|
return vm->mmu_levels[0].lo_bit[0];
|
||||||
@@ -2230,7 +2086,7 @@ static int update_gmmu_level_locked(struct vm_gk20a *vm,
|
|||||||
next_pte = pte->entries + pde_i;
|
next_pte = pte->entries + pde_i;
|
||||||
|
|
||||||
if (!next_pte->mem.size) {
|
if (!next_pte->mem.size) {
|
||||||
err = gk20a_zalloc_gmmu_page_table(vm,
|
err = nvgpu_zalloc_gmmu_page_table(vm,
|
||||||
pgsz_idx, next_l, next_pte, prev_pte);
|
pgsz_idx, next_l, next_pte, prev_pte);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
return err;
|
||||||
@@ -2522,75 +2378,6 @@ const struct gk20a_mmu_level gk20a_mm_levels_128k[] = {
|
|||||||
{.update_entry = NULL}
|
{.update_entry = NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize a semaphore pool. Just return successfully if we do not need
|
|
||||||
* semaphores (i.e when sync-pts are active).
|
|
||||||
*/
|
|
||||||
static int gk20a_init_sema_pool(struct vm_gk20a *vm)
|
|
||||||
{
|
|
||||||
struct nvgpu_semaphore_sea *sema_sea;
|
|
||||||
struct mm_gk20a *mm = vm->mm;
|
|
||||||
struct gk20a *g = mm->g;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Don't waste the memory on semaphores if we don't need them.
|
|
||||||
*/
|
|
||||||
if (g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
if (vm->sema_pool)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
sema_sea = nvgpu_semaphore_sea_create(g);
|
|
||||||
if (!sema_sea)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
vm->sema_pool = nvgpu_semaphore_pool_alloc(sema_sea);
|
|
||||||
if (!vm->sema_pool)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Allocate a chunk of GPU VA space for mapping the semaphores. We will
|
|
||||||
* do a fixed alloc in the kernel VM so that all channels have the same
|
|
||||||
* RO address range for the semaphores.
|
|
||||||
*
|
|
||||||
* !!! TODO: cleanup.
|
|
||||||
*/
|
|
||||||
sema_sea->gpu_va = nvgpu_alloc_fixed(&vm->kernel,
|
|
||||||
vm->va_limit -
|
|
||||||
mm->channel.kernel_size,
|
|
||||||
512 * PAGE_SIZE,
|
|
||||||
SZ_4K);
|
|
||||||
if (!sema_sea->gpu_va) {
|
|
||||||
nvgpu_free(&vm->kernel, sema_sea->gpu_va);
|
|
||||||
nvgpu_vm_put(vm);
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = nvgpu_semaphore_pool_map(vm->sema_pool, vm);
|
|
||||||
if (err) {
|
|
||||||
nvgpu_semaphore_pool_unmap(vm->sema_pool, vm);
|
|
||||||
nvgpu_free(vm->vma[gmmu_page_size_small],
|
|
||||||
vm->sema_pool->gpu_va);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine if the passed address space can support big pages or not.
|
|
||||||
*/
|
|
||||||
int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size)
|
|
||||||
{
|
|
||||||
u64 mask = ((u64)vm->big_page_size << 10) - 1;
|
|
||||||
|
|
||||||
if (base & mask || size & mask)
|
|
||||||
return 0;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attempt to find a reserved memory area to determine PTE size for the passed
|
* Attempt to find a reserved memory area to determine PTE size for the passed
|
||||||
* mapping. If no reserved area can be found use small pages.
|
* mapping. If no reserved area can be found use small pages.
|
||||||
@@ -2661,272 +2448,6 @@ enum gmmu_pgsz_gk20a __get_pte_size(struct vm_gk20a *vm, u64 base, u64 size)
|
|||||||
return gmmu_page_size_small;
|
return gmmu_page_size_small;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int init_vm_page_tables(struct vm_gk20a *vm)
|
|
||||||
{
|
|
||||||
u32 pde_lo, pde_hi;
|
|
||||||
int err;
|
|
||||||
|
|
||||||
pde_range_from_vaddr_range(vm,
|
|
||||||
0, vm->va_limit-1,
|
|
||||||
&pde_lo, &pde_hi);
|
|
||||||
vm->pdb.entries = nvgpu_vzalloc(vm->mm->g,
|
|
||||||
sizeof(struct gk20a_mm_entry) *
|
|
||||||
(pde_hi + 1));
|
|
||||||
vm->pdb.num_entries = pde_hi + 1;
|
|
||||||
|
|
||||||
if (!vm->pdb.entries)
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
err = gk20a_zalloc_gmmu_page_table(vm, 0, &vm->mmu_levels[0],
|
|
||||||
&vm->pdb, NULL);
|
|
||||||
if (err) {
|
|
||||||
nvgpu_vfree(vm->mm->g, vm->pdb.entries);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* nvgpu_init_vm() - Initialize an address space.
|
|
||||||
*
|
|
||||||
* @mm - Parent MM.
|
|
||||||
* @vm - The VM to init.
|
|
||||||
* @big_page_size - Size of big pages associated with this VM.
|
|
||||||
* @low_hole - The size of the low hole (unaddressable memory at the bottom of
|
|
||||||
* the address space.
|
|
||||||
* @kernel_reserved - Space reserved for kernel only allocations.
|
|
||||||
* @aperture_size - Total size of the aperture.
|
|
||||||
* @big_pages - Ignored. Will be set based on other passed params.
|
|
||||||
* @name - Name of the address space.
|
|
||||||
*
|
|
||||||
* This function initializes an address space according to the following map:
|
|
||||||
*
|
|
||||||
* +--+ 0x0
|
|
||||||
* | |
|
|
||||||
* +--+ @low_hole
|
|
||||||
* | |
|
|
||||||
* ~ ~ This is the "user" section.
|
|
||||||
* | |
|
|
||||||
* +--+ @aperture_size - @kernel_reserved
|
|
||||||
* | |
|
|
||||||
* ~ ~ This is the "kernel" section.
|
|
||||||
* | |
|
|
||||||
* +--+ @aperture_size
|
|
||||||
*
|
|
||||||
* The user section is therefor what ever is left over after the @low_hole and
|
|
||||||
* @kernel_reserved memory have been portioned out. The @kernel_reserved is
|
|
||||||
* always persent at the top of the memory space and the @low_hole is always at
|
|
||||||
* the bottom.
|
|
||||||
*
|
|
||||||
* For certain address spaces a "user" section makes no sense (bar1, etc) so in
|
|
||||||
* such cases the @kernel_reserved and @low_hole should sum to exactly
|
|
||||||
* @aperture_size.
|
|
||||||
*/
|
|
||||||
int nvgpu_init_vm(struct mm_gk20a *mm,
|
|
||||||
struct vm_gk20a *vm,
|
|
||||||
u32 big_page_size,
|
|
||||||
u64 low_hole,
|
|
||||||
u64 kernel_reserved,
|
|
||||||
u64 aperture_size,
|
|
||||||
bool big_pages,
|
|
||||||
bool userspace_managed,
|
|
||||||
char *name)
|
|
||||||
{
|
|
||||||
int err;
|
|
||||||
char alloc_name[32];
|
|
||||||
u64 kernel_vma_flags;
|
|
||||||
u64 user_vma_start, user_vma_limit;
|
|
||||||
u64 user_lp_vma_start, user_lp_vma_limit;
|
|
||||||
u64 kernel_vma_start, kernel_vma_limit;
|
|
||||||
struct gk20a *g = mm->g;
|
|
||||||
struct gk20a_platform *p = gk20a_get_platform(g->dev);
|
|
||||||
|
|
||||||
if (WARN_ON(kernel_reserved + low_hole > aperture_size))
|
|
||||||
return -ENOMEM;
|
|
||||||
|
|
||||||
gk20a_dbg_info("Init space for %s: va_limit=0x%llx, "
|
|
||||||
"big_page_size=0x%x low_hole=0x%llx",
|
|
||||||
name, aperture_size,
|
|
||||||
(unsigned int)big_page_size, low_hole);
|
|
||||||
|
|
||||||
vm->mm = mm;
|
|
||||||
|
|
||||||
vm->gmmu_page_sizes[gmmu_page_size_small] = SZ_4K;
|
|
||||||
vm->gmmu_page_sizes[gmmu_page_size_big] = big_page_size;
|
|
||||||
vm->gmmu_page_sizes[gmmu_page_size_kernel] = SZ_4K;
|
|
||||||
|
|
||||||
/* Set up vma pointers. */
|
|
||||||
vm->vma[gmmu_page_size_small] = &vm->user;
|
|
||||||
vm->vma[gmmu_page_size_big] = &vm->user;
|
|
||||||
vm->vma[gmmu_page_size_kernel] = &vm->kernel;
|
|
||||||
if (!p->unify_address_spaces)
|
|
||||||
vm->vma[gmmu_page_size_big] = &vm->user_lp;
|
|
||||||
|
|
||||||
vm->va_start = low_hole;
|
|
||||||
vm->va_limit = aperture_size;
|
|
||||||
vm->big_pages = big_pages;
|
|
||||||
|
|
||||||
vm->big_page_size = vm->gmmu_page_sizes[gmmu_page_size_big];
|
|
||||||
vm->userspace_managed = userspace_managed;
|
|
||||||
vm->mmu_levels = g->ops.mm.get_mmu_levels(g, vm->big_page_size);
|
|
||||||
|
|
||||||
/* Initialize the page table data structures. */
|
|
||||||
err = init_vm_page_tables(vm);
|
|
||||||
if (err)
|
|
||||||
return err;
|
|
||||||
|
|
||||||
/* Setup vma limits. */
|
|
||||||
if (kernel_reserved + low_hole < aperture_size) {
|
|
||||||
if (p->unify_address_spaces) {
|
|
||||||
user_vma_start = low_hole;
|
|
||||||
user_vma_limit = vm->va_limit - kernel_reserved;
|
|
||||||
user_lp_vma_start = user_vma_limit;
|
|
||||||
user_lp_vma_limit = user_vma_limit;
|
|
||||||
} else {
|
|
||||||
user_vma_start = low_hole;
|
|
||||||
user_vma_limit = __nv_gmmu_va_small_page_limit();
|
|
||||||
user_lp_vma_start = __nv_gmmu_va_small_page_limit();
|
|
||||||
user_lp_vma_limit = vm->va_limit - kernel_reserved;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
user_vma_start = 0;
|
|
||||||
user_vma_limit = 0;
|
|
||||||
user_lp_vma_start = 0;
|
|
||||||
user_lp_vma_limit = 0;
|
|
||||||
}
|
|
||||||
kernel_vma_start = vm->va_limit - kernel_reserved;
|
|
||||||
kernel_vma_limit = vm->va_limit;
|
|
||||||
|
|
||||||
gk20a_dbg_info("user_vma [0x%llx,0x%llx)",
|
|
||||||
user_vma_start, user_vma_limit);
|
|
||||||
gk20a_dbg_info("user_lp_vma [0x%llx,0x%llx)",
|
|
||||||
user_lp_vma_start, user_lp_vma_limit);
|
|
||||||
gk20a_dbg_info("kernel_vma [0x%llx,0x%llx)",
|
|
||||||
kernel_vma_start, kernel_vma_limit);
|
|
||||||
|
|
||||||
if (WARN_ON(user_vma_start > user_vma_limit) ||
|
|
||||||
WARN_ON(user_lp_vma_start > user_lp_vma_limit) ||
|
|
||||||
WARN_ON(kernel_vma_start >= kernel_vma_limit)) {
|
|
||||||
err = -EINVAL;
|
|
||||||
goto clean_up_page_tables;
|
|
||||||
}
|
|
||||||
|
|
||||||
kernel_vma_flags = (kernel_reserved + low_hole) == aperture_size ?
|
|
||||||
0 : GPU_ALLOC_GVA_SPACE;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* A "user" area only makes sense for the GVA spaces. For VMs where
|
|
||||||
* there is no "user" area user_vma_start will be equal to
|
|
||||||
* user_vma_limit (i.e a 0 sized space). In such a situation the kernel
|
|
||||||
* area must be non-zero in length.
|
|
||||||
*/
|
|
||||||
if (user_vma_start >= user_vma_limit &&
|
|
||||||
kernel_vma_start >= kernel_vma_limit) {
|
|
||||||
err = -EINVAL;
|
|
||||||
goto clean_up_page_tables;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Determine if big pages are possible in this VM. If a split address
|
|
||||||
* space is used then check the user_lp vma instead of the user vma.
|
|
||||||
*/
|
|
||||||
if (p->unify_address_spaces)
|
|
||||||
vm->big_pages = gk20a_big_pages_possible(vm, user_vma_start,
|
|
||||||
user_vma_limit - user_vma_start);
|
|
||||||
else
|
|
||||||
vm->big_pages = gk20a_big_pages_possible(vm, user_lp_vma_start,
|
|
||||||
user_lp_vma_limit - user_lp_vma_start);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* User VMA.
|
|
||||||
*/
|
|
||||||
if (user_vma_start < user_vma_limit) {
|
|
||||||
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s", name);
|
|
||||||
err = __nvgpu_buddy_allocator_init(g, &vm->user,
|
|
||||||
vm, alloc_name,
|
|
||||||
user_vma_start,
|
|
||||||
user_vma_limit -
|
|
||||||
user_vma_start,
|
|
||||||
SZ_4K,
|
|
||||||
GPU_BALLOC_MAX_ORDER,
|
|
||||||
GPU_ALLOC_GVA_SPACE);
|
|
||||||
if (err)
|
|
||||||
goto clean_up_page_tables;
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Make these allocator pointers point to the kernel allocator
|
|
||||||
* since we still use the legacy notion of page size to choose
|
|
||||||
* the allocator.
|
|
||||||
*/
|
|
||||||
vm->vma[0] = &vm->kernel;
|
|
||||||
vm->vma[1] = &vm->kernel;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* User VMA for large pages when a split address range is used.
|
|
||||||
*/
|
|
||||||
if (user_lp_vma_start < user_lp_vma_limit) {
|
|
||||||
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s_lp", name);
|
|
||||||
err = __nvgpu_buddy_allocator_init(g, &vm->user_lp,
|
|
||||||
vm, alloc_name,
|
|
||||||
user_lp_vma_start,
|
|
||||||
user_lp_vma_limit -
|
|
||||||
user_lp_vma_start,
|
|
||||||
vm->big_page_size,
|
|
||||||
GPU_BALLOC_MAX_ORDER,
|
|
||||||
GPU_ALLOC_GVA_SPACE);
|
|
||||||
if (err)
|
|
||||||
goto clean_up_allocators;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Kernel VMA. Must always exist for an address space.
|
|
||||||
*/
|
|
||||||
snprintf(alloc_name, sizeof(alloc_name), "gk20a_%s-sys", name);
|
|
||||||
err = __nvgpu_buddy_allocator_init(g, &vm->kernel,
|
|
||||||
vm, alloc_name,
|
|
||||||
kernel_vma_start,
|
|
||||||
kernel_vma_limit - kernel_vma_start,
|
|
||||||
SZ_4K,
|
|
||||||
GPU_BALLOC_MAX_ORDER,
|
|
||||||
kernel_vma_flags);
|
|
||||||
if (err)
|
|
||||||
goto clean_up_allocators;
|
|
||||||
|
|
||||||
vm->mapped_buffers = NULL;
|
|
||||||
|
|
||||||
nvgpu_mutex_init(&vm->update_gmmu_lock);
|
|
||||||
kref_init(&vm->ref);
|
|
||||||
nvgpu_init_list_node(&vm->vm_area_list);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is only necessary for channel address spaces. The best way to
|
|
||||||
* distinguish channel address spaces from other address spaces is by
|
|
||||||
* size - if the address space is 4GB or less, it's not a channel.
|
|
||||||
*/
|
|
||||||
if (vm->va_limit > SZ_4G) {
|
|
||||||
err = gk20a_init_sema_pool(vm);
|
|
||||||
if (err)
|
|
||||||
goto clean_up_allocators;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
clean_up_allocators:
|
|
||||||
if (nvgpu_alloc_initialized(&vm->kernel))
|
|
||||||
nvgpu_alloc_destroy(&vm->kernel);
|
|
||||||
if (nvgpu_alloc_initialized(&vm->user))
|
|
||||||
nvgpu_alloc_destroy(&vm->user);
|
|
||||||
if (nvgpu_alloc_initialized(&vm->user_lp))
|
|
||||||
nvgpu_alloc_destroy(&vm->user_lp);
|
|
||||||
clean_up_page_tables:
|
|
||||||
/* Cleans up init_vm_page_tables() */
|
|
||||||
nvgpu_vfree(g, vm->pdb.entries);
|
|
||||||
free_gmmu_pages(vm, &vm->pdb);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* address space interfaces for the gk20a module */
|
/* address space interfaces for the gk20a module */
|
||||||
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
|
int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size,
|
||||||
u32 flags)
|
u32 flags)
|
||||||
|
|||||||
@@ -456,8 +456,6 @@ const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
|
|||||||
void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
|
void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *mem,
|
||||||
struct vm_gk20a *vm);
|
struct vm_gk20a *vm);
|
||||||
|
|
||||||
int gk20a_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
|
|
||||||
|
|
||||||
extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
|
extern const struct gk20a_mmu_level gk20a_mm_levels_64k[];
|
||||||
extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
|
extern const struct gk20a_mmu_level gk20a_mm_levels_128k[];
|
||||||
|
|
||||||
|
|||||||
@@ -401,7 +401,7 @@ static void gp10b_remove_bar2_vm(struct gk20a *g)
|
|||||||
struct mm_gk20a *mm = &g->mm;
|
struct mm_gk20a *mm = &g->mm;
|
||||||
|
|
||||||
gp10b_replayable_pagefault_buffer_deinit(g);
|
gp10b_replayable_pagefault_buffer_deinit(g);
|
||||||
nvgpu_remove_vm(&mm->bar2.vm, &mm->bar2.inst_block);
|
nvgpu_vm_remove(&mm->bar2.vm, &mm->bar2.inst_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -63,6 +63,12 @@ struct gk20a_mmu_level {
|
|||||||
size_t entry_size;
|
size_t entry_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
int nvgpu_zalloc_gmmu_page_table(struct vm_gk20a *vm,
|
||||||
|
enum gmmu_pgsz_gk20a pgsz_idx,
|
||||||
|
const struct gk20a_mmu_level *l,
|
||||||
|
struct gk20a_mm_entry *entry,
|
||||||
|
struct gk20a_mm_entry *prev_entry);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* nvgpu_gmmu_map - Map memory into the GMMU.
|
* nvgpu_gmmu_map - Map memory into the GMMU.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -181,6 +181,7 @@ void nvgpu_vm_get(struct vm_gk20a *vm);
|
|||||||
void nvgpu_vm_put(struct vm_gk20a *vm);
|
void nvgpu_vm_put(struct vm_gk20a *vm);
|
||||||
|
|
||||||
int vm_aspace_id(struct vm_gk20a *vm);
|
int vm_aspace_id(struct vm_gk20a *vm);
|
||||||
|
int nvgpu_big_pages_possible(struct vm_gk20a *vm, u64 base, u64 size);
|
||||||
|
|
||||||
/* batching eliminates redundant cache flushes and invalidates */
|
/* batching eliminates redundant cache flushes and invalidates */
|
||||||
void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
|
void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch);
|
||||||
@@ -194,7 +195,6 @@ void nvgpu_vm_mapping_batch_finish_locked(
|
|||||||
int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
|
int nvgpu_vm_get_buffers(struct vm_gk20a *vm,
|
||||||
struct nvgpu_mapped_buf ***mapped_buffers,
|
struct nvgpu_mapped_buf ***mapped_buffers,
|
||||||
int *num_buffers);
|
int *num_buffers);
|
||||||
|
|
||||||
/* put references on the given buffers */
|
/* put references on the given buffers */
|
||||||
void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
|
void nvgpu_vm_put_buffers(struct vm_gk20a *vm,
|
||||||
struct nvgpu_mapped_buf **mapped_buffers,
|
struct nvgpu_mapped_buf **mapped_buffers,
|
||||||
@@ -220,7 +220,6 @@ struct nvgpu_mapped_buf *__nvgpu_vm_find_mapped_buf_less_than(
|
|||||||
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
int nvgpu_vm_find_buf(struct vm_gk20a *vm, u64 gpu_va,
|
||||||
struct dma_buf **dmabuf,
|
struct dma_buf **dmabuf,
|
||||||
u64 *offset);
|
u64 *offset);
|
||||||
|
|
||||||
int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
|
int nvgpu_insert_mapped_buf(struct vm_gk20a *vm,
|
||||||
struct nvgpu_mapped_buf *mapped_buffer);
|
struct nvgpu_mapped_buf *mapped_buffer);
|
||||||
void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
|
void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
|
||||||
@@ -228,8 +227,7 @@ void nvgpu_remove_mapped_buf(struct vm_gk20a *vm,
|
|||||||
|
|
||||||
void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm);
|
void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm);
|
||||||
void nvgpu_vm_remove_support(struct vm_gk20a *vm);
|
void nvgpu_vm_remove_support(struct vm_gk20a *vm);
|
||||||
|
void nvgpu_vm_remove(struct vm_gk20a *vm, struct nvgpu_mem *inst_block);
|
||||||
void nvgpu_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block);
|
|
||||||
|
|
||||||
int nvgpu_init_vm(struct mm_gk20a *mm,
|
int nvgpu_init_vm(struct mm_gk20a *mm,
|
||||||
struct vm_gk20a *vm,
|
struct vm_gk20a *vm,
|
||||||
|
|||||||
@@ -364,7 +364,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
|
|||||||
if (user_vma_start < user_vma_limit) {
|
if (user_vma_start < user_vma_limit) {
|
||||||
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
|
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
|
||||||
gmmu_page_sizes[gmmu_page_size_small] >> 10);
|
gmmu_page_sizes[gmmu_page_size_small] >> 10);
|
||||||
if (!gk20a_big_pages_possible(vm, user_vma_start,
|
if (!nvgpu_big_pages_possible(vm, user_vma_start,
|
||||||
user_vma_limit - user_vma_start))
|
user_vma_limit - user_vma_start))
|
||||||
vm->big_pages = false;
|
vm->big_pages = false;
|
||||||
|
|
||||||
@@ -391,7 +391,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
|
|||||||
|
|
||||||
snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
|
snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
|
||||||
gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
|
gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
|
||||||
if (!gk20a_big_pages_possible(vm, kernel_vma_start,
|
if (!nvgpu_big_pages_possible(vm, kernel_vma_start,
|
||||||
kernel_vma_limit - kernel_vma_start))
|
kernel_vma_limit - kernel_vma_start))
|
||||||
vm->big_pages = false;
|
vm->big_pages = false;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user