gpu: nvgpu: vgpu: Alloc kernel address space

JIRA VFND-890

Change-Id: I8eba041b663cead94f2cc3d75d6458d472f1a755
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Signed-off-by: Richard Zhao <rizhao@nvidia.com>
Reviewed-on: http://git-master/r/815378
(cherry picked from commit 4b52329e955758ec4368abcb463ce4e3a2653237)
Reviewed-on: http://git-master/r/820499
This commit is contained in:
Terje Bergstrom
2015-10-09 11:53:19 -07:00
parent fb3a1d31cd
commit 37255d42cc
2 changed files with 121 additions and 37 deletions

View File

@@ -144,7 +144,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
/* Circular Buffer */ /* Circular Buffer */
gpu_va = gk20a_vm_alloc_va(ch_vm, gpu_va = gk20a_vm_alloc_va(ch_vm,
gr->global_ctx_buffer[CIRCULAR].mem.size, 0); gr->global_ctx_buffer[CIRCULAR].mem.size,
gmmu_page_size_kernel);
if (!gpu_va) if (!gpu_va)
goto clean_up; goto clean_up;
@@ -153,7 +154,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
/* Attribute Buffer */ /* Attribute Buffer */
gpu_va = gk20a_vm_alloc_va(ch_vm, gpu_va = gk20a_vm_alloc_va(ch_vm,
gr->global_ctx_buffer[ATTRIBUTE].mem.size, 0); gr->global_ctx_buffer[ATTRIBUTE].mem.size,
gmmu_page_size_kernel);
if (!gpu_va) if (!gpu_va)
goto clean_up; goto clean_up;
@@ -162,7 +164,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
/* Page Pool */ /* Page Pool */
gpu_va = gk20a_vm_alloc_va(ch_vm, gpu_va = gk20a_vm_alloc_va(ch_vm,
gr->global_ctx_buffer[PAGEPOOL].mem.size, 0); gr->global_ctx_buffer[PAGEPOOL].mem.size,
gmmu_page_size_kernel);
if (!gpu_va) if (!gpu_va)
goto clean_up; goto clean_up;
g_bfr_va[PAGEPOOL_VA] = gpu_va; g_bfr_va[PAGEPOOL_VA] = gpu_va;
@@ -170,7 +173,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g,
/* Priv register Access Map */ /* Priv register Access Map */
gpu_va = gk20a_vm_alloc_va(ch_vm, gpu_va = gk20a_vm_alloc_va(ch_vm,
gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size, 0); gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size,
gmmu_page_size_kernel);
if (!gpu_va) if (!gpu_va)
goto clean_up; goto clean_up;
g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va; g_bfr_va[PRIV_ACCESS_MAP_VA] = gpu_va;
@@ -257,7 +261,9 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g,
gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size; gr->ctx_vars.buffer_total_size = gr->ctx_vars.golden_image_size;
gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; gr_ctx->mem.size = gr->ctx_vars.buffer_total_size;
gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm, gr_ctx->mem.size, 0); gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm,
gr_ctx->mem.size,
gmmu_page_size_kernel);
if (!gr_ctx->mem.gpu_va) { if (!gr_ctx->mem.gpu_va) {
kfree(gr_ctx); kfree(gr_ctx);
@@ -351,7 +357,8 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g,
patch_ctx->mem.size = 128 * sizeof(u32); patch_ctx->mem.size = 128 * sizeof(u32);
patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm, patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm,
patch_ctx->mem.size, 0); patch_ctx->mem.size,
gmmu_page_size_kernel);
if (!patch_ctx->mem.gpu_va) if (!patch_ctx->mem.gpu_va)
return -ENOMEM; return -ENOMEM;

View File

@@ -99,7 +99,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
map_offset = gk20a_vm_alloc_va(vm, size, map_offset = gk20a_vm_alloc_va(vm, size,
pgsz_idx); pgsz_idx);
if (!map_offset) { if (!map_offset) {
gk20a_err(d, "failed to allocate va space"); gk20a_err(d, "failed to allocate va space\n");
err = -ENOMEM; err = -ENOMEM;
goto fail; goto fail;
} }
@@ -118,6 +118,20 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
p->addr = addr; p->addr = addr;
p->gpu_va = map_offset; p->gpu_va = map_offset;
p->size = size; p->size = size;
if (pgsz_idx == gmmu_page_size_kernel) {
u32 page_size = vm->gmmu_page_sizes[pgsz_idx];
if (page_size == vm->gmmu_page_sizes[gmmu_page_size_small]) {
pgsz_idx = gmmu_page_size_small;
} else if (page_size ==
vm->gmmu_page_sizes[gmmu_page_size_big]) {
pgsz_idx = gmmu_page_size_big;
} else {
gk20a_err(d, "invalid kernel page size %d\n",
page_size);
goto fail;
}
}
p->pgsz_idx = pgsz_idx; p->pgsz_idx = pgsz_idx;
p->iova = mapping ? 1 : 0; p->iova = mapping ? 1 : 0;
p->kind = kind_v; p->kind = kind_v;
@@ -127,7 +141,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
p->ctag_offset = ctag_offset; p->ctag_offset = ctag_offset;
p->clear_ctags = clear_ctags; p->clear_ctags = clear_ctags;
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
if (err || msg.ret) err = err ? err : msg.ret;
if (err)
goto fail; goto fail;
/* TLB invalidate handled on server side */ /* TLB invalidate handled on server side */
@@ -214,7 +229,10 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm)
err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
WARN_ON(err || msg.ret); WARN_ON(err || msg.ret);
gk20a_allocator_destroy(&vm->vma[gmmu_page_size_kernel]);
if (vm->vma[gmmu_page_size_small].init)
gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
if (vm->vma[gmmu_page_size_big].init)
gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]); gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
mutex_unlock(&vm->update_gmmu_lock); mutex_unlock(&vm->update_gmmu_lock);
@@ -258,14 +276,16 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
struct tegra_vgpu_as_share_params *p = &msg.params.as_share; struct tegra_vgpu_as_share_params *p = &msg.params.as_share;
struct mm_gk20a *mm = &g->mm; struct mm_gk20a *mm = &g->mm;
struct vm_gk20a *vm; struct vm_gk20a *vm;
u64 small_vma_size, large_vma_size; u64 small_vma_start, small_vma_limit, large_vma_start, large_vma_limit,
kernel_vma_start, kernel_vma_limit;
char name[32]; char name[32];
int err, i; int err, i;
/* note: keep the page sizes sorted lowest to highest here */ /* note: keep the page sizes sorted lowest to highest here */
u32 gmmu_page_sizes[gmmu_nr_page_sizes] = { u32 gmmu_page_sizes[gmmu_nr_page_sizes] = {
SZ_4K, SZ_4K,
big_page_size ? big_page_size : platform->default_big_page_size big_page_size ? big_page_size : platform->default_big_page_size,
SZ_4K
}; };
gk20a_dbg_fn(""); gk20a_dbg_fn("");
@@ -288,8 +308,7 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
vm->big_page_size = big_page_size; vm->big_page_size = big_page_size;
vm->va_start = big_page_size << 10; /* create a one pde hole */ vm->va_start = big_page_size << 10; /* create a one pde hole */
vm->va_limit = mm->channel.user_size; /* note this means channel.size vm->va_limit = mm->channel.user_size + mm->channel.kernel_size;
is really just the max */
msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE; msg.cmd = TEGRA_VGPU_CMD_AS_ALLOC_SHARE;
msg.handle = platform->virt_handle; msg.handle = platform->virt_handle;
@@ -303,34 +322,88 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
vm->handle = p->handle; vm->handle = p->handle;
/* First 16GB of the address space goes towards small pages. What ever /* setup vma limits */
* remains is allocated to large pages. */ small_vma_start = vm->va_start;
small_vma_size = (u64)16 << 30;
large_vma_size = vm->va_limit - small_vma_size;
if (vm->big_pages) {
/* First 16GB of the address space goes towards small
* pages. The kernel reserved pages are at the end.
* What ever remains is allocated to large pages.
*/
small_vma_limit = __nv_gmmu_va_small_page_limit();
large_vma_start = small_vma_limit;
large_vma_limit = vm->va_limit - mm->channel.kernel_size;
} else {
small_vma_limit = vm->va_limit - mm->channel.kernel_size;
large_vma_start = 0;
large_vma_limit = 0;
}
kernel_vma_start = vm->va_limit - mm->channel.kernel_size;
kernel_vma_limit = vm->va_limit;
gk20a_dbg_info(
"small_vma=[0x%llx,0x%llx) large_vma=[0x%llx,0x%llx) kernel_vma=[0x%llx,0x%llx)\n",
small_vma_start, small_vma_limit,
large_vma_start, large_vma_limit,
kernel_vma_start, kernel_vma_limit);
/* check that starts do not exceed limits */
WARN_ON(small_vma_start > small_vma_limit);
WARN_ON(large_vma_start > large_vma_limit);
/* kernel_vma must also be non-zero */
WARN_ON(kernel_vma_start >= kernel_vma_limit);
if (small_vma_start > small_vma_limit ||
large_vma_start > large_vma_limit ||
kernel_vma_start >= kernel_vma_limit) {
err = -EINVAL;
goto clean_up_share;
}
if (small_vma_start < small_vma_limit) {
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
gmmu_page_sizes[gmmu_page_size_small]>>10); gmmu_page_sizes[gmmu_page_size_small] >> 10);
err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small], err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_small],
vm, name, vm, name,
vm->va_start, small_vma_start,
small_vma_size - vm->va_start, small_vma_limit - small_vma_start,
SZ_4K, SZ_4K,
GPU_BALLOC_MAX_ORDER, GPU_BALLOC_MAX_ORDER,
GPU_BALLOC_GVA_SPACE); GPU_BALLOC_GVA_SPACE);
if (err) if (err)
goto clean_up_share; goto clean_up_share;
}
if (large_vma_start < large_vma_limit) {
snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id, snprintf(name, sizeof(name), "gk20a_as_%d-%dKB", as_share->id,
gmmu_page_sizes[gmmu_page_size_big]>>10); gmmu_page_sizes[gmmu_page_size_big] >> 10);
err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big], err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_big],
vm, name, vm, name,
small_vma_size, large_vma_start,
large_vma_size, large_vma_limit - large_vma_start,
big_page_size, big_page_size,
GPU_BALLOC_MAX_ORDER, GPU_BALLOC_MAX_ORDER,
GPU_BALLOC_GVA_SPACE); GPU_BALLOC_GVA_SPACE);
if (err) if (err)
goto clean_up_small_allocator; goto clean_up_small_allocator;
}
snprintf(name, sizeof(name), "gk20a_as_%dKB-sys",
gmmu_page_sizes[gmmu_page_size_kernel] >> 10);
/*
* kernel reserved VMA is at the end of the aperture
*/
err = __gk20a_allocator_init(&vm->vma[gmmu_page_size_kernel],
vm, name,
kernel_vma_start,
kernel_vma_limit - kernel_vma_start,
SZ_4K,
GPU_BALLOC_MAX_ORDER,
GPU_BALLOC_GVA_SPACE);
if (err)
goto clean_up_big_allocator;
vm->mapped_buffers = RB_ROOT; vm->mapped_buffers = RB_ROOT;
@@ -342,7 +415,11 @@ static int vgpu_vm_alloc_share(struct gk20a_as_share *as_share,
return 0; return 0;
clean_up_big_allocator:
if (large_vma_start < large_vma_limit)
gk20a_allocator_destroy(&vm->vma[gmmu_page_size_big]);
clean_up_small_allocator: clean_up_small_allocator:
if (small_vma_start < small_vma_limit)
gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]); gk20a_allocator_destroy(&vm->vma[gmmu_page_size_small]);
clean_up_share: clean_up_share:
msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE; msg.cmd = TEGRA_VGPU_CMD_AS_FREE_SHARE;