diff --git a/drivers/gpu/nvgpu/common/linux/vm.c b/drivers/gpu/nvgpu/common/linux/vm.c index 5470d9eeb..9238a9dfa 100644 --- a/drivers/gpu/nvgpu/common/linux/vm.c +++ b/drivers/gpu/nvgpu/common/linux/vm.c @@ -394,7 +394,7 @@ clean_up: } nvgpu_kfree(g, mapped_buffer); if (va_allocated) - gk20a_vm_free_va(vm, map_offset, bfr.size, bfr.pgsz_idx); + __nvgpu_vm_free_va(vm, map_offset, bfr.pgsz_idx); if (!IS_ERR(bfr.sgt)) gk20a_mm_unpin(g->dev, dmabuf, bfr.sgt); diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3bdc905e8..3b3b7a107 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -29,6 +29,54 @@ int vm_aspace_id(struct vm_gk20a *vm) return vm->as_share ? vm->as_share->id : -1; } +u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, + enum gmmu_pgsz_gk20a pgsz_idx) + +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_allocator *vma = NULL; + u64 addr; + u64 page_size = vm->gmmu_page_sizes[pgsz_idx]; + + vma = vm->vma[pgsz_idx]; + + if (pgsz_idx >= gmmu_nr_page_sizes) { + nvgpu_err(g, "(%s) invalid page size requested", vma->name); + return 0; + } + + if ((pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { + nvgpu_err(g, "(%s) unsupportd page size requested", vma->name); + return 0; + } + + /* Be certain we round up to page_size if needed */ + size = (size + ((u64)page_size - 1)) & ~((u64)page_size - 1); + nvgpu_log(g, gpu_dbg_map, "size=0x%llx @ pgsz=%dKB", size, + vm->gmmu_page_sizes[pgsz_idx] >> 10); + + addr = nvgpu_alloc(vma, size); + if (!addr) { + nvgpu_err(g, "(%s) oom: sz=0x%llx", vma->name, size); + return 0; + } + + nvgpu_log(g, gpu_dbg_map, "(%s) addr: 0x%llx", vma->name, addr); + return addr; +} + +int __nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr, + enum gmmu_pgsz_gk20a pgsz_idx) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; + + nvgpu_log(g, gpu_dbg_map, "(%s) addr: 0x%llx", vma->name, addr); + nvgpu_free(vma, addr); + + return 0; +} + void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) { memset(mapping_batch, 0, sizeof(*mapping_batch)); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5051f0283..2642a0b10 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1192,57 +1192,6 @@ static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, nvgpu_mutex_release(&vm->update_gmmu_lock); } -u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, - u64 size, - enum gmmu_pgsz_gk20a gmmu_pgsz_idx) - -{ - struct nvgpu_allocator *vma = vm->vma[gmmu_pgsz_idx]; - u64 offset; - u64 gmmu_page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; - struct gk20a *g = vm->mm->g; - - if (gmmu_pgsz_idx >= gmmu_nr_page_sizes) { - nvgpu_warn(g, - "invalid page size requested in gk20a vm alloc"); - return 0; - } - - if ((gmmu_pgsz_idx == gmmu_page_size_big) && !vm->big_pages) { - nvgpu_warn(g, "unsupportd page size requested"); - return 0; - - } - - /* Be certain we round up to gmmu_page_size if needed */ - size = (size + ((u64)gmmu_page_size - 1)) & ~((u64)gmmu_page_size - 1); - gk20a_dbg_info("size=0x%llx @ pgsz=%dKB", size, - vm->gmmu_page_sizes[gmmu_pgsz_idx]>>10); - - offset = nvgpu_alloc(vma, size); - if (!offset) { - nvgpu_err(vm->mm->g, - "%s oom: sz=0x%llx", vma->name, size); - return 0; - } - - gk20a_dbg_fn("%s found addr: 0x%llx", vma->name, offset); - return offset; -} - -int gk20a_vm_free_va(struct vm_gk20a *vm, - u64 offset, u64 size, - enum gmmu_pgsz_gk20a pgsz_idx) -{ - struct nvgpu_allocator *vma = vm->vma[pgsz_idx]; - - gk20a_dbg_info("%s free addr=0x%llx, size=0x%llx", - vma->name, offset, size); - nvgpu_free(vma, offset); - - return 0; -} - int setup_buffer_kind_and_compression(struct vm_gk20a *vm, u32 flags, struct buffer_attrs *bfr, @@ -1313,7 +1262,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, /* Allocate (or validate when map_offset != 0) the virtual address. */ if (!map_offset) { - map_offset = gk20a_vm_alloc_va(vm, size, + map_offset = __nvgpu_vm_alloc_va(vm, size, pgsz_idx); if (!map_offset) { nvgpu_err(g, "failed to allocate va space"); @@ -1364,7 +1313,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, return map_offset; fail_validate: if (allocated) - gk20a_vm_free_va(vm, map_offset, size, pgsz_idx); + __nvgpu_vm_free_va(vm, map_offset, pgsz_idx); fail_alloc: nvgpu_err(g, "%s: failed with err=%d\n", __func__, err); return 0; @@ -1383,7 +1332,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm, struct gk20a *g = gk20a_from_vm(vm); if (va_allocated) { - err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); + err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); if (err) { nvgpu_err(g, "failed to free va"); return; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 6ddf842a1..276811997 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -412,14 +412,6 @@ int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, u32 *mapping_ctagline, u32 *flags); -u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, - u64 size, - enum gmmu_pgsz_gk20a gmmu_pgsz_idx); - -int gk20a_vm_free_va(struct vm_gk20a *vm, - u64 offset, u64 size, - enum gmmu_pgsz_gk20a pgsz_idx); - /* vm-as interface */ struct nvgpu_as_alloc_space_args; struct nvgpu_as_free_space_args; diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index 69c08c77e..fb55483d6 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -242,4 +242,17 @@ int nvgpu_init_vm(struct mm_gk20a *mm, char *name); void nvgpu_deinit_vm(struct vm_gk20a *vm); +/* + * These are private to the VM code but are unfortunately used by the vgpu code. + * It appears to be used for an optimization in reducing the number of server + * requests to the vgpu server. Basically the vgpu implementation of + * map_global_ctx_buffers() sends a bunch of VA ranges over to the RM server. + * Ideally the RM server can just batch mappings but until such a time this + * will be used by the vgpu code. + */ +u64 __nvgpu_vm_alloc_va(struct vm_gk20a *vm, u64 size, + enum gmmu_pgsz_gk20a pgsz_idx); +int __nvgpu_vm_free_va(struct vm_gk20a *vm, u64 addr, + enum gmmu_pgsz_gk20a pgsz_idx); + #endif diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c index b5c9735c1..cac1db29d 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c @@ -40,8 +40,7 @@ static void vgpu_gr_gp10b_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, gr_ctx->mem.size, - gmmu_page_size_kernel); + __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, gmmu_page_size_kernel); nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.pagepool_ctxsw_buffer); nvgpu_dma_unmap_free(vm, &gr_ctx->t18x.betacb_ctxsw_buffer); diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c index 15ff10b9e..f425b7e52 100644 --- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c +++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_mm_gp10b.c @@ -81,7 +81,7 @@ static u64 vgpu_gp10b_locked_gmmu_map(struct vm_gk20a *vm, /* Allocate (or validate when map_offset != 0) the virtual address. */ if (!map_offset) { - map_offset = gk20a_vm_alloc_va(vm, size, pgsz_idx); + map_offset = __nvgpu_vm_alloc_va(vm, size, pgsz_idx); if (!map_offset) { nvgpu_err(g, "failed to allocate va space"); err = -ENOMEM; diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index 42af9ee1c..2198b1153 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -156,7 +156,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, /* FIXME: add VPR support */ /* Circular Buffer */ - gpu_va = gk20a_vm_alloc_va(ch_vm, + gpu_va = __nvgpu_vm_alloc_va(ch_vm, gr->global_ctx_buffer[CIRCULAR].mem.size, gmmu_page_size_kernel); @@ -166,7 +166,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, g_bfr_size[CIRCULAR_VA] = gr->global_ctx_buffer[CIRCULAR].mem.size; /* Attribute Buffer */ - gpu_va = gk20a_vm_alloc_va(ch_vm, + gpu_va = __nvgpu_vm_alloc_va(ch_vm, gr->global_ctx_buffer[ATTRIBUTE].mem.size, gmmu_page_size_kernel); @@ -176,7 +176,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, g_bfr_size[ATTRIBUTE_VA] = gr->global_ctx_buffer[ATTRIBUTE].mem.size; /* Page Pool */ - gpu_va = gk20a_vm_alloc_va(ch_vm, + gpu_va = __nvgpu_vm_alloc_va(ch_vm, gr->global_ctx_buffer[PAGEPOOL].mem.size, gmmu_page_size_kernel); if (!gpu_va) @@ -185,7 +185,7 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, g_bfr_size[PAGEPOOL_VA] = gr->global_ctx_buffer[PAGEPOOL].mem.size; /* Priv register Access Map */ - gpu_va = gk20a_vm_alloc_va(ch_vm, + gpu_va = __nvgpu_vm_alloc_va(ch_vm, gr->global_ctx_buffer[PRIV_ACCESS_MAP].mem.size, gmmu_page_size_kernel); if (!gpu_va) @@ -211,8 +211,8 @@ static int vgpu_gr_map_global_ctx_buffers(struct gk20a *g, clean_up: for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { if (g_bfr_va[i]) { - gk20a_vm_free_va(ch_vm, g_bfr_va[i], - g_bfr_size[i], gmmu_page_size_kernel); + __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], + gmmu_page_size_kernel); g_bfr_va[i] = 0; } } @@ -242,8 +242,8 @@ static void vgpu_gr_unmap_global_ctx_buffers(struct channel_gk20a *c) for (i = 0; i < NR_GLOBAL_CTX_BUF_VA; i++) { if (g_bfr_va[i]) { - gk20a_vm_free_va(ch_vm, g_bfr_va[i], g_bfr_size[i], - gmmu_page_size_kernel); + __nvgpu_vm_free_va(ch_vm, g_bfr_va[i], + gmmu_page_size_kernel); g_bfr_va[i] = 0; g_bfr_size[i] = 0; } @@ -277,7 +277,7 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, return -ENOMEM; gr_ctx->mem.size = gr->ctx_vars.buffer_total_size; - gr_ctx->mem.gpu_va = gk20a_vm_alloc_va(vm, + gr_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(vm, gr_ctx->mem.size, gmmu_page_size_kernel); @@ -296,8 +296,8 @@ int vgpu_gr_alloc_gr_ctx(struct gk20a *g, if (unlikely(err)) { nvgpu_err(g, "fail to alloc gr_ctx"); - gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, - gr_ctx->mem.size, gmmu_page_size_kernel); + __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, + gmmu_page_size_kernel); nvgpu_kfree(g, gr_ctx); } else { gr_ctx->virt_ctx = p->gr_ctx_handle; @@ -323,8 +323,8 @@ void vgpu_gr_free_gr_ctx(struct gk20a *g, struct vm_gk20a *vm, err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - gk20a_vm_free_va(vm, gr_ctx->mem.gpu_va, gr_ctx->mem.size, - gmmu_page_size_kernel); + __nvgpu_vm_free_va(vm, gr_ctx->mem.gpu_va, + gmmu_page_size_kernel); nvgpu_kfree(g, gr_ctx); } } @@ -349,7 +349,7 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, gk20a_dbg_fn(""); patch_ctx->mem.size = 128 * sizeof(u32); - patch_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch_vm, + patch_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch_vm, patch_ctx->mem.size, gmmu_page_size_kernel); if (!patch_ctx->mem.gpu_va) @@ -361,8 +361,8 @@ static int vgpu_gr_alloc_channel_patch_ctx(struct gk20a *g, p->patch_ctx_va = patch_ctx->mem.gpu_va; err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); if (err || msg.ret) { - gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, - patch_ctx->mem.size, gmmu_page_size_kernel); + __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, + gmmu_page_size_kernel); err = -ENOMEM; } @@ -387,8 +387,8 @@ static void vgpu_gr_free_channel_patch_ctx(struct channel_gk20a *c) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - gk20a_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, - patch_ctx->mem.size, gmmu_page_size_kernel); + __nvgpu_vm_free_va(ch_vm, patch_ctx->mem.gpu_va, + gmmu_page_size_kernel); patch_ctx->mem.gpu_va = 0; } } @@ -413,8 +413,8 @@ static void vgpu_gr_free_channel_pm_ctx(struct channel_gk20a *c) err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg)); WARN_ON(err || msg.ret); - gk20a_vm_free_va(c->vm, pm_ctx->mem.gpu_va, pm_ctx->mem.size, - gmmu_page_size_kernel); + __nvgpu_vm_free_va(c->vm, pm_ctx->mem.gpu_va, + gmmu_page_size_kernel); pm_ctx->mem.gpu_va = 0; } @@ -1046,7 +1046,7 @@ static int vgpu_gr_update_hwpm_ctxsw_mode(struct gk20a *g, /* Allocate buffer if necessary */ if (pm_ctx->mem.gpu_va == 0) { - pm_ctx->mem.gpu_va = gk20a_vm_alloc_va(ch->vm, + pm_ctx->mem.gpu_va = __nvgpu_vm_alloc_va(ch->vm, g->gr.ctx_vars.pm_ctxsw_image_size, gmmu_page_size_kernel); diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index b42fbcb30..b8b5985c1 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -106,7 +106,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm, /* Allocate (or validate when map_offset != 0) the virtual address. */ if (!map_offset) { - map_offset = gk20a_vm_alloc_va(vm, size, + map_offset = __nvgpu_vm_alloc_va(vm, size, pgsz_idx); if (!map_offset) { nvgpu_err(g, "failed to allocate va space\n"); @@ -180,7 +180,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm, gk20a_dbg_fn(""); if (va_allocated) { - err = gk20a_vm_free_va(vm, vaddr, size, pgsz_idx); + err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx); if (err) { dev_err(dev_from_vm(vm), "failed to free va");