diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu index 04107dbcb..0a60eece0 100644 --- a/drivers/gpu/nvgpu/Makefile.nvgpu +++ b/drivers/gpu/nvgpu/Makefile.nvgpu @@ -45,6 +45,7 @@ nvgpu-y := \ common/mm/page_allocator.o \ common/mm/lockless_allocator.o \ common/mm/gmmu.o \ + common/mm/vm.o \ common/pramin.o \ common/semaphore.o \ common/as.o \ diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_as.c b/drivers/gpu/nvgpu/common/linux/ioctl_as.c index 4bbcedda6..7a24a14f9 100644 --- a/drivers/gpu/nvgpu/common/linux/ioctl_as.c +++ b/drivers/gpu/nvgpu/common/linux/ioctl_as.c @@ -28,6 +28,7 @@ #include "gk20a/gk20a.h" #include "gk20a/platform_gk20a.h" #include "ioctl_as.h" +#include "vm_priv.h" static int gk20a_as_ioctl_bind_channel( struct gk20a_as_share *as_share, @@ -72,7 +73,7 @@ static int gk20a_as_ioctl_map_buffer_ex( { gk20a_dbg_fn(""); - return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, + return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, &args->offset, args->flags, args->kind, args->buffer_offset, @@ -85,7 +86,7 @@ static int gk20a_as_ioctl_map_buffer( struct nvgpu_as_map_buffer_args *args) { gk20a_dbg_fn(""); - return gk20a_vm_map_buffer(as_share->vm, args->dmabuf_fd, + return nvgpu_vm_map_buffer(as_share->vm, args->dmabuf_fd, &args->o_a.offset, args->flags, NV_KIND_DEFAULT, 0, 0, NULL); @@ -97,7 +98,7 @@ static int gk20a_as_ioctl_unmap_buffer( struct nvgpu_as_unmap_buffer_args *args) { gk20a_dbg_fn(""); - return gk20a_vm_unmap_buffer(as_share->vm, args->offset, NULL); + return nvgpu_vm_unmap_buffer(as_share->vm, args->offset, NULL); } static int gk20a_as_ioctl_map_buffer_batch( @@ -123,7 +124,7 @@ static int gk20a_as_ioctl_map_buffer_batch( args->num_maps > g->gpu_characteristics.map_buffer_batch_limit) return -EINVAL; - gk20a_vm_mapping_batch_start(&batch); + nvgpu_vm_mapping_batch_start(&batch); for (i = 0; i < args->num_unmaps; ++i) { struct nvgpu_as_unmap_buffer_args unmap_args; @@ -134,14 +135,14 @@ static int gk20a_as_ioctl_map_buffer_batch( break; } - err = gk20a_vm_unmap_buffer(as_share->vm, unmap_args.offset, + err = nvgpu_vm_unmap_buffer(as_share->vm, unmap_args.offset, &batch); if (err) break; } if (err) { - gk20a_vm_mapping_batch_finish(as_share->vm, &batch); + nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); args->num_unmaps = i; args->num_maps = 0; @@ -158,7 +159,7 @@ static int gk20a_as_ioctl_map_buffer_batch( break; } - err = gk20a_vm_map_buffer( + err = nvgpu_vm_map_buffer( as_share->vm, map_args.dmabuf_fd, &map_args.offset, map_args.flags, map_args.kind, @@ -169,7 +170,7 @@ static int gk20a_as_ioctl_map_buffer_batch( break; } - gk20a_vm_mapping_batch_finish(as_share->vm, &batch); + nvgpu_vm_mapping_batch_finish(as_share->vm, &batch); if (err) args->num_maps = i; @@ -228,7 +229,7 @@ static int gk20a_as_ioctl_get_buffer_compbits_info( struct nvgpu_as_get_buffer_compbits_info_args *args) { gk20a_dbg_fn(""); - return gk20a_vm_get_compbits_info(as_share->vm, + return nvgpu_vm_get_compbits_info(as_share->vm, args->mapping_gva, &args->compbits_win_size, &args->compbits_win_ctagline, @@ -241,7 +242,7 @@ static int gk20a_as_ioctl_map_buffer_compbits( struct nvgpu_as_map_buffer_compbits_args *args) { gk20a_dbg_fn(""); - return gk20a_vm_map_compbits(as_share->vm, + return nvgpu_vm_map_compbits(as_share->vm, args->mapping_gva, &args->compbits_win_gva, &args->mapping_iova, diff --git a/drivers/gpu/nvgpu/common/linux/vm_priv.h b/drivers/gpu/nvgpu/common/linux/vm_priv.h new file mode 100644 index 000000000..c0fb0ffe4 --- /dev/null +++ b/drivers/gpu/nvgpu/common/linux/vm_priv.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __COMMON_LINUX_VM_PRIV_H__ +#define __COMMON_LINUX_VM_PRIV_H__ + +#include + +struct sg_table; +struct dma_buf; + +struct vm_gk20a; +struct vm_gk20a_mapping_batch; + +u64 nvgpu_vm_map(struct vm_gk20a *vm, + struct dma_buf *dmabuf, + u64 offset_align, + u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, + int kind, + struct sg_table **sgt, + bool user_mapped, + int rw_flag, + u64 buffer_offset, + u64 mapping_size, + struct vm_gk20a_mapping_batch *mapping_batch); + +int nvgpu_vm_map_compbits(struct vm_gk20a *vm, + u64 mapping_gva, + u64 *compbits_win_gva, + u64 *mapping_iova, + u32 flags); + +/* Note: batch may be NULL if map op is not part of a batch */ +int nvgpu_vm_map_buffer(struct vm_gk20a *vm, + int dmabuf_fd, + u64 *offset_align, + u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ + int kind, + u64 buffer_offset, + u64 mapping_size, + struct vm_gk20a_mapping_batch *batch); + +void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset); + +/* find buffer corresponding to va */ +int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, + struct dma_buf **dmabuf, + u64 *offset); +#endif diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c new file mode 100644 index 000000000..eaf30fd0f --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) +{ + memset(mapping_batch, 0, sizeof(*mapping_batch)); + mapping_batch->gpu_l2_flushed = false; + mapping_batch->need_tlb_invalidate = false; +} + +void nvgpu_vm_mapping_batch_finish_locked( + struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch) +{ + /* hanging kref_put batch pointer? */ + WARN_ON(vm->kref_put_batch == mapping_batch); + + if (mapping_batch->need_tlb_invalidate) { + struct gk20a *g = gk20a_from_vm(vm); + g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); + } +} + +void nvgpu_vm_mapping_batch_finish(struct vm_gk20a *vm, + struct vm_gk20a_mapping_batch *mapping_batch) +{ + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + nvgpu_vm_mapping_batch_finish_locked(vm, mapping_batch); + nvgpu_mutex_release(&vm->update_gmmu_lock); +} + +void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm) +{ + struct mapped_buffer_node *mapped_buffer; + struct vm_reserved_va_node *va_node, *va_node_tmp; + struct nvgpu_rbtree_node *node = NULL; + struct gk20a *g = vm->mm->g; + + gk20a_dbg_fn(""); + + /* + * Do this outside of the update_gmmu_lock since unmapping the semaphore + * pool involves unmapping a GMMU mapping which means aquiring the + * update_gmmu_lock. + */ + if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) { + if (vm->sema_pool) { + nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); + nvgpu_semaphore_pool_put(vm->sema_pool); + } + } + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); + while (node) { + mapped_buffer = mapped_buffer_from_rbtree_node(node); + nvgpu_vm_unmap_locked(mapped_buffer, NULL); + nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); + } + + /* destroy remaining reserved memory areas */ + nvgpu_list_for_each_entry_safe(va_node, va_node_tmp, + &vm->reserved_va_list, + vm_reserved_va_node, reserved_va_list) { + nvgpu_list_del(&va_node->reserved_va_list); + nvgpu_kfree(vm->mm->g, va_node); + } + + nvgpu_deinit_vm(vm); + + nvgpu_mutex_release(&vm->update_gmmu_lock); +} + +void nvgpu_vm_remove_support(struct vm_gk20a *vm) +{ + nvgpu_vm_remove_support_nofree(vm); + /* vm is not used anymore. release it. */ + nvgpu_kfree(vm->mm->g, vm); +} + +static void nvgpu_vm_remove_support_kref(struct kref *ref) +{ + struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); + struct gk20a *g = gk20a_from_vm(vm); + + g->ops.mm.vm_remove(vm); +} + +void nvgpu_vm_get(struct vm_gk20a *vm) +{ + kref_get(&vm->ref); +} + +void nvgpu_vm_put(struct vm_gk20a *vm) +{ + kref_put(&vm->ref, nvgpu_vm_remove_support_kref); +} + +void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) +{ + struct gk20a *g = vm->mm->g; + + gk20a_dbg_fn(""); + + gk20a_free_inst_block(g, inst_block); + nvgpu_vm_remove_support_nofree(vm); +} diff --git a/drivers/gpu/nvgpu/common/mm/vm_area.c b/drivers/gpu/nvgpu/common/mm/vm_area.c new file mode 100644 index 000000000..7b831947f --- /dev/null +++ b/drivers/gpu/nvgpu/common/mm/vm_area.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "gk20a/gk20a.h" +#include "gk20a/mm_gk20a.h" + +struct nvgpu_vm_area *nvgpu_vm_area_find(struct vm_gk20a *vm, u64 addr) +{ + struct nvgpu_vm_area *vm_area; + + nvgpu_list_for_each_entry(vm_area, &vm->vm_area_list, + nvgpu_vm_area, vm_area_list) { + if (addr >= vm_area->addr && + addr < (u64)vm_area->addr + (u64)vm_area->size) + return vm_area; + } + + return NULL; +} + +int nvgpu_vm_area_validate_buffer(struct vm_gk20a *vm, + u64 map_addr, u64 map_size, int pgsz_idx, + struct nvgpu_vm_area **pvm_area) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_vm_area *vm_area; + struct nvgpu_mapped_buf *buffer; + u64 map_end = map_addr + map_size; + + /* can wrap around with insane map_size; zero is disallowed too */ + if (map_end <= map_addr) { + nvgpu_warn(g, "fixed offset mapping with invalid map_size"); + return -EINVAL; + } + + if (map_addr & (vm->gmmu_page_sizes[pgsz_idx] - 1)) { + nvgpu_err(g, "map offset must be buffer page size aligned 0x%llx", + map_addr); + return -EINVAL; + } + + /* Find the space reservation, but it's ok to have none for + * userspace-managed address spaces */ + vm_area = nvgpu_vm_area_find(vm, map_addr); + if (!vm_area && !vm->userspace_managed) { + nvgpu_warn(g, "fixed offset mapping without space allocation"); + return -EINVAL; + } + + /* Mapped area should fit inside va, if there's one */ + if (vm_area && map_end > vm_area->addr + vm_area->size) { + nvgpu_warn(g, "fixed offset mapping size overflows va node"); + return -EINVAL; + } + + /* check that this mapping does not collide with existing + * mappings by checking the buffer with the highest GPU VA + * that is less than our buffer end */ + buffer = __nvgpu_vm_find_mapped_buf_less_than( + vm, map_addr + map_size); + if (buffer && buffer->addr + buffer->size > map_addr) { + nvgpu_warn(g, "overlapping buffer map requested"); + return -EINVAL; + } + + *pvm_area = vm_area; + + return 0; +} + +int nvgpu_vm_area_alloc(struct vm_gk20a *vm, u32 pages, u32 page_size, + u64 *addr, u32 flags) +{ + struct gk20a *g = vm->mm->g; + struct nvgpu_allocator *vma; + struct nvgpu_vm_area *vm_area; + u64 vaddr_start = 0; + int pgsz_idx = gmmu_page_size_small; + + nvgpu_log(g, gpu_dbg_map, + "ADD vm_area: pgsz=%#-8x pages=%-9u addr=%#-14llx flags=0x%x", + page_size, pages, *addr, flags); + + for (; pgsz_idx < gmmu_nr_page_sizes; pgsz_idx++) { + if (vm->gmmu_page_sizes[pgsz_idx] == page_size) + break; + } + + if (pgsz_idx > gmmu_page_size_big) + return -EINVAL; + + if (!vm->big_pages && pgsz_idx == gmmu_page_size_big) + return -EINVAL; + + vm_area = nvgpu_kzalloc(g, sizeof(*vm_area)); + if (!vm_area) + goto clean_up_err; + + vma = vm->vma[pgsz_idx]; + if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_FIXED_OFFSET) + vaddr_start = nvgpu_alloc_fixed(vma, *addr, + (u64)pages * + (u64)page_size, + page_size); + else + vaddr_start = nvgpu_alloc(vma, + (u64)pages * + (u64)page_size); + + if (!vaddr_start) + goto clean_up_err; + + vm_area->flags = flags; + vm_area->addr = vaddr_start; + vm_area->size = (u64)page_size * (u64)pages; + vm_area->pgsz_idx = pgsz_idx; + nvgpu_init_list_node(&vm_area->buffer_list_head); + nvgpu_init_list_node(&vm_area->vm_area_list); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + + if (flags & NVGPU_AS_ALLOC_SPACE_FLAGS_SPARSE) { + u64 map_addr = g->ops.mm.gmmu_map(vm, vaddr_start, + NULL, + 0, + vm_area->size, + pgsz_idx, + 0, + 0, + flags, + gk20a_mem_flag_none, + false, + true, + false, + NULL, + APERTURE_INVALID); + if (!map_addr) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + goto clean_up_err; + } + + vm_area->sparse = true; + } + nvgpu_list_add_tail(&vm_area->vm_area_list, &vm->vm_area_list); + + nvgpu_mutex_release(&vm->update_gmmu_lock); + + *addr = vaddr_start; + return 0; + +clean_up_err: + if (vaddr_start) + nvgpu_free(vma, vaddr_start); + if (vm_area) + nvgpu_kfree(g, vm_area); + return -ENOMEM; +} + +int nvgpu_vm_area_free(struct vm_gk20a *vm, u64 addr) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_mapped_buf *buffer, *n; + struct nvgpu_vm_area *vm_area; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + vm_area = nvgpu_vm_area_find(vm, addr); + if (!vm_area) { + nvgpu_mutex_release(&vm->update_gmmu_lock); + return 0; + } + nvgpu_list_del(&vm_area->vm_area_list); + nvgpu_mutex_release(&vm->update_gmmu_lock); + + nvgpu_log(g, gpu_dbg_map, + "DEL vm_area: pgsz=%#-8x pages=%-9llu " + "addr=%#-14llx flags=0x%x", + vm->gmmu_page_sizes[vm_area->pgsz_idx], + vm_area->size / vm->gmmu_page_sizes[vm_area->pgsz_idx], + vm_area->addr, + vm_area->flags); + + /* Decrement the ref count on all buffers in this vm_area. This + * allows userspace to let the kernel free mappings that are + * only used by this vm_area. */ + nvgpu_list_for_each_entry_safe(buffer, n, + &vm_area->buffer_list_head, + nvgpu_mapped_buf, buffer_list) { + nvgpu_list_del(&buffer->buffer_list); + kref_put(&buffer->ref, gk20a_vm_unmap_locked_kref); + } + + /* if this was a sparse mapping, free the va */ + if (vm_area->sparse) + g->ops.mm.gmmu_unmap(vm, + vm_area->addr, + vm_area->size, + vm_area->pgsz_idx, + true, + gk20a_mem_flag_none, + true, + NULL); + + nvgpu_free(vm->vma[vm_area->pgsz_idx], vm_area->addr); + nvgpu_kfree(g, vm_area); + + return 0; +} diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index d5d75be5d..cf95019bc 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -46,6 +46,12 @@ #include #include +/* + * Currently this code uses nvgpu_vm_map() since it takes dmabuf FDs from the + * CDE ioctls. That has to change - instead this needs to take an nvgpu_mem. + */ +#include "common/linux/vm_priv.h" + static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx); static struct gk20a_cde_ctx *gk20a_cde_allocate_context(struct gk20a *g); @@ -1016,8 +1022,8 @@ __releases(&cde_app->mutex) /* map the destination buffer */ - get_dma_buf(compbits_scatter_buf); /* a ref for gk20a_vm_map */ - map_vaddr = gk20a_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, + get_dma_buf(compbits_scatter_buf); /* a ref for nvgpu_vm_map */ + map_vaddr = nvgpu_vm_map(cde_ctx->vm, compbits_scatter_buf, 0, NVGPU_MAP_BUFFER_FLAGS_CACHEABLE_TRUE, compbits_kind, NULL, true, gk20a_mem_flag_none, @@ -1136,7 +1142,7 @@ __releases(&cde_app->mutex) cde_ctx->init_cmd_executed = true; /* unmap the buffers - channel holds references to them now */ - gk20a_vm_unmap(cde_ctx->vm, map_vaddr); + nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); return err; @@ -1144,7 +1150,7 @@ exit_unmap_surface: if (surface) dma_buf_vunmap(compbits_scatter_buf, surface); exit_unmap_vaddr: - gk20a_vm_unmap(cde_ctx->vm, map_vaddr); + nvgpu_vm_unmap(cde_ctx->vm, map_vaddr); exit_idle: gk20a_idle(g); return err; @@ -1277,7 +1283,7 @@ err_init_cde_img: nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); err_map_backingstore: err_alloc_gpfifo: - gk20a_vm_put(ch->vm); + nvgpu_vm_put(ch->vm); err_commit_va: err_get_gk20a_channel: nvgpu_release_firmware(g, img); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 313584682..b7fb363e0 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -38,6 +38,13 @@ #include +/* + * This is required for nvgpu_vm_find_buffer() which is used in the tracing + * code. Once we can get and access userspace buffers without requiring + * direct dma_buf usage this can be removed. + */ +#include "common/linux/vm_priv.h" + /* * Although channels do have pointers back to the gk20a struct that they were * created under in cases where the driver is killed that pointer can be bad. @@ -550,7 +557,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) /* * When releasing the channel we unbind the VM - so release the ref. */ - gk20a_vm_put(ch_vm); + nvgpu_vm_put(ch_vm); nvgpu_spinlock_acquire(&ch->update_fn_lock); ch->update_fn = NULL; @@ -1399,7 +1406,7 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, int err; words = pbdma_gp_entry1_length_v(g->entry1); - err = gk20a_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); + err = nvgpu_vm_find_buffer(c->vm, gpu_va, &dmabuf, &offset); if (!err) mem = dma_buf_vmap(dmabuf); } @@ -1901,7 +1908,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(c); if (!skip_buffer_refcounting) { - err = gk20a_vm_get_buffers(vm, &mapped_buffers, + err = nvgpu_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); if (err) return err; @@ -1940,7 +1947,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, return 0; err_put_buffers: - gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); + nvgpu_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers); return err; } @@ -2039,7 +2046,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, } if (job->num_mapped_buffers) - gk20a_vm_put_buffers(vm, job->mapped_buffers, + nvgpu_vm_put_buffers(vm, job->mapped_buffers, job->num_mapped_buffers); /* Remove job from channel's job list before we close the diff --git a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c index a08eb047f..5351750a8 100644 --- a/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/dbg_gpu_gk20a.c @@ -26,6 +26,7 @@ #include #include +#include #include "gk20a.h" #include "gk20a/platform_gk20a.h" @@ -37,6 +38,14 @@ #include #include +/* + * Currently this code uses nvgpu_vm_map_buffer() since it takes dmabuf FDs from + * the dbg ioctls. That has to change; this needs to hide the usage of dmabufs + * in Linux specific code. All core driver usage of mapping must be done through + * nvgpu_gmmu_map(). + */ +#include "common/linux/vm_priv.h" + /* * API to get first channel from the list of all channels * bound to the debug session @@ -1844,7 +1853,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, return -EBUSY; } - err = gk20a_init_vm(mm, vm, big_page_size, + err = nvgpu_init_vm(mm, vm, big_page_size, big_page_size << 10, NV_MM_DEFAULT_KERNEL_SIZE, NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, @@ -1860,7 +1869,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, g->ops.mm.init_inst_block(&mm->perfbuf.inst_block, vm, 0); - err = gk20a_vm_map_buffer(vm, + err = nvgpu_vm_map_buffer(vm, args->dmabuf_fd, &args->offset, 0, @@ -1913,7 +1922,7 @@ static int gk20a_perfbuf_map(struct dbg_session_gk20a *dbg_s, return 0; err_unmap: - gk20a_vm_unmap_buffer(vm, args->offset, NULL); + nvgpu_vm_unmap_buffer(vm, args->offset, NULL); err_remove_vm: gk20a_remove_vm(vm, &mm->perfbuf.inst_block); nvgpu_mutex_release(&g->dbg_sessions_lock); @@ -1952,7 +1961,7 @@ static int gk20a_perfbuf_release_locked(struct gk20a *g, u64 offset) err = gk20a_perfbuf_disable_locked(g); - gk20a_vm_unmap_buffer(vm, offset, NULL); + nvgpu_vm_unmap_buffer(vm, offset, NULL); gk20a_remove_vm(vm, &mm->perfbuf.inst_block); g->perfbuf.owner = NULL; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 4d41f9ff5..9bd078945 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -3182,14 +3182,14 @@ int gk20a_alloc_obj_ctx(struct channel_gk20a *c, } else { if (!tsg->tsg_gr_ctx) { tsg->vm = c->vm; - gk20a_vm_get(tsg->vm); + nvgpu_vm_get(tsg->vm); err = gr_gk20a_alloc_tsg_gr_ctx(g, tsg, args->class_num, args->flags); if (err) { nvgpu_err(g, "fail to allocate TSG gr ctx buffer"); - gk20a_vm_put(tsg->vm); + nvgpu_vm_put(tsg->vm); tsg->vm = NULL; goto out; } diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 201c2090e..72a3ee13f 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -55,6 +55,12 @@ #include #include +/* + * Necessary while transitioning to less coupled code. Will be removed once + * all the common APIs no longers have Linux stuff in them. + */ +#include "common/linux/vm_priv.h" + #if defined(CONFIG_GK20A_VIDMEM) static void gk20a_vidmem_clear_mem_worker(struct work_struct *work); #endif @@ -177,8 +183,6 @@ struct gk20a_vidmem_buf { void (*dmabuf_priv_delete)(void *); }; -static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm); - static int gk20a_comptaglines_alloc(struct gk20a_comptag_allocator *allocator, u32 *offset, u32 len) { @@ -460,16 +464,6 @@ static int gk20a_init_mm_reset_enable_hw(struct gk20a *g) return 0; } -void gk20a_remove_vm(struct vm_gk20a *vm, struct nvgpu_mem *inst_block) -{ - struct gk20a *g = vm->mm->g; - - gk20a_dbg_fn(""); - - gk20a_free_inst_block(g, inst_block); - gk20a_vm_remove_support_nofree(vm); -} - static void gk20a_vidmem_destroy(struct gk20a *g) { #if defined(CONFIG_GK20A_VIDMEM) @@ -487,7 +481,7 @@ static void gk20a_remove_mm_ce_support(struct mm_gk20a *mm) mm->vidmem.ce_ctx_id = (u32)~0; - gk20a_vm_remove_support_nofree(&mm->ce.vm); + nvgpu_vm_remove_support_nofree(&mm->ce.vm); } @@ -503,7 +497,7 @@ static void gk20a_remove_mm_support(struct mm_gk20a *mm) gk20a_remove_vm(&mm->pmu.vm, &mm->pmu.inst_block); gk20a_free_inst_block(gk20a_from_mm(mm), &mm->hwpm.inst_block); - gk20a_vm_remove_support_nofree(&mm->cde.vm); + nvgpu_vm_remove_support_nofree(&mm->cde.vm); gk20a_semaphore_sea_destroy(g); gk20a_vidmem_destroy(g); @@ -1102,7 +1096,7 @@ static struct vm_reserved_va_node *addr_to_reservation(struct vm_gk20a *vm, return NULL; } -int gk20a_vm_get_buffers(struct vm_gk20a *vm, +int nvgpu_vm_get_buffers(struct vm_gk20a *vm, struct mapped_buffer_node ***mapped_buffers, int *num_buffers) { @@ -1151,37 +1145,10 @@ static void gk20a_vm_unmap_locked_kref(struct kref *ref) { struct mapped_buffer_node *mapped_buffer = container_of(ref, struct mapped_buffer_node, ref); - gk20a_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); + nvgpu_vm_unmap_locked(mapped_buffer, mapped_buffer->vm->kref_put_batch); } -void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *mapping_batch) -{ - memset(mapping_batch, 0, sizeof(*mapping_batch)); - mapping_batch->gpu_l2_flushed = false; - mapping_batch->need_tlb_invalidate = false; -} - -void gk20a_vm_mapping_batch_finish_locked( - struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *mapping_batch) -{ - /* hanging kref_put batch pointer? */ - WARN_ON(vm->kref_put_batch == mapping_batch); - - if (mapping_batch->need_tlb_invalidate) { - struct gk20a *g = gk20a_from_vm(vm); - g->ops.fb.tlb_invalidate(g, &vm->pdb.mem); - } -} - -void gk20a_vm_mapping_batch_finish(struct vm_gk20a *vm, - struct vm_gk20a_mapping_batch *mapping_batch) -{ - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - gk20a_vm_mapping_batch_finish_locked(vm, mapping_batch); - nvgpu_mutex_release(&vm->update_gmmu_lock); -} - -void gk20a_vm_put_buffers(struct vm_gk20a *vm, +void nvgpu_vm_put_buffers(struct vm_gk20a *vm, struct mapped_buffer_node **mapped_buffers, int num_buffers) { @@ -1192,7 +1159,7 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm, return; nvgpu_mutex_acquire(&vm->update_gmmu_lock); - gk20a_vm_mapping_batch_start(&batch); + nvgpu_vm_mapping_batch_start(&batch); vm->kref_put_batch = &batch; for (i = 0; i < num_buffers; ++i) @@ -1200,13 +1167,13 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm, gk20a_vm_unmap_locked_kref); vm->kref_put_batch = NULL; - gk20a_vm_mapping_batch_finish_locked(vm, &batch); + nvgpu_vm_mapping_batch_finish_locked(vm, &batch); nvgpu_mutex_release(&vm->update_gmmu_lock); nvgpu_big_free(vm->mm->g, mapped_buffers); } -static void gk20a_vm_unmap_user(struct vm_gk20a *vm, u64 offset, +static void nvgpu_vm_unmap_user(struct vm_gk20a *vm, u64 offset, struct vm_gk20a_mapping_batch *batch) { struct gk20a *g = vm->mm->g; @@ -1650,7 +1617,7 @@ static enum nvgpu_aperture gk20a_dmabuf_aperture(struct gk20a *g, } } -static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm, +static u64 nvgpu_vm_map_duplicate_locked(struct vm_gk20a *vm, struct dma_buf *dmabuf, u64 offset_align, u32 flags, @@ -1997,7 +1964,7 @@ static u64 gk20a_mm_get_align(struct gk20a *g, struct scatterlist *sgl, return align; } -u64 gk20a_vm_map(struct vm_gk20a *vm, +u64 nvgpu_vm_map(struct vm_gk20a *vm, struct dma_buf *dmabuf, u64 offset_align, u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, @@ -2038,7 +2005,7 @@ u64 gk20a_vm_map(struct vm_gk20a *vm, /* check if this buffer is already mapped */ if (!vm->userspace_managed) { - map_offset = gk20a_vm_map_duplicate_locked( + map_offset = nvgpu_vm_map_duplicate_locked( vm, dmabuf, offset_align, flags, kind, sgt, user_mapped, rw_flag); @@ -2256,7 +2223,7 @@ clean_up: return 0; } -int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, +int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, u64 mapping_gva, u64 *compbits_win_size, u32 *compbits_win_ctagline, @@ -2298,7 +2265,7 @@ int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, } -int gk20a_vm_map_compbits(struct vm_gk20a *vm, +int nvgpu_vm_map_compbits(struct vm_gk20a *vm, u64 mapping_gva, u64 *compbits_win_gva, u64 *mapping_iova, @@ -3059,7 +3026,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, } /* NOTE! mapped_buffers lock must be held */ -void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, +void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, struct vm_gk20a_mapping_batch *batch) { struct vm_gk20a *vm = mapped_buffer->vm; @@ -3115,7 +3082,7 @@ void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, return; } -void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset) +void nvgpu_vm_unmap(struct vm_gk20a *vm, u64 offset) { struct gk20a *g = vm->mm->g; struct mapped_buffer_node *mapped_buffer; @@ -3148,76 +3115,6 @@ static void gk20a_vm_free_entries(struct vm_gk20a *vm, parent->entries = NULL; } -static void gk20a_vm_remove_support_nofree(struct vm_gk20a *vm) -{ - struct mapped_buffer_node *mapped_buffer; - struct vm_reserved_va_node *va_node, *va_node_tmp; - struct nvgpu_rbtree_node *node = NULL; - struct gk20a *g = vm->mm->g; - - gk20a_dbg_fn(""); - - /* - * Do this outside of the update_gmmu_lock since unmapping the semaphore - * pool involves unmapping a GMMU mapping which means aquiring the - * update_gmmu_lock. - */ - if (!(g->gpu_characteristics.flags & NVGPU_GPU_FLAGS_HAS_SYNCPOINTS)) { - if (vm->sema_pool) { - nvgpu_semaphore_pool_unmap(vm->sema_pool, vm); - nvgpu_semaphore_pool_put(vm->sema_pool); - } - } - - nvgpu_mutex_acquire(&vm->update_gmmu_lock); - - /* TBD: add a flag here for the unmap code to recognize teardown - * and short-circuit any otherwise expensive operations. */ - - nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); - while (node) { - mapped_buffer = mapped_buffer_from_rbtree_node(node); - gk20a_vm_unmap_locked(mapped_buffer, NULL); - nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); - } - - /* destroy remaining reserved memory areas */ - nvgpu_list_for_each_entry_safe(va_node, va_node_tmp, - &vm->reserved_va_list, - vm_reserved_va_node, reserved_va_list) { - nvgpu_list_del(&va_node->reserved_va_list); - nvgpu_kfree(vm->mm->g, va_node); - } - - gk20a_deinit_vm(vm); - - nvgpu_mutex_release(&vm->update_gmmu_lock); -} - -void gk20a_vm_remove_support(struct vm_gk20a *vm) -{ - gk20a_vm_remove_support_nofree(vm); - /* vm is not used anymore. release it. */ - nvgpu_kfree(vm->mm->g, vm); -} - -static void gk20a_vm_remove_support_kref(struct kref *ref) -{ - struct vm_gk20a *vm = container_of(ref, struct vm_gk20a, ref); - struct gk20a *g = gk20a_from_vm(vm); - g->ops.mm.vm_remove(vm); -} - -void gk20a_vm_get(struct vm_gk20a *vm) -{ - kref_get(&vm->ref); -} - -void gk20a_vm_put(struct vm_gk20a *vm) -{ - kref_put(&vm->ref, gk20a_vm_remove_support_kref); -} - const struct gk20a_mmu_level gk20a_mm_levels_64k[] = { {.hi_bit = {NV_GMMU_VA_RANGE-1, NV_GMMU_VA_RANGE-1}, .lo_bit = {26, 26}, @@ -3284,7 +3181,7 @@ static int gk20a_init_sema_pool(struct vm_gk20a *vm) SZ_4K); if (!sema_sea->gpu_va) { nvgpu_free(&vm->kernel, sema_sea->gpu_va); - gk20a_vm_put(vm); + nvgpu_vm_put(vm); return -ENOMEM; } @@ -3408,7 +3305,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm) } /** - * gk20a_init_vm() - Initialize an address space. + * nvgpu_init_vm() - Initialize an address space. * * @mm - Parent MM. * @vm - The VM to init. @@ -3443,7 +3340,7 @@ static int init_vm_page_tables(struct vm_gk20a *vm) * such cases the @kernel_reserved and @low_hole should sum to exactly * @aperture_size. */ -int gk20a_init_vm(struct mm_gk20a *mm, +int nvgpu_init_vm(struct mm_gk20a *mm, struct vm_gk20a *vm, u32 big_page_size, u64 low_hole, @@ -3683,7 +3580,7 @@ int gk20a_vm_alloc_share(struct gk20a_as_share *as_share, u32 big_page_size, snprintf(name, sizeof(name), "as_%d", as_share->id); - err = gk20a_init_vm(mm, vm, big_page_size, + err = nvgpu_init_vm(mm, vm, big_page_size, big_page_size << 10, mm->channel.kernel_size, mm->channel.user_size + mm->channel.kernel_size, @@ -3701,7 +3598,7 @@ int gk20a_vm_release_share(struct gk20a_as_share *as_share) vm->as_share = NULL; as_share->vm = NULL; - gk20a_vm_put(vm); + nvgpu_vm_put(vm); return 0; } @@ -3864,7 +3761,7 @@ int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) gk20a_dbg_fn(""); - gk20a_vm_get(vm); + nvgpu_vm_get(vm); ch->vm = vm; err = channel_gk20a_commit_va(ch); if (err) @@ -3960,7 +3857,7 @@ out: } -int gk20a_vm_map_buffer(struct vm_gk20a *vm, +int nvgpu_vm_map_buffer(struct vm_gk20a *vm, int dmabuf_fd, u64 *offset_align, u32 flags, /*NVGPU_AS_MAP_BUFFER_FLAGS_*/ @@ -3989,7 +3886,7 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm, return err; } - ret_va = gk20a_vm_map(vm, dmabuf, *offset_align, + ret_va = nvgpu_vm_map(vm, dmabuf, *offset_align, flags, kind, NULL, true, gk20a_mem_flag_none, buffer_offset, @@ -4005,16 +3902,16 @@ int gk20a_vm_map_buffer(struct vm_gk20a *vm, return err; } -int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, +int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, struct vm_gk20a_mapping_batch *batch) { gk20a_dbg_fn(""); - gk20a_vm_unmap_user(vm, offset, batch); + nvgpu_vm_unmap_user(vm, offset, batch); return 0; } -void gk20a_deinit_vm(struct vm_gk20a *vm) +void nvgpu_deinit_vm(struct vm_gk20a *vm) { if (nvgpu_alloc_initialized(&vm->kernel)) nvgpu_alloc_destroy(&vm->kernel); @@ -4069,7 +3966,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) mm->bar1.aperture_size = bar1_aperture_size_mb_gk20a() << 20; gk20a_dbg_info("bar1 vm size = 0x%x", mm->bar1.aperture_size); - gk20a_init_vm(mm, vm, + nvgpu_init_vm(mm, vm, big_page_size, SZ_4K, /* Low hole */ mm->bar1.aperture_size - SZ_4K, /* Kernel reserved. */ @@ -4085,7 +3982,7 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm) return 0; clean_up_va: - gk20a_deinit_vm(vm); + nvgpu_deinit_vm(vm); return err; } @@ -4108,7 +4005,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) mm->pmu.aperture_size = GK20A_PMU_VA_SIZE; gk20a_dbg_info("pmu vm size = 0x%x", mm->pmu.aperture_size); - gk20a_init_vm(mm, vm, big_page_size, + nvgpu_init_vm(mm, vm, big_page_size, low_hole, aperture_size - low_hole, aperture_size, @@ -4124,7 +4021,7 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm) return 0; clean_up_va: - gk20a_deinit_vm(vm); + nvgpu_deinit_vm(vm); return err; } @@ -4149,7 +4046,7 @@ static int gk20a_init_cde_vm(struct mm_gk20a *mm) struct gk20a *g = gk20a_from_mm(mm); u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; - return gk20a_init_vm(mm, vm, big_page_size, + return nvgpu_init_vm(mm, vm, big_page_size, big_page_size << 10, NV_MM_DEFAULT_KERNEL_SIZE, NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, @@ -4162,7 +4059,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm) struct gk20a *g = gk20a_from_mm(mm); u32 big_page_size = gk20a_get_platform(g->dev)->default_big_page_size; - return gk20a_init_vm(mm, vm, big_page_size, + return nvgpu_init_vm(mm, vm, big_page_size, big_page_size << 10, NV_MM_DEFAULT_KERNEL_SIZE, NV_MM_DEFAULT_KERNEL_SIZE + NV_MM_DEFAULT_USER_SIZE, @@ -4399,7 +4296,7 @@ hw_was_off: gk20a_idle_nosuspend(g->dev); } -int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, +int nvgpu_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, struct dma_buf **dmabuf, u64 *offset) { @@ -4503,7 +4400,7 @@ void gk20a_init_mm(struct gpu_ops *gops) { gops->mm.gmmu_map = gk20a_locked_gmmu_map; gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; - gops->mm.vm_remove = gk20a_vm_remove_support; + gops->mm.vm_remove = nvgpu_vm_remove_support; gops->mm.vm_alloc_share = gk20a_vm_alloc_share; gops->mm.vm_bind_channel = gk20a_vm_bind_channel; gops->mm.fb_flush = gk20a_mm_fb_flush; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 0a102cb26..331843cca 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -116,13 +117,6 @@ gk20a_buffer_state_from_list(struct nvgpu_list_node *node) ((uintptr_t)node - offsetof(struct gk20a_buffer_state, list)); }; -enum gmmu_pgsz_gk20a { - gmmu_page_size_small = 0, - gmmu_page_size_big = 1, - gmmu_page_size_kernel = 2, - gmmu_nr_page_sizes = 3, -}; - struct gk20a_comptags { u32 offset; u32 lines; @@ -130,15 +124,6 @@ struct gk20a_comptags { bool user_mappable; }; -struct gk20a_mm_entry { - /* backing for */ - struct nvgpu_mem mem; - u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */ - int pgsz; - struct gk20a_mm_entry *entries; - int num_entries; -}; - struct priv_cmd_queue { struct nvgpu_mem mem; u32 size; /* num of entries in words */ @@ -214,84 +199,6 @@ vm_reserved_va_node_from_reserved_va_list(struct nvgpu_list_node *node) ((uintptr_t)node - offsetof(struct vm_reserved_va_node, reserved_va_list)); }; -struct gk20a_mmu_level { - int hi_bit[2]; - int lo_bit[2]; - int (*update_entry)(struct vm_gk20a *vm, - struct gk20a_mm_entry *pte, - u32 i, u32 gmmu_pgsz_idx, - struct scatterlist **sgl, - u64 *offset, - u64 *iova, - u32 kind_v, u64 *ctag, - bool cacheable, bool unmapped_pte, - int rw_flag, bool sparse, bool priv, - enum nvgpu_aperture aperture); - size_t entry_size; -}; - -/* map/unmap batch state */ -struct vm_gk20a_mapping_batch -{ - bool gpu_l2_flushed; - bool need_tlb_invalidate; -}; - -struct vm_gk20a { - struct mm_gk20a *mm; - struct gk20a_as_share *as_share; /* as_share this represents */ - - u64 va_start; - u64 va_limit; - - int num_user_mapped_buffers; - - bool big_pages; /* enable large page support */ - bool enable_ctag; - bool mapped; - - u32 big_page_size; - - bool userspace_managed; - - const struct gk20a_mmu_level *mmu_levels; - - struct kref ref; - - struct nvgpu_mutex update_gmmu_lock; - - struct gk20a_mm_entry pdb; - - /* - * These structs define the address spaces. In some cases it's possible - * to merge address spaces (user and user_lp) and in other cases it's - * not. vma[] allows the code to be agnostic to this by always using - * address spaces through this pointer array. - */ - struct nvgpu_allocator *vma[gmmu_nr_page_sizes]; - struct nvgpu_allocator kernel; - struct nvgpu_allocator user; - struct nvgpu_allocator user_lp; - - struct nvgpu_rbtree_node *mapped_buffers; - - struct nvgpu_list_node reserved_va_list; - -#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION - u64 handle; -#endif - u32 gmmu_page_sizes[gmmu_nr_page_sizes]; - - /* if non-NULL, kref_put will use this batch when - unmapping. Must hold vm->update_gmmu_lock. */ - struct vm_gk20a_mapping_batch *kref_put_batch; - - /* - * Each address space needs to have a semaphore pool. - */ - struct nvgpu_semaphore_pool *sema_pool; -}; - struct gk20a; struct channel_gk20a; @@ -562,57 +469,13 @@ struct sg_table *gk20a_mm_pin(struct device *dev, struct dma_buf *dmabuf); void gk20a_mm_unpin(struct device *dev, struct dma_buf *dmabuf, struct sg_table *sgt); -u64 gk20a_vm_map(struct vm_gk20a *vm, - struct dma_buf *dmabuf, - u64 offset_align, - u32 flags /*NVGPU_AS_MAP_BUFFER_FLAGS_*/, - int kind, - struct sg_table **sgt, - bool user_mapped, - int rw_flag, - u64 buffer_offset, - u64 mapping_size, - struct vm_gk20a_mapping_batch *mapping_batch); - -int gk20a_vm_get_compbits_info(struct vm_gk20a *vm, +int nvgpu_vm_get_compbits_info(struct vm_gk20a *vm, u64 mapping_gva, u64 *compbits_win_size, u32 *compbits_win_ctagline, u32 *mapping_ctagline, u32 *flags); -int gk20a_vm_map_compbits(struct vm_gk20a *vm, - u64 mapping_gva, - u64 *compbits_win_gva, - u64 *mapping_iova, - u32 flags); - -/* unmap handle from kernel */ -void gk20a_vm_unmap(struct vm_gk20a *vm, u64 offset); - -void gk20a_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, - struct vm_gk20a_mapping_batch *batch); - -/* get reference to all currently mapped buffers */ -int gk20a_vm_get_buffers(struct vm_gk20a *vm, - struct mapped_buffer_node ***mapped_buffers, - int *num_buffers); - -/* put references on the given buffers */ -void gk20a_vm_put_buffers(struct vm_gk20a *vm, - struct mapped_buffer_node **mapped_buffers, - int num_buffers); - -/* find buffer corresponding to va */ -int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va, - struct dma_buf **dmabuf, - u64 *offset); - -void gk20a_vm_get(struct vm_gk20a *vm); -void gk20a_vm_put(struct vm_gk20a *vm); - -void gk20a_vm_remove_support(struct vm_gk20a *vm); - u64 gk20a_vm_alloc_va(struct vm_gk20a *vm, u64 size, enum gmmu_pgsz_gk20a gmmu_pgsz_idx); @@ -635,44 +498,11 @@ int gk20a_vm_bind_channel(struct gk20a_as_share *as_share, struct channel_gk20a *ch); int __gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch); -/* batching eliminates redundant cache flushes and invalidates */ -void gk20a_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); -void gk20a_vm_mapping_batch_finish( - struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); -/* called when holding vm->update_gmmu_lock */ -void gk20a_vm_mapping_batch_finish_locked( - struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); - - int gk20a_vidmem_buf_alloc(struct gk20a *g, size_t bytes); int gk20a_vidmem_get_space(struct gk20a *g, u64 *space); int gk20a_vidbuf_access_memory(struct gk20a *g, struct dma_buf *dmabuf, void *buffer, u64 offset, u64 size, u32 cmd); -/* Note: batch may be NULL if map op is not part of a batch */ -int gk20a_vm_map_buffer(struct vm_gk20a *vm, - int dmabuf_fd, - u64 *offset_align, - u32 flags, /* NVGPU_AS_MAP_BUFFER_FLAGS_ */ - int kind, - u64 buffer_offset, - u64 mapping_size, - struct vm_gk20a_mapping_batch *batch); - -int gk20a_init_vm(struct mm_gk20a *mm, - struct vm_gk20a *vm, - u32 big_page_size, - u64 low_hole, - u64 kernel_reserved, - u64 aperture_size, - bool big_pages, - bool userspace_managed, - char *name); -void gk20a_deinit_vm(struct vm_gk20a *vm); - -/* Note: batch may be NULL if unmap op is not part of a batch */ -int gk20a_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, - struct vm_gk20a_mapping_batch *batch); void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf, struct gk20a_comptags *comptags); dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr); diff --git a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c index d1e667b6d..f9884cfb4 100644 --- a/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/tsg_gk20a.c @@ -289,7 +289,7 @@ void gk20a_tsg_release(struct kref *ref) tsg->tsg_gr_ctx = NULL; } if (tsg->vm) { - gk20a_vm_put(tsg->vm); + nvgpu_vm_put(tsg->vm); tsg->vm = NULL; } diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index bf3dd240b..1405ef30a 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -60,7 +60,7 @@ void gm20b_init_mm(struct gpu_ops *gops) gops->mm.support_sparse = gm20b_mm_support_sparse; gops->mm.gmmu_map = gk20a_locked_gmmu_map; gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap; - gops->mm.vm_remove = gk20a_vm_remove_support; + gops->mm.vm_remove = nvgpu_vm_remove_support; gops->mm.vm_alloc_share = gk20a_vm_alloc_share; gops->mm.vm_bind_channel = gk20a_vm_bind_channel; gops->mm.fb_flush = gk20a_mm_fb_flush; diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 6b7f63b36..eab51175a 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -74,7 +74,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g) /* BAR2 aperture size is 32MB */ mm->bar2.aperture_size = 32 << 20; gk20a_dbg_info("bar2 vm size = 0x%x", mm->bar2.aperture_size); - gk20a_init_vm(mm, vm, big_page_size, SZ_4K, + nvgpu_init_vm(mm, vm, big_page_size, SZ_4K, mm->bar2.aperture_size - SZ_4K, mm->bar2.aperture_size, false, false, "bar2"); @@ -88,7 +88,7 @@ static int gb10b_init_bar2_vm(struct gk20a *g) return 0; clean_up_va: - gk20a_deinit_vm(vm); + nvgpu_deinit_vm(vm); return err; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index 7fb0147ea..6d8aa0252 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -18,6 +18,9 @@ #define __NVGPU_GMMU_H__ #include +#include + +struct scatterlist; /* * This is the GMMU API visible to blocks outside of the GMMU. Basically this @@ -28,7 +31,37 @@ struct vm_gk20a; struct nvgpu_mem; -enum nvgpu_aperture; +enum gmmu_pgsz_gk20a { + gmmu_page_size_small = 0, + gmmu_page_size_big = 1, + gmmu_page_size_kernel = 2, + gmmu_nr_page_sizes = 3, +}; + +struct gk20a_mm_entry { + /* backing for */ + struct nvgpu_mem mem; + u32 woffset; /* if >0, mem is a shadow copy, owned by another entry */ + int pgsz; + struct gk20a_mm_entry *entries; + int num_entries; +}; + +struct gk20a_mmu_level { + int hi_bit[2]; + int lo_bit[2]; + int (*update_entry)(struct vm_gk20a *vm, + struct gk20a_mm_entry *pte, + u32 i, u32 gmmu_pgsz_idx, + struct scatterlist **sgl, + u64 *offset, + u64 *iova, + u32 kind_v, u64 *ctag, + bool cacheable, bool unmapped_pte, + int rw_flag, bool sparse, bool priv, + enum nvgpu_aperture aperture); + size_t entry_size; +}; /** * nvgpu_gmmu_map - Map memory into the GMMU. diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h new file mode 100644 index 000000000..1fb772d52 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __NVGPU_VM_H__ +#define __NVGPU_VM_H__ + +#include +#include +#include +#include +#include +#include +#include + +struct vm_gk20a; +struct mapped_buffer_node; + +/** + * This header contains the OS agnostic APIs for dealing with VMs. Most of the + * VM implementation is system specific - it must translate from a platform's + * representation of DMA'able memory to our nvgpu_mem notion. + * + * However, some stuff is platform agnostic. VM ref-counting and the VM struct + * itself are platform agnostic. Also, the initialization and destruction of + * VMs is the same across all platforms (for now). + */ + +/* map/unmap batch state */ +struct vm_gk20a_mapping_batch { + bool gpu_l2_flushed; + bool need_tlb_invalidate; +}; + +struct vm_gk20a { + struct mm_gk20a *mm; + struct gk20a_as_share *as_share; /* as_share this represents */ + + u64 va_start; + u64 va_limit; + + int num_user_mapped_buffers; + + bool big_pages; /* enable large page support */ + bool enable_ctag; + bool mapped; + + u32 big_page_size; + + bool userspace_managed; + + const struct gk20a_mmu_level *mmu_levels; + + struct kref ref; + + struct nvgpu_mutex update_gmmu_lock; + + struct gk20a_mm_entry pdb; + + /* + * These structs define the address spaces. In some cases it's possible + * to merge address spaces (user and user_lp) and in other cases it's + * not. vma[] allows the code to be agnostic to this by always using + * address spaces through this pointer array. + */ + struct nvgpu_allocator *vma[gmmu_nr_page_sizes]; + struct nvgpu_allocator kernel; + struct nvgpu_allocator user; + struct nvgpu_allocator user_lp; + + struct nvgpu_rbtree_node *mapped_buffers; + + struct nvgpu_list_node reserved_va_list; + +#ifdef CONFIG_TEGRA_GR_VIRTUALIZATION + u64 handle; +#endif + u32 gmmu_page_sizes[gmmu_nr_page_sizes]; + + /* if non-NULL, kref_put will use this batch when + unmapping. Must hold vm->update_gmmu_lock. */ + struct vm_gk20a_mapping_batch *kref_put_batch; + + /* + * Each address space needs to have a semaphore pool. + */ + struct nvgpu_semaphore_pool *sema_pool; +}; + +void nvgpu_vm_get(struct vm_gk20a *vm); +void nvgpu_vm_put(struct vm_gk20a *vm); + +/* batching eliminates redundant cache flushes and invalidates */ +void nvgpu_vm_mapping_batch_start(struct vm_gk20a_mapping_batch *batch); +void nvgpu_vm_mapping_batch_finish( + struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); +/* called when holding vm->update_gmmu_lock */ +void nvgpu_vm_mapping_batch_finish_locked( + struct vm_gk20a *vm, struct vm_gk20a_mapping_batch *batch); + +/* get reference to all currently mapped buffers */ +int nvgpu_vm_get_buffers(struct vm_gk20a *vm, + struct mapped_buffer_node ***mapped_buffers, + int *num_buffers); + +/* put references on the given buffers */ +void nvgpu_vm_put_buffers(struct vm_gk20a *vm, + struct mapped_buffer_node **mapped_buffers, + int num_buffers); + +/* Note: batch may be NULL if unmap op is not part of a batch */ +int nvgpu_vm_unmap_buffer(struct vm_gk20a *vm, u64 offset, + struct vm_gk20a_mapping_batch *batch); + +void nvgpu_vm_unmap_locked(struct mapped_buffer_node *mapped_buffer, + struct vm_gk20a_mapping_batch *batch); + +void nvgpu_vm_remove_support_nofree(struct vm_gk20a *vm); +void nvgpu_vm_remove_support(struct vm_gk20a *vm); + +int nvgpu_init_vm(struct mm_gk20a *mm, + struct vm_gk20a *vm, + u32 big_page_size, + u64 low_hole, + u64 kernel_reserved, + u64 aperture_size, + bool big_pages, + bool userspace_managed, + char *name); +void nvgpu_deinit_vm(struct vm_gk20a *vm); + +#endif diff --git a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c index c7960c1f4..dd515f41c 100644 --- a/drivers/gpu/nvgpu/vgpu/gr_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/gr_vgpu.c @@ -526,7 +526,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, } else { if (!tsg->tsg_gr_ctx) { tsg->vm = c->vm; - gk20a_vm_get(tsg->vm); + nvgpu_vm_get(tsg->vm); err = g->ops.gr.alloc_gr_ctx(g, &tsg->tsg_gr_ctx, c->vm, args->class_num, @@ -536,7 +536,7 @@ static int vgpu_gr_alloc_obj_ctx(struct channel_gk20a *c, if (err) { nvgpu_err(g, "fail to allocate TSG gr ctx buffer, err=%d", err); - gk20a_vm_put(tsg->vm); + nvgpu_vm_put(tsg->vm); tsg->vm = NULL; goto out; } diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 7e42c198e..cfa9e4289 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -22,6 +22,8 @@ #include "vgpu/vgpu.h" #include "gk20a/mm_gk20a.h" +#include "common/linux/vm_priv.h" + static int vgpu_init_mm_setup_sw(struct gk20a *g) { struct mm_gk20a *mm = &g->mm; @@ -216,7 +218,7 @@ static void vgpu_vm_remove_support(struct vm_gk20a *vm) nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); while (node) { mapped_buffer = mapped_buffer_from_rbtree_node(node); - gk20a_vm_unmap_locked(mapped_buffer, NULL); + nvgpu_vm_unmap_locked(mapped_buffer, NULL); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); } @@ -454,7 +456,7 @@ static int vgpu_vm_bind_channel(struct gk20a_as_share *as_share, } if (ch->vm) - gk20a_vm_get(ch->vm); + nvgpu_vm_get(ch->vm); return err; }