diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c index 46138e126..7a3b03ef0 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -61,6 +61,11 @@ NVGPU_COV_WHITELIST(false_positive, NVGPU_MISRA(Rule, 14_4), "Bug 2623654") \ } while (false) #endif +/** + * Size required to submit work through MMIO. + */ +#define NVGPU_GPU_MMIO_SIZE SZ_64K + static int pd_allocate(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, const struct gk20a_mmu_level *l, @@ -196,6 +201,140 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem) nvgpu_gmmu_unmap_addr(vm, mem, mem->gpu_va); } +int nvgpu_channel_setup_mmio_gpu_vas(struct gk20a *g, + struct nvgpu_channel *c, + u32 gpfifosize) +{ + int err = 0; + struct nvgpu_sgt *sgt = NULL; + struct vm_gk20a *vm = c->vm; + u64 virtual_func_offset = 0U; + + /* Initialize the map sizes for userd, gpummio and gpfio */ + c->userd_va_mapsize = SZ_4K; + c->gpfifo_va_mapsize = gpfifosize; + + sgt = nvgpu_sgt_create_from_mem(g, &c->usermode_userd); + if (sgt == NULL) { + return -ENOMEM; + } + + c->userd_va = nvgpu_gmmu_map_va(vm, sgt, c->userd_va_mapsize, + APERTURE_SYSMEM, 0); + + nvgpu_sgt_free(g, sgt); + if (c->userd_va == 0U) { + return -ENOMEM; + } + + sgt = nvgpu_sgt_create_from_mem(g, &c->usermode_gpfifo); + if (sgt == NULL) { + goto free_userd_va; + } + + c->gpfifo_va = nvgpu_gmmu_map_va(vm, sgt, gpfifosize, APERTURE_SYSMEM, 0); + nvgpu_sgt_free(g, sgt); + if (c->gpfifo_va == 0U) { + goto free_userd_va; + } + + nvgpu_mutex_acquire(&vm->gpu_mmio_va_map_lock); + if (vm->gpummio_va == 0U) { + virtual_func_offset = g->ops.usermode.base(g); + vm->gpummio_va_mapsize = NVGPU_GPU_MMIO_SIZE; + /* + * create a SGT from VF addr with 64KB for the first channel" + */ + err = nvgpu_mem_create_from_phys(g, &vm->gpummio_mem, + virtual_func_offset, + vm->gpummio_va_mapsize / NVGPU_CPU_PAGE_SIZE); + if (err < 0) { + nvgpu_mutex_release(&vm->gpu_mmio_va_map_lock); + goto free_gpfifo_va; + } + + sgt = nvgpu_sgt_create_from_mem(g, &vm->gpummio_mem); + if (sgt == NULL) { + goto free_mem_and_release_lock; + } + + vm->gpummio_va = nvgpu_gmmu_map_va(vm, sgt, vm->gpummio_va_mapsize, + APERTURE_SYSMEM_COH, NVGPU_KIND_SMSKED_MESSAGE); + nvgpu_sgt_free(g, sgt); + if (vm->gpummio_va == 0U) { + goto free_mem_and_release_lock; + } + } + nvgpu_mutex_release(&vm->gpu_mmio_va_map_lock); + return 0; +free_mem_and_release_lock: + nvgpu_dma_free(g, &vm->gpummio_mem); + nvgpu_mutex_release(&vm->gpu_mmio_va_map_lock); + +free_gpfifo_va: + nvgpu_gmmu_unmap_va(c->vm, c->gpfifo_va, c->gpfifo_va_mapsize); + c->gpfifo_va = 0U; +free_userd_va: + nvgpu_gmmu_unmap_va(c->vm, c->userd_va, c->userd_va_mapsize); + c->userd_va = 0U; + return -ENOMEM; +} + +void nvgpu_channel_free_mmio_gpu_vas(struct gk20a *g, + struct nvgpu_channel *c) +{ + (void)g; + + if (c->gpfifo_va != 0U) { + nvgpu_gmmu_unmap_va(c->vm, c->gpfifo_va, c->gpfifo_va_mapsize); + } + + if (c->userd_va != 0U) { + nvgpu_gmmu_unmap_va(c->vm, c->userd_va, c->userd_va_mapsize); + } + + c->userd_va = 0U; + c->gpfifo_va = 0U; +} + +u64 nvgpu_gmmu_map_va(struct vm_gk20a *vm, + struct nvgpu_sgt *sgt, + u64 size, + enum nvgpu_aperture aperture, + u8 kind) +{ + + struct gk20a *g = gk20a_from_vm(vm); + u64 gpu_va = 0U; + u64 vaddr = 0U; + u64 buffer_offset = 0U; + u32 ctag_offset = 0U; + u32 flags = 0U; + enum gk20a_mem_rw_flag rw_flag = 0; + bool clear_ctags = false; + bool sparse = false; + bool priv = false; + struct vm_gk20a_mapping_batch *batch = NULL; + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + gpu_va = g->ops.mm.gmmu.map(vm, vaddr, sgt/* sg list */, + buffer_offset, size, GMMU_PAGE_SIZE_SMALL, kind, + ctag_offset, flags, rw_flag, clear_ctags, + sparse, priv, batch, aperture); + nvgpu_mutex_release(&vm->update_gmmu_lock); + return gpu_va; +} + +void nvgpu_gmmu_unmap_va(struct vm_gk20a *vm, u64 gpu_va, u64 size) +{ + struct gk20a *g = gk20a_from_vm(vm); + + nvgpu_mutex_acquire(&vm->update_gmmu_lock); + g->ops.mm.gmmu.unmap(vm, gpu_va, size, GMMU_PAGE_SIZE_SMALL, false, + gk20a_mem_flag_none, false, NULL); + nvgpu_mutex_release(&vm->update_gmmu_lock); +} + int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) { u32 pdb_size; diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 115381846..3fee0a961 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -813,6 +813,7 @@ int nvgpu_vm_do_init(struct mm_gk20a *mm, vm->mapped_buffers = NULL; nvgpu_mutex_init(&vm->syncpt_ro_map_lock); + nvgpu_mutex_init(&vm->gpu_mmio_va_map_lock); nvgpu_mutex_init(&vm->update_gmmu_lock); nvgpu_ref_init(&vm->ref); @@ -838,6 +839,7 @@ int nvgpu_vm_do_init(struct mm_gk20a *mm, clean_up_gmmu_lock: nvgpu_mutex_destroy(&vm->update_gmmu_lock); nvgpu_mutex_destroy(&vm->syncpt_ro_map_lock); + nvgpu_mutex_destroy(&vm->gpu_mmio_va_map_lock); #endif clean_up_gpu_vm: if (g->ops.mm.vm_as_free_share != NULL) { @@ -943,6 +945,16 @@ static void nvgpu_vm_remove(struct vm_gk20a *vm) vm->syncpt_ro_map_gpu_va); } + nvgpu_mutex_acquire(&vm->gpu_mmio_va_map_lock); + if (vm->gpummio_va != 0U) { + nvgpu_gmmu_unmap_va(vm, vm->gpummio_va, + vm->gpummio_va_mapsize); + nvgpu_dma_free(g, &vm->gpummio_mem); + vm->gpummio_va = 0U; + vm->gpummio_va_mapsize = 0U; + } + nvgpu_mutex_release(&vm->gpu_mmio_va_map_lock); + nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_rbtree_enum_start(0, &node, vm->mapped_buffers); @@ -988,6 +1000,7 @@ static void nvgpu_vm_remove(struct vm_gk20a *vm) nvgpu_mutex_destroy(&vm->update_gmmu_lock); nvgpu_mutex_destroy(&vm->syncpt_ro_map_lock); + nvgpu_mutex_destroy(&vm->gpu_mmio_va_map_lock); nvgpu_kfree(g, vm); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 0f0d65790..f00398943 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -63,21 +63,24 @@ struct nvgpu_runlist; /** * Enable VPR support. */ -#define NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR BIT32(0) +#define NVGPU_SETUP_BIND_FLAGS_SUPPORT_VPR BIT32(0) /** * Channel must have deterministic (and low) submit latency. * This flag is only valid for kernel mode submit. */ -#define NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC BIT32(1) +#define NVGPU_SETUP_BIND_FLAGS_SUPPORT_DETERMINISTIC BIT32(1) /** * Enable replayable faults. */ -#define NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE BIT32(2) +#define NVGPU_SETUP_BIND_FLAGS_REPLAYABLE_FAULTS_ENABLE BIT32(2) /** * Enable usermode submit (mutually exclusive with kernel_mode submit). */ -#define NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT BIT32(3) - +#define NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT BIT32(3) +/** + * Enable GPU MMIO support + */ +#define NVGPU_SETUP_BIND_FLAGS_USERMODE_GPU_MAP_RESOURCES_SUPPORT BIT32(4) /** * Insert a wait on previous job's completion fence, before gpfifo entries. * See also #nvgpu_fence. @@ -246,6 +249,9 @@ struct nvgpu_setup_bind_args { u32 gpfifo_dmabuf_fd; u64 gpfifo_dmabuf_offset; u32 work_submit_token; + u64 gpfifo_gpu_va; + u64 userd_gpu_va; + u64 usermode_mmio_gpu_va; u32 flags; }; @@ -581,6 +587,10 @@ struct nvgpu_channel { */ nvgpu_atomic_t sched_exit_wait_for_errbar_refcnt; #endif + u64 userd_va; + u64 gpfifo_va; + u64 userd_va_mapsize; + u64 gpfifo_va_mapsize; }; #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index 8b743a557..e4faa164d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -235,6 +235,8 @@ struct gk20a; "Multimedia engine support"), \ DEFINE_FLAG(NVGPU_SUPPORT_SEMA_BASED_GPFIFO_GET, \ "Semaphore based gpfifo get update support"), \ + DEFINE_FLAG(NVGPU_SUPPORT_GPU_MMIO, \ + "Support for work submit through GPUMMIO"), \ DEFINE_FLAG(NVGPU_MAX_ENABLED_BITS, "Marks max number of flags"), /** diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index aec41ef68..c63ca56ca 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -46,6 +46,7 @@ struct nvgpu_mem; struct nvgpu_sgt; struct nvgpu_gmmu_pd; struct vm_gk20a_mapping_batch; +struct nvgpu_channel; /** * Small page size (4KB) index in the page size table @@ -430,6 +431,81 @@ void nvgpu_gmmu_unmap_addr(struct vm_gk20a *vm, */ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem); +/** + * @brief Map a memory pointed by sgt to GMMU. + * This is required to add the translations in the GPU page table + * for the given channel. + * + * @param vm [in] Pointer to virtual memory structure. + * @param sgt [in] Structure for storing the memory information. + * @param size [in] Size to be mapped to GMMU. + * @param aperture [in] Information about the type of the given memory. + * @param kind [in] Kind to be used for mapping. + * + * + * GMMU map: + * Acquires the VM GMMU lock to avoid race. + * Call core map routine to map the given sgt to GMMU. + * Release the VM GMMU lock. + * + * @return gpu_va. + */ + +u64 nvgpu_gmmu_map_va(struct vm_gk20a *vm, struct nvgpu_sgt *sgt, + u64 size, enum nvgpu_aperture aperture, + u8 kind); + +/** + * @brief Unmap a memory mapped by nvgpu_gmmu_map_va(). + * This is required to remove the translations from the GPU page table. + * + * @param vm [in] Pointer to virtual memory structure. + * @param gpu_va [in] GPU virtual address. + * @param size [in] Size to be unmapped from GMMU. + * + * + * GMMU Unmap: + * Acquires the VM GMMU lock to the avoid race. + * Call core unmap routine to remove the translations from GMMU. + * Release the VM GMMU lock. + * + * @return None. + */ +void nvgpu_gmmu_unmap_va(struct vm_gk20a *vm, u64 gpu_va, u64 size); + +/** + * @brief Setup mappings on the GMMU to enable gpu work submission + * + * @param g [in] Pointer to the super struture G. + * @param c [in] Structure for storing the channel info. + * @param gpfifosize [in] Size to create gpu mapping for gpfifo. + * + * Create the sgt from the given userd from the channel. + * Call nvgpu_gmmu_map_va() to map the userd with 4k in GMMU. + * Create the sgt from the given gpfifo derived from the channel. + * Call nvgpu_gmmu_map_va() to map the gpfifo with gpfifosize + * in GMMU. + * Create the sgt from the given gpummio derived from the channel. + * Call nvgpu_gmmu_map_va() to map the gpummio with 64k. + * + * @return 0 for success, < 1 for failure. + */ +int nvgpu_channel_setup_mmio_gpu_vas(struct gk20a *g, + struct nvgpu_channel *c, + u32 gpfifosize); + +/** + * @brief Free the mappings done by nvgpu_channel_setup_mmio_gpu_vas(). + * + * @param g [in] Pointer to the super structure G. + * @param c [in] Structure for storing the channel information. + * + * Free the mappings done by nvgpu_channel_setup_mmio_gpu_vas(). + * + * @return None. + */ +void nvgpu_channel_free_mmio_gpu_vas(struct gk20a *g, + struct nvgpu_channel *c); /** * @brief Compute number of words in a PTE. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index 94cbcb2bf..d08697985 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -338,6 +338,23 @@ struct vm_gk20a { * Protect allocation of sync point map. */ struct nvgpu_mutex syncpt_ro_map_lock; + /** + * gpuva required to submit work by mmio. + */ + u64 gpummio_va; + /** + * Size of the gpummio mapping. + */ + u64 gpummio_va_mapsize; + /** + * nvgpu_mem to store the physical address information. + */ + struct nvgpu_mem gpummio_mem; + /** + * Mutex to protect the gpummio mappings. + */ + struct nvgpu_mutex gpu_mmio_va_map_lock; + }; /* @@ -354,6 +371,7 @@ struct vm_gk20a { #define NVGPU_VM_MAP_ACCESS_READ_ONLY 1U #define NVGPU_VM_MAP_ACCESS_READ_WRITE 2U +#define NVGPU_KIND_SMSKED_MESSAGE 0xF #define NVGPU_KIND_INVALID S16(-1) /** diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index d4c8e6d2e..48e9cdf72 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -629,6 +629,9 @@ static u32 nvgpu_setup_bind_user_flags_to_common_flags(u32 user_flags) if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_SUPPORT) flags |= NVGPU_SETUP_BIND_FLAGS_USERMODE_SUPPORT; + if (user_flags & NVGPU_CHANNEL_SETUP_BIND_FLAGS_USERMODE_GPU_MAP_RESOURCES_SUPPORT) + flags |= NVGPU_SETUP_BIND_FLAGS_USERMODE_GPU_MAP_RESOURCES_SUPPORT; + return flags; } @@ -1328,6 +1331,12 @@ long gk20a_channel_ioctl(struct file *filp, err = nvgpu_channel_setup_bind(ch, &setup_bind_args); channel_setup_bind_args->work_submit_token = setup_bind_args.work_submit_token; + channel_setup_bind_args->gpfifo_gpu_va = + setup_bind_args.gpfifo_gpu_va; + channel_setup_bind_args->userd_gpu_va = + setup_bind_args.userd_gpu_va; + channel_setup_bind_args->usermode_mmio_gpu_va = + setup_bind_args.usermode_mmio_gpu_va; gk20a_idle(ch->g); break; } diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 737d6f8a0..bee0eb4c5 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -328,6 +328,8 @@ static struct nvgpu_flags_mapping flags_mapping[] = { NVGPU_SCHED_EXIT_WAIT_FOR_ERRBAR_SUPPORTED}, {NVGPU_GPU_FLAGS_MULTI_PROCESS_TSG_SHARING, NVGPU_SUPPORT_MULTI_PROCESS_TSG_SHARING}, + {NVGPU_GPU_FLAGS_SUPPORT_GPU_MMIO, + NVGPU_SUPPORT_GPU_MMIO}, }; static u64 nvgpu_ctrl_ioctl_gpu_characteristics_flags(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/os/linux/linux-channel.c b/drivers/gpu/nvgpu/os/linux/linux-channel.c index 5d834cdf1..4dc31037e 100644 --- a/drivers/gpu/nvgpu/os/linux/linux-channel.c +++ b/drivers/gpu/nvgpu/os/linux/linux-channel.c @@ -494,6 +494,10 @@ void nvgpu_os_channel_free_usermode_buffers(struct nvgpu_channel *c) struct gk20a *g = c->g; struct device *dev = dev_from_gk20a(g); + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GPU_MMIO)) { + nvgpu_channel_free_mmio_gpu_vas(g, c); + } + if (priv->usermode.gpfifo.dmabuf != NULL) { nvgpu_mm_unpin(dev, priv->usermode.gpfifo.dmabuf, priv->usermode.gpfifo.attachment, @@ -560,7 +564,19 @@ static int nvgpu_channel_alloc_usermode_buffers(struct nvgpu_channel *c, goto unmap_free_gpfifo; } + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_GPU_MMIO) && + ((args->flags & NVGPU_SETUP_BIND_FLAGS_USERMODE_GPU_MAP_RESOURCES_SUPPORT) != 0U)) { + err = nvgpu_channel_setup_mmio_gpu_vas(g, c, gpfifo_size); + if (err < 0) { + err = -ENOMEM; + goto unmap_free_gpfifo; + } + } + args->work_submit_token = g->ops.usermode.doorbell_token(c); + args->gpfifo_gpu_va = c->gpfifo_va; + args->userd_gpu_va = c->userd_va; + args->usermode_mmio_gpu_va = c->vm->gpummio_va; return 0; unmap_free_gpfifo: