/* * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #define DMA_ERROR_CODE (~(u64)0x0) /* * This function (and the get_addr() and get_phys_addr() functions are somewhat * meaningless in userspace. * * There is no GPU in the loop here, so defining a "GPU physical" address is * difficult. What we do here is simple but limited. We'll treat the GPU physical * address as just the bottom 32 bits of the CPU virtual address. Since the driver * shouldn't be dereferencing these pointers in the first place that's sufficient * to make most tests work. The reason we truncate the CPU VA is because the * address returned from this is programmed into the GMMU PTEs/PDEs. That code * asserts that the address is a valid GPU physical address (i.e less than some * number of bits, depending on chip). * * However, this does lead to some potential quirks: GPU addresses of different * CPU virtual addresses could alias (e.g B and B + 1024GB will both result in * the same value when ANDing with 0xFFFFFFFFFF. * * If there is a buffer with an address range that crosses a 1024GB boundary it'll * be detected here. A more sophisticated buffer to GPU virtual address approach * could be taken, but for now this is probably sufficient. * * For invalid nvgpu_mems and nvgpu_mems with no cpu_va, just return NULL. * There's little else we can do. In many cases in the unit test FW we wind up * getting essentially uninitialized nvgpu_mems. */ static u64 nvgpu_mem_userspace_get_addr(struct gk20a *g, struct nvgpu_mem *mem) { u64 hi_front = ((u64)(uintptr_t)mem->cpu_va) & ~0xffffffffffUL; u64 hi_back = ((u64)(uintptr_t)mem->cpu_va + mem->size - 1U) & ~0xffffffffffUL; if (!nvgpu_mem_is_valid(mem) || mem->cpu_va == NULL) { return 0x0UL; } if (hi_front != hi_back) { nvgpu_err(g, "Mismatching cpu_va calc."); nvgpu_err(g, " valid = %s", nvgpu_mem_is_valid(mem) ? "yes" : "no"); nvgpu_err(g, " cpu_va = %p", mem->cpu_va); nvgpu_err(g, " size = %lx", mem->size); nvgpu_err(g, " hi_front = 0x%llx", hi_front); nvgpu_err(g, " hi_back = 0x%llx", hi_back); } nvgpu_assert(hi_front == hi_back); return ((u64)(uintptr_t)mem->cpu_va) & 0xffffffffffUL; } u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) { return nvgpu_mem_userspace_get_addr(g, mem); } u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) { return nvgpu_mem_userspace_get_addr(g, mem); } void *nvgpu_mem_sgl_next(void *sgl) { struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl; return (void *) mem->next; } u64 nvgpu_mem_sgl_phys(struct gk20a *g, void *sgl) { struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl; (void)g; return (u64)(uintptr_t)mem->phys; } u64 nvgpu_mem_sgl_ipa_to_pa(struct gk20a *g, void *sgl, u64 ipa, u64 *pa_len) { (void)ipa; (void)pa_len; return nvgpu_mem_sgl_phys(g, sgl); } u64 nvgpu_mem_sgl_dma(void *sgl) { struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl; return (u64)(uintptr_t)mem->dma; } u64 nvgpu_mem_sgl_length(void *sgl) { struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl; return (u64)mem->length; } u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, void *sgl, struct nvgpu_gmmu_attrs *attrs) { struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl; if (mem->dma == 0U) { return g->ops.mm.gmmu.gpu_phys_addr(g, attrs, mem->phys); } if (mem->dma == DMA_ERROR_CODE) { return 0x0; } return nvgpu_mem_iommu_translate(g, mem->dma); } bool nvgpu_mem_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt) { struct nvgpu_os_posix *p = nvgpu_os_posix_from_gk20a(g); (void)sgt; return p->mm_sgt_is_iommuable; } void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl) { struct nvgpu_mem_sgl *tptr; while (sgl != NULL) { tptr = sgl->next; nvgpu_kfree(g, sgl); sgl = tptr; } } void nvgpu_mem_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) { nvgpu_mem_sgl_free(g, (struct nvgpu_mem_sgl *)sgt->sgl); nvgpu_kfree(g, sgt); } static struct nvgpu_sgt_ops nvgpu_sgt_posix_ops = { .sgl_next = nvgpu_mem_sgl_next, .sgl_phys = nvgpu_mem_sgl_phys, .sgl_ipa = nvgpu_mem_sgl_phys, .sgl_ipa_to_pa = nvgpu_mem_sgl_ipa_to_pa, .sgl_dma = nvgpu_mem_sgl_dma, .sgl_length = nvgpu_mem_sgl_length, .sgl_gpu_addr = nvgpu_mem_sgl_gpu_addr, .sgt_iommuable = nvgpu_mem_sgt_iommuable, .sgt_free = nvgpu_mem_sgt_free, }; struct nvgpu_mem_sgl *nvgpu_mem_sgl_posix_create_from_list(struct gk20a *g, struct nvgpu_mem_sgl *sgl_list, u32 nr_sgls, u64 *total_size) { struct nvgpu_mem_sgl *sgl_ptr, *tptr, *head = NULL; u32 i; *total_size = 0; for (i = 0; i < nr_sgls; i++) { tptr = (struct nvgpu_mem_sgl *)nvgpu_kzalloc(g, sizeof(struct nvgpu_mem_sgl)); if (tptr == NULL) { goto err; } if (i == 0U) { sgl_ptr = tptr; head = sgl_ptr; } else { sgl_ptr->next = tptr; sgl_ptr = sgl_ptr->next; } sgl_ptr->next = NULL; sgl_ptr->phys = sgl_list[i].phys; sgl_ptr->dma = sgl_list[i].dma; sgl_ptr->length = sgl_list[i].length; *total_size += sgl_list[i].length; } return head; err: while (head != NULL) { struct nvgpu_mem_sgl *tmp = head; head = tmp->next; nvgpu_kfree(g, tmp); } return NULL; } struct nvgpu_sgt *nvgpu_mem_sgt_posix_create_from_list(struct gk20a *g, struct nvgpu_mem_sgl *sgl_list, u32 nr_sgls, u64 *total_size) { struct nvgpu_sgt *sgt = nvgpu_kzalloc(g, sizeof(struct nvgpu_sgt)); struct nvgpu_mem_sgl *sgl; if (sgt == NULL) { return NULL; } sgl = nvgpu_mem_sgl_posix_create_from_list(g, sgl_list, nr_sgls, total_size); if (sgl == NULL) { nvgpu_kfree(g, sgt); return NULL; } sgt->sgl = (void *)sgl; sgt->ops = &nvgpu_sgt_posix_ops; return sgt; } int nvgpu_mem_posix_create_from_list(struct gk20a *g, struct nvgpu_mem *mem, struct nvgpu_mem_sgl *sgl_list, u32 nr_sgls) { u64 sgl_size; mem->priv.sgt = nvgpu_mem_sgt_posix_create_from_list(g, sgl_list, nr_sgls, &sgl_size); if (mem->priv.sgt == NULL) { return -ENOMEM; } mem->aperture = APERTURE_SYSMEM; mem->aligned_size = PAGE_ALIGN(sgl_size); mem->size = sgl_size; return 0; } struct nvgpu_sgt *nvgpu_sgt_os_create_from_mem(struct gk20a *g, struct nvgpu_mem *mem) { struct nvgpu_mem_sgl *sgl; struct nvgpu_sgt *sgt; if (mem->priv.sgt != NULL) { return mem->priv.sgt; } sgt = nvgpu_kzalloc(g, sizeof(*sgt)); if (sgt == NULL) { return NULL; } sgt->ops = &nvgpu_sgt_posix_ops; /* * The userspace implementation is simple: a single 'entry' (which we * only need the nvgpu_mem_sgl struct to describe). A unit test can * easily replace it if needed. */ sgl = (struct nvgpu_mem_sgl *) nvgpu_kzalloc(g, sizeof( struct nvgpu_mem_sgl)); if (sgl == NULL) { nvgpu_kfree(g, sgt); return NULL; } sgl->length = mem->size; sgl->phys = (u64) mem->cpu_va; sgt->sgl = (void *) sgl; return sgt; } int nvgpu_mem_create_from_mem(struct gk20a *g, struct nvgpu_mem *dest, struct nvgpu_mem *src, u64 start_page, size_t nr_pages) { u64 start = start_page * U64(NVGPU_CPU_PAGE_SIZE); u64 size = U64(nr_pages) * U64(NVGPU_CPU_PAGE_SIZE); (void)g; if (!nvgpu_aperture_is_sysmem(src->aperture)) { return -EINVAL; } /* Some silly things a caller might do... */ if (size > src->size) { return -EINVAL; } if ((start + size) > src->size) { return -EINVAL; } (void) memset(dest, 0, sizeof(*dest)); dest->cpu_va = ((char *)src->cpu_va) + start; dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY; dest->aperture = src->aperture; dest->skip_wmb = src->skip_wmb; dest->size = size; return 0; } int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest, u64 src_phys, int nr_pages) { (void)g; (void)dest; (void)src_phys; (void)nr_pages; BUG(); return 0; }