mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:16:01 +03:00
Not sure if there's an actual bug or JIRA filed for this, but the change here fixes a long standing bug in the MM code for unit tests. Te GMMU programming code verifies that the CPU _physical_ address programmed into the GMMU PDE0 is a valid Tegra SoC CPU physical address. That means that it's not too large a value. The POSIX imlementation of the nvgpu_mem related code used the CPU virtual address as the "phys" address. Obviously, in userspace, there's no access to physical addresses, so in some sense it's a meaningless function. But the GMMU code does care, as described above, about the format of the address. The fix is simple enough: since the nvgpu_mem_get_addr() and nvgpu_mem_get_phys_addr() values shouldn't actually be accessed by the driver anyway (they could be vidmem addresses or IOVA addresses in real life) ANDing them with 0xffffffff (e.g 32 bits) truncates the potentially problematic CPU virtual address bits returned by malloc() in the POSIX environment. With this, a run of the unit test framework passes for me locally on my Ubuntu 18 machine. Also, clean up a few whitespace issues I noticed while I debugged this and fix another long standing bug where the NVGPU_DEFAULT_DBG_MASK was not being copied to g->log_mask during gk20a struct init. Change-Id: Ie92d3bd26240d194183b4376973d4d32cb6f9b8f Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2395953 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
333 lines
8.9 KiB
C
333 lines
8.9 KiB
C
/*
|
|
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/bug.h>
|
|
#include <nvgpu/dma.h>
|
|
#include <nvgpu/gmmu.h>
|
|
#include <nvgpu/kmem.h>
|
|
#include <nvgpu/nvgpu_mem.h>
|
|
#include <nvgpu/nvgpu_sgt.h>
|
|
#include <nvgpu/nvgpu_sgt_os.h>
|
|
#include <nvgpu/gk20a.h>
|
|
#include <os/posix/os_posix.h>
|
|
|
|
#define DMA_ERROR_CODE (~(u64)0x0)
|
|
|
|
/*
|
|
* This function (and the get_addr() and get_phys_addr() functions are somewhat
|
|
* meaningless in userspace.
|
|
*
|
|
* There is no GPU in the loop here, so defining a "GPU physical" address is
|
|
* difficult. What we do here is simple but limited. We'll treat the GPU physical
|
|
* address as just the bottom 32 bits of the CPU virtual address. Since the driver
|
|
* shouldn't be dereferencing these pointers in the first place that's sufficient
|
|
* to make most tests work. The reason we truncate the CPU VA is because the
|
|
* address returned from this is programmed into the GMMU PTEs/PDEs. That code
|
|
* asserts that the address is a valid GPU physical address (i.e less than some
|
|
* number of bits, depending on chip).
|
|
*
|
|
* However, this does lead to some potential quirks: GPU addresses of different
|
|
* CPU virtual addresses could alias (e.g B and B + 4GB will both result in the
|
|
* same value when ANDing with 0xFFFFFFFF.
|
|
*
|
|
* If there is a buffer with an address range that crosses a 4GB boundary it'll
|
|
* be detected here. A more sophisticated buffer to GPU virtual address approach
|
|
* could be taken, but for now this is probably sufficient. At least for one run
|
|
* through the unit test framework, the CPU malloc() address range seemed to be
|
|
* 0x555555000000 - this is a long way away from any 4GB boundary.
|
|
*
|
|
* For invalid nvgpu_mems and nvgpu_mems with no cpu_va, just return NULL.
|
|
* There's little else we can do. In many cases in the unit test FW we wind up
|
|
* getting essentially uninitialized nvgpu_mems.
|
|
*/
|
|
static u64 nvgpu_mem_userspace_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
|
|
{
|
|
u64 hi_front = ((u64)(uintptr_t)mem->cpu_va) & ~0xffffffffUL;
|
|
u64 hi_back = ((u64)(uintptr_t)mem->cpu_va + mem->size - 1U) & ~0xffffffffUL;
|
|
|
|
if (!nvgpu_mem_is_valid(mem) || mem->cpu_va == NULL) {
|
|
return 0x0UL;
|
|
}
|
|
|
|
if (hi_front != hi_back) {
|
|
nvgpu_err(g, "Mismatching cpu_va calc.");
|
|
nvgpu_err(g, " valid = %s", nvgpu_mem_is_valid(mem) ? "yes" : "no");
|
|
nvgpu_err(g, " cpu_va = %p", mem->cpu_va);
|
|
nvgpu_err(g, " size = %lx", mem->size);
|
|
nvgpu_err(g, " hi_front = 0x%llx", hi_front);
|
|
nvgpu_err(g, " hi_back = 0x%llx", hi_back);
|
|
}
|
|
|
|
nvgpu_assert(hi_front == hi_back);
|
|
|
|
return ((u64)(uintptr_t)mem->cpu_va) & 0xffffffffUL;
|
|
}
|
|
|
|
u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
|
|
{
|
|
return nvgpu_mem_userspace_get_addr(g, mem);
|
|
}
|
|
|
|
u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
|
|
{
|
|
return nvgpu_mem_userspace_get_addr(g, mem);
|
|
}
|
|
|
|
void *nvgpu_mem_sgl_next(void *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return (void *) mem->next;
|
|
}
|
|
|
|
u64 nvgpu_mem_sgl_phys(struct gk20a *g, void *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return (u64)(uintptr_t)mem->phys;
|
|
}
|
|
|
|
u64 nvgpu_mem_sgl_ipa_to_pa(struct gk20a *g, void *sgl, u64 ipa, u64 *pa_len)
|
|
{
|
|
return nvgpu_mem_sgl_phys(g, sgl);
|
|
}
|
|
|
|
u64 nvgpu_mem_sgl_dma(void *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return (u64)(uintptr_t)mem->dma;
|
|
}
|
|
|
|
u64 nvgpu_mem_sgl_length(void *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
return (u64)mem->length;
|
|
}
|
|
|
|
u64 nvgpu_mem_sgl_gpu_addr(struct gk20a *g, void *sgl,
|
|
struct nvgpu_gmmu_attrs *attrs)
|
|
{
|
|
struct nvgpu_mem_sgl *mem = (struct nvgpu_mem_sgl *)sgl;
|
|
|
|
if (mem->dma == 0U) {
|
|
return g->ops.mm.gmmu.gpu_phys_addr(g, attrs, mem->phys);
|
|
}
|
|
|
|
if (mem->dma == DMA_ERROR_CODE) {
|
|
return 0x0;
|
|
}
|
|
|
|
return nvgpu_mem_iommu_translate(g, mem->dma);
|
|
}
|
|
|
|
bool nvgpu_mem_sgt_iommuable(struct gk20a *g, struct nvgpu_sgt *sgt)
|
|
{
|
|
struct nvgpu_os_posix *p = nvgpu_os_posix_from_gk20a(g);
|
|
|
|
return p->mm_sgt_is_iommuable;
|
|
}
|
|
|
|
void nvgpu_mem_sgl_free(struct gk20a *g, struct nvgpu_mem_sgl *sgl)
|
|
{
|
|
struct nvgpu_mem_sgl *tptr;
|
|
|
|
while (sgl != NULL) {
|
|
tptr = sgl->next;
|
|
nvgpu_kfree(g, sgl);
|
|
sgl = tptr;
|
|
}
|
|
}
|
|
|
|
void nvgpu_mem_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt)
|
|
{
|
|
nvgpu_mem_sgl_free(g, (struct nvgpu_mem_sgl *)sgt->sgl);
|
|
nvgpu_kfree(g, sgt);
|
|
}
|
|
|
|
static struct nvgpu_sgt_ops nvgpu_sgt_posix_ops = {
|
|
.sgl_next = nvgpu_mem_sgl_next,
|
|
.sgl_phys = nvgpu_mem_sgl_phys,
|
|
.sgl_ipa = nvgpu_mem_sgl_phys,
|
|
.sgl_ipa_to_pa = nvgpu_mem_sgl_ipa_to_pa,
|
|
.sgl_dma = nvgpu_mem_sgl_dma,
|
|
.sgl_length = nvgpu_mem_sgl_length,
|
|
.sgl_gpu_addr = nvgpu_mem_sgl_gpu_addr,
|
|
.sgt_iommuable = nvgpu_mem_sgt_iommuable,
|
|
.sgt_free = nvgpu_mem_sgt_free,
|
|
};
|
|
|
|
struct nvgpu_mem_sgl *nvgpu_mem_sgl_posix_create_from_list(struct gk20a *g,
|
|
struct nvgpu_mem_sgl *sgl_list, u32 nr_sgls,
|
|
u64 *total_size)
|
|
{
|
|
struct nvgpu_mem_sgl *sgl_ptr, *tptr, *head = NULL;
|
|
u32 i;
|
|
|
|
*total_size = 0;
|
|
for (i = 0; i < nr_sgls; i++) {
|
|
tptr = (struct nvgpu_mem_sgl *)nvgpu_kzalloc(g,
|
|
sizeof(struct nvgpu_mem_sgl));
|
|
if (tptr == NULL) {
|
|
goto err;
|
|
}
|
|
|
|
if (i == 0U) {
|
|
sgl_ptr = tptr;
|
|
head = sgl_ptr;
|
|
} else {
|
|
sgl_ptr->next = tptr;
|
|
sgl_ptr = sgl_ptr->next;
|
|
}
|
|
sgl_ptr->next = NULL;
|
|
sgl_ptr->phys = sgl_list[i].phys;
|
|
sgl_ptr->dma = sgl_list[i].dma;
|
|
sgl_ptr->length = sgl_list[i].length;
|
|
*total_size += sgl_list[i].length;
|
|
}
|
|
|
|
return head;
|
|
|
|
err:
|
|
while (head != NULL) {
|
|
struct nvgpu_mem_sgl *tmp = head;
|
|
head = tmp->next;
|
|
nvgpu_kfree(g, tmp);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
struct nvgpu_sgt *nvgpu_mem_sgt_posix_create_from_list(struct gk20a *g,
|
|
struct nvgpu_mem_sgl *sgl_list, u32 nr_sgls,
|
|
u64 *total_size)
|
|
{
|
|
struct nvgpu_sgt *sgt = nvgpu_kzalloc(g, sizeof(struct nvgpu_sgt));
|
|
struct nvgpu_mem_sgl *sgl;
|
|
|
|
if (sgt == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
sgl = nvgpu_mem_sgl_posix_create_from_list(g, sgl_list, nr_sgls,
|
|
total_size);
|
|
if (sgl == NULL) {
|
|
nvgpu_kfree(g, sgt);
|
|
return NULL;
|
|
}
|
|
sgt->sgl = (void *)sgl;
|
|
sgt->ops = &nvgpu_sgt_posix_ops;
|
|
|
|
return sgt;
|
|
}
|
|
|
|
int nvgpu_mem_posix_create_from_list(struct gk20a *g, struct nvgpu_mem *mem,
|
|
struct nvgpu_mem_sgl *sgl_list, u32 nr_sgls)
|
|
{
|
|
u64 sgl_size;
|
|
|
|
mem->priv.sgt = nvgpu_mem_sgt_posix_create_from_list(g, sgl_list,
|
|
nr_sgls, &sgl_size);
|
|
if (mem->priv.sgt == NULL) {
|
|
return -ENOMEM;
|
|
}
|
|
|
|
mem->aperture = APERTURE_SYSMEM;
|
|
mem->aligned_size = PAGE_ALIGN(sgl_size);
|
|
mem->size = sgl_size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct nvgpu_sgt *nvgpu_sgt_os_create_from_mem(struct gk20a *g,
|
|
struct nvgpu_mem *mem)
|
|
{
|
|
struct nvgpu_mem_sgl *sgl;
|
|
struct nvgpu_sgt *sgt;
|
|
|
|
if (mem->priv.sgt != NULL) {
|
|
return mem->priv.sgt;
|
|
}
|
|
|
|
sgt = nvgpu_kzalloc(g, sizeof(*sgt));
|
|
if (sgt == NULL) {
|
|
return NULL;
|
|
}
|
|
|
|
sgt->ops = &nvgpu_sgt_posix_ops;
|
|
|
|
/*
|
|
* The userspace implementation is simple: a single 'entry' (which we
|
|
* only need the nvgpu_mem_sgl struct to describe). A unit test can
|
|
* easily replace it if needed.
|
|
*/
|
|
sgl = (struct nvgpu_mem_sgl *) nvgpu_kzalloc(g, sizeof(
|
|
struct nvgpu_mem_sgl));
|
|
if (sgl == NULL) {
|
|
nvgpu_kfree(g, sgt);
|
|
return NULL;
|
|
}
|
|
|
|
sgl->length = mem->size;
|
|
sgl->phys = (u64) mem->cpu_va;
|
|
sgt->sgl = (void *) sgl;
|
|
|
|
return sgt;
|
|
}
|
|
|
|
int nvgpu_mem_create_from_mem(struct gk20a *g,
|
|
struct nvgpu_mem *dest, struct nvgpu_mem *src,
|
|
u64 start_page, size_t nr_pages)
|
|
{
|
|
u64 start = start_page * U64(PAGE_SIZE);
|
|
u64 size = U64(nr_pages) * U64(PAGE_SIZE);
|
|
|
|
if (src->aperture != APERTURE_SYSMEM) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Some silly things a caller might do... */
|
|
if (size > src->size) {
|
|
return -EINVAL;
|
|
}
|
|
if ((start + size) > src->size) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
(void) memset(dest, 0, sizeof(*dest));
|
|
|
|
dest->cpu_va = ((char *)src->cpu_va) + start;
|
|
dest->mem_flags = src->mem_flags | NVGPU_MEM_FLAG_SHADOW_COPY;
|
|
dest->aperture = src->aperture;
|
|
dest->skip_wmb = src->skip_wmb;
|
|
dest->size = size;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int __nvgpu_mem_create_from_phys(struct gk20a *g, struct nvgpu_mem *dest,
|
|
u64 src_phys, int nr_pages)
|
|
{
|
|
BUG();
|
|
return 0;
|
|
}
|