From 27cd70afd8934ed463df49e42ef2488b6b60bd54 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Fri, 7 Aug 2020 14:46:51 -0500 Subject: [PATCH] gpu: nvgpu: unit: Fix long standing MM bug Not sure if there's an actual bug or JIRA filed for this, but the change here fixes a long standing bug in the MM code for unit tests. Te GMMU programming code verifies that the CPU _physical_ address programmed into the GMMU PDE0 is a valid Tegra SoC CPU physical address. That means that it's not too large a value. The POSIX imlementation of the nvgpu_mem related code used the CPU virtual address as the "phys" address. Obviously, in userspace, there's no access to physical addresses, so in some sense it's a meaningless function. But the GMMU code does care, as described above, about the format of the address. The fix is simple enough: since the nvgpu_mem_get_addr() and nvgpu_mem_get_phys_addr() values shouldn't actually be accessed by the driver anyway (they could be vidmem addresses or IOVA addresses in real life) ANDing them with 0xffffffff (e.g 32 bits) truncates the potentially problematic CPU virtual address bits returned by malloc() in the POSIX environment. With this, a run of the unit test framework passes for me locally on my Ubuntu 18 machine. Also, clean up a few whitespace issues I noticed while I debugged this and fix another long standing bug where the NVGPU_DEFAULT_DBG_MASK was not being copied to g->log_mask during gk20a struct init. Change-Id: Ie92d3bd26240d194183b4376973d4d32cb6f9b8f Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2395953 Tested-by: mobile promotions Reviewed-by: automaticguardword Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: Vijayakumar Subbu Reviewed-by: Konsta Holtta Reviewed-by: mobile promotions GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/mm/gmmu/page_table.c | 3 +- drivers/gpu/nvgpu/os/posix/nvgpu.c | 2 +- drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c | 57 +++++++++++++++++-- userspace/units/bus/nvgpu-bus.c | 2 +- userspace/units/fb/fb_gm20b_fusa.c | 2 +- userspace/units/mm/mm/mm.c | 1 + userspace/units/mm/vm/vm.c | 2 +- 7 files changed, 58 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c index 30677a924..58288590b 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -505,8 +505,7 @@ static int nvgpu_set_pd_level(struct vm_gk20a *vm, * target addr is the real physical address we are aiming for. */ target_addr = (next_pd != NULL) ? - nvgpu_pd_gpu_addr(g, next_pd) : - phys_addr; + nvgpu_pd_gpu_addr(g, next_pd) : phys_addr; l->update_entry(vm, l, pd, pd_idx, diff --git a/drivers/gpu/nvgpu/os/posix/nvgpu.c b/drivers/gpu/nvgpu/os/posix/nvgpu.c index 83992d216..02f883818 100644 --- a/drivers/gpu/nvgpu/os/posix/nvgpu.c +++ b/drivers/gpu/nvgpu/os/posix/nvgpu.c @@ -250,7 +250,7 @@ struct gk20a *nvgpu_posix_probe(void) (void) memset(p, 0, sizeof(*p)); g = &p->g; - g->log_mask = 0; + g->log_mask = NVGPU_DEFAULT_DBG_MASK; g->mm.g = g; if (nvgpu_kmem_init(g) != 0) { diff --git a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c index a441fe0e8..17562ff55 100644 --- a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c +++ b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c @@ -33,16 +33,63 @@ #define DMA_ERROR_CODE (~(u64)0x0) /* - * These functions are somewhat meaningless. + * This function (and the get_addr() and get_phys_addr() functions are somewhat + * meaningless in userspace. + * + * There is no GPU in the loop here, so defining a "GPU physical" address is + * difficult. What we do here is simple but limited. We'll treat the GPU physical + * address as just the bottom 32 bits of the CPU virtual address. Since the driver + * shouldn't be dereferencing these pointers in the first place that's sufficient + * to make most tests work. The reason we truncate the CPU VA is because the + * address returned from this is programmed into the GMMU PTEs/PDEs. That code + * asserts that the address is a valid GPU physical address (i.e less than some + * number of bits, depending on chip). + * + * However, this does lead to some potential quirks: GPU addresses of different + * CPU virtual addresses could alias (e.g B and B + 4GB will both result in the + * same value when ANDing with 0xFFFFFFFF. + * + * If there is a buffer with an address range that crosses a 4GB boundary it'll + * be detected here. A more sophisticated buffer to GPU virtual address approach + * could be taken, but for now this is probably sufficient. At least for one run + * through the unit test framework, the CPU malloc() address range seemed to be + * 0x555555000000 - this is a long way away from any 4GB boundary. + * + * For invalid nvgpu_mems and nvgpu_mems with no cpu_va, just return NULL. + * There's little else we can do. In many cases in the unit test FW we wind up + * getting essentially uninitialized nvgpu_mems. */ +static u64 nvgpu_mem_userspace_get_addr(struct gk20a *g, struct nvgpu_mem *mem) +{ + u64 hi_front = ((u64)(uintptr_t)mem->cpu_va) & ~0xffffffffUL; + u64 hi_back = ((u64)(uintptr_t)mem->cpu_va + mem->size - 1U) & ~0xffffffffUL; + + if (!nvgpu_mem_is_valid(mem) || mem->cpu_va == NULL) { + return 0x0UL; + } + + if (hi_front != hi_back) { + nvgpu_err(g, "Mismatching cpu_va calc."); + nvgpu_err(g, " valid = %s", nvgpu_mem_is_valid(mem) ? "yes" : "no"); + nvgpu_err(g, " cpu_va = %p", mem->cpu_va); + nvgpu_err(g, " size = %lx", mem->size); + nvgpu_err(g, " hi_front = 0x%llx", hi_front); + nvgpu_err(g, " hi_back = 0x%llx", hi_back); + } + + nvgpu_assert(hi_front == hi_back); + + return ((u64)(uintptr_t)mem->cpu_va) & 0xffffffffUL; +} + u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem) { - return (u64)(uintptr_t)mem->cpu_va; + return nvgpu_mem_userspace_get_addr(g, mem); } u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem) { - return (u64)(uintptr_t)mem->cpu_va; + return nvgpu_mem_userspace_get_addr(g, mem); } void *nvgpu_mem_sgl_next(void *sgl) @@ -121,8 +168,8 @@ void nvgpu_mem_sgt_free(struct gk20a *g, struct nvgpu_sgt *sgt) static struct nvgpu_sgt_ops nvgpu_sgt_posix_ops = { .sgl_next = nvgpu_mem_sgl_next, .sgl_phys = nvgpu_mem_sgl_phys, - .sgl_ipa = nvgpu_mem_sgl_phys, - .sgl_ipa_to_pa = nvgpu_mem_sgl_ipa_to_pa, + .sgl_ipa = nvgpu_mem_sgl_phys, + .sgl_ipa_to_pa = nvgpu_mem_sgl_ipa_to_pa, .sgl_dma = nvgpu_mem_sgl_dma, .sgl_length = nvgpu_mem_sgl_length, .sgl_gpu_addr = nvgpu_mem_sgl_gpu_addr, diff --git a/userspace/units/bus/nvgpu-bus.c b/userspace/units/bus/nvgpu-bus.c index 60c146c61..de407f8f3 100644 --- a/userspace/units/bus/nvgpu-bus.c +++ b/userspace/units/bus/nvgpu-bus.c @@ -208,7 +208,7 @@ done: int test_bar_bind(struct unit_module *m, struct gk20a *g, void *args) { int ret = UNIT_FAIL; - struct nvgpu_mem bar_inst; + struct nvgpu_mem bar_inst = {0}; struct nvgpu_posix_fault_inj *timer_fi = nvgpu_timers_get_fault_injection(); diff --git a/userspace/units/fb/fb_gm20b_fusa.c b/userspace/units/fb/fb_gm20b_fusa.c index c61f1f5e6..61e089d42 100644 --- a/userspace/units/fb/fb_gm20b_fusa.c +++ b/userspace/units/fb/fb_gm20b_fusa.c @@ -42,7 +42,7 @@ int fb_gm20b_tlb_invalidate_test(struct unit_module *m, struct gk20a *g, void *args) { int err; - struct nvgpu_mem pdb; + struct nvgpu_mem pdb = {0}; struct nvgpu_posix_fault_inj *timer_fi = nvgpu_timers_get_fault_injection(); diff --git a/userspace/units/mm/mm/mm.c b/userspace/units/mm/mm/mm.c index ae7920770..741f66bdb 100644 --- a/userspace/units/mm/mm/mm.c +++ b/userspace/units/mm/mm/mm.c @@ -637,6 +637,7 @@ int test_mm_inst_block(struct unit_module *m, struct gk20a *g, struct nvgpu_mem *block = malloc(sizeof(struct nvgpu_mem)); int ret = UNIT_FAIL; + memset(block, 0, sizeof(*block)); block->aperture = APERTURE_SYSMEM; block->cpu_va = (void *) TEST_ADDRESS; diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c index b2997791a..6283e804b 100644 --- a/userspace/units/mm/vm/vm.c +++ b/userspace/units/mm/vm/vm.c @@ -48,7 +48,7 @@ #include /* Random CPU physical address for the buffers we'll map */ -#define BUF_CPU_PA 0xEFAD80000000ULL +#define BUF_CPU_PA 0xEFAD0000ULL #define TEST_BATCH_NUM_BUFFERS 10 #define PHYS_ADDR_BITS_HIGH 0x00FFFFFFU #define PHYS_ADDR_BITS_LOW 0xFFFFFF00U