gpu: nvgpu: unit: Fix long standing MM bug

Not sure if there's an actual bug or JIRA filed for this, but the change here fixes a long standing bug in the MM code for unit tests. Te GMMU programming code verifies that the CPU _physical_ address programmed into the GMMU PDE0 is a valid Tegra SoC CPU physical address. That means that it's not too large a value. The POSIX imlementation of the nvgpu_mem related code used the CPU virtual address as the "phys" address. Obviously, in userspace, there's no access to physical addresses, so in some sense it's a meaningless function. But the GMMU code does care, as described above, about the format of the address. The fix is simple enough: since the nvgpu_mem_get_addr() and nvgpu_mem_get_phys_addr() values shouldn't actually be accessed by the driver anyway (they could be vidmem addresses or IOVA addresses in real life) ANDing them with 0xffffffff (e.g 32 bits) truncates the potentially problematic CPU virtual address bits returned by malloc() in the POSIX environment. With this, a run of the unit test framework passes for me locally on my Ubuntu 18 machine. Also, clean up a few whitespace issues I noticed while I debugged this and fix another long standing bug where the NVGPU_DEFAULT_DBG_MASK was not being copied to g->log_mask during gk20a struct init. Change-Id: Ie92d3bd26240d194183b4376973d4d32cb6f9b8f Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2395953 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: automaticguardword <automaticguardword@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 17:36:20 +03:00 · 2020-08-07 14:46:51 -05:00
parent 71b005c1ef
commit 27cd70afd8
7 changed files with 58 additions and 11 deletions
--- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
@@ -505,8 +505,7 @@ static int nvgpu_set_pd_level(struct vm_gk20a *vm,
 		 * target addr is the real physical address we are aiming for.
 		 */
 		target_addr = (next_pd != NULL) ?
-			nvgpu_pd_gpu_addr(g, next_pd) :
-			phys_addr;
+			nvgpu_pd_gpu_addr(g, next_pd) : phys_addr;

 		l->update_entry(vm, l,
 				pd, pd_idx,
--- a/drivers/gpu/nvgpu/os/posix/nvgpu.c
+++ b/drivers/gpu/nvgpu/os/posix/nvgpu.c
@@ -250,7 +250,7 @@ struct gk20a *nvgpu_posix_probe(void)
 	(void) memset(p, 0, sizeof(*p));

 	g = &p->g;
-	g->log_mask = 0;
+	g->log_mask = NVGPU_DEFAULT_DBG_MASK;
 	g->mm.g = g;

 	if (nvgpu_kmem_init(g) != 0) {
--- a/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/os/posix/posix-nvgpu_mem.c
@@ -33,16 +33,63 @@
 #define DMA_ERROR_CODE	(~(u64)0x0)

 /*
- * These functions are somewhat meaningless.
+ * This function (and the get_addr() and get_phys_addr() functions are somewhat
+ * meaningless in userspace.
+ *
+ * There is no GPU in the loop here, so defining a "GPU physical" address is
+ * difficult. What we do here is simple but limited. We'll treat the GPU physical
+ * address as just the bottom 32 bits of the CPU virtual address. Since the driver
+ * shouldn't be dereferencing these pointers in the first place that's sufficient
+ * to make most tests work. The reason we truncate the CPU VA is because the
+ * address returned from this is programmed into the GMMU PTEs/PDEs. That code
+ * asserts that the address is a valid GPU physical address (i.e less than some
+ * number of bits, depending on chip).
+ *
+ * However, this does lead to some potential quirks: GPU addresses of different
+ * CPU virtual addresses could alias (e.g B and B + 4GB will both result in the
+ * same value when ANDing with 0xFFFFFFFF.
+ *
+ * If there is a buffer with an address range that crosses a 4GB boundary it'll
+ * be detected here. A more sophisticated buffer to GPU virtual address approach
+ * could be taken, but for now this is probably sufficient. At least for one run
+ * through the unit test framework, the CPU malloc() address range seemed to be
+ * 0x555555000000 - this is a long way away from any 4GB boundary.
+ *
+ * For invalid nvgpu_mems and nvgpu_mems with no cpu_va, just return NULL.
+ * There's little else we can do. In many cases in the unit test FW we wind up
+ * getting essentially uninitialized nvgpu_mems.
 */
+static u64 nvgpu_mem_userspace_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	u64 hi_front = ((u64)(uintptr_t)mem->cpu_va) & ~0xffffffffUL;
+	u64 hi_back  = ((u64)(uintptr_t)mem->cpu_va + mem->size - 1U) & ~0xffffffffUL;
+
+	if (!nvgpu_mem_is_valid(mem) || mem->cpu_va == NULL) {
+		return 0x0UL;
+	}
+
+	if (hi_front != hi_back) {
+		nvgpu_err(g, "Mismatching cpu_va calc.");
+		nvgpu_err(g, "  valid = %s", nvgpu_mem_is_valid(mem) ? "yes" : "no");
+		nvgpu_err(g, "  cpu_va = %p", mem->cpu_va);
+		nvgpu_err(g, "  size   = %lx", mem->size);
+		nvgpu_err(g, "  hi_front = 0x%llx", hi_front);
+		nvgpu_err(g, "  hi_back  = 0x%llx", hi_back);
+	}
+
+	nvgpu_assert(hi_front == hi_back);
+
+	return ((u64)(uintptr_t)mem->cpu_va) & 0xffffffffUL;
+}
+
 u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
 {
-	return (u64)(uintptr_t)mem->cpu_va;
+	return nvgpu_mem_userspace_get_addr(g, mem);
 }

 u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem)
 {
-	return (u64)(uintptr_t)mem->cpu_va;
+	return nvgpu_mem_userspace_get_addr(g, mem);
 }

 void *nvgpu_mem_sgl_next(void *sgl)
--- a/userspace/units/bus/nvgpu-bus.c
+++ b/userspace/units/bus/nvgpu-bus.c
@@ -208,7 +208,7 @@ done:
 int test_bar_bind(struct unit_module *m, struct gk20a *g, void *args)
 {
 	int ret = UNIT_FAIL;
-	struct nvgpu_mem bar_inst;
+	struct nvgpu_mem bar_inst = {0};
 	struct nvgpu_posix_fault_inj *timer_fi =
 					nvgpu_timers_get_fault_injection();

--- a/userspace/units/fb/fb_gm20b_fusa.c
+++ b/userspace/units/fb/fb_gm20b_fusa.c
@@ -42,7 +42,7 @@ int fb_gm20b_tlb_invalidate_test(struct unit_module *m, struct gk20a *g,
 	void *args)
 {
 	int err;
-	struct nvgpu_mem pdb;
+	struct nvgpu_mem pdb = {0};
 	struct nvgpu_posix_fault_inj *timer_fi =
 			nvgpu_timers_get_fault_injection();

--- a/userspace/units/mm/mm/mm.c
+++ b/userspace/units/mm/mm/mm.c
@@ -637,6 +637,7 @@ int test_mm_inst_block(struct unit_module *m, struct gk20a *g,
 	struct nvgpu_mem *block = malloc(sizeof(struct nvgpu_mem));
 	int ret = UNIT_FAIL;

+	memset(block, 0, sizeof(*block));
 	block->aperture = APERTURE_SYSMEM;
 	block->cpu_va = (void *) TEST_ADDRESS;

--- a/userspace/units/mm/vm/vm.c
+++ b/userspace/units/mm/vm/vm.c
@@ -48,7 +48,7 @@
 #include <nvgpu/posix/posix-fault-injection.h>

 /* Random CPU physical address for the buffers we'll map */
-#define BUF_CPU_PA		0xEFAD80000000ULL
+#define BUF_CPU_PA		0xEFAD0000ULL
 #define TEST_BATCH_NUM_BUFFERS	10
 #define PHYS_ADDR_BITS_HIGH	0x00FFFFFFU
 #define PHYS_ADDR_BITS_LOW	0xFFFFFF00U