/* * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static u32 *rand_test_data; static u32 *vidmem; /* * VIDMEM_ADDRESS represents an arbitrary VIDMEM address that will be passed * to the PRAMIN module to set the PRAM window to. */ #define VIDMEM_ADDRESS 0x00100100 #define VIDMEM_SIZE (8*SZ_1M) /* * Amount of data to use in the tests. * Must be smaller or equal to VIDMEM_SIZE and RAND_DATA_SIZE. * To use multiple PRAM windows, TEST_SIZE should be > 1 MB */ #define TEST_SIZE (2*SZ_1M) /* Size of the random data to generate, must be >= TEST_SIZE*/ #define RAND_DATA_SIZE (2*SZ_1M) /* Simple pattern for memset operations */ #define MEMSET_PATTERN 0x12345678 static bool is_PRAM_range(struct gk20a *g, u32 addr) { if ((addr >= pram_data032_r(0)) && (addr <= (pram_data032_r(0)+SZ_1M))) { return true; } return false; } static u32 PRAM_get_u32_index(struct gk20a *g, u32 addr) { /* Offset is based on the currently set 1MB PRAM window */ u32 offset = (g->mm.pramin_window) << bus_bar0_window_target_bar0_window_base_shift_v(); /* addr must be 32-bit aligned */ BUG_ON(addr & 0x3); return (addr + offset)/sizeof(u32); } static u32 PRAM_read(struct gk20a *g, u32 addr) { return vidmem[PRAM_get_u32_index(g, addr)]; } static void PRAM_write(struct gk20a *g, u32 addr, u32 value) { vidmem[PRAM_get_u32_index(g, addr)] = value; } /* * Write callback (for all nvgpu_writel calls). If address belongs to PRAM * range, route the call to our own handler, otherwise call the IO framework */ static void writel_access_reg_fn(struct gk20a *g, struct nvgpu_reg_access *access) { if (is_PRAM_range(g, access->addr)) { PRAM_write(g, access->addr - pram_data032_r(0), access->value); } else { nvgpu_posix_io_writel_reg_space(g, access->addr, access->value); } nvgpu_posix_io_record_access(g, access); } /* * Read callback, similar to the write callback above. */ static void readl_access_reg_fn(struct gk20a *g, struct nvgpu_reg_access *access) { if (is_PRAM_range(g, access->addr)) { access->value = PRAM_read(g, access->addr - pram_data032_r(0)); } else { access->value = nvgpu_posix_io_readl_reg_space(g, access->addr); } } /* * Define all the callbacks to be used during the test. Typically all * write operations use the same callback, likewise for all read operations. */ static struct nvgpu_posix_io_callbacks pramin_callbacks = { /* Write APIs all can use the same accessor. */ .writel = writel_access_reg_fn, .writel_check = writel_access_reg_fn, .bar1_writel = writel_access_reg_fn, .usermode_writel = writel_access_reg_fn, /* Likewise for the read APIs. */ .__readl = readl_access_reg_fn, .readl = readl_access_reg_fn, .bar1_readl = readl_access_reg_fn, }; static void init_rand_buffer(void) { u32 i; /* * Fill the test buffer with random data. Always use the same seed to * make the test deterministic. */ srand(0); for (i = 0; i < RAND_DATA_SIZE/sizeof(u32); i++) { rand_test_data[i] = (u32) rand(); } } static int init_test_env(struct unit_module *m, struct gk20a *g) { static bool first_init = true; int err = 0; if (!first_init) { /* * If already initialized, just refill the test buffer with * new random data */ init_rand_buffer(); return 0; } first_init = false; nvgpu_init_pramin(&g->mm); /* Create a test buffer to be filled with random data */ rand_test_data = (u32 *) malloc(RAND_DATA_SIZE); if (rand_test_data == NULL) { return -ENOMEM; } /* Create the VIDMEM */ vidmem = (u32 *) malloc(VIDMEM_SIZE); if (vidmem == NULL) { err = -ENOMEM; goto clean_rand_data; } nvgpu_posix_register_io(g, &pramin_callbacks); nvgpu_posix_io_init_reg_space(g); /* Minimum HAL init for PRAMIN */ g->ops.bus.set_bar0_window = gk20a_bus_set_bar0_window; g->ops.pramin.data032_r = pram_data032_r; /* Register space: BUS_BAR0 */ if (nvgpu_posix_io_add_reg_space(g, bus_bar0_window_r(), 0x100) != 0) { err = -ENOMEM; goto clean_vidmem; } init_rand_buffer(); return 0; clean_vidmem: free(vidmem); clean_rand_data: free(rand_test_data); return err; } static int free_test_env(struct unit_module *m, struct gk20a *g, void *__args) { free(rand_test_data); free(vidmem); nvgpu_posix_io_delete_reg_space(g, bus_bar0_window_r()); return UNIT_SUCCESS; } static int create_alloc_and_sgt(struct unit_module *m, struct gk20a *g, struct nvgpu_mem *mem) { struct nvgpu_sgt *sgt; mem->vidmem_alloc = (struct nvgpu_page_alloc *) malloc(sizeof( struct nvgpu_page_alloc)); if (mem->vidmem_alloc == NULL) { unit_err(m, "Memory allocation failed\n"); return -ENOMEM; } /* All we need from the SGT are the ops */ sgt = nvgpu_sgt_create_from_mem(g, mem); if (sgt == NULL) { unit_err(m, "Memory allocation failed\n"); free(mem->vidmem_alloc); return -ENOMEM; } mem->vidmem_alloc->sgt.ops = sgt->ops; mem->vidmem_alloc->sgt.sgl = (struct nvgpu_sgl *) mem; free(sgt); /* All PRAMIN accessed must have a VIDMEM aperture */ mem->aperture = APERTURE_VIDMEM; return 0; } static struct nvgpu_mem_sgl *create_sgl(struct unit_module *m, u64 length, u64 phys) { struct nvgpu_mem_sgl *sgl = (struct nvgpu_mem_sgl *) malloc( sizeof(struct nvgpu_mem_sgl)); if (sgl == NULL) { unit_err(m, "Memory allocation failed\n"); return NULL; } sgl->length = length; sgl->phys = phys; return sgl; } /* * Test case to exercize "nvgpu_pramin_rd_n". It will read TEST_SIZE bytes * from VIDMEM base address VIDMEM_ADDRESS. Only use one SGL in this test */ static int test_pramin_rd_n_single(struct unit_module *m, struct gk20a *g, void *__args) { u32 *dest; struct nvgpu_mem mem = { }; struct nvgpu_mem_sgl *sgl; u32 byte_cnt = TEST_SIZE; bool success = false; if (init_test_env(m, g) != 0) { unit_return_fail(m, "Module init failed\n"); } unit_info(m, "Reading %d bytes via PRAMIN\n", byte_cnt); /* * Get the first byte_cnt bytes from the test buffer and copy them * into VIDMEM */ memcpy(((u8 *) vidmem) + VIDMEM_ADDRESS, (void *) rand_test_data, byte_cnt); /* PRAMIN will copy data into the buffer below */ dest = malloc(byte_cnt); if (dest == NULL) { unit_return_fail(m, "Memory allocation failed\n"); } if (create_alloc_and_sgt(m, g, &mem) != 0) { goto free_dest; } sgl = create_sgl(m, byte_cnt, VIDMEM_ADDRESS); if (sgl == NULL) { goto free_vidmem; } mem.vidmem_alloc->sgt.sgl = (struct nvgpu_sgl *)sgl; nvgpu_pramin_rd_n(g, &mem, 0, byte_cnt, (void *) dest); if (memcmp((void *) dest, (void *) rand_test_data, byte_cnt) != 0) { unit_err(m, "Mismatch comparing copied data\n"); } else { success = true; } free(sgl); free_vidmem: free(mem.vidmem_alloc); free_dest: free(dest); if (success) return UNIT_SUCCESS; else return UNIT_FAIL; } /* * Test case to exercize "nvgpu_pramin_wr_n" with a couple of advanced cases: * - Use multiple SGLs * - Use a byte offset */ static int test_pramin_wr_n_multi(struct unit_module *m, struct gk20a *g, void *__args) { u32 *src; struct nvgpu_mem mem = { }; struct nvgpu_mem_sgl *sgl1, *sgl2, *sgl3; u32 byte_cnt = TEST_SIZE; u32 byte_offset = SZ_128K; void *vidmem_data; bool success = false; if (init_test_env(m, g) != 0) { unit_return_fail(m, "Module init failed\n"); } /* This is where the written data should end up in VIDMEM */ vidmem_data = ((u8 *) vidmem) + VIDMEM_ADDRESS + byte_offset; unit_info(m, "Writing %d bytes via PRAMIN\n", byte_cnt); /* Source data contains random data from test buffer */ src = malloc(byte_cnt); if (src == NULL) { unit_return_fail(m, "Memory allocation failed\n"); } memcpy((void *) src, (void *) rand_test_data, byte_cnt); if (create_alloc_and_sgt(m, g, &mem) != 0) { goto free_src; } /* * If the PRAMIN access has an offset that is greater than the length * of the first SGL, then PRAMIN will move to the next SGL, and so on * until the the total length of encountered SGLs has reached offset. * Practically for this test, it means that the total length of all * SGLs + the byte offset must be greater or equal to the number of * bytes to write. * Below, the first SGL has a length of byte_offset, so PRAMIN will * skip it. Then 2 more SGLs each cover half of the data to be copied. */ sgl1 = create_sgl(m, byte_offset, VIDMEM_ADDRESS); if (sgl1 == NULL) { goto free_vidmem; } sgl2 = create_sgl(m, byte_cnt / 2, sgl1->phys + sgl1->length); if (sgl2 == NULL) { goto free_sgl1; } sgl3 = create_sgl(m, byte_cnt / 2, sgl2->phys + sgl2->length); if (sgl3 == NULL) { goto free_sgl2; } sgl1->next = sgl2; sgl2->next = sgl3; sgl3->next = NULL; mem.vidmem_alloc->sgt.sgl = (struct nvgpu_sgl *) sgl1; nvgpu_pramin_wr_n(g, &mem, byte_offset, byte_cnt, (void *) src); if (memcmp((void *) src, vidmem_data, byte_cnt) != 0) { unit_err(m, "Mismatch comparing copied data\n"); } else { success = true; } free(sgl3); free_sgl2: free(sgl2); free_sgl1: free(sgl1); free_vidmem: free(mem.vidmem_alloc); free_src: free(src); if (success) return UNIT_SUCCESS; else return UNIT_FAIL; } /* * Test case to exercize "nvgpu_pramin_memset" */ static int test_pramin_memset(struct unit_module *m, struct gk20a *g, void *__args) { struct nvgpu_mem mem = { }; struct nvgpu_mem_sgl *sgl; u32 byte_cnt = TEST_SIZE; u32 word_cnt = byte_cnt / sizeof(u32); u32 vidmem_index = VIDMEM_ADDRESS / sizeof(u32); bool success = false; u32 i; if (init_test_env(m, g) != 0) { unit_return_fail(m, "Module init failed\n"); } unit_info(m, "Memsetting %d bytes in PRAM\n", byte_cnt); if (create_alloc_and_sgt(m, g, &mem) != 0) { return UNIT_FAIL; } sgl = create_sgl(m, byte_cnt, VIDMEM_ADDRESS); if (sgl == NULL) { goto free_vidmem; } mem.vidmem_alloc->sgt.sgl = (struct nvgpu_sgl *)sgl; nvgpu_pramin_memset(g, &mem, 0, byte_cnt, MEMSET_PATTERN); for (i = 0; i < word_cnt; i++) { if (vidmem[vidmem_index + i] != MEMSET_PATTERN) { unit_err(m, "Memset pattern not found at offset %d\n", i); goto free_sgl; } } success = true; free_sgl: free(sgl); free_vidmem: free(mem.vidmem_alloc); if (success) return UNIT_SUCCESS; else return UNIT_FAIL; } /* * Test case to exercize the special case where NVGPU is dying. In that case, * PRAM is not available and PRAMIN should handle the case by not trying to * access PRAM. */ static int test_pramin_nvgpu_dying(struct unit_module *m, struct gk20a *g, void *__args) { if (init_test_env(m, g) != 0) { unit_return_fail(m, "Module init failed\n"); } nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); /* * When the GPU is dying, PRAMIN should prevent any accesses, so * pointers to nvgpu_mem and destination data don't matter and can be * left NULL. If the pramin call below causes a sigsegv, then it would * be a test failure, otherwise it is a success. */ nvgpu_pramin_rd_n(g, NULL, 0, 1, NULL); /* Restore GPU driver state for other tests */ nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, false); return UNIT_SUCCESS; } struct unit_module_test pramin_tests[] = { UNIT_TEST(nvgpu_pramin_rd_n_1_sgl, test_pramin_rd_n_single, NULL), UNIT_TEST(nvgpu_pramin_wr_n_3_sgl, test_pramin_wr_n_multi, NULL), UNIT_TEST(nvgpu_pramin_memset, test_pramin_memset, NULL), UNIT_TEST(nvgpu_pramin_dying, test_pramin_nvgpu_dying, NULL), UNIT_TEST(nvgpu_pramin_free_test_env, free_test_env, NULL), }; UNIT_MODULE(pramin, pramin_tests, UNIT_PRIO_NVGPU_TEST);