diff --git a/drivers/gpu/nvgpu/include/nvgpu/posix/sizes.h b/drivers/gpu/nvgpu/include/nvgpu/posix/sizes.h index c60ef5d39..e296d563a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/posix/sizes.h +++ b/drivers/gpu/nvgpu/include/nvgpu/posix/sizes.h @@ -23,6 +23,8 @@ #ifndef NVGPU_POSIX_SIZES_H #define NVGPU_POSIX_SIZES_H +#define SZ_256 256UL + #define SZ_1K (1UL << 10) #define SZ_4K (SZ_1K << 2) #define SZ_64K (SZ_1K << 6) diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export index 922b495d0..0ec0f55fe 100644 --- a/drivers/gpu/nvgpu/libnvgpu-drv.export +++ b/drivers/gpu/nvgpu/libnvgpu-drv.export @@ -107,6 +107,8 @@ nvgpu_pd_alloc nvgpu_pd_cache_fini nvgpu_pd_cache_init nvgpu_pd_free +nvgpu_pd_gpu_addr +nvgpu_pd_offset_from_index nvgpu_pd_write nvgpu_posix_cleanup nvgpu_posix_enable_fault_injection diff --git a/drivers/gpu/nvgpu/os/posix/posix-dma.c b/drivers/gpu/nvgpu/os/posix/posix-dma.c index 50ff0397e..0fc4fee24 100644 --- a/drivers/gpu/nvgpu/os/posix/posix-dma.c +++ b/drivers/gpu/nvgpu/os/posix/posix-dma.c @@ -62,6 +62,8 @@ static int __nvgpu_do_dma_alloc(struct gk20a *g, unsigned long flags, return -ENOMEM; } + memset(memory, 0, PAGE_ALIGN(size)); + mem->cpu_va = memory; mem->aperture = ap; mem->size = size; diff --git a/userspace/include/unit/unit-requirement-ids.h b/userspace/include/unit/unit-requirement-ids.h index 758a02a06..d731c8ba0 100644 --- a/userspace/include/unit/unit-requirement-ids.h +++ b/userspace/include/unit/unit-requirement-ids.h @@ -28,6 +28,13 @@ */ #define PD_CACHE_REQ1_UID "6439202" #define PD_CACHE_REQ2_UID "6898078" +#define PD_CACHE_REQ3_UID "6957786" +#define PD_CACHE_REQ4_UID "6962424" +#define PD_CACHE_REQ5_UID "6963067" +#define PD_CACHE_REQ6_UID "6962548" +#define PD_CACHE_REQ7_UID "7138651" +#define PD_CACHE_REQ8_UID "6962610" + #define PAGE_TABLE_REQ1_UID "6439094" #endif diff --git a/userspace/required_tests.json b/userspace/required_tests.json index 892f85e4e..1706f3788 100644 --- a/userspace/required_tests.json +++ b/userspace/required_tests.json @@ -505,6 +505,13 @@ "test": "alloc_oom", "unit": "pd_cache" }, + { + "test": "deinit", + "req": "NVGPU-RQCD-125.C1", + "vc": "V2", + "uid": "6962610", + "unit": "pd_cache" + }, { "test": "env_init", "unit": "pd_cache" @@ -517,14 +524,38 @@ "test": "free_empty", "unit": "pd_cache" }, + { + "test": "gpu_address", + "req": "NVGPU-RQCD-123.C1", + "vc": "V2", + "uid": "6962424", + "unit": "pd_cache" + }, { "test": "init", + "req": "NVGPU-RQCD-124.C1", + "vc": "V3", + "uid": "6962548", "unit": "pd_cache" }, { "test": "invalid_pd_alloc", "unit": "pd_cache" }, + { + "test": "multi_init", + "req": "NVGPU-RQCD-155.C1", + "vc": "V2", + "uid": "7138651", + "unit": "pd_cache" + }, + { + "test": "offset_comp", + "req": "NVGPU-RQCD-126.C1,2", + "vc": "V1", + "uid": "6963067", + "unit": "pd_cache" + }, { "test": "pd_packing", "req": "NVGPU-RQCD-68.C3", @@ -546,6 +577,13 @@ "uid": "6439202", "unit": "pd_cache" }, + { + "test": "write", + "req": "NVGPU-RQCD-122.C1", + "vc": "V3", + "uid": "6957786", + "unit": "pd_cache" + }, { "test": "bit_clear", "unit": "posix_bitops" diff --git a/userspace/units/mm/gmmu/pd_cache/pd_cache.c b/userspace/units/mm/gmmu/pd_cache/pd_cache.c index e159745c1..202443f96 100644 --- a/userspace/units/mm/gmmu/pd_cache/pd_cache.c +++ b/userspace/units/mm/gmmu/pd_cache/pd_cache.c @@ -32,7 +32,9 @@ #include #include -#include +#include "common/mm/gmmu/pd_cache_priv.h" + +#include "gp10b/mm_gp10b.h" /* * Direct allocs are allocs large enough to just pass straight on to the @@ -572,7 +574,7 @@ static int test_pd_cache_fini(struct unit_module *m, return UNIT_SUCCESS; } -/* +/** * Requirement NVGPU-RQCD-68.C1 * * Valid/Invalid: The pd_cache does/does not allocate a suitable DMA'able @@ -582,6 +584,11 @@ static int test_pd_cache_fini(struct unit_module *m, * * Valid/Invalid: The allocated PD is/is not sufficiently aligned for use by * the GMMU. + * + * Requirement NVGPU-RQCD-124.C1 + * + * Valid/Invalid: After initialization of the pd_cache the pd_cache can/cannot + * allocate valid PDs. */ static int test_pd_cache_valid_alloc(struct unit_module *m, struct gk20a *g, void *args) @@ -643,7 +650,7 @@ fail: return err; } -/* +/** * Requirement NVGPU-RQCD-68.C3 * * Valid/Invalid: 16 256B, 8 512B, etc, PDs can/cannot fit into a single @@ -701,7 +708,7 @@ cleanup: return err; } -/* +/** * Requirement NVGPU-RQCD-118.C1 * * Valid/Invalid: Previously allocated PD entries are/are not re-usable. @@ -784,6 +791,233 @@ cleanup: return err; } +/* + * Read back and compare the pattern to the word in the page directory. Return + * true if they match, false otherwise. + */ +static bool readback_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, + u32 index, u32 pattern) +{ + u32 offset = index + (pd->mem_offs / sizeof(u32)); + + return nvgpu_mem_rd32(g, pd->mem, offset) == pattern; +} + +/** + * Requirement NVGPU-RQCD-122.C1 + * + * Valid/Invalid: The pd_cache writes/does not write a word of memory in a + * passed PD. + * + * Requirement NVGPU-RQCD-126.C1,2 + * + * C1: Valid/Invalid: The pd_cache unit does/does not return a valid word + * offset for a 2 word PDE/PTE. + * C2: Valid/Invalid: The pd_cache unit does/does not return a valid word + * offset for a 4 word PDE/PTE. + * + * This test hits both the pd_write() and the pd_nvgpu_pd_offset_from_index() + * functions since these are used to validate each other. + */ +static int test_pd_write(struct unit_module *m, struct gk20a *g, void *args) +{ + int err = UNIT_SUCCESS; + struct vm_gk20a vm; + struct nvgpu_gmmu_pd pd_2w, pd_4w; + const struct gk20a_mmu_level *mm_levels = + gp10b_mm_get_mmu_levels(g, SZ_64K); + u32 i, indexes[] = { 0U, 16U, 255U }; + + err = init_pd_cache(m, g, &vm); + if (err != UNIT_SUCCESS) { + return err; + } + + /* + * Typical size of the last level dual page PD is 4K bytes - 256 entries + * at 16 bytes an entry. + */ + err = nvgpu_pd_alloc(&vm, &pd_4w, SZ_4K); + if (err != UNIT_SUCCESS) { + goto cleanup; + } + + /* + * Most upper level PDs are 512 entries with 8 bytes per entry: again 4K + * bytes. + */ + err = nvgpu_pd_alloc(&vm, &pd_2w, SZ_4K); + if (err != UNIT_SUCCESS) { + goto cleanup; + } + + /* + * Write to PDs at the given index and read back the value from the + * underlying nvgpu_mem. + */ + for (i = 0U; i < sizeof(indexes) / sizeof(*indexes); i++) { + u32 offs_2w = nvgpu_pd_offset_from_index(&mm_levels[2], + indexes[i]); + u32 offs_4w = nvgpu_pd_offset_from_index(&mm_levels[3], + indexes[i]); + + nvgpu_pd_write(g, &pd_2w, offs_2w, 0xA5A5A5A5); + nvgpu_pd_write(g, &pd_4w, offs_4w, 0xA5A5A5A5); + + /* Read back. */ + if (!readback_pd_write(g, &pd_2w, offs_2w, 0xA5A5A5A5)) { + err = UNIT_FAIL; + goto cleanup; + } + if (!readback_pd_write(g, &pd_4w, offs_4w, 0xA5A5A5A5)) { + err = UNIT_FAIL; + goto cleanup; + } + } + +cleanup: + nvgpu_pd_free(&vm, &pd_2w); + nvgpu_pd_free(&vm, &pd_4w); + nvgpu_pd_cache_fini(g); + + return err; +} + +/** + * Requirement NVGPU-RQCD-123.C1 + * + * C1: Valid/Invalid: The pd_cache does/does not provide a valid GPU physical + * address for a given PD. + */ +static int test_gpu_address(struct unit_module *m, struct gk20a *g, void *args) +{ + int err; + struct vm_gk20a vm; + struct nvgpu_gmmu_pd pd; + u64 addr; + + err = init_pd_cache(m, g, &vm); + if (err != UNIT_SUCCESS) { + return err; + } + + err = nvgpu_pd_alloc(&vm, &pd, SZ_4K); + if (err != UNIT_SUCCESS) { + nvgpu_pd_cache_fini(g); + return UNIT_FAIL; + } + + addr = nvgpu_pd_gpu_addr(g, &pd); + if (addr == 0ULL) { + unit_return_fail(m, "GPU address of PD is NULL\n"); + } + + nvgpu_pd_free(&vm, &pd); + nvgpu_pd_cache_fini(g); + + return UNIT_SUCCESS; +} + +/** + * Requirement NVGPU-RQCD-126.C1,2 + * + * C1: Valid/Invalid: The pd_cache unit does/does not return a valid word + * offset for a 2 word PDE/PTE. + * C2: Valid/Invalid: The pd_cache unit does/does not return a valid word + * offset for a 4 word PDE/PTE. + */ +static int test_offset_computation(struct unit_module *m, struct gk20a *g, + void *args) +{ + const struct gk20a_mmu_level *mm_levels = + gp10b_mm_get_mmu_levels(g, SZ_64K); + u32 indexes[] = { 0U, 4U, 16U, 255U }; + u32 offsets_2w[] = { 0U, 8U, 32U, 510U }; + u32 offsets_4w[] = { 0U, 16U, 64U, 1020U }; + bool fail = false; + u32 i; + + for (i = 0U; i < sizeof(indexes) / sizeof(*indexes); i++) { + u32 offs_2w = nvgpu_pd_offset_from_index(&mm_levels[2], + indexes[i]); + u32 offs_4w = nvgpu_pd_offset_from_index(&mm_levels[3], + indexes[i]); + + if (offs_2w != offsets_2w[i]) { + unit_err(m, "2w offset comp failed: [%u] %u -> %u\n", + i, indexes[i], offs_2w); + fail = true; + } + if (offs_4w != offsets_4w[i]) { + unit_err(m, "4w offset comp failed: [%u] %u -> %u\n", + i, indexes[i], offs_4w); + fail = true; + } + } + + return fail ? UNIT_FAIL : UNIT_SUCCESS; +} + +/** + * Call this to cover a range of requirement tests: + * + * NVGPU-RQCD-124.C1 + * C1: Valid/Invalid: After initialization of pd_cache pd_cache can/cannot + * allocate valid PDs + * + * NVGPU-RQCD-155.C1 + * C1: Valid/Invalid: Re-initialization does not/does cause subsequent kernel + * or DMA allocations. + * + * NVGPU-RQCD-125.C1 + * C1: Valid/Invalid: The pd_cache unit does/does not release all it's + allocated memory (kerrnel and DMA). + * + * It's redundant, certainly, but that's fine as this test runs fast. + */ +static int test_init_deinit(struct unit_module *m, struct gk20a *g, void *args) +{ + int err, status = UNIT_SUCCESS; + struct vm_gk20a vm; + struct nvgpu_gmmu_pd pd; + struct nvgpu_posix_fault_inj *kmem_fi = + nvgpu_kmem_get_fault_injection(); + struct nvgpu_posix_fault_inj *dma_fi = + nvgpu_dma_alloc_get_fault_injection(); + + err = init_pd_cache(m, g, &vm); + if (err != UNIT_SUCCESS) { + return err; + } + + err = nvgpu_pd_alloc(&vm, &pd, SZ_4K); + if (err != UNIT_SUCCESS) { + nvgpu_pd_cache_fini(g); + return UNIT_FAIL; + } + + nvgpu_posix_enable_fault_injection(kmem_fi, true, 0); + nvgpu_posix_enable_fault_injection(dma_fi, true, 0); + + /* + * Block all allocs and check that we don't hit a -ENOMEM. This proves + * that we haven't done any extra allocations on subsequent init calls. + */ + err = nvgpu_pd_cache_init(g); + if (err == -ENOMEM) { + unit_err(m, "Attempted allocation during multi-init\n"); + status = UNIT_FAIL; + } + + nvgpu_posix_enable_fault_injection(kmem_fi, false, 0); + nvgpu_posix_enable_fault_injection(dma_fi, false, 0); + + nvgpu_pd_free(&vm, &pd); + nvgpu_pd_cache_fini(g); + + return status; +} + /* * Init the global env - just make sure we don't try and allocate from VIDMEM * when doing dma allocs. @@ -804,12 +1038,24 @@ struct unit_module_test pd_cache_tests[] = { /* * Requirement verification tests. */ - UNIT_TEST_REQ("NVGPU-RQCD-68.C1,2", PD_CACHE_REQ1_UID, "V4", + UNIT_TEST_REQ("NVGPU-RQCD-68.C1,2", PD_CACHE_REQ1_UID, "V4", valid_alloc, test_pd_cache_valid_alloc, NULL), - UNIT_TEST_REQ("NVGPU-RQCD-68.C3", PD_CACHE_REQ1_UID, "V4", + UNIT_TEST_REQ("NVGPU-RQCD-68.C3", PD_CACHE_REQ1_UID, "V4", pd_packing, test_per_pd_size, do_test_pd_cache_packing_size), - UNIT_TEST_REQ("NVGPU-RQCD-118.C1", PD_CACHE_REQ2_UID, "V3", + UNIT_TEST_REQ("NVGPU-RQCD-118.C1", PD_CACHE_REQ2_UID, "V3", pd_reusability, test_per_pd_size, do_test_pd_reusability), + UNIT_TEST_REQ("NVGPU-RQCD-122.C1", PD_CACHE_REQ3_UID, "V3", + write, test_pd_write, NULL), + UNIT_TEST_REQ("NVGPU-RQCD-123.C1", PD_CACHE_REQ4_UID, "V2", + gpu_address, test_gpu_address, NULL), + UNIT_TEST_REQ("NVGPU-RQCD-126.C1,2", PD_CACHE_REQ5_UID, "V1", + offset_comp, test_offset_computation, NULL), + UNIT_TEST_REQ("NVGPU-RQCD-124.C1", PD_CACHE_REQ6_UID, "V3", + init, test_init_deinit, NULL), + UNIT_TEST_REQ("NVGPU-RQCD-155.C1", PD_CACHE_REQ7_UID, "V2", + multi_init, test_init_deinit, NULL), + UNIT_TEST_REQ("NVGPU-RQCD-125.C1", PD_CACHE_REQ8_UID, "V2", + deinit, test_init_deinit, NULL), /* * Direct allocs.