diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c index 583c27694..ca77f00de 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -197,13 +197,18 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) * * Currently PAGE_SIZE is used, even when 64K, to work around an issue * with the PDB TLB invalidate code not being pd_cache aware yet. + * + * Similarly, we can't use nvgpu_pd_alloc() here, because the top-level + * PD must have mem_offs be 0 for the invalidate code to work, so we + * can't use the PD cache. */ pdb_size = ALIGN(pd_get_size(&vm->mmu_levels[0], &attrs), PAGE_SIZE); - err = nvgpu_pd_alloc(vm, &vm->pdb, pdb_size); + err = nvgpu_pd_cache_alloc_direct(vm->mm->g, &vm->pdb, pdb_size); if (err != 0) { return err; } + vm->pdb.pd_size = pdb_size; /* * One nvgpu_mb() is done after all mapping operations. Don't need diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c index f1681a3d6..33272d4b5 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c @@ -62,7 +62,8 @@ static u32 nvgpu_pd_cache_get_nr_entries(struct nvgpu_pd_mem_entry *pentry) { BUG_ON(pentry->pd_size == 0); - return PAGE_SIZE / pentry->pd_size; + return (nvgpu_safe_cast_u64_to_u32(NVGPU_PD_CACHE_SIZE)) / + pentry->pd_size; } /* @@ -155,7 +156,7 @@ void nvgpu_pd_cache_fini(struct gk20a *g) * Note: this does not need the cache lock since it does not modify any of the * PD cache data structures. */ -static int nvgpu_pd_cache_alloc_direct(struct gk20a *g, +int nvgpu_pd_cache_alloc_direct(struct gk20a *g, struct nvgpu_gmmu_pd *pd, u32 bytes) { int err; @@ -206,6 +207,8 @@ static int nvgpu_pd_cache_alloc_new(struct gk20a *g, u32 bytes) { struct nvgpu_pd_mem_entry *pentry; + u64 flags = 0UL; + int32_t err; pd_dbg(g, "PD-Alloc [C] New: offs=0"); @@ -215,8 +218,21 @@ static int nvgpu_pd_cache_alloc_new(struct gk20a *g, return -ENOMEM; } - if (nvgpu_dma_alloc(g, PAGE_SIZE, &pentry->mem) != 0) { + if (!nvgpu_iommuable(g) && (NVGPU_PD_CACHE_SIZE > PAGE_SIZE)) { + flags = NVGPU_DMA_PHYSICALLY_ADDRESSED; + } + + err = nvgpu_dma_alloc_flags(g, flags, + NVGPU_PD_CACHE_SIZE, &pentry->mem); + if (err != 0) { nvgpu_kfree(g, pentry); + + /* Not enough contiguous space, but a direct + * allocation may work + */ + if (err == -ENOMEM) { + return nvgpu_pd_cache_alloc_direct(g, pd, bytes); + } nvgpu_err(g, "Unable to DMA alloc!"); return -ENOMEM; } @@ -330,7 +346,7 @@ static int nvgpu_pd_cache_alloc(struct gk20a *g, struct nvgpu_pd_cache *cache, return -EINVAL; } - nvgpu_assert(bytes < PAGE_SIZE); + nvgpu_assert(bytes < NVGPU_PD_CACHE_SIZE); pentry = nvgpu_pd_cache_get_partial(cache, bytes); if (pentry == NULL) { @@ -360,7 +376,7 @@ int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes) * Simple case: PD is bigger than a page so just do a regular DMA * alloc. */ - if (bytes >= PAGE_SIZE) { + if (bytes >= NVGPU_PD_CACHE_SIZE) { err = nvgpu_pd_cache_alloc_direct(g, pd, bytes); if (err != 0) { return err; @@ -424,7 +440,21 @@ static void nvgpu_pd_cache_do_free(struct gk20a *g, /* * Partially full still. If it was already on the partial list * this just re-adds it. + * + * Since the memory used for the entries is still mapped, if + * igpu make sure the entries are invalidated so that the hw + * doesn't acccidentally try to prefetch non-existent fb memory. + * + * As IOMMU prefetching of invalid pd entries cause the IOMMU fault, + * fill them with zero. */ + if ((nvgpu_iommuable(g)) && + (NVGPU_PD_CACHE_SIZE > PAGE_SIZE) && + (pd->mem->cpu_va != NULL)) { + (void)memset(((u8 *)pd->mem->cpu_va + pd->mem_offs), 0, + pd->pd_size); + } + nvgpu_list_del(&pentry->list_entry); nvgpu_list_add(&pentry->list_entry, &cache->partial[nvgpu_pd_cache_nr(pentry->pd_size)]); diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache_priv.h b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache_priv.h index 2e2d6562c..742b2f9dd 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache_priv.h +++ b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache_priv.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -80,23 +80,37 @@ * Minimum size of a cache. The number of different caches in the nvgpu_pd_cache * structure is of course depending on this. */ -#define NVGPU_PD_CACHE_MIN 256U +#define NVGPU_PD_CACHE_MIN 256UL /** * MIN_SHIFT is the right number of bits to shift to determine * which list to use in the array of lists. */ -#define NVGPU_PD_CACHE_MIN_SHIFT 9U +#define NVGPU_PD_CACHE_MIN_SHIFT 9UL + /** - * Maximum PD cache count. This value varies depending on PAGE_SIZE. + * Maximum PD cache count. This specifies the number of slabs; since each + * slab represents a PO2 increase in size a count of 8 leads to: + * + * NVGPU_PD_CACHE_SIZE = 256B * 2^8 = 64KB + * + * For Linux with 4K pages, if the cache size is larger than 4KB then we + * need to allocate from CMA. This puts a lot of pressure on the CMA space. + * For kernel with a PAGE_SIZE of 64K this isn't the case, so allow the + * PD cache size to be 64K if PAGE_SIZE > 4K (i.e PAGE_SIZE == 64K). */ -#if PAGE_SIZE == 4096 -#define NVGPU_PD_CACHE_COUNT 4U -#elif PAGE_SIZE == 65536 -#define NVGPU_PD_CACHE_COUNT 8U +#ifdef __KERNEL__ +# if PAGE_SIZE > 4096 +# define NVGPU_PD_CACHE_COUNT 8UL +# else +# define NVGPU_PD_CACHE_COUNT 4UL +# endif #else -#error "Unsupported page size." +#define NVGPU_PD_CACHE_COUNT 8UL #endif +#define NVGPU_PD_CACHE_SIZE (NVGPU_PD_CACHE_MIN * \ + (1UL << NVGPU_PD_CACHE_COUNT)) + /** * This structure describes a slab within the slab allocator. */ @@ -115,7 +129,7 @@ struct nvgpu_pd_mem_entry { * The size of mem will always * be one page. pd_size will always be a power of 2. */ - DECLARE_BITMAP(alloc_map, PAGE_SIZE / NVGPU_PD_CACHE_MIN); + DECLARE_BITMAP(alloc_map, NVGPU_PD_CACHE_SIZE / NVGPU_PD_CACHE_MIN); /** * Total number of allocations in this PD. */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h index 1426db648..16b166775 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -189,4 +189,6 @@ void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, */ u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd); +int nvgpu_pd_cache_alloc_direct(struct gk20a *g, + struct nvgpu_gmmu_pd *pd, u32 bytes); #endif diff --git a/drivers/gpu/nvgpu/os/linux/linux-dma.c b/drivers/gpu/nvgpu/os/linux/linux-dma.c index 83e238d83..8eebb6f7f 100644 --- a/drivers/gpu/nvgpu/os/linux/linux-dma.c +++ b/drivers/gpu/nvgpu/os/linux/linux-dma.c @@ -111,7 +111,7 @@ static void nvgpu_dma_print_err(struct gk20a *g, size_t size, nvgpu_dma_flags_to_str(g, flags, flags_str); - nvgpu_err(g, + nvgpu_info(g, "DMA %s FAILED: [%s] size=%-7zu " "aligned=%-7zu flags:%s", what, type, diff --git a/userspace/units/acr/nvgpu-acr.c b/userspace/units/acr/nvgpu-acr.c index edeb8a6d9..6b3ddf5ce 100644 --- a/userspace/units/acr/nvgpu-acr.c +++ b/userspace/units/acr/nvgpu-acr.c @@ -308,6 +308,11 @@ static int init_test_env(struct unit_module *m, struct gk20a *g) unit_return_fail(m, "ecc init failed\n"); } + err = nvgpu_pd_cache_init(g); + if (err != 0) { + unit_return_fail(m, "failed to init pd cache"); + } + err = g->ops.mm.init_mm_support(g); if (err != 0) { unit_return_fail(m, "failed to init gk20a mm"); diff --git a/userspace/units/fifo/nvgpu-fifo-common.c b/userspace/units/fifo/nvgpu-fifo-common.c index fe9327359..81c604ec1 100644 --- a/userspace/units/fifo/nvgpu-fifo-common.c +++ b/userspace/units/fifo/nvgpu-fifo-common.c @@ -174,6 +174,10 @@ int test_fifo_init_support(struct unit_module *m, struct gk20a *g, void *args) g->ops.userd.setup_sw = stub_userd_setup_sw; #endif g->ops.ecc.ecc_init_support(g); + + /* PD cache must be initialized prior to mm init */ + err = nvgpu_pd_cache_init(g); + g->ops.mm.init_mm_support(g); nvgpu_device_init(g); diff --git a/userspace/units/gr/nvgpu-gr.c b/userspace/units/gr/nvgpu-gr.c index 2f1de6383..efbef32e1 100644 --- a/userspace/units/gr/nvgpu-gr.c +++ b/userspace/units/gr/nvgpu-gr.c @@ -84,6 +84,12 @@ int test_gr_init_setup(struct unit_module *m, struct gk20a *g, void *args) return -ENOMEM; } + err = nvgpu_pd_cache_init(g); + if (err != 0) { + unit_err(m, "PD cache initialization failed\n"); + return -ENOMEM; + } + return UNIT_SUCCESS; fail: diff --git a/userspace/units/mm/as/as.c b/userspace/units/mm/as/as.c index a14ce17e3..8cd7239de 100644 --- a/userspace/units/mm/as/as.c +++ b/userspace/units/mm/as/as.c @@ -158,6 +158,11 @@ int test_init_mm(struct unit_module *m, struct gk20a *g, void *args) g->ops.fb.intr.enable = gv11b_fb_intr_enable; g->ops.fb.ecc.init = NULL; + err = nvgpu_pd_cache_init(g); + if (err != 0) { + unit_return_fail(m, "pd cache initialization failed\n"); + } + err = nvgpu_init_mm_support(g); if (err != 0) { unit_return_fail(m, "nvgpu_init_mm_support failed err=%d\n", diff --git a/userspace/units/mm/dma/dma.c b/userspace/units/mm/dma/dma.c index beb1f92a1..3485f73cc 100644 --- a/userspace/units/mm/dma/dma.c +++ b/userspace/units/mm/dma/dma.c @@ -204,6 +204,10 @@ static int init_mm(struct unit_module *m, struct gk20a *g) unit_return_fail(m, "nvgpu_vm_init failed\n"); } + if (nvgpu_pd_cache_init(g) != 0) { + unit_return_fail(m, "pd cache initialization failed\n"); + } + return UNIT_SUCCESS; } diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c index 41adb4349..fae1d7a8a 100644 --- a/userspace/units/mm/gmmu/page_table/page_table.c +++ b/userspace/units/mm/gmmu/page_table/page_table.c @@ -383,6 +383,10 @@ int test_nvgpu_gmmu_init(struct unit_module *m, struct gk20a *g, void *args) init_platform(m, g, true); + if (nvgpu_pd_cache_init(g) != 0) { + unit_return_fail(m, "PD cache initialization failed\n"); + } + if (init_mm(m, g) != 0) { unit_return_fail(m, "nvgpu_init_mm_support failed\n"); } diff --git a/userspace/units/mm/gmmu/pd_cache/pd_cache.c b/userspace/units/mm/gmmu/pd_cache/pd_cache.c index de067e4a0..9be380470 100644 --- a/userspace/units/mm/gmmu/pd_cache/pd_cache.c +++ b/userspace/units/mm/gmmu/pd_cache/pd_cache.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -321,7 +321,7 @@ int test_pd_free_empty_pd(struct unit_module *m, struct gk20a *g, /* And now direct frees. */ memset(&pd, 0U, sizeof(pd)); - err = nvgpu_pd_alloc(&vm, &pd, PAGE_SIZE); + err = nvgpu_pd_alloc(&vm, &pd, NVGPU_PD_CACHE_SIZE); if (err != 0) { unit_return_fail(m, "PD alloc failed"); } @@ -610,7 +610,7 @@ static int do_test_pd_cache_packing_size(struct unit_module *m, struct gk20a *g, { int err; u32 i; - u32 n = PAGE_SIZE / pd_size; + u32 n = NVGPU_PD_CACHE_SIZE / pd_size; struct nvgpu_gmmu_pd pds[n], pd; struct nvgpu_posix_fault_inj *dma_fi = nvgpu_dma_alloc_get_fault_injection(); @@ -667,7 +667,7 @@ static int do_test_pd_reusability(struct unit_module *m, struct gk20a *g, { int err = UNIT_SUCCESS; u32 i; - u32 n = PAGE_SIZE / pd_size; + u32 n = NVGPU_PD_CACHE_SIZE / pd_size; struct nvgpu_gmmu_pd pds[n]; struct nvgpu_posix_fault_inj *dma_fi = nvgpu_dma_alloc_get_fault_injection(); diff --git a/userspace/units/mm/hal/mmu_fault/gv11b_fusa/mmu-fault-gv11b-fusa.c b/userspace/units/mm/hal/mmu_fault/gv11b_fusa/mmu-fault-gv11b-fusa.c index dd1dc2151..42980f291 100644 --- a/userspace/units/mm/hal/mmu_fault/gv11b_fusa/mmu-fault-gv11b-fusa.c +++ b/userspace/units/mm/hal/mmu_fault/gv11b_fusa/mmu-fault-gv11b-fusa.c @@ -126,6 +126,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g) u64 low_hole, aperture_size; struct nvgpu_os_posix *p = nvgpu_os_posix_from_gk20a(g); struct mm_gk20a *mm = &g->mm; + int err; p->mm_is_iommuable = true; @@ -193,6 +194,11 @@ static int init_mm(struct unit_module *m, struct gk20a *g) unit_return_fail(m, "'bar2' nvgpu_vm_init failed\n"); } + err = nvgpu_pd_cache_init(g); + if (err != 0) { + unit_return_fail(m, "PD cache init failed\n"); + } + /* * This initialization will make sure that correct aperture mask * is returned */ diff --git a/userspace/units/pmu/nvgpu-pmu.c b/userspace/units/pmu/nvgpu-pmu.c index f99c81d10..b757e45ce 100644 --- a/userspace/units/pmu/nvgpu-pmu.c +++ b/userspace/units/pmu/nvgpu-pmu.c @@ -229,6 +229,12 @@ static int init_pmu_falcon_test_env(struct unit_module *m, struct gk20a *g) return -ENOMEM; } + err = nvgpu_pd_cache_init(g); + if (err != 0) { + unit_err(m, " PD cache allocation failed!\n"); + return -ENOMEM; + } + return 0; } diff --git a/userspace/units/rc/nvgpu-rc.c b/userspace/units/rc/nvgpu-rc.c index c41faa70c..6fe968073 100644 --- a/userspace/units/rc/nvgpu-rc.c +++ b/userspace/units/rc/nvgpu-rc.c @@ -89,6 +89,11 @@ int test_rc_init(struct unit_module *m, struct gk20a *g, void *args) unit_return_fail(m, "fifo reg_space failure"); } + ret = nvgpu_pd_cache_init(g); + if (ret != 0) { + unit_return_fail(m, "PD cache initialization failure"); + } + nvgpu_device_init(g); g->ops.gr.init.get_no_of_sm = stub_gv11b_gr_init_get_no_of_sm; diff --git a/userspace/units/sync/nvgpu-sync.c b/userspace/units/sync/nvgpu-sync.c index df1542e20..59437d448 100644 --- a/userspace/units/sync/nvgpu-sync.c +++ b/userspace/units/sync/nvgpu-sync.c @@ -98,6 +98,10 @@ static int init_channel_vm(struct unit_module *m, struct nvgpu_channel *ch) ch->vm = mm->pmu.vm; + if (nvgpu_pd_cache_init(g) != 0) { + unit_return_fail(m, "pd cache initialization failed\n"); + } + return UNIT_SUCCESS; }