diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c index 796169abb..f3fbb413e 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -218,22 +219,6 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm) return 0; } -/* - * Return the _physical_ address of a page directory. - */ -u64 nvgpu_pde_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd) -{ - u64 page_addr; - - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { - page_addr = nvgpu_mem_get_phys_addr(g, pd->mem); - } else { - page_addr = nvgpu_mem_get_addr(g, pd->mem); - } - - return page_addr + pd->mem_offs; -} - /* * Return the aligned length based on the page size in attrs. */ @@ -477,7 +462,7 @@ static int __set_pd_level(struct vm_gk20a *vm, * target addr is the real physical address we are aiming for. */ target_addr = (next_pd != NULL) ? - nvgpu_pde_gpu_addr(g, next_pd) : + nvgpu_pd_gpu_addr(g, next_pd) : phys_addr; l->update_entry(vm, l, @@ -987,8 +972,8 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, * Take into account the real offset into the nvgpu_mem since the PD * may be located at an offset other than 0 (due to PD packing). */ - pte_base = (pd->mem_offs / sizeof(u32)) + - pd_offset_from_index(l, pd_idx); + pte_base = (u32)(pd->mem_offs / sizeof(u32)) + + nvgpu_pd_offset_from_index(l, pd_idx); pte_size = (u32)(l->entry_size / sizeof(u32)); if (data != NULL) { @@ -1006,7 +991,7 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm, } if (pd_offs_out != NULL) { - *pd_offs_out = pd_offset_from_index(l, pd_idx); + *pd_offs_out = nvgpu_pd_offset_from_index(l, pd_idx); } return 0; @@ -1043,7 +1028,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte) pte_size = __nvgpu_pte_words(g); for (i = 0; i < pte_size; i++) { - pd_write(g, pd, (size_t)pd_offs + (size_t)i, pte[i]); + nvgpu_pd_write(g, pd, (size_t)pd_offs + (size_t)i, pte[i]); pte_dbg(g, attrs_ptr, "PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]); } diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c index 8db71b5f6..838fc7d6e 100644 --- a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c +++ b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c @@ -28,6 +28,7 @@ #include #include #include +#include #define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args) @@ -159,6 +160,34 @@ static u32 nvgpu_pd_cache_get_nr_entries(struct nvgpu_pd_mem_entry *pentry) return PAGE_SIZE / pentry->pd_size; } +/* + * Return the _physical_ address of a page directory. + */ +u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd) +{ + u64 page_addr; + + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) { + page_addr = nvgpu_mem_get_phys_addr(g, pd->mem); + } else { + page_addr = nvgpu_mem_get_addr(g, pd->mem); + } + + return page_addr + pd->mem_offs; +} + +u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx) +{ + return (pd_idx * l->entry_size) / U32(sizeof(u32)); +} + +void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, + size_t w, u32 data) +{ + nvgpu_mem_wr32(g, pd->mem, + (u32)((pd->mem_offs / sizeof(u32)) + w), data); +} + int nvgpu_pd_cache_init(struct gk20a *g) { struct nvgpu_pd_cache *cache; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 5231f3f59..8e7c17edd 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "mm_gk20a.h" #include "fence_gk20a.h" @@ -162,7 +163,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, { struct gk20a *g = gk20a_from_vm(vm); bool small_valid, big_valid; - u32 pd_offset = pd_offset_from_index(l, pd_idx); + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); u32 pde_v[2] = {0, 0}; small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL; @@ -190,8 +191,8 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm, virt_addr, phys_addr, pde_v[1], pde_v[0]); - pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]); - pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]); + nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]); + nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]); } static void __update_pte_sparse(u32 *pte_w) @@ -268,7 +269,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, { struct gk20a *g = gk20a_from_vm(vm); u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; - u32 pd_offset = pd_offset_from_index(l, pd_idx); + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); u32 pte_w[2] = {0, 0}; int ctag_shift = 0; int shamt = ilog2(g->ops.fb.compression_page_size(g)); @@ -304,8 +305,8 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, (u32)attrs->ctag >> ctag_shift, pte_w[1], pte_w[0]); - pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]); - pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]); } u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l, @@ -376,7 +377,7 @@ int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch) void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, struct vm_gk20a *vm) { - u64 pdb_addr = nvgpu_pde_gpu_addr(g, &vm->pdb); + u64 pdb_addr = nvgpu_pd_gpu_addr(g, &vm->pdb); u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); u32 pdb_addr_hi = u64_hi32(pdb_addr); diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 2a1b065eb..9e0bf185d 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -88,7 +89,7 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm, { struct gk20a *g = gk20a_from_vm(vm); struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx]; - u32 pd_offset = pd_offset_from_index(l, pd_idx); + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); u32 pde_v[2] = {0, 0}; phys_addr >>= gmmu_new_pde_address_shift_v(); @@ -101,8 +102,8 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm, pde_v[0] |= gmmu_new_pde_vol_true_f(); pde_v[1] |= phys_addr >> 24; - pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); - pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); pte_dbg(g, attrs, "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | " @@ -125,7 +126,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx]; bool small_valid, big_valid; u32 small_addr = 0, big_addr = 0; - u32 pd_offset = pd_offset_from_index(l, pd_idx); + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); u32 pde_v[4] = {0, 0, 0, 0}; small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL; @@ -160,10 +161,10 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm, pde_v[1] |= big_addr >> 28; } - pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); - pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); - pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]); - pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]); pte_dbg(g, attrs, "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | " @@ -240,7 +241,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, { struct gk20a *g = vm->mm->g; u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; - u32 pd_offset = pd_offset_from_index(l, pd_idx); + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); u32 pte_w[2] = {0, 0}; if (phys_addr != 0ULL) { @@ -271,8 +272,8 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm, (u32)attrs->ctag / g->ops.fb.compression_page_size(g), pte_w[1], pte_w[0]); - pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]); - pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]); } #define GP10B_PDE0_ENTRY_SIZE 16U @@ -287,7 +288,7 @@ static u32 gp10b_get_pde0_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l, struct nvgpu_gmmu_pd *pd, u32 pd_idx) { u32 pde_base = pd->mem_offs / sizeof(u32); - u32 pde_offset = pde_base + pd_offset_from_index(l, pd_idx); + u32 pde_offset = pde_base + nvgpu_pd_offset_from_index(l, pd_idx); u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2]; u32 i; u32 pgsz = GMMU_NR_PAGE_SIZES; @@ -380,7 +381,7 @@ const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block, struct vm_gk20a *vm) { - u64 pdb_addr = nvgpu_pde_gpu_addr(g, &vm->pdb); + u64 pdb_addr = nvgpu_pd_gpu_addr(g, &vm->pdb); u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); u32 pdb_addr_hi = u64_hi32(pdb_addr); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h index a77786201..d7db66fd4 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h @@ -37,6 +37,7 @@ struct vm_gk20a; struct nvgpu_mem; +struct nvgpu_gmmu_pd; #define GMMU_PAGE_SIZE_SMALL 0U #define GMMU_PAGE_SIZE_BIG 1U @@ -49,29 +50,6 @@ enum gk20a_mem_rw_flag { gk20a_mem_flag_write_only = 2, /* WO */ }; -/* - * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs - * in the GMMU. - */ -struct nvgpu_gmmu_pd { - /* - * DMA memory describing the PTEs or PDEs. @mem_offs describes the - * offset of the PDE table in @mem. @cached specifies if this PD is - * using pd_cache memory. - */ - struct nvgpu_mem *mem; - u32 mem_offs; - bool cached; - u32 pd_size; /* In bytes. */ - - /* - * List of pointers to the next level of page tables. Will not be - * populated when this PD is pointing to PTEs. - */ - struct nvgpu_gmmu_pd *entries; - int num_entries; -}; - /* * Reduce the number of arguments getting passed through the various levels of * GMMU mapping functions. @@ -185,27 +163,6 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm, struct nvgpu_mem *mem, u64 gpu_va); -int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes); -void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); -int nvgpu_pd_cache_init(struct gk20a *g); -void nvgpu_pd_cache_fini(struct gk20a *g); -u64 nvgpu_pde_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd); - -/* - * Some useful routines that are shared across chips. - */ -static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l, - u32 pd_idx) -{ - return (pd_idx * l->entry_size) / U32(sizeof(u32)); -} - -static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, - size_t w, size_t data) -{ - nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data); -} - /** * __nvgpu_pte_words - Compute number of words in a PTE. * diff --git a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h new file mode 100644 index 000000000..557c808d9 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_PD_CACHE_H +#define NVGPU_PD_CACHE_H + +#include + +struct gk20a; +struct vm_gk20a; +struct nvgpu_mem; +struct gk20a_mmu_level; + +/* + * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs + * in the GMMU. + */ +struct nvgpu_gmmu_pd { + /* + * DMA memory describing the PTEs or PDEs. @mem_offs describes the + * offset of the PDE table in @mem. @cached specifies if this PD is + * using pd_cache memory. + */ + struct nvgpu_mem *mem; + u32 mem_offs; + bool cached; + u32 pd_size; /* In bytes. */ + + /* + * List of pointers to the next level of page tables. Does not + * need to be populated when this PD is pointing to PTEs. + */ + struct nvgpu_gmmu_pd *entries; + int num_entries; +}; + +int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes); +void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd); +int nvgpu_pd_cache_init(struct gk20a *g); +void nvgpu_pd_cache_fini(struct gk20a *g); +u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx); +void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd, + size_t w, u32 data); +u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd); + +#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h index 50f530714..5b07a1857 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/vm.h +++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export index a5cd2a3ac..59353b52d 100644 --- a/drivers/gpu/nvgpu/libnvgpu-drv.export +++ b/drivers/gpu/nvgpu/libnvgpu-drv.export @@ -73,6 +73,7 @@ nvgpu_pd_alloc nvgpu_pd_cache_fini nvgpu_pd_cache_init nvgpu_pd_free +nvgpu_pd_write nvgpu_mem_rd32 nvgpu_mem_wr32 nvgpu_mutex_acquire diff --git a/userspace/units/mm/pd_cache/pd_cache.c b/userspace/units/mm/pd_cache/pd_cache.c index fda8ddc6d..ca674ee7e 100644 --- a/userspace/units/mm/pd_cache/pd_cache.c +++ b/userspace/units/mm/pd_cache/pd_cache.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -598,7 +599,7 @@ static int test_pd_cache_valid_alloc(struct unit_module *m, * the nvgpu_mem. Using the zeroth word makes it easy to read * back. */ - pd_write(g, &pd, 0, 0x12345678); + nvgpu_pd_write(g, &pd, 0, 0x12345678); if (0x12345678 != nvgpu_mem_rd32(g, pd.mem, pd.mem_offs / sizeof(u32))) {