From 00d7b53b73c9051c0b9f068c43b1adfd4433c565 Mon Sep 17 00:00:00 2001 From: Alex Waterman Date: Tue, 23 Apr 2019 13:34:30 -0700 Subject: [PATCH] gpu: nvgpu: Move remaining GMMU HAL code to hal/mm/gmmu/ Move the remaining GMMU HAL related code from the gm20b/, gp10b/, and gv11b/ directories to new gmmu hal source files. Also update all makefiles and HAL init code to refelct the new location of the headers and source code. JIRA NVGPU-2042 Change-Id: Ic9b85cc547bd0f994ad11042fc4093c517327399 Signed-off-by: Alex Waterman Reviewed-on: https://git-master.nvidia.com/r/2103672 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 3 + drivers/gpu/nvgpu/Makefile.sources | 3 + .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 2 + .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 2 + drivers/gpu/nvgpu/gm20b/mm_gm20b.c | 16 - drivers/gpu/nvgpu/gm20b/mm_gm20b.h | 5 +- drivers/gpu/nvgpu/gp10b/mm_gp10b.c | 348 ---------------- drivers/gpu/nvgpu/gp10b/mm_gp10b.h | 4 - drivers/gpu/nvgpu/gv11b/mm_gv11b.c | 17 - drivers/gpu/nvgpu/gv11b/mm_gv11b.h | 2 - drivers/gpu/nvgpu/hal/init/hal_gm20b.c | 1 + drivers/gpu/nvgpu/hal/init/hal_gp10b.c | 3 +- drivers/gpu/nvgpu/hal/init/hal_gv100.c | 4 +- drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 4 +- drivers/gpu/nvgpu/hal/init/hal_tu104.c | 4 +- drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.c | 44 +++ drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.h | 36 ++ drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.c | 374 ++++++++++++++++++ drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.h | 36 ++ drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.c | 43 ++ drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.h | 34 ++ .../buddy_allocator/buddy_allocator.c | 3 +- .../units/mm/gmmu/page_table/page_table.c | 8 +- userspace/units/mm/gmmu/pd_cache/pd_cache.c | 2 +- userspace/units/mm/nvgpu_mem/nvgpu_mem.c | 3 +- .../mm/page_table_faults/page_table_faults.c | 10 +- userspace/units/mm/vm/vm.c | 4 +- 27 files changed, 607 insertions(+), 408 deletions(-) create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.c create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.h create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.c create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.h create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.c create mode 100644 drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.h diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index b779246f0..931e81bce 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -155,6 +155,9 @@ nvgpu-y += \ hal/mm/cache/flush_gk20a.o \ hal/mm/cache/flush_gv11b.o \ hal/mm/gmmu/gmmu_gk20a.o \ + hal/mm/gmmu/gmmu_gm20b.o \ + hal/mm/gmmu/gmmu_gp10b.o \ + hal/mm/gmmu/gmmu_gv11b.o \ hal/mc/mc_gm20b.o \ hal/mc/mc_gp10b.o \ hal/mc/mc_gv11b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 6f9aaf211..88b059168 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -263,6 +263,9 @@ srcs += common/sim/sim.c \ hal/mm/cache/flush_gk20a.c \ hal/mm/cache/flush_gv11b.c \ hal/mm/gmmu/gmmu_gk20a.c \ + hal/mm/gmmu/gmmu_gm20b.c \ + hal/mm/gmmu/gmmu_gp10b.c \ + hal/mm/gmmu/gmmu_gv11b.c \ hal/mc/mc_gm20b.c \ hal/mc/mc_gp10b.c \ hal/mc/mc_gv11b.c \ diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index af1e1ef4b..aaf827da9 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -33,6 +33,8 @@ #include "hal/bus/bus_gk20a.h" #include "hal/bus/bus_gm20b.h" +#include "hal/mm/gmmu/gmmu_gm20b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" #include "hal/regops/regops_gp10b.h" #include "hal/class/class_gp10b.h" #include "hal/fifo/engines_gm20b.h" diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index ac86c993b..5f87b970b 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -22,6 +22,8 @@ #include "hal/bus/bus_gk20a.h" #include "hal/bus/bus_gm20b.h" +#include "hal/mm/gmmu/gmmu_gm20b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" #include "hal/regops/regops_gv11b.h" #include "hal/class/class_gv11b.h" #include "hal/fifo/fifo_gv11b.h" diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c index 744130dd4..1621e5379 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c @@ -29,23 +29,7 @@ #include -u32 gm20b_mm_get_big_page_sizes(void) -{ - return SZ_64K | SZ_128K; -} - -u32 gm20b_mm_get_default_big_page_size(void) -{ - return SZ_64K; -} - bool gm20b_mm_is_bar1_supported(struct gk20a *g) { return true; } - -u64 gm20b_gpu_phys_addr(struct gk20a *g, - struct nvgpu_gmmu_attrs *attrs, u64 phys) -{ - return phys; -} diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h index 6c6b78f7a..df0a51a41 100644 --- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h @@ -27,10 +27,7 @@ struct gk20a; #define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1)) #define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1)) -u32 gm20b_mm_get_big_page_sizes(void); -u32 gm20b_mm_get_default_big_page_size(void); bool gm20b_mm_support_sparse(struct gk20a *g); bool gm20b_mm_is_bar1_supported(struct gk20a *g); -u64 gm20b_gpu_phys_addr(struct gk20a *g, - struct nvgpu_gmmu_attrs *attrs, u64 phys); + #endif /* NVGPU_GM20B_MM_GM20B_H */ diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c index 8c489d096..4089115b4 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c @@ -34,20 +34,6 @@ #include "gm20b/mm_gm20b.h" #include "mm_gp10b.h" -#include "hal/mm/gmmu/gmmu_gk20a.h" - -#include - -u32 gp10b_mm_get_default_big_page_size(void) -{ - return (u32)SZ_64K; -} - -u32 gp10b_mm_get_iommu_bit(struct gk20a *g) -{ - return 36; -} - int gp10b_init_bar2_vm(struct gk20a *g) { int err; @@ -81,340 +67,6 @@ clean_up_va: return err; } -/* - * For GV11B and TU104 MSS NVLINK HW settings are in force_snoop mode. - * This will force all the GPU mappings to be coherent. - * By default the mem aperture sets as sysmem_non_coherent and will use L2 mode. - * Change target pte aperture to sysmem_coherent if mem attribute requests for - * platform atomics to use rmw atomic capability. - * - */ -static u32 gmmu_aperture_mask(struct gk20a *g, - enum nvgpu_aperture mem_ap, - bool platform_atomic_attr, - u32 sysmem_mask, - u32 sysmem_coh_mask, - u32 vidmem_mask) -{ - if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) && - platform_atomic_attr) { - mem_ap = APERTURE_SYSMEM_COH; - } - - return nvgpu_aperture_mask_raw(g, mem_ap, - sysmem_mask, - sysmem_coh_mask, - vidmem_mask); -} - -static void update_gmmu_pde3_locked(struct vm_gk20a *vm, - const struct gk20a_mmu_level *l, - struct nvgpu_gmmu_pd *pd, - u32 pd_idx, - u64 virt_addr, - u64 phys_addr, - struct nvgpu_gmmu_attrs *attrs) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx]; - u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); - u32 pde_v[2] = {0, 0}; - - phys_addr >>= gmmu_new_pde_address_shift_v(); - - pde_v[0] |= nvgpu_aperture_mask(g, next_pd->mem, - gmmu_new_pde_aperture_sys_mem_ncoh_f(), - gmmu_new_pde_aperture_sys_mem_coh_f(), - gmmu_new_pde_aperture_video_memory_f()); - pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); - pde_v[0] |= gmmu_new_pde_vol_true_f(); - nvgpu_assert(u64_hi32(phys_addr >> 24) == 0U); - pde_v[1] |= (u32)(phys_addr >> 24); - - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); - - pte_dbg(g, attrs, - "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | " - "GPU %#-12llx phys %#-12llx " - "[0x%08x, 0x%08x]", - pd_idx, l->entry_size, pd_offset, - virt_addr, phys_addr, - pde_v[1], pde_v[0]); -} - -static void update_gmmu_pde0_locked(struct vm_gk20a *vm, - const struct gk20a_mmu_level *l, - struct nvgpu_gmmu_pd *pd, - u32 pd_idx, - u64 virt_addr, - u64 phys_addr, - struct nvgpu_gmmu_attrs *attrs) -{ - struct gk20a *g = gk20a_from_vm(vm); - struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx]; - bool small_valid, big_valid; - u32 small_addr = 0, big_addr = 0; - u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); - u32 pde_v[4] = {0, 0, 0, 0}; - u64 tmp_addr; - - small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL; - big_valid = attrs->pgsz == GMMU_PAGE_SIZE_BIG; - - if (small_valid) { - tmp_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v(); - nvgpu_assert(u64_hi32(tmp_addr) == 0U); - small_addr = (u32)tmp_addr; - } - - if (big_valid) { - tmp_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v(); - nvgpu_assert(u64_hi32(tmp_addr) == 0U); - big_addr = (u32)tmp_addr; - } - - if (small_valid) { - pde_v[2] |= - gmmu_new_dual_pde_address_small_sys_f(small_addr); - pde_v[2] |= nvgpu_aperture_mask(g, next_pd->mem, - gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), - gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(), - gmmu_new_dual_pde_aperture_small_video_memory_f()); - pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); - pde_v[3] |= small_addr >> 24; - } - - if (big_valid) { - pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(big_addr); - pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); - pde_v[0] |= nvgpu_aperture_mask(g, next_pd->mem, - gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), - gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(), - gmmu_new_dual_pde_aperture_big_video_memory_f()); - pde_v[1] |= big_addr >> 28; - } - - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]); - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]); - - pte_dbg(g, attrs, - "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | " - "GPU %#-12llx phys %#-12llx " - "[0x%08x, 0x%08x, 0x%08x, 0x%08x]", - pd_idx, l->entry_size, pd_offset, - small_valid ? 'S' : '-', - big_valid ? 'B' : '-', - virt_addr, phys_addr, - pde_v[3], pde_v[2], pde_v[1], pde_v[0]); -} - -static void __update_pte(struct vm_gk20a *vm, - u32 *pte_w, - u64 phys_addr, - struct nvgpu_gmmu_attrs *attrs) -{ - struct gk20a *g = gk20a_from_vm(vm); - u64 ctag_granularity = g->ops.fb.compression_page_size(g); - u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; - u32 pte_valid = attrs->valid ? - gmmu_new_pte_valid_true_f() : - gmmu_new_pte_valid_false_f(); - u64 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v(); - u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ? - gmmu_new_pte_address_sys_f(u64_lo32(phys_shifted)) : - gmmu_new_pte_address_vid_f(u64_lo32(phys_shifted)); - u32 pte_tgt = gmmu_aperture_mask(g, - attrs->aperture, - attrs->platform_atomic, - gmmu_new_pte_aperture_sys_mem_ncoh_f(), - gmmu_new_pte_aperture_sys_mem_coh_f(), - gmmu_new_pte_aperture_video_memory_f()); - u64 tmp_addr; - - pte_w[0] = pte_valid | pte_addr | pte_tgt; - - if (attrs->priv) { - pte_w[0] |= gmmu_new_pte_privilege_true_f(); - } - - tmp_addr = phys_addr >> (24U + gmmu_new_pte_address_shift_v()); - nvgpu_assert(u64_hi32(tmp_addr) == 0U); - pte_w[1] = (u32)tmp_addr | - gmmu_new_pte_kind_f(attrs->kind_v) | - gmmu_new_pte_comptagline_f((u32)(attrs->ctag / - ctag_granularity)); - - if (attrs->rw_flag == gk20a_mem_flag_read_only) { - pte_w[0] |= gmmu_new_pte_read_only_true_f(); - } - - if (!attrs->valid && !attrs->cacheable) { - pte_w[0] |= gmmu_new_pte_read_only_true_f(); - } else if (!attrs->cacheable) { - pte_w[0] |= gmmu_new_pte_vol_true_f(); - } - - if (attrs->ctag != 0ULL) { - attrs->ctag += page_size; - } - -} - -static void __update_pte_sparse(u32 *pte_w) -{ - pte_w[0] = gmmu_new_pte_valid_false_f(); - pte_w[0] |= gmmu_new_pte_vol_true_f(); -} - -static void update_gmmu_pte_locked(struct vm_gk20a *vm, - const struct gk20a_mmu_level *l, - struct nvgpu_gmmu_pd *pd, - u32 pd_idx, - u64 virt_addr, - u64 phys_addr, - struct nvgpu_gmmu_attrs *attrs) -{ - struct gk20a *g = vm->mm->g; - u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; - u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); - u32 pte_w[2] = {0, 0}; - - if (phys_addr != 0ULL) { - __update_pte(vm, pte_w, phys_addr, attrs); - } else if (attrs->sparse) { - __update_pte_sparse(pte_w); - } - - pte_dbg(g, attrs, - "vm=%s " - "PTE: i=%-4u size=%-2u | " - "GPU %#-12llx phys %#-12llx " - "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c " - "ctag=0x%08llx " - "[0x%08x, 0x%08x]", - vm->name, - pd_idx, l->entry_size, - virt_addr, phys_addr, - page_size >> 10, - nvgpu_gmmu_perm_str(attrs->rw_flag), - attrs->kind_v, - nvgpu_aperture_str(g, attrs->aperture), - attrs->cacheable ? 'C' : '-', - attrs->sparse ? 'S' : '-', - attrs->priv ? 'P' : '-', - attrs->valid ? 'V' : '-', - attrs->platform_atomic ? 'A' : '-', - attrs->ctag / g->ops.fb.compression_page_size(g), - pte_w[1], pte_w[0]); - - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]); - nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]); -} - -#define GP10B_PDE0_ENTRY_SIZE 16U - -/* - * Calculate the pgsz of the pde level - * Pascal+ implements a 5 level page table structure with only the last - * level having a different number of entries depending on whether it holds - * big pages or small pages. - */ -static u32 gp10b_get_pde0_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l, - struct nvgpu_gmmu_pd *pd, u32 pd_idx) -{ - u32 pde_base = pd->mem_offs / (u32)sizeof(u32); - u32 pde_offset = pde_base + nvgpu_pd_offset_from_index(l, pd_idx); - u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2]; - u32 i; - u32 pgsz = GMMU_NR_PAGE_SIZES; - - if (pd->mem == NULL) { - return pgsz; - } - - for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++) { - pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_offset + i); - } - - /* - * Check if the aperture AND address are set - */ - if ((pde_v[2] & - (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() | - gmmu_new_dual_pde_aperture_small_sys_mem_coh_f() | - gmmu_new_dual_pde_aperture_small_video_memory_f())) != 0U) { - u64 addr = ((U64(pde_v[3]) << U64(32)) | (U64(pde_v[2]) & - U64(gmmu_new_dual_pde_address_small_sys_f(~U32(0U))))) << - U64(gmmu_new_dual_pde_address_shift_v()); - - if (addr != 0ULL) { - pgsz = GMMU_PAGE_SIZE_SMALL; - } - } - - if ((pde_v[0] & - (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() | - gmmu_new_dual_pde_aperture_big_sys_mem_coh_f() | - gmmu_new_dual_pde_aperture_big_video_memory_f())) != 0U) { - u64 addr = ((U64(pde_v[1]) << U64(32)) | (U64(pde_v[0]) & - U64(gmmu_new_dual_pde_address_big_sys_f(~U32(0U))))) << - U64(gmmu_new_dual_pde_address_big_shift_v()); - - if (addr != 0ULL) { - /* - * If small is set that means that somehow MM allowed - * both small and big to be set, the PDE is not valid - * and may be corrupted - */ - if (pgsz == GMMU_PAGE_SIZE_SMALL) { - nvgpu_err(g, - "both small and big apertures enabled"); - return GMMU_NR_PAGE_SIZES; - } - pgsz = GMMU_PAGE_SIZE_BIG; - } - } - - return pgsz; -} - -static const struct gk20a_mmu_level gp10b_mm_levels[] = { - {.hi_bit = {48, 48}, - .lo_bit = {47, 47}, - .update_entry = update_gmmu_pde3_locked, - .entry_size = 8, - .get_pgsz = gk20a_get_pde_pgsz}, - {.hi_bit = {46, 46}, - .lo_bit = {38, 38}, - .update_entry = update_gmmu_pde3_locked, - .entry_size = 8, - .get_pgsz = gk20a_get_pde_pgsz}, - {.hi_bit = {37, 37}, - .lo_bit = {29, 29}, - .update_entry = update_gmmu_pde3_locked, - .entry_size = 8, - .get_pgsz = gk20a_get_pde_pgsz}, - {.hi_bit = {28, 28}, - .lo_bit = {21, 21}, - .update_entry = update_gmmu_pde0_locked, - .entry_size = GP10B_PDE0_ENTRY_SIZE, - .get_pgsz = gp10b_get_pde0_pgsz}, - {.hi_bit = {20, 20}, - .lo_bit = {12, 16}, - .update_entry = update_gmmu_pte_locked, - .entry_size = 8, - .get_pgsz = gk20a_get_pte_pgsz}, - {.update_entry = NULL} -}; - -const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, - u32 big_page_size) -{ - return gp10b_mm_levels; -} void gp10b_remove_bar2_vm(struct gk20a *g) { diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h index 35ee17407..b7a37d56b 100644 --- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.h +++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.h @@ -28,11 +28,7 @@ struct gk20a_mmu_level; struct nvgpu_mem; struct vm_gk20a; -u32 gp10b_mm_get_default_big_page_size(void); -u32 gp10b_mm_get_iommu_bit(struct gk20a *g); int gp10b_init_bar2_vm(struct gk20a *g); -const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, - u32 big_page_size); void gp10b_remove_bar2_vm(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c index a0895e446..272fb97cb 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c @@ -37,8 +37,6 @@ #include -#define NVGPU_L3_ALLOC_BIT BIT64(36) - bool gv11b_mm_is_bar1_supported(struct gk20a *g) { return false; @@ -199,18 +197,3 @@ int gv11b_init_mm_setup_hw(struct gk20a *g) return err; } - -/* - * On Volta the GPU determines whether to do L3 allocation for a mapping by - * checking bit 36 of the phsyical address. So if a mapping should allocte lines - * in the L3 this bit must be set. - */ -u64 gv11b_gpu_phys_addr(struct gk20a *g, - struct nvgpu_gmmu_attrs *attrs, u64 phys) -{ - if ((attrs != NULL) && attrs->l3_alloc) { - return phys | NVGPU_L3_ALLOC_BIT; - } - - return phys; -} diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h index d9d1fe0c0..f9bf346d9 100644 --- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h +++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h @@ -32,8 +32,6 @@ bool gv11b_mm_is_bar1_supported(struct gk20a *g); void gv11b_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm, u32 big_page_size); int gv11b_init_mm_setup_hw(struct gk20a *g); -u64 gv11b_gpu_phys_addr(struct gk20a *g, - struct nvgpu_gmmu_attrs *attrs, u64 phys); void gv11b_mm_fault_info_mem_destroy(struct gk20a *g); void gv11b_mm_mmu_fault_disable_hw(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 1aa9bc61f..20605557b 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -43,6 +43,7 @@ #include "hal/mm/cache/flush_gk20a.h" #include "hal/mm/gmmu/gmmu_gk20a.h" +#include "hal/mm/gmmu/gmmu_gm20b.h" #include "hal/mc/mc_gm20b.h" #include "hal/bus/bus_gm20b.h" #include "hal/bus/bus_gk20a.h" diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 77e7676d9..cfa743337 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -42,7 +42,8 @@ #include #include "hal/mm/cache/flush_gk20a.h" -#include "hal/mm/gmmu/gmmu_gk20a.h" +#include "hal/mm/gmmu/gmmu_gm20b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/bus/bus_gk20a.h" diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv100.c b/drivers/gpu/nvgpu/hal/init/hal_gv100.c index b424a4129..5eafa56ed 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c @@ -24,7 +24,9 @@ #include "hal/mm/cache/flush_gk20a.h" #include "hal/mm/cache/flush_gv11b.h" -#include "hal/mm/gmmu/gmmu_gk20a.h" +#include "hal/mm/gmmu/gmmu_gm20b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" +#include "hal/mm/gmmu/gmmu_gv11b.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/mc/mc_gv11b.h" diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index 959469553..47f43d59f 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -32,7 +32,9 @@ #include "hal/mm/cache/flush_gk20a.h" #include "hal/mm/cache/flush_gv11b.h" -#include "hal/mm/gmmu/gmmu_gk20a.h" +#include "hal/mm/gmmu/gmmu_gm20b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" +#include "hal/mm/gmmu/gmmu_gv11b.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/mc/mc_gv11b.h" diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 8d4a61c96..384a83f47 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -24,7 +24,9 @@ #include "hal/mm/cache/flush_gk20a.h" #include "hal/mm/cache/flush_gv11b.h" -#include "hal/mm/gmmu/gmmu_gk20a.h" +#include "hal/mm/gmmu/gmmu_gm20b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" +#include "hal/mm/gmmu/gmmu_gv11b.h" #include "hal/mc/mc_gm20b.h" #include "hal/mc/mc_gp10b.h" #include "hal/mc/mc_gv11b.h" diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.c b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.c new file mode 100644 index 000000000..ca9e99c2f --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.c @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include + +#include "gmmu_gm20b.h" + +u32 gm20b_mm_get_big_page_sizes(void) +{ + return SZ_64K | SZ_128K; +} + +u32 gm20b_mm_get_default_big_page_size(void) +{ + return SZ_64K; +} + +u64 gm20b_gpu_phys_addr(struct gk20a *g, + struct nvgpu_gmmu_attrs *attrs, u64 phys) +{ + return phys; +} diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.h b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.h new file mode 100644 index 000000000..9cb2c5baa --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gm20b.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef HAL_MM_GMMU_GMMU_GM20B_H +#define HAL_MM_GMMU_GMMU_GM20B_H + +#include + +struct gk20a; +struct nvgpu_gmmu_attrs; + +u32 gm20b_mm_get_big_page_sizes(void); +u32 gm20b_mm_get_default_big_page_size(void); +u64 gm20b_gpu_phys_addr(struct gk20a *g, + struct nvgpu_gmmu_attrs *attrs, u64 phys); + +#endif diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.c b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.c new file mode 100644 index 000000000..84ca90479 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.c @@ -0,0 +1,374 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include + +#include "gmmu_gk20a.h" +#include "gmmu_gp10b.h" + +u32 gp10b_mm_get_default_big_page_size(void) +{ + return (u32)SZ_64K; +} + +u32 gp10b_mm_get_iommu_bit(struct gk20a *g) +{ + return 36; +} + +/* + * For GV11B and TU104 MSS NVLINK HW settings are in force_snoop mode. + * This will force all the GPU mappings to be coherent. + * By default the mem aperture sets as sysmem_non_coherent and will use L2 mode. + * Change target pte aperture to sysmem_coherent if mem attribute requests for + * platform atomics to use rmw atomic capability. + * + */ +static u32 gmmu_aperture_mask(struct gk20a *g, + enum nvgpu_aperture mem_ap, + bool platform_atomic_attr, + u32 sysmem_mask, + u32 sysmem_coh_mask, + u32 vidmem_mask) +{ + if (nvgpu_is_enabled(g, NVGPU_SUPPORT_PLATFORM_ATOMIC) && + platform_atomic_attr) { + mem_ap = APERTURE_SYSMEM_COH; + } + + return nvgpu_aperture_mask_raw(g, mem_ap, + sysmem_mask, + sysmem_coh_mask, + vidmem_mask); +} + +static void update_gmmu_pde3_locked(struct vm_gk20a *vm, + const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_pd *pd, + u32 pd_idx, + u64 virt_addr, + u64 phys_addr, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx]; + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); + u32 pde_v[2] = {0, 0}; + + phys_addr >>= gmmu_new_pde_address_shift_v(); + + pde_v[0] |= nvgpu_aperture_mask(g, next_pd->mem, + gmmu_new_pde_aperture_sys_mem_ncoh_f(), + gmmu_new_pde_aperture_sys_mem_coh_f(), + gmmu_new_pde_aperture_video_memory_f()); + pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); + pde_v[0] |= gmmu_new_pde_vol_true_f(); + nvgpu_assert(u64_hi32(phys_addr >> 24) == 0U); + pde_v[1] |= (u32)(phys_addr >> 24); + + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); + + pte_dbg(g, attrs, + "PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | " + "GPU %#-12llx phys %#-12llx " + "[0x%08x, 0x%08x]", + pd_idx, l->entry_size, pd_offset, + virt_addr, phys_addr, + pde_v[1], pde_v[0]); +} + +static void update_gmmu_pde0_locked(struct vm_gk20a *vm, + const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_pd *pd, + u32 pd_idx, + u64 virt_addr, + u64 phys_addr, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx]; + bool small_valid, big_valid; + u32 small_addr = 0, big_addr = 0; + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); + u32 pde_v[4] = {0, 0, 0, 0}; + u64 tmp_addr; + + small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL; + big_valid = attrs->pgsz == GMMU_PAGE_SIZE_BIG; + + if (small_valid) { + tmp_addr = phys_addr >> gmmu_new_dual_pde_address_shift_v(); + nvgpu_assert(u64_hi32(tmp_addr) == 0U); + small_addr = (u32)tmp_addr; + } + + if (big_valid) { + tmp_addr = phys_addr >> gmmu_new_dual_pde_address_big_shift_v(); + nvgpu_assert(u64_hi32(tmp_addr) == 0U); + big_addr = (u32)tmp_addr; + } + + if (small_valid) { + pde_v[2] |= + gmmu_new_dual_pde_address_small_sys_f(small_addr); + pde_v[2] |= nvgpu_aperture_mask(g, next_pd->mem, + gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), + gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(), + gmmu_new_dual_pde_aperture_small_video_memory_f()); + pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); + pde_v[3] |= small_addr >> 24; + } + + if (big_valid) { + pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(big_addr); + pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); + pde_v[0] |= nvgpu_aperture_mask(g, next_pd->mem, + gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), + gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(), + gmmu_new_dual_pde_aperture_big_video_memory_f()); + pde_v[1] |= big_addr >> 28; + } + + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]); + + pte_dbg(g, attrs, + "PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | " + "GPU %#-12llx phys %#-12llx " + "[0x%08x, 0x%08x, 0x%08x, 0x%08x]", + pd_idx, l->entry_size, pd_offset, + small_valid ? 'S' : '-', + big_valid ? 'B' : '-', + virt_addr, phys_addr, + pde_v[3], pde_v[2], pde_v[1], pde_v[0]); +} + +static void update_pte(struct vm_gk20a *vm, + u32 *pte_w, + u64 phys_addr, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = gk20a_from_vm(vm); + u64 ctag_granularity = g->ops.fb.compression_page_size(g); + u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; + u32 pte_valid = attrs->valid ? + gmmu_new_pte_valid_true_f() : + gmmu_new_pte_valid_false_f(); + u64 phys_shifted = phys_addr >> gmmu_new_pte_address_shift_v(); + u32 pte_addr = attrs->aperture == APERTURE_SYSMEM ? + gmmu_new_pte_address_sys_f(u64_lo32(phys_shifted)) : + gmmu_new_pte_address_vid_f(u64_lo32(phys_shifted)); + u32 pte_tgt = gmmu_aperture_mask(g, + attrs->aperture, + attrs->platform_atomic, + gmmu_new_pte_aperture_sys_mem_ncoh_f(), + gmmu_new_pte_aperture_sys_mem_coh_f(), + gmmu_new_pte_aperture_video_memory_f()); + u64 tmp_addr; + + pte_w[0] = pte_valid | pte_addr | pte_tgt; + + if (attrs->priv) { + pte_w[0] |= gmmu_new_pte_privilege_true_f(); + } + + tmp_addr = phys_addr >> (24U + gmmu_new_pte_address_shift_v()); + nvgpu_assert(u64_hi32(tmp_addr) == 0U); + pte_w[1] = (u32)tmp_addr | + gmmu_new_pte_kind_f(attrs->kind_v) | + gmmu_new_pte_comptagline_f((u32)(attrs->ctag / + ctag_granularity)); + + if (attrs->rw_flag == gk20a_mem_flag_read_only) { + pte_w[0] |= gmmu_new_pte_read_only_true_f(); + } + + if (!attrs->valid && !attrs->cacheable) { + pte_w[0] |= gmmu_new_pte_read_only_true_f(); + } else if (!attrs->cacheable) { + pte_w[0] |= gmmu_new_pte_vol_true_f(); + } + + if (attrs->ctag != 0ULL) { + attrs->ctag += page_size; + } + +} + +static void update_pte_sparse(u32 *pte_w) +{ + pte_w[0] = gmmu_new_pte_valid_false_f(); + pte_w[0] |= gmmu_new_pte_vol_true_f(); +} + +static void update_gmmu_pte_locked(struct vm_gk20a *vm, + const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_pd *pd, + u32 pd_idx, + u64 virt_addr, + u64 phys_addr, + struct nvgpu_gmmu_attrs *attrs) +{ + struct gk20a *g = vm->mm->g; + u32 page_size = vm->gmmu_page_sizes[attrs->pgsz]; + u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx); + u32 pte_w[2] = {0, 0}; + + if (phys_addr != 0ULL) { + update_pte(vm, pte_w, phys_addr, attrs); + } else if (attrs->sparse) { + update_pte_sparse(pte_w); + } + + pte_dbg(g, attrs, + "vm=%s " + "PTE: i=%-4u size=%-2u | " + "GPU %#-12llx phys %#-12llx " + "pgsz: %3dkb perm=%-2s kind=%#02x APT=%-6s %c%c%c%c%c " + "ctag=0x%08llx " + "[0x%08x, 0x%08x]", + vm->name, + pd_idx, l->entry_size, + virt_addr, phys_addr, + page_size >> 10, + nvgpu_gmmu_perm_str(attrs->rw_flag), + attrs->kind_v, + nvgpu_aperture_str(g, attrs->aperture), + attrs->cacheable ? 'C' : '-', + attrs->sparse ? 'S' : '-', + attrs->priv ? 'P' : '-', + attrs->valid ? 'V' : '-', + attrs->platform_atomic ? 'A' : '-', + attrs->ctag / g->ops.fb.compression_page_size(g), + pte_w[1], pte_w[0]); + + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]); + nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]); +} + +#define GP10B_PDE0_ENTRY_SIZE 16U + +/* + * Calculate the pgsz of the pde level + * Pascal+ implements a 5 level page table structure with only the last + * level having a different number of entries depending on whether it holds + * big pages or small pages. + */ +static u32 gp10b_get_pde0_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l, + struct nvgpu_gmmu_pd *pd, u32 pd_idx) +{ + u32 pde_base = pd->mem_offs / (u32)sizeof(u32); + u32 pde_offset = pde_base + nvgpu_pd_offset_from_index(l, pd_idx); + u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2]; + u32 i; + u32 pgsz = GMMU_NR_PAGE_SIZES; + + if (pd->mem == NULL) { + return pgsz; + } + + for (i = 0; i < GP10B_PDE0_ENTRY_SIZE >> 2; i++) { + pde_v[i] = nvgpu_mem_rd32(g, pd->mem, pde_offset + i); + } + + /* + * Check if the aperture AND address are set + */ + if ((pde_v[2] & + (gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() | + gmmu_new_dual_pde_aperture_small_sys_mem_coh_f() | + gmmu_new_dual_pde_aperture_small_video_memory_f())) != 0U) { + u64 addr = ((U64(pde_v[3]) << U64(32)) | (U64(pde_v[2]) & + U64(gmmu_new_dual_pde_address_small_sys_f(~U32(0U))))) << + U64(gmmu_new_dual_pde_address_shift_v()); + + if (addr != 0ULL) { + pgsz = GMMU_PAGE_SIZE_SMALL; + } + } + + if ((pde_v[0] & + (gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() | + gmmu_new_dual_pde_aperture_big_sys_mem_coh_f() | + gmmu_new_dual_pde_aperture_big_video_memory_f())) != 0U) { + u64 addr = ((U64(pde_v[1]) << U64(32)) | (U64(pde_v[0]) & + U64(gmmu_new_dual_pde_address_big_sys_f(~U32(0U))))) << + U64(gmmu_new_dual_pde_address_big_shift_v()); + + if (addr != 0ULL) { + /* + * If small is set that means that somehow MM allowed + * both small and big to be set, the PDE is not valid + * and may be corrupted + */ + if (pgsz == GMMU_PAGE_SIZE_SMALL) { + nvgpu_err(g, + "both small and big apertures enabled"); + return GMMU_NR_PAGE_SIZES; + } + pgsz = GMMU_PAGE_SIZE_BIG; + } + } + + return pgsz; +} + +static const struct gk20a_mmu_level gp10b_mm_levels[] = { + {.hi_bit = {48, 48}, + .lo_bit = {47, 47}, + .update_entry = update_gmmu_pde3_locked, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, + {.hi_bit = {46, 46}, + .lo_bit = {38, 38}, + .update_entry = update_gmmu_pde3_locked, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, + {.hi_bit = {37, 37}, + .lo_bit = {29, 29}, + .update_entry = update_gmmu_pde3_locked, + .entry_size = 8, + .get_pgsz = gk20a_get_pde_pgsz}, + {.hi_bit = {28, 28}, + .lo_bit = {21, 21}, + .update_entry = update_gmmu_pde0_locked, + .entry_size = GP10B_PDE0_ENTRY_SIZE, + .get_pgsz = gp10b_get_pde0_pgsz}, + {.hi_bit = {20, 20}, + .lo_bit = {12, 16}, + .update_entry = update_gmmu_pte_locked, + .entry_size = 8, + .get_pgsz = gk20a_get_pte_pgsz}, + {.update_entry = NULL} +}; + +const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, + u32 big_page_size) +{ + return gp10b_mm_levels; +} diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.h b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.h new file mode 100644 index 000000000..f370b03eb --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gp10b.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef HAL_MM_GMMU_GMMU_GP10B_H +#define HAL_MM_GMMU_GMMU_GP10B_H + +#include + +struct gk20a; +struct gk20a_mmu_level; + +u32 gp10b_mm_get_default_big_page_size(void); +u32 gp10b_mm_get_iommu_bit(struct gk20a *g); +const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels( + struct gk20a *g, u32 big_page_size); + +#endif diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.c b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.c new file mode 100644 index 000000000..53875754b --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "gmmu_gv11b.h" + +#define NVGPU_L3_ALLOC_BIT BIT64(36) + +/* + * On Volta the GPU determines whether to do L3 allocation for a mapping by + * checking bit 36 of the phsyical address. So if a mapping should allocte lines + * in the L3 this bit must be set. + */ +u64 gv11b_gpu_phys_addr(struct gk20a *g, + struct nvgpu_gmmu_attrs *attrs, u64 phys) +{ + if ((attrs != NULL) && attrs->l3_alloc) { + return phys | NVGPU_L3_ALLOC_BIT; + } + + return phys; +} diff --git a/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.h b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.h new file mode 100644 index 000000000..4077eff3e --- /dev/null +++ b/drivers/gpu/nvgpu/hal/mm/gmmu/gmmu_gv11b.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef HAL_MM_GMMU_GMMU_GV11B_H +#define HAL_MM_GMMU_GMMU_GV11B_H + +#include + +struct gk20a; +struct gk20a_mmu_level; + +u64 gv11b_gpu_phys_addr(struct gk20a *g, + struct nvgpu_gmmu_attrs *attrs, u64 phys); + +#endif diff --git a/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c b/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c index b438169f0..c9b095f33 100644 --- a/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c +++ b/userspace/units/mm/allocators/buddy_allocator/buddy_allocator.c @@ -32,8 +32,9 @@ #include "common/mm/allocators/buddy_allocator_priv.h" -#include #include +#include + #include #define SZ_8K (SZ_4K << 1) diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c index e398182f4..fc990eb55 100644 --- a/userspace/units/mm/gmmu/page_table/page_table.c +++ b/userspace/units/mm/gmmu/page_table/page_table.c @@ -35,19 +35,19 @@ #include #include -#include -#include -#include #include -#include #include #include +#include +#include #include #include #include #include +#include + #define TEST_PA_ADDRESS 0xEFAD80000000 #define TEST_GPU_VA 0x102040600000 #define TEST_PA_ADDRESS_64K 0x1FAD80010000 diff --git a/userspace/units/mm/gmmu/pd_cache/pd_cache.c b/userspace/units/mm/gmmu/pd_cache/pd_cache.c index cab43046d..096ac9c20 100644 --- a/userspace/units/mm/gmmu/pd_cache/pd_cache.c +++ b/userspace/units/mm/gmmu/pd_cache/pd_cache.c @@ -36,7 +36,7 @@ #include "common/mm/gmmu/pd_cache_priv.h" -#include "gp10b/mm_gp10b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" /* * Direct allocs are allocs large enough to just pass straight on to the diff --git a/userspace/units/mm/nvgpu_mem/nvgpu_mem.c b/userspace/units/mm/nvgpu_mem/nvgpu_mem.c index 52a05560f..99cfcbd53 100644 --- a/userspace/units/mm/nvgpu_mem/nvgpu_mem.c +++ b/userspace/units/mm/nvgpu_mem/nvgpu_mem.c @@ -35,8 +35,9 @@ #include #include -#include +#include #include + #include #include diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c index 0375218d9..03e36a526 100644 --- a/userspace/units/mm/page_table_faults/page_table_faults.c +++ b/userspace/units/mm/page_table_faults/page_table_faults.c @@ -35,16 +35,13 @@ #include #include #include "os/posix/os_posix.h" -#include "gk20a/mm_gk20a.h" -#include "gm20b/mm_gm20b.h" -#include "gp10b/mm_gp10b.h" #include "gv11b/mm_gv11b.h" #include "common/fifo/channel_gv11b.h" -#include "nvgpu/hw/gv11b/hw_gmmu_gv11b.h" -#include "nvgpu/hw/gv11b/hw_fb_gv11b.h" #include "hal/mm/cache/flush_gk20a.h" #include "hal/mm/cache/flush_gv11b.h" +#include "hal/mm/gmmu/gmmu_gp10b.h" +#include "hal/mm/gmmu/gmmu_gv11b.h" #include "hal/mc/mc_gv11b.h" #include "hal/fb/fb_gp10b.h" #include "hal/fb/fb_gm20b.h" @@ -54,6 +51,9 @@ #include "hal/fifo/ramin_gm20b.h" #include "hal/fifo/ramin_gp10b.h" +#include +#include + #define TEST_PA_ADDRESS 0xEFAD80000000 #define TEST_COMP_TAG 0xEF #define TEST_INVALID_ADDRESS 0xAAC0000000 diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c index 5b01facad..7fa7ae9f1 100644 --- a/userspace/units/mm/vm/vm.c +++ b/userspace/units/mm/vm/vm.c @@ -35,9 +35,11 @@ #include #include #include +#include +#include #include #include -#include + #include /* Random CPU physical address for the buffers we'll map */