mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 18:12:41 +03:00
Add the following info into GPU characteristics: available big page sizes, support indicators for sync fence fds and cycle stats, gpc mask, SM version, SM SPA version and warp count, and IOCTL interface levels. Also, add new IOCTL to fetch TPC masks. Bug 1551769 Bug 1558186 Change-Id: I8a47d882645f29c7bf0c8f74334ebf47240e41de Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/562904 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
305 lines
7.5 KiB
C
305 lines
7.5 KiB
C
/*
|
|
* GM20B MMU
|
|
*
|
|
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#include <linux/pm_runtime.h>
|
|
#include "gk20a/gk20a.h"
|
|
#include "mm_gm20b.h"
|
|
#include "hw_gmmu_gm20b.h"
|
|
#include "hw_fb_gm20b.h"
|
|
#include "hw_gr_gm20b.h"
|
|
#include "hw_ram_gm20b.h"
|
|
|
|
static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
|
|
enum gmmu_pgsz_gk20a pgsz_idx,
|
|
u64 first_vaddr, u64 last_vaddr,
|
|
bool clear, bool refplus)
|
|
{
|
|
int err;
|
|
u32 pte_lo, pte_hi;
|
|
u32 pde_lo, pde_hi;
|
|
u32 pte_w[2] = {0, 0}; /* invalid pte */
|
|
u64 addr = 0;
|
|
u32 pte_cur;
|
|
void *pte_kv_cur;
|
|
struct page_table_gk20a *pte;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
pde_range_from_vaddr_range(vm, first_vaddr, last_vaddr,
|
|
&pde_lo, &pde_hi);
|
|
|
|
gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d",
|
|
pgsz_idx, pde_lo, pde_hi);
|
|
|
|
/* Expect ptes of the same pde */
|
|
BUG_ON(pde_lo != pde_hi);
|
|
|
|
pte = vm->pdes.ptes[pgsz_idx] + pde_lo;
|
|
if (refplus)
|
|
pte->ref_cnt++;
|
|
|
|
pte_lo = pte_index_from_vaddr(vm, first_vaddr, pgsz_idx);
|
|
pte_hi = pte_index_from_vaddr(vm, last_vaddr, pgsz_idx);
|
|
|
|
/* get cpu access to the ptes */
|
|
err = map_gmmu_pages(pte->ref, pte->sgt, &pte_kv_cur, pte->size);
|
|
if (err)
|
|
goto fail;
|
|
|
|
gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
|
|
for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
|
|
pte_w[0] = gmmu_pte_valid_false_f();
|
|
pte_w[1] = clear ? 0 : gmmu_pte_vol_true_f();
|
|
|
|
gk20a_dbg(gpu_dbg_pte,
|
|
"pte_cur=%d addr=%llx refs=%d"
|
|
" [0x%08x,0x%08x]",
|
|
pte_cur, addr,
|
|
pte->ref_cnt, pte_w[1], pte_w[0]);
|
|
|
|
gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 0, pte_w[0]);
|
|
gk20a_mem_wr32(pte_kv_cur + pte_cur*8, 1, pte_w[1]);
|
|
}
|
|
|
|
unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
|
|
|
|
smp_mb();
|
|
vm->tlb_dirty = true;
|
|
gk20a_dbg_fn("set tlb dirty");
|
|
|
|
return 0;
|
|
fail:
|
|
return err;
|
|
|
|
}
|
|
|
|
static bool gm20b_vm_is_pde_in_range(struct vm_gk20a *vm, u64 vaddr_lo,
|
|
u64 vaddr_hi, u32 pde)
|
|
{
|
|
u64 pde_vaddr_lo, pde_vaddr_hi;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
pde_vaddr_lo = (u64)pde << vm->pde_stride_shift;
|
|
pde_vaddr_hi = pde_vaddr_lo |
|
|
((0x1UL << (vm->pde_stride_shift)) - 1);
|
|
|
|
return ((vaddr_lo <= pde_vaddr_lo) && (vaddr_hi) >= pde_vaddr_hi);
|
|
}
|
|
|
|
static int gm20b_vm_put_sparse(struct vm_gk20a *vm, u64 vaddr,
|
|
u32 num_pages, u32 pgsz_idx, bool refplus)
|
|
{
|
|
struct mm_gk20a *mm = vm->mm;
|
|
u32 pgsz = vm->gmmu_page_sizes[pgsz_idx];
|
|
u32 pde_shift = vm->pde_stride_shift;
|
|
u64 vaddr_hi;
|
|
u64 vaddr_pde_start;
|
|
u32 i;
|
|
u32 pde_lo, pde_hi;
|
|
int err;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
vaddr_hi = vaddr + pgsz * num_pages - 1;
|
|
pde_range_from_vaddr_range(vm,
|
|
vaddr,
|
|
vaddr_hi,
|
|
&pde_lo, &pde_hi);
|
|
|
|
gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
|
|
"pde_hi: 0x%x, pgsz: %d, pde_stride_shift: %d",
|
|
vaddr, vaddr_hi, pde_lo, pde_hi, pgsz,
|
|
vm->pde_stride_shift);
|
|
|
|
for (i = pde_lo; i <= pde_hi; i++) {
|
|
/* Mark all ptes as sparse. */
|
|
err = validate_gmmu_page_table_gk20a_locked(vm, i,
|
|
pgsz_idx);
|
|
if (err) {
|
|
gk20a_err(dev_from_vm(vm),
|
|
"failed to validate page table %d: %d",
|
|
i, err);
|
|
goto fail;
|
|
}
|
|
|
|
if (gm20b_vm_is_pde_in_range(vm, vaddr, vaddr_hi, i)) {
|
|
/* entire pde is marked as sparse */
|
|
vaddr_pde_start = (u64)i << pde_shift;
|
|
allocate_gmmu_ptes_sparse(vm, pgsz_idx,
|
|
vaddr_pde_start,
|
|
PDE_ADDR_END(vaddr_pde_start,
|
|
pde_shift), false, refplus);
|
|
} else {
|
|
/* Check leading and trailing spaces which doesn't fit
|
|
* into entire pde. */
|
|
if (pde_lo == pde_hi)
|
|
allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
|
|
vaddr_hi, false, refplus);
|
|
else if (i == pde_lo)
|
|
allocate_gmmu_ptes_sparse(vm, pgsz_idx, vaddr,
|
|
PDE_ADDR_END(vaddr, pde_shift), false,
|
|
refplus);
|
|
else
|
|
allocate_gmmu_ptes_sparse(vm, pgsz_idx,
|
|
PDE_ADDR_START(vaddr_hi, pde_shift),
|
|
vaddr_hi, false,
|
|
refplus);
|
|
}
|
|
}
|
|
|
|
gk20a_mm_l2_flush(mm->g, true);
|
|
|
|
return 0;
|
|
|
|
fail:
|
|
WARN_ON(1);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
|
|
const unsigned int msec)
|
|
{
|
|
unsigned long timeout;
|
|
|
|
if (tegra_platform_is_silicon())
|
|
timeout = jiffies + msecs_to_jiffies(msec);
|
|
else
|
|
timeout = msecs_to_jiffies(msec);
|
|
|
|
while (1) {
|
|
u32 val;
|
|
val = gk20a_readl(g, fb_mmu_vpr_info_r());
|
|
if (fb_mmu_vpr_info_fetch_v(val) ==
|
|
fb_mmu_vpr_info_fetch_false_v())
|
|
break;
|
|
if (tegra_platform_is_silicon()) {
|
|
if (WARN_ON(time_after(jiffies, timeout)))
|
|
return -ETIME;
|
|
} else if (--timeout == 0)
|
|
return -ETIME;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g)
|
|
{
|
|
int ret = 0;
|
|
|
|
gk20a_busy_noresume(g->dev);
|
|
#ifdef CONFIG_PM_RUNTIME
|
|
if (!pm_runtime_active(&g->dev->dev))
|
|
goto fail;
|
|
#endif
|
|
|
|
if (gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
|
|
ret = -ETIME;
|
|
goto fail;
|
|
}
|
|
|
|
gk20a_writel(g, fb_mmu_vpr_info_r(),
|
|
fb_mmu_vpr_info_fetch_true_v());
|
|
|
|
ret = gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
|
|
|
|
fail:
|
|
pm_runtime_put(&g->dev->dev);
|
|
return ret;
|
|
}
|
|
|
|
static void gm20b_vm_clear_sparse(struct vm_gk20a *vm, u64 vaddr,
|
|
u64 size, u32 pgsz_idx) {
|
|
u64 vaddr_hi;
|
|
u32 pde_lo, pde_hi, pde_i;
|
|
|
|
gk20a_dbg_fn("");
|
|
vaddr_hi = vaddr + size - 1;
|
|
pde_range_from_vaddr_range(vm,
|
|
vaddr,
|
|
vaddr_hi,
|
|
&pde_lo, &pde_hi);
|
|
|
|
gk20a_dbg_info("vaddr: 0x%llx, vaddr_hi: 0x%llx, pde_lo: 0x%x, "
|
|
"pde_hi: 0x%x, pgsz_idx: %d, pde_stride_shift: %d",
|
|
vaddr, vaddr_hi, pde_lo, pde_hi, pgsz_idx,
|
|
vm->pde_stride_shift);
|
|
|
|
for (pde_i = pde_lo; pde_i <= pde_hi; pde_i++) {
|
|
struct page_table_gk20a *pte = vm->pdes.ptes[pgsz_idx] + pde_i;
|
|
pte->ref_cnt--;
|
|
|
|
if (pte->ref_cnt == 0) {
|
|
free_gmmu_pages(vm, pte->ref, pte->sgt,
|
|
vm->page_table_sizing[pgsz_idx].order,
|
|
pte->size);
|
|
pte->ref = NULL;
|
|
update_gmmu_pde_locked(vm, pde_i);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
static bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g)
|
|
{
|
|
u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
|
|
return gr_gpcs_pri_mmu_debug_ctrl_debug_v(debug_ctrl) ==
|
|
gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v();
|
|
}
|
|
|
|
static void gm20b_mm_set_big_page_size(struct gk20a *g,
|
|
void *inst_ptr, int size)
|
|
{
|
|
u32 val;
|
|
|
|
gk20a_dbg_fn("");
|
|
|
|
gk20a_dbg_info("big page size %d\n", size);
|
|
val = gk20a_mem_rd32(inst_ptr, ram_in_big_page_size_w());
|
|
val &= ~ram_in_big_page_size_m();
|
|
|
|
if (size == SZ_64K)
|
|
val |= ram_in_big_page_size_64kb_f();
|
|
else
|
|
val |= ram_in_big_page_size_128kb_f();
|
|
|
|
gk20a_mem_wr32(inst_ptr, ram_in_big_page_size_w(), val);
|
|
gk20a_dbg_fn("done");
|
|
}
|
|
|
|
u32 gm20b_mm_get_big_page_sizes(void)
|
|
{
|
|
return SZ_64K | SZ_128K;
|
|
}
|
|
|
|
void gm20b_init_mm(struct gpu_ops *gops)
|
|
{
|
|
gops->mm.set_sparse = gm20b_vm_put_sparse;
|
|
gops->mm.clear_sparse = gm20b_vm_clear_sparse;
|
|
gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled;
|
|
gops->mm.gmmu_map = gk20a_locked_gmmu_map;
|
|
gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
|
|
gops->mm.vm_remove = gk20a_vm_remove_support;
|
|
gops->mm.vm_alloc_share = gk20a_vm_alloc_share;
|
|
gops->mm.vm_bind_channel = gk20a_vm_bind_channel;
|
|
gops->mm.fb_flush = gk20a_mm_fb_flush;
|
|
gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
|
|
gops->mm.l2_flush = gk20a_mm_l2_flush;
|
|
gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
|
|
gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
|
|
gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
|
|
}
|