gpu: nvgpu: gp10b: Implement new page table format

Implement the 5-level Pascal page table format. It is enabled
only for simulation.

Change-Id: I6767fac8b52fe0f6a2e2f86312de5fc93af6518e
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-on: http://git-master/r/682114
This commit is contained in:
Terje Bergstrom
2015-01-29 11:29:56 -08:00
committed by Deepak Nibade
parent 1fcd7fd547
commit ac0cd782ab
2 changed files with 333 additions and 92 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -50,163 +50,207 @@
#ifndef _hw_gmmu_gp10b_h_
#define _hw_gmmu_gp10b_h_
static inline u32 gmmu_pde_aperture_big_w(void)
static inline u32 gmmu_new_pde_is_pte_w(void)
{
return 0;
}
static inline u32 gmmu_pde_aperture_big_invalid_f(void)
static inline u32 gmmu_new_pde_is_pte_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_pde_aperture_big_video_memory_f(void)
{
return 0x1;
}
static inline u32 gmmu_pde_size_w(void)
static inline u32 gmmu_new_pde_aperture_w(void)
{
return 0;
}
static inline u32 gmmu_pde_size_full_f(void)
static inline u32 gmmu_new_pde_aperture_invalid_f(void)
{
return 0x0;
}
static inline u32 gmmu_pde_address_big_sys_f(u32 v)
static inline u32 gmmu_new_pde_aperture_video_memory_f(void)
{
return (v & 0xfffffff) << 4;
return 0x2;
}
static inline u32 gmmu_pde_address_big_sys_w(void)
static inline u32 gmmu_new_pde_address_sys_f(u32 v)
{
return (v & 0xffffff) << 8;
}
static inline u32 gmmu_new_pde_address_sys_w(void)
{
return 0;
}
static inline u32 gmmu_pde_aperture_small_w(void)
static inline u32 gmmu_new_pde_vol_w(void)
{
return 1;
return 0;
}
static inline u32 gmmu_pde_aperture_small_invalid_f(void)
{
return 0x0;
}
static inline u32 gmmu_pde_aperture_small_video_memory_f(void)
{
return 0x1;
}
static inline u32 gmmu_pde_vol_small_w(void)
{
return 1;
}
static inline u32 gmmu_pde_vol_small_true_f(void)
{
return 0x4;
}
static inline u32 gmmu_pde_vol_small_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_pde_vol_big_w(void)
{
return 1;
}
static inline u32 gmmu_pde_vol_big_true_f(void)
static inline u32 gmmu_new_pde_vol_true_f(void)
{
return 0x8;
}
static inline u32 gmmu_pde_vol_big_false_f(void)
static inline u32 gmmu_new_pde_vol_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_pde_address_small_sys_f(u32 v)
{
return (v & 0xfffffff) << 4;
}
static inline u32 gmmu_pde_address_small_sys_w(void)
{
return 1;
}
static inline u32 gmmu_pde_address_shift_v(void)
static inline u32 gmmu_new_pde_address_shift_v(void)
{
return 0x0000000c;
}
static inline u32 gmmu_pde__size_v(void)
static inline u32 gmmu_new_pde__size_v(void)
{
return 0x00000008;
}
static inline u32 gmmu_pte__size_v(void)
{
return 0x00000008;
}
static inline u32 gmmu_pte_valid_w(void)
static inline u32 gmmu_new_dual_pde_is_pte_w(void)
{
return 0;
}
static inline u32 gmmu_pte_valid_true_f(void)
{
return 0x1;
}
static inline u32 gmmu_pte_valid_false_f(void)
static inline u32 gmmu_new_dual_pde_is_pte_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_pte_address_sys_f(u32 v)
static inline u32 gmmu_new_dual_pde_aperture_big_w(void)
{
return 0;
}
static inline u32 gmmu_new_dual_pde_aperture_big_invalid_f(void)
{
return 0x0;
}
static inline u32 gmmu_new_dual_pde_aperture_big_video_memory_f(void)
{
return 0x2;
}
static inline u32 gmmu_new_dual_pde_address_big_sys_f(u32 v)
{
return (v & 0xfffffff) << 4;
}
static inline u32 gmmu_pte_address_sys_w(void)
static inline u32 gmmu_new_dual_pde_address_big_sys_w(void)
{
return 0;
}
static inline u32 gmmu_pte_vol_w(void)
static inline u32 gmmu_new_dual_pde_aperture_small_w(void)
{
return 1;
return 2;
}
static inline u32 gmmu_pte_vol_true_f(void)
static inline u32 gmmu_new_dual_pde_aperture_small_invalid_f(void)
{
return 0x0;
}
static inline u32 gmmu_new_dual_pde_aperture_small_video_memory_f(void)
{
return 0x2;
}
static inline u32 gmmu_new_dual_pde_vol_small_w(void)
{
return 2;
}
static inline u32 gmmu_new_dual_pde_vol_small_true_f(void)
{
return 0x8;
}
static inline u32 gmmu_new_dual_pde_vol_small_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_new_dual_pde_vol_big_w(void)
{
return 0;
}
static inline u32 gmmu_new_dual_pde_vol_big_true_f(void)
{
return 0x8;
}
static inline u32 gmmu_new_dual_pde_vol_big_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_new_dual_pde_address_small_sys_f(u32 v)
{
return (v & 0xffffff) << 8;
}
static inline u32 gmmu_new_dual_pde_address_small_sys_w(void)
{
return 2;
}
static inline u32 gmmu_new_dual_pde_address_shift_v(void)
{
return 0x0000000c;
}
static inline u32 gmmu_new_dual_pde_address_big_shift_v(void)
{
return 0x00000008;
}
static inline u32 gmmu_new_dual_pde__size_v(void)
{
return 0x00000010;
}
static inline u32 gmmu_new_pte__size_v(void)
{
return 0x00000008;
}
static inline u32 gmmu_new_pte_valid_w(void)
{
return 0;
}
static inline u32 gmmu_new_pte_valid_true_f(void)
{
return 0x1;
}
static inline u32 gmmu_pte_vol_false_f(void)
static inline u32 gmmu_new_pte_valid_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_pte_aperture_w(void)
static inline u32 gmmu_new_pte_address_sys_f(u32 v)
{
return 1;
return (v & 0xffffff) << 8;
}
static inline u32 gmmu_pte_aperture_video_memory_f(void)
{
return 0x0;
}
static inline u32 gmmu_pte_read_only_w(void)
static inline u32 gmmu_new_pte_address_sys_w(void)
{
return 0;
}
static inline u32 gmmu_pte_read_only_true_f(void)
static inline u32 gmmu_new_pte_vol_w(void)
{
return 0x4;
return 0;
}
static inline u32 gmmu_pte_write_disable_w(void)
static inline u32 gmmu_new_pte_vol_true_f(void)
{
return 0x8;
}
static inline u32 gmmu_new_pte_vol_false_f(void)
{
return 0x0;
}
static inline u32 gmmu_new_pte_aperture_w(void)
{
return 0;
}
static inline u32 gmmu_new_pte_aperture_video_memory_f(void)
{
return 0x0;
}
static inline u32 gmmu_new_pte_read_only_w(void)
{
return 0;
}
static inline u32 gmmu_new_pte_read_only_true_f(void)
{
return 0x40;
}
static inline u32 gmmu_new_pte_comptagline_f(u32 v)
{
return (v & 0x3ffff) << 4;
}
static inline u32 gmmu_new_pte_comptagline_w(void)
{
return 1;
}
static inline u32 gmmu_pte_write_disable_true_f(void)
static inline u32 gmmu_new_pte_kind_f(u32 v)
{
return 0x80000000;
return (v & 0xff) << 24;
}
static inline u32 gmmu_pte_read_disable_w(void)
static inline u32 gmmu_new_pte_kind_w(void)
{
return 1;
}
static inline u32 gmmu_pte_read_disable_true_f(void)
{
return 0x40000000;
}
static inline u32 gmmu_pte_comptagline_f(u32 v)
{
return (v & 0x3ffff) << 12;
}
static inline u32 gmmu_pte_comptagline_w(void)
{
return 1;
}
static inline u32 gmmu_pte_address_shift_v(void)
static inline u32 gmmu_new_pte_address_shift_v(void)
{
return 0x0000000c;
}

View File

@@ -20,6 +20,8 @@
#include "rpfb_gp10b.h"
#include "hw_ram_gp10b.h"
#include "hw_bus_gp10b.h"
#include "hw_gmmu_gp10b.h"
#include "gk20a/semaphore_gk20a.h"
static u32 gp10b_mm_get_physical_addr_bits(struct gk20a *g)
{
@@ -138,6 +140,197 @@ static u64 gp10b_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
}
u32 *pde3_from_index(struct gk20a_mm_entry *entry, u32 i)
{
return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_pde__size_v());
}
static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
struct gk20a_mm_entry *parent,
u32 i, u32 gmmu_pgsz_idx,
u64 iova,
u32 kind_v, u32 *ctag,
bool cacheable, bool unmapped_pte,
int rw_flag, bool sparse, u32 flags)
{
u64 pte_addr = 0;
u64 pde_addr = 0;
struct gk20a_mm_entry *pte = parent->entries + i;
u32 pde_v[2] = {0, 0};
u32 *pde;
struct gk20a *g = vm->mm->g;
gk20a_dbg_fn("");
pte_addr = g->ops.mm.get_iova_addr(g, pte->sgt->sgl, 0)
>> gmmu_new_pde_address_shift_v();
pde_addr = g->ops.mm.get_iova_addr(g, parent->sgt->sgl, 0);
pde_v[0] |= gmmu_new_pde_aperture_video_memory_f();
pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
pde = pde3_from_index(parent, i);
gk20a_mem_wr32(pde, 0, pde_v[0]);
gk20a_mem_wr32(pde, 1, pde_v[1]);
gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d = 0x%x,0x%08x",
i, gmmu_pgsz_idx, pde_v[1], pde_v[0]);
gk20a_dbg_fn("done");
return 0;
}
u32 *pde0_from_index(struct gk20a_mm_entry *entry, u32 i)
{
return (u32 *) (((u8 *)entry->cpu_va) + i*gmmu_new_dual_pde__size_v());
}
static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
struct gk20a_mm_entry *pte,
u32 i, u32 gmmu_pgsz_idx,
u64 iova,
u32 kind_v, u32 *ctag,
bool cacheable, bool unmapped_pte,
int rw_flag, bool sparse, u32 flags)
{
bool small_valid, big_valid;
u32 pte_addr_small = 0, pte_addr_big = 0;
struct gk20a_mm_entry *entry = pte->entries + i;
u32 pde_v[4] = {0, 0, 0, 0};
u32 *pde;
struct gk20a *g = vm->mm->g;
gk20a_dbg_fn("");
small_valid = entry->size && entry->pgsz == gmmu_page_size_small;
big_valid = entry->size && entry->pgsz == gmmu_page_size_big;
if (small_valid)
pte_addr_small = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
>> gmmu_new_dual_pde_address_shift_v();
if (big_valid)
pte_addr_big = g->ops.mm.get_iova_addr(g, entry->sgt->sgl, 0)
>> gmmu_new_dual_pde_address_big_shift_v();
if (small_valid) {
pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
pde_v[2] |= gmmu_new_dual_pde_aperture_small_video_memory_f();
pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
}
if (big_valid) {
pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
pde_v[0] |= gmmu_new_dual_pde_aperture_big_video_memory_f();
}
pde = pde0_from_index(pte, i);
gk20a_mem_wr32(pde, 0, pde_v[0]);
gk20a_mem_wr32(pde, 1, pde_v[1]);
gk20a_mem_wr32(pde, 2, pde_v[2]);
gk20a_mem_wr32(pde, 3, pde_v[3]);
gk20a_dbg(gpu_dbg_pte, "pde:%d,sz=%d [0x%08x, 0x%08x, 0x%x, 0x%08x]",
i, gmmu_pgsz_idx, pde_v[3], pde_v[2], pde_v[1], pde_v[0]);
gk20a_dbg_fn("done");
return 0;
}
static int update_gmmu_pte_locked(struct vm_gk20a *vm,
struct gk20a_mm_entry *pte,
u32 i, u32 gmmu_pgsz_idx,
u64 iova,
u32 kind_v, u32 *ctag,
bool cacheable, bool unmapped_pte,
int rw_flag, bool sparse, u32 flags)
{
u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
u32 pte_w[2] = {0, 0}; /* invalid pte */
gk20a_dbg_fn("");
if (iova) {
pte_w[0] = gmmu_new_pte_valid_true_f() |
gmmu_new_pte_address_sys_f(iova
>> gmmu_new_pte_address_shift_v());
pte_w[1] = gmmu_new_pte_aperture_video_memory_f() |
gmmu_new_pte_kind_f(kind_v) |
gmmu_new_pte_comptagline_f(*ctag / SZ_128K);
if (rw_flag == gk20a_mem_flag_read_only)
pte_w[0] |= gmmu_new_pte_read_only_true_f();
if (!cacheable)
pte_w[1] |= gmmu_new_pte_vol_true_f();
gk20a_dbg(gpu_dbg_pte, "pte=%d iova=0x%llx kind=%d"
" ctag=%d vol=%d"
" [0x%08x, 0x%08x]",
i, iova,
kind_v, *ctag, !cacheable,
pte_w[1], pte_w[0]);
if (*ctag)
*ctag += page_size;
} else if (sparse) {
pte_w[0] = gmmu_new_pte_valid_false_f();
pte_w[1] |= gmmu_new_pte_vol_true_f();
} else {
gk20a_dbg(gpu_dbg_pte, "pte_cur=%d [0x0,0x0]", i);
}
gk20a_mem_wr32(pte->cpu_va + i*8, 0, pte_w[0]);
gk20a_mem_wr32(pte->cpu_va + i*8, 1, pte_w[1]);
gk20a_dbg_fn("done");
return 0;
}
const struct gk20a_mmu_level gp10b_mm_levels[] = {
{.hi_bit = {48, 48},
.lo_bit = {47, 47},
.update_entry = update_gmmu_pde3_locked,
.entry_size = 8},
{.hi_bit = {46, 46},
.lo_bit = {38, 38},
.update_entry = update_gmmu_pde3_locked,
.entry_size = 8},
{.hi_bit = {37, 37},
.lo_bit = {29, 29},
.update_entry = update_gmmu_pde3_locked,
.entry_size = 8},
{.hi_bit = {28, 28},
.lo_bit = {21, 21},
.update_entry = update_gmmu_pde0_locked,
.entry_size = 16},
{.hi_bit = {20, 20},
.lo_bit = {12, 16},
.update_entry = update_gmmu_pte_locked,
.entry_size = 8},
{.update_entry = NULL}
};
const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g, u32 big_page_size)
{
return gp10b_mm_levels;
}
static void gp10b_mm_init_pdb(struct gk20a *g, void *inst_ptr, u64 pdb_addr)
{
u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
u32 pdb_addr_hi = u64_hi32(pdb_addr);
gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
ram_in_page_dir_base_target_vid_mem_f() |
ram_in_page_dir_base_vol_true_f() |
ram_in_page_dir_base_lo_f(pdb_addr_lo) |
1 << 10);
gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_hi_w(),
ram_in_page_dir_base_hi_f(pdb_addr_hi));
}
void gp10b_init_mm(struct gpu_ops *gops)
{
gm20b_init_mm(gops);
@@ -146,4 +339,8 @@ void gp10b_init_mm(struct gpu_ops *gops)
gops->mm.init_bar2_vm = gb10b_init_bar2_vm;
gops->mm.init_bar2_mm_hw_setup = gb10b_init_bar2_mm_hw_setup;
gops->mm.get_iova_addr = gp10b_mm_iova_addr;
if (tegra_platform_is_linsim()) {
gops->mm.get_mmu_levels = gp10b_mm_get_mmu_levels;
gops->mm.init_pdb = gp10b_mm_init_pdb;
}
}