gpu: nvgpu: gp10x: add support for vidmem in page tables

Modify page table updates to take an aperture flag (up until
gk20a_locked_gmmu_map()), don't hard-assume sysmem and propagate it to
hardware.

Jira DNVGPU-76

Change-Id: I797fdaaf5f42a84fa0446577359147fb6908a720
Signed-off-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-on: http://git-master/r/1169295
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Konsta Holtta
2016-06-17 15:45:31 +03:00
committed by Deepak Nibade
parent 81756640cb
commit d6730d5214
2 changed files with 35 additions and 21 deletions

View File

@@ -242,6 +242,14 @@ static inline u32 gmmu_new_pte_address_sys_w(void)
{ {
return 0; return 0;
} }
static inline u32 gmmu_new_pte_address_vid_f(u32 v)
{
return (v & 0xffffff) << 8;
}
static inline u32 gmmu_new_pte_address_vid_w(void)
{
return 0;
}
static inline u32 gmmu_new_pte_vol_w(void) static inline u32 gmmu_new_pte_vol_w(void)
{ {
return 0; return 0;

View File

@@ -170,7 +170,8 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
u64 *iova, u64 *iova,
u32 kind_v, u64 *ctag, u32 kind_v, u64 *ctag,
bool cacheable, bool unmapped_pte, bool cacheable, bool unmapped_pte,
int rw_flag, bool sparse, bool priv) int rw_flag, bool sparse, bool priv,
enum gk20a_aperture aperture)
{ {
struct gk20a *g = gk20a_from_vm(vm); struct gk20a *g = gk20a_from_vm(vm);
u64 pte_addr = 0; u64 pte_addr = 0;
@@ -184,9 +185,9 @@ static int update_gmmu_pde3_locked(struct vm_gk20a *vm,
pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v(); pte_addr = entry_addr(g, pte) >> gmmu_new_pde_address_shift_v();
pde_addr = entry_addr(g, parent); pde_addr = entry_addr(g, parent);
pde_v[0] |= g->mm.vidmem_is_vidmem ? pde_v[0] |= gk20a_aperture_mask(g, &pte->mem,
gmmu_new_pde_aperture_sys_mem_ncoh_f() : gmmu_new_pde_aperture_sys_mem_ncoh_f(),
gmmu_new_pde_aperture_video_memory_f(); gmmu_new_pde_aperture_video_memory_f());
pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr)); pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(pte_addr));
pde_v[0] |= gmmu_new_pde_vol_true_f(); pde_v[0] |= gmmu_new_pde_vol_true_f();
pde_v[1] |= pte_addr >> 24; pde_v[1] |= pte_addr >> 24;
@@ -214,7 +215,8 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
u64 *iova, u64 *iova,
u32 kind_v, u64 *ctag, u32 kind_v, u64 *ctag,
bool cacheable, bool unmapped_pte, bool cacheable, bool unmapped_pte,
int rw_flag, bool sparse, bool priv) int rw_flag, bool sparse, bool priv,
enum gk20a_aperture aperture)
{ {
struct gk20a *g = gk20a_from_vm(vm); struct gk20a *g = gk20a_from_vm(vm);
bool small_valid, big_valid; bool small_valid, big_valid;
@@ -239,9 +241,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
if (small_valid) { if (small_valid) {
pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small); pde_v[2] |= gmmu_new_dual_pde_address_small_sys_f(pte_addr_small);
pde_v[2] |= g->mm.vidmem_is_vidmem ? pde_v[2] |= gk20a_aperture_mask(g, &pte->mem,
gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f() : gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
gmmu_new_dual_pde_aperture_small_video_memory_f(); gmmu_new_dual_pde_aperture_small_video_memory_f());
pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
pde_v[3] |= pte_addr_small >> 24; pde_v[3] |= pte_addr_small >> 24;
} }
@@ -249,9 +251,9 @@ static int update_gmmu_pde0_locked(struct vm_gk20a *vm,
if (big_valid) { if (big_valid) {
pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big); pde_v[0] |= gmmu_new_dual_pde_address_big_sys_f(pte_addr_big);
pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
pde_v[0] |= g->mm.vidmem_is_vidmem ? pde_v[0] |= gk20a_aperture_mask(g, &pte->mem,
gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f() : gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
gmmu_new_dual_pde_aperture_big_video_memory_f(); gmmu_new_dual_pde_aperture_big_video_memory_f());
pde_v[1] |= pte_addr_big >> 28; pde_v[1] |= pte_addr_big >> 28;
} }
@@ -276,7 +278,8 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
u64 *iova, u64 *iova,
u32 kind_v, u64 *ctag, u32 kind_v, u64 *ctag,
bool cacheable, bool unmapped_pte, bool cacheable, bool unmapped_pte,
int rw_flag, bool sparse, bool priv) int rw_flag, bool sparse, bool priv,
enum gk20a_aperture aperture)
{ {
struct gk20a *g = vm->mm->g; struct gk20a *g = vm->mm->g;
u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx]; u32 page_size = vm->gmmu_page_sizes[gmmu_pgsz_idx];
@@ -284,15 +287,18 @@ static int update_gmmu_pte_locked(struct vm_gk20a *vm,
u32 pte_w[2] = {0, 0}; /* invalid pte */ u32 pte_w[2] = {0, 0}; /* invalid pte */
if (*iova) { if (*iova) {
if (unmapped_pte) u32 pte_valid = unmapped_pte ?
pte_w[0] = gmmu_new_pte_valid_false_f(); gmmu_new_pte_valid_false_f() :
else gmmu_new_pte_valid_true_f();
pte_w[0] = gmmu_new_pte_valid_true_f(); u32 iova_v = *iova >> gmmu_new_pte_address_shift_v();
pte_w[0] |= g->mm.vidmem_is_vidmem ? u32 pte_addr = aperture == APERTURE_SYSMEM ?
gmmu_new_pte_aperture_sys_mem_ncoh_f() : gmmu_new_pte_address_sys_f(iova_v) :
gmmu_new_pte_aperture_video_memory_f(); gmmu_new_pte_address_vid_f(iova_v);
pte_w[0] |= gmmu_new_pte_address_sys_f(*iova u32 pte_tgt = __gk20a_aperture_mask(g, aperture,
>> gmmu_new_pte_address_shift_v()); gmmu_new_pte_aperture_sys_mem_ncoh_f(),
gmmu_new_pte_aperture_video_memory_f());
pte_w[0] = pte_valid | pte_addr | pte_tgt;
if (priv) if (priv)
pte_w[0] |= gmmu_new_pte_privilege_true_f(); pte_w[0] |= gmmu_new_pte_privilege_true_f();