gpu: nvgpu: Move pd_cache declarations to new header

The pd_cache header declarations were originally part of the
gmmu.h header. This is not good from a unit isolation perspective
so this patch moves all the pd_cache specifics over to a new
header file: <nvgpu/pd_cache.h>.

Also a couple of static inlines that were possible when the code
was part of gmmu.h were turned into real, first class functions.
This allows the pd_cache.h header to not include the gmmu.h
header file.

Also fix an issue in the nvgpu_pd_write() function where the data
was being passed as a size_t for some reason. This has now been
changed to a u32.

JIRA NVGPU-1444

Change-Id: Iead9a0d998396d2289ffcb3b48765d770400397b
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1965271
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2018-11-13 13:08:05 -08:00
committed by mobile promotions
parent f110d6b2f1
commit 15603b9fd5
8 changed files with 125 additions and 85 deletions

View File

@@ -25,6 +25,7 @@
#include <nvgpu/list.h>
#include <nvgpu/dma.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/pd_cache.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/nvgpu_sgt.h>
#include <nvgpu/enabled.h>
@@ -218,22 +219,6 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
return 0;
}
/*
* Return the _physical_ address of a page directory.
*/
u64 nvgpu_pde_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
{
u64 page_addr;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
page_addr = nvgpu_mem_get_phys_addr(g, pd->mem);
} else {
page_addr = nvgpu_mem_get_addr(g, pd->mem);
}
return page_addr + pd->mem_offs;
}
/*
* Return the aligned length based on the page size in attrs.
*/
@@ -477,7 +462,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
* target addr is the real physical address we are aiming for.
*/
target_addr = (next_pd != NULL) ?
nvgpu_pde_gpu_addr(g, next_pd) :
nvgpu_pd_gpu_addr(g, next_pd) :
phys_addr;
l->update_entry(vm, l,
@@ -987,8 +972,8 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
* Take into account the real offset into the nvgpu_mem since the PD
* may be located at an offset other than 0 (due to PD packing).
*/
pte_base = (pd->mem_offs / sizeof(u32)) +
pd_offset_from_index(l, pd_idx);
pte_base = (u32)(pd->mem_offs / sizeof(u32)) +
nvgpu_pd_offset_from_index(l, pd_idx);
pte_size = (u32)(l->entry_size / sizeof(u32));
if (data != NULL) {
@@ -1006,7 +991,7 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
}
if (pd_offs_out != NULL) {
*pd_offs_out = pd_offset_from_index(l, pd_idx);
*pd_offs_out = nvgpu_pd_offset_from_index(l, pd_idx);
}
return 0;
@@ -1043,7 +1028,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
pte_size = __nvgpu_pte_words(g);
for (i = 0; i < pte_size; i++) {
pd_write(g, pd, (size_t)pd_offs + (size_t)i, pte[i]);
nvgpu_pd_write(g, pd, (size_t)pd_offs + (size_t)i, pte[i]);
pte_dbg(g, attrs_ptr,
"PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]);
}

View File

@@ -28,6 +28,7 @@
#include <nvgpu/list.h>
#include <nvgpu/log2.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/enabled.h>
#include "gk20a/mm_gk20a.h"
@@ -161,6 +162,34 @@ static u32 nvgpu_pd_cache_get_nr_entries(struct nvgpu_pd_mem_entry *pentry)
return PAGE_SIZE / pentry->pd_size;
}
/*
* Return the _physical_ address of a page directory.
*/
u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
{
u64 page_addr;
if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
page_addr = nvgpu_mem_get_phys_addr(g, pd->mem);
} else {
page_addr = nvgpu_mem_get_addr(g, pd->mem);
}
return page_addr + pd->mem_offs;
}
u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx)
{
return (pd_idx * l->entry_size) / U32(sizeof(u32));
}
void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
size_t w, u32 data)
{
nvgpu_mem_wr32(g, pd->mem,
(u32)((pd->mem_offs / sizeof(u32)) + w), data);
}
int nvgpu_pd_cache_init(struct gk20a *g)
{
struct nvgpu_pd_cache *cache;

View File

@@ -44,6 +44,7 @@
#include <nvgpu/utils.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/channel.h>
#include <nvgpu/pd_cache.h>
#include "mm_gk20a.h"
#include "fence_gk20a.h"
@@ -162,7 +163,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm,
{
struct gk20a *g = gk20a_from_vm(vm);
bool small_valid, big_valid;
u32 pd_offset = pd_offset_from_index(l, pd_idx);
u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
u32 pde_v[2] = {0, 0};
small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
@@ -190,8 +191,8 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm,
virt_addr, phys_addr,
pde_v[1], pde_v[0]);
pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
}
static void __update_pte_sparse(u32 *pte_w)
@@ -268,7 +269,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
{
struct gk20a *g = gk20a_from_vm(vm);
u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
u32 pd_offset = pd_offset_from_index(l, pd_idx);
u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
u32 pte_w[2] = {0, 0};
int ctag_shift = 0;
int shamt = ilog2(g->ops.fb.compression_page_size(g));
@@ -304,8 +305,8 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
(u32)attrs->ctag >> ctag_shift,
pte_w[1], pte_w[0]);
pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
}
u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
@@ -376,7 +377,7 @@ int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
struct vm_gk20a *vm)
{
u64 pdb_addr = nvgpu_pde_gpu_addr(g, &vm->pdb);
u64 pdb_addr = nvgpu_pd_gpu_addr(g, &vm->pdb);
u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
u32 pdb_addr_hi = u64_hi32(pdb_addr);

View File

@@ -25,6 +25,7 @@
#include <nvgpu/mm.h>
#include <nvgpu/dma.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/pd_cache.h>
#include <nvgpu/sizes.h>
#include <nvgpu/utils.h>
#include <nvgpu/gk20a.h>
@@ -88,7 +89,7 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
{
struct gk20a *g = gk20a_from_vm(vm);
struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx];
u32 pd_offset = pd_offset_from_index(l, pd_idx);
u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
u32 pde_v[2] = {0, 0};
phys_addr >>= gmmu_new_pde_address_shift_v();
@@ -101,8 +102,8 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
pde_v[0] |= gmmu_new_pde_vol_true_f();
pde_v[1] |= phys_addr >> 24;
pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
pte_dbg(g, attrs,
"PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | "
@@ -125,7 +126,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx];
bool small_valid, big_valid;
u32 small_addr = 0, big_addr = 0;
u32 pd_offset = pd_offset_from_index(l, pd_idx);
u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
u32 pde_v[4] = {0, 0, 0, 0};
small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
@@ -160,10 +161,10 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
pde_v[1] |= big_addr >> 28;
}
pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]);
pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]);
pte_dbg(g, attrs,
"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
@@ -240,7 +241,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
{
struct gk20a *g = vm->mm->g;
u32 page_size = vm->gmmu_page_sizes[attrs->pgsz];
u32 pd_offset = pd_offset_from_index(l, pd_idx);
u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
u32 pte_w[2] = {0, 0};
if (phys_addr != 0ULL) {
@@ -271,8 +272,8 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
(u32)attrs->ctag / g->ops.fb.compression_page_size(g),
pte_w[1], pte_w[0]);
pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
}
#define GP10B_PDE0_ENTRY_SIZE 16U
@@ -287,7 +288,7 @@ static u32 gp10b_get_pde0_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
struct nvgpu_gmmu_pd *pd, u32 pd_idx)
{
u32 pde_base = pd->mem_offs / sizeof(u32);
u32 pde_offset = pde_base + pd_offset_from_index(l, pd_idx);
u32 pde_offset = pde_base + nvgpu_pd_offset_from_index(l, pd_idx);
u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
u32 i;
u32 pgsz = GMMU_NR_PAGE_SIZES;
@@ -380,7 +381,7 @@ const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
struct vm_gk20a *vm)
{
u64 pdb_addr = nvgpu_pde_gpu_addr(g, &vm->pdb);
u64 pdb_addr = nvgpu_pd_gpu_addr(g, &vm->pdb);
u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
u32 pdb_addr_hi = u64_hi32(pdb_addr);

View File

@@ -37,6 +37,7 @@
struct vm_gk20a;
struct nvgpu_mem;
struct nvgpu_gmmu_pd;
#define GMMU_PAGE_SIZE_SMALL 0U
#define GMMU_PAGE_SIZE_BIG 1U
@@ -49,29 +50,6 @@ enum gk20a_mem_rw_flag {
gk20a_mem_flag_write_only = 2, /* WO */
};
/*
* GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
* in the GMMU.
*/
struct nvgpu_gmmu_pd {
/*
* DMA memory describing the PTEs or PDEs. @mem_offs describes the
* offset of the PDE table in @mem. @cached specifies if this PD is
* using pd_cache memory.
*/
struct nvgpu_mem *mem;
u32 mem_offs;
bool cached;
u32 pd_size; /* In bytes. */
/*
* List of pointers to the next level of page tables. Will not be
* populated when this PD is pointing to PTEs.
*/
struct nvgpu_gmmu_pd *entries;
int num_entries;
};
/*
* Reduce the number of arguments getting passed through the various levels of
* GMMU mapping functions.
@@ -185,27 +163,6 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
struct nvgpu_mem *mem,
u64 gpu_va);
int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
int nvgpu_pd_cache_init(struct gk20a *g);
void nvgpu_pd_cache_fini(struct gk20a *g);
u64 nvgpu_pde_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
/*
* Some useful routines that are shared across chips.
*/
static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
u32 pd_idx)
{
return (pd_idx * l->entry_size) / U32(sizeof(u32));
}
static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
size_t w, size_t data)
{
nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
}
/**
* __nvgpu_pte_words - Compute number of words in a PTE.
*

View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef NVGPU_PD_CACHE_H
#define NVGPU_PD_CACHE_H
#include <nvgpu/types.h>
struct gk20a;
struct vm_gk20a;
struct nvgpu_mem;
struct gk20a_mmu_level;
/*
* GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
* in the GMMU.
*/
struct nvgpu_gmmu_pd {
/*
* DMA memory describing the PTEs or PDEs. @mem_offs describes the
* offset of the PDE table in @mem. @cached specifies if this PD is
* using pd_cache memory.
*/
struct nvgpu_mem *mem;
u32 mem_offs;
bool cached;
u32 pd_size; /* In bytes. */
/*
* List of pointers to the next level of page tables. Does not
* need to be populated when this PD is pointing to PTEs.
*/
struct nvgpu_gmmu_pd *entries;
int num_entries;
};
int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
int nvgpu_pd_cache_init(struct gk20a *g);
void nvgpu_pd_cache_fini(struct gk20a *g);
u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx);
void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
size_t w, u32 data);
u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
#endif

View File

@@ -28,6 +28,7 @@
#include <nvgpu/rbtree.h>
#include <nvgpu/types.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/pd_cache.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/allocator.h>