mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
gpu: nvgpu: Physical page bits to be per chip
Retrieve number of physical page bits based on chip. Bug 1567274 Change-Id: I5a0f6a66be37f2cf720d66b5bdb2b704cd992234 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/601700
This commit is contained in:
committed by
Dan Willemsen
parent
1deb73b9c6
commit
2d71d633cf
@@ -98,7 +98,7 @@ int channel_gk20a_commit_va(struct channel_gk20a *c)
|
||||
if (!inst_ptr)
|
||||
return -ENOMEM;
|
||||
|
||||
addr = gk20a_mm_iova_addr(c->vm->pdes.sgt->sgl);
|
||||
addr = gk20a_mm_iova_addr(c->g, c->vm->pdes.sgt->sgl);
|
||||
addr_lo = u64_lo32(addr >> 12);
|
||||
addr_hi = u64_hi32(addr);
|
||||
|
||||
|
||||
@@ -581,7 +581,7 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
|
||||
f->channel[chid].userd_cpu_va =
|
||||
f->userd.cpuva + chid * f->userd_entry_size;
|
||||
f->channel[chid].userd_iova =
|
||||
NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
|
||||
gk20a_mm_smmu_vaddr_translate(g, f->userd.iova)
|
||||
+ chid * f->userd_entry_size;
|
||||
f->channel[chid].userd_gpu_va =
|
||||
f->userd.gpu_va + chid * f->userd_entry_size;
|
||||
|
||||
@@ -312,6 +312,7 @@ struct gpu_ops {
|
||||
void (*set_big_page_size)(struct gk20a *g,
|
||||
void *inst_ptr, int size);
|
||||
u32 (*get_big_page_sizes)(void);
|
||||
u32 (*get_physical_addr_bits)(struct gk20a *g);
|
||||
} mm;
|
||||
struct {
|
||||
int (*prepare_ucode)(struct gk20a *g);
|
||||
|
||||
@@ -1734,7 +1734,7 @@ static int gr_gk20a_init_ctxsw_ucode_vaspace(struct gk20a *g)
|
||||
gk20a_mem_wr32(inst_ptr, ram_in_adr_limit_hi_w(),
|
||||
ram_in_adr_limit_hi_f(u64_hi32(vm->va_limit)));
|
||||
|
||||
pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
|
||||
pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl);
|
||||
pde_addr_lo = u64_lo32(pde_addr >> 12);
|
||||
pde_addr_hi = u64_hi32(pde_addr);
|
||||
gk20a_mem_wr32(inst_ptr, ram_in_page_dir_base_lo_w(),
|
||||
@@ -4255,7 +4255,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
/* init mmu debug buffer */
|
||||
addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_wr_mem.iova);
|
||||
addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_wr_mem.iova);
|
||||
addr >>= fb_mmu_debug_wr_addr_alignment_v();
|
||||
|
||||
gk20a_writel(g, fb_mmu_debug_wr_r(),
|
||||
@@ -4263,7 +4263,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
|
||||
fb_mmu_debug_wr_vol_false_f() |
|
||||
fb_mmu_debug_wr_addr_f(addr));
|
||||
|
||||
addr = NV_MC_SMMU_VADDR_TRANSLATE(gr->mmu_rd_mem.iova);
|
||||
addr = gk20a_mm_smmu_vaddr_translate(g, gr->mmu_rd_mem.iova);
|
||||
addr >>= fb_mmu_debug_rd_addr_alignment_v();
|
||||
|
||||
gk20a_writel(g, fb_mmu_debug_rd_r(),
|
||||
|
||||
@@ -173,7 +173,7 @@ static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
|
||||
if (tegra_platform_is_linsim())
|
||||
compbit_store_base_iova = gr->compbit_store.base_iova;
|
||||
else
|
||||
compbit_store_base_iova = NV_MC_SMMU_VADDR_TRANSLATE(
|
||||
compbit_store_base_iova = gk20a_mm_smmu_vaddr_translate(g,
|
||||
gr->compbit_store.base_iova);
|
||||
|
||||
compbit_base_post_divide64 = compbit_store_base_iova >>
|
||||
|
||||
@@ -597,7 +597,7 @@ int zalloc_gmmu_page_table_gk20a(struct vm_gk20a *vm,
|
||||
return err;
|
||||
|
||||
gk20a_dbg(gpu_dbg_pte, "pte = 0x%p, addr=%08llx, size %d",
|
||||
pte, gk20a_mm_iova_addr(sgt->sgl), pte_order);
|
||||
pte, gk20a_mm_iova_addr(vm->mm->g, sgt->sgl), pte_order);
|
||||
|
||||
pte->ref = handle;
|
||||
pte->sgt = sgt;
|
||||
@@ -1554,7 +1554,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
|
||||
mutex_lock(&vm->update_gmmu_lock);
|
||||
buffer = find_mapped_buffer_locked(&vm->mapped_buffers, gpu_vaddr);
|
||||
if (buffer)
|
||||
addr = gk20a_mm_iova_addr(buffer->sgt->sgl);
|
||||
addr = gk20a_mm_iova_addr(vm->mm->g, buffer->sgt->sgl);
|
||||
mutex_unlock(&vm->update_gmmu_lock);
|
||||
|
||||
return addr;
|
||||
@@ -1657,16 +1657,19 @@ void gk20a_free_sgtable(struct sg_table **sgt)
|
||||
*sgt = NULL;
|
||||
}
|
||||
|
||||
u64 gk20a_mm_iova_addr(struct scatterlist *sgl)
|
||||
u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova)
|
||||
{
|
||||
return iova | 1ULL << g->ops.mm.get_physical_addr_bits(g);
|
||||
}
|
||||
|
||||
u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl)
|
||||
{
|
||||
u64 result = sg_phys(sgl);
|
||||
#ifdef CONFIG_TEGRA_IOMMU_SMMU
|
||||
if (sg_dma_address(sgl) == DMA_ERROR_CODE)
|
||||
result = 0;
|
||||
else if (sg_dma_address(sgl)) {
|
||||
result = sg_dma_address(sgl) |
|
||||
1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT;
|
||||
}
|
||||
else if (sg_dma_address(sgl))
|
||||
result = gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
@@ -1709,7 +1712,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
||||
BUG_ON(space_to_skip & (page_size - 1));
|
||||
|
||||
while (space_to_skip > 0 && cur_chunk) {
|
||||
u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
|
||||
u64 new_addr = gk20a_mm_iova_addr(vm->mm->g, cur_chunk);
|
||||
if (new_addr) {
|
||||
addr = new_addr;
|
||||
addr += cur_offset;
|
||||
@@ -1759,7 +1762,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
|
||||
gk20a_dbg(gpu_dbg_pte, "pte_lo=%d, pte_hi=%d", pte_lo, pte_hi);
|
||||
for (pte_cur = pte_lo; pte_cur <= pte_hi; pte_cur++) {
|
||||
if (likely(sgt)) {
|
||||
u64 new_addr = gk20a_mm_iova_addr(cur_chunk);
|
||||
u64 new_addr = gk20a_mm_iova_addr(vm->mm->g,
|
||||
cur_chunk);
|
||||
if (new_addr) {
|
||||
addr = new_addr;
|
||||
addr += cur_offset;
|
||||
@@ -1886,11 +1890,11 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
|
||||
|
||||
if (small_valid)
|
||||
pte_addr[gmmu_page_size_small] =
|
||||
gk20a_mm_iova_addr(small_pte->sgt->sgl);
|
||||
gk20a_mm_iova_addr(vm->mm->g, small_pte->sgt->sgl);
|
||||
|
||||
if (big_valid)
|
||||
pte_addr[gmmu_page_size_big] =
|
||||
gk20a_mm_iova_addr(big_pte->sgt->sgl);
|
||||
gk20a_mm_iova_addr(vm->mm->g, big_pte->sgt->sgl);
|
||||
|
||||
pde_v[0] = gmmu_pde_size_full_f();
|
||||
pde_v[0] |= big_valid ?
|
||||
@@ -2270,7 +2274,7 @@ static int gk20a_init_vm(struct mm_gk20a *mm,
|
||||
goto clean_up_ptes;
|
||||
}
|
||||
gk20a_dbg(gpu_dbg_pte, "bar 1 pdes.kv = 0x%p, pdes.phys = 0x%llx",
|
||||
vm->pdes.kv, gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
|
||||
vm->pdes.kv, gk20a_mm_iova_addr(vm->mm->g, vm->pdes.sgt->sgl));
|
||||
/* we could release vm->pdes.kv but it's only one page... */
|
||||
|
||||
/* low-half: alloc small pages */
|
||||
@@ -2728,9 +2732,9 @@ static int gk20a_init_bar1_vm(struct mm_gk20a *mm)
|
||||
mm->bar1.aperture_size, false, "bar1");
|
||||
|
||||
gk20a_dbg_info("pde pa=0x%llx",
|
||||
(u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
|
||||
(u64)gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl));
|
||||
|
||||
pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
|
||||
pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl);
|
||||
pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v());
|
||||
pde_addr_hi = u64_hi32(pde_addr);
|
||||
|
||||
@@ -2814,9 +2818,9 @@ static int gk20a_init_system_vm(struct mm_gk20a *mm)
|
||||
SZ_128K << 10, GK20A_PMU_VA_SIZE, false, "system");
|
||||
|
||||
gk20a_dbg_info("pde pa=0x%llx",
|
||||
(u64)gk20a_mm_iova_addr(vm->pdes.sgt->sgl));
|
||||
(u64)gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl));
|
||||
|
||||
pde_addr = gk20a_mm_iova_addr(vm->pdes.sgt->sgl);
|
||||
pde_addr = gk20a_mm_iova_addr(g, vm->pdes.sgt->sgl);
|
||||
pde_addr_lo = u64_lo32(pde_addr >> ram_in_base_shift_v());
|
||||
pde_addr_hi = u64_hi32(pde_addr);
|
||||
|
||||
@@ -3034,7 +3038,8 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
|
||||
void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
|
||||
{
|
||||
struct gk20a *g = gk20a_from_vm(vm);
|
||||
u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->pdes.sgt->sgl) >> 12);
|
||||
u32 addr_lo = u64_lo32(gk20a_mm_iova_addr(vm->mm->g,
|
||||
vm->pdes.sgt->sgl) >> 12);
|
||||
u32 data;
|
||||
s32 retry = 200;
|
||||
static DEFINE_MUTEX(tlb_lock);
|
||||
@@ -3116,6 +3121,11 @@ bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
|
||||
fb_mmu_debug_ctrl_debug_enabled_v();
|
||||
}
|
||||
|
||||
u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
|
||||
{
|
||||
return 34;
|
||||
}
|
||||
|
||||
void gk20a_init_mm(struct gpu_ops *gops)
|
||||
{
|
||||
/* remember to remove NVGPU_GPU_FLAGS_SUPPORT_SPARSE_ALLOCS in
|
||||
@@ -3134,5 +3144,6 @@ void gk20a_init_mm(struct gpu_ops *gops)
|
||||
gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
|
||||
gops->mm.l2_flush = gk20a_mm_l2_flush;
|
||||
gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
|
||||
gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,15 +25,6 @@
|
||||
#include <asm/cacheflush.h>
|
||||
#include "gk20a_allocator.h"
|
||||
|
||||
/* This "address bit" in the gmmu ptes (and other gk20a accesses)
|
||||
* signals the address as presented should be translated by the SMMU.
|
||||
* Without this bit present gk20a accesses are *not* translated.
|
||||
*/
|
||||
/* Hack, get this from manuals somehow... */
|
||||
#define NV_MC_SMMU_VADDR_TRANSLATION_BIT 34
|
||||
#define NV_MC_SMMU_VADDR_TRANSLATE(x) (x | \
|
||||
(1ULL << NV_MC_SMMU_VADDR_TRANSLATION_BIT))
|
||||
|
||||
/* For now keep the size relatively small-ish compared to the full
|
||||
* 40b va. 32GB for now. It consists of two 16GB spaces. */
|
||||
#define NV_GMMU_VA_RANGE 35ULL
|
||||
@@ -360,6 +351,7 @@ struct mm_gk20a {
|
||||
|
||||
void (*remove_support)(struct mm_gk20a *mm);
|
||||
bool sw_ready;
|
||||
int physical_bits;
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
u32 ltc_enabled;
|
||||
u32 ltc_enabled_debug;
|
||||
@@ -420,7 +412,8 @@ int gk20a_get_sgtable_from_pages(struct device *d, struct sg_table **sgt,
|
||||
|
||||
void gk20a_free_sgtable(struct sg_table **sgt);
|
||||
|
||||
u64 gk20a_mm_iova_addr(struct scatterlist *sgl);
|
||||
u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl);
|
||||
u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova);
|
||||
|
||||
void gk20a_mm_ltc_isr(struct gk20a *g);
|
||||
|
||||
@@ -557,6 +550,8 @@ void free_gmmu_pages(struct vm_gk20a *vm, void *handle,
|
||||
size_t size);
|
||||
void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i);
|
||||
|
||||
u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g);
|
||||
|
||||
struct gpu_ops;
|
||||
void gk20a_init_mm(struct gpu_ops *gops);
|
||||
#endif /* MM_GK20A_H */
|
||||
|
||||
@@ -258,7 +258,8 @@ int prepare_ucode_blob(struct gk20a *g)
|
||||
gm20b_dbg_pmu("managed LS falcon %d, WPR size %d bytes.\n",
|
||||
plsfm->managed_flcn_cnt, plsfm->wpr_size);
|
||||
lsfm_init_wpr_contents(g, plsfm, nonwpr_addr);
|
||||
g->acr.ucode_blob_start = NV_MC_SMMU_VADDR_TRANSLATE(iova);
|
||||
g->acr.ucode_blob_start =
|
||||
gk20a_mm_smmu_vaddr_translate(g, iova);
|
||||
g->acr.ucode_blob_size = plsfm->wpr_size;
|
||||
gm20b_dbg_pmu("base reg carveout 2:%x\n",
|
||||
readl(mc + MC_SECURITY_CARVEOUT2_BOM_0));
|
||||
|
||||
@@ -301,4 +301,5 @@ void gm20b_init_mm(struct gpu_ops *gops)
|
||||
gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
|
||||
gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
|
||||
gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
|
||||
gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
|
||||
}
|
||||
|
||||
@@ -317,7 +317,7 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
|
||||
f->channel[chid].userd_cpu_va =
|
||||
f->userd.cpuva + chid * f->userd_entry_size;
|
||||
f->channel[chid].userd_iova =
|
||||
NV_MC_SMMU_VADDR_TRANSLATE(f->userd.iova)
|
||||
gk20a_mm_smmu_vaddr_translate(g, f->userd.iova)
|
||||
+ chid * f->userd_entry_size;
|
||||
f->channel[chid].userd_gpu_va =
|
||||
f->userd.gpu_va + chid * f->userd_entry_size;
|
||||
|
||||
@@ -74,7 +74,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
|
||||
struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
|
||||
struct tegra_vgpu_cmd_msg msg;
|
||||
struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
|
||||
u64 addr = gk20a_mm_iova_addr(sgt->sgl);
|
||||
u64 addr = gk20a_mm_iova_addr(g, sgt->sgl);
|
||||
u8 prot;
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
@@ -210,7 +210,7 @@ u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
|
||||
struct gk20a_platform *platform = gk20a_get_platform(g->dev);
|
||||
struct dma_iommu_mapping *mapping =
|
||||
to_dma_iommu_mapping(dev_from_gk20a(g));
|
||||
u64 addr = gk20a_mm_iova_addr((*sgt)->sgl);
|
||||
u64 addr = gk20a_mm_iova_addr(g, (*sgt)->sgl);
|
||||
struct tegra_vgpu_cmd_msg msg;
|
||||
struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
|
||||
int err;
|
||||
@@ -429,4 +429,5 @@ void vgpu_init_mm_ops(struct gpu_ops *gops)
|
||||
gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
|
||||
gops->mm.l2_flush = vgpu_mm_l2_flush;
|
||||
gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
|
||||
gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user