gpu: nvgpu: Remove mm.get_iova_addr

Remove the mm.get_iova_addr() HAL and replace it with a new HAL
called mm.gpu_phys_addr(). This new HAL provides the real phys
address that should be passed to the GPU from a physical address
obtained from a scatter list. It also provides a mechanism by
which the HAL code can add extra bits to a GPU physical address
based on the attributes passed in. This is necessary during GMMU
page table programming.

Also remove the flags argument from the various address functions.
This flag was used for adding an IO coherence bit to the GPU
physical address which is not supported.

JIRA NVGPU-30

Change-Id: I69af5b1c6bd905c4077c26c098fac101c6b41a33
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1530864
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2017-06-07 17:32:56 -07:00
committed by mobile promotions
parent 192cf8c1f8
commit 1da69dd8b2
17 changed files with 101 additions and 74 deletions

View File

@@ -15,6 +15,7 @@
*/
#include <nvgpu/dma.h>
#include <nvgpu/gmmu.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/page_allocator.h>
#include <nvgpu/log.h>
@@ -23,6 +24,8 @@
#include <nvgpu/linux/dma.h>
#include "os_linux.h"
#include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h"
@@ -246,6 +249,61 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
}
}
/*
* Obtain a SYSMEM address from a Linux SGL. This should eventually go away
* and/or become private to this file once all bad usages of Linux SGLs are
* cleaned up in the driver.
*/
u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
{
struct nvgpu_os_linux *l = container_of(g, struct nvgpu_os_linux, g);
if (!device_is_iommuable(l->dev))
return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
if (sg_dma_address(sgl) == 0)
return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
if (sg_dma_address(sgl) == DMA_ERROR_CODE)
return 0;
return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
}
/*
* Obtain the address the GPU should use from the %mem assuming this is a SYSMEM
* allocation.
*/
static u64 nvgpu_mem_get_addr_sysmem(struct gk20a *g, struct nvgpu_mem *mem)
{
return nvgpu_mem_get_addr_sgl(g, mem->priv.sgt->sgl);
}
/*
* Return the base address of %mem. Handles whether this is a VIDMEM or SYSMEM
* allocation.
*
* %attrs can be NULL. If it is not NULL then it may be inspected to determine
* if the address needs to be modified before writing into a PTE.
*/
u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem)
{
struct nvgpu_page_alloc *alloc;
if (mem->aperture == APERTURE_SYSMEM)
return nvgpu_mem_get_addr_sysmem(g, mem);
/*
* Otherwise get the vidmem address.
*/
alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
/* This API should not be used with > 1 chunks */
WARN_ON(alloc->nr_chunks != 1);
return alloc->base;
}
/*
* Be careful how you use this! You are responsible for correctly freeing this
* memory.

View File

@@ -201,7 +201,7 @@ u64 nvgpu_pde_phys_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
if (g->mm.has_physical_mode)
page_addr = sg_phys(pd->mem->priv.sgt->sgl);
else
page_addr = nvgpu_mem_get_base_addr(g, pd->mem, 0);
page_addr = nvgpu_mem_get_addr(g, pd->mem);
return page_addr + pd->mem_offs;
}
@@ -559,7 +559,7 @@ static int __nvgpu_gmmu_update_page_table_sysmem(struct vm_gk20a *vm,
sgl = sgt->sgl;
if (!g->mm.bypass_smmu) {
u64 io_addr = g->ops.mm.get_iova_addr(g, sgl, 0);
u64 io_addr = nvgpu_mem_get_addr_sgl(g, sgl);
io_addr += space_to_skip;
@@ -670,7 +670,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
phys_addr = alloc->base;
} else
phys_addr = g->ops.mm.get_iova_addr(g, sgt->sgl, 0);
phys_addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl);
}
__gmmu_dbg(g, attrs,

View File

@@ -44,8 +44,7 @@ void fb_gk20a_reset(struct gk20a *g)
void gk20a_fb_init_hw(struct gk20a *g)
{
u32 addr = g->ops.mm.get_iova_addr(g,
g->mm.sysmem_flush.priv.sgt->sgl, 0) >> 8;
u32 addr = nvgpu_mem_get_addr(g, &g->mm.sysmem_flush) >> 8;
gk20a_writel(g, fb_niso_flush_sysmem_addr_r(), addr);
}
@@ -67,7 +66,7 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
if (!g->power_on)
return;
addr_lo = u64_lo32(nvgpu_mem_get_base_addr(g, pdb, 0) >> 12);
addr_lo = u64_lo32(nvgpu_mem_get_addr(g, pdb) >> 12);
nvgpu_mutex_acquire(&g->mm.tlb_lock);

View File

@@ -891,8 +891,8 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
for (chid = 0; chid < f->num_channels; chid++) {
f->channel[chid].userd_iova =
g->ops.mm.get_iova_addr(g, f->userd.priv.sgt->sgl, 0)
+ chid * f->userd_entry_size;
nvgpu_mem_get_addr(g, &f->userd) +
chid * f->userd_entry_size;
f->channel[chid].userd_gpu_va =
f->userd.gpu_va + chid * f->userd_entry_size;
gk20a_init_channel_support(g, chid);
@@ -3106,8 +3106,7 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
old_buf = runlist->cur_buffer;
new_buf = !runlist->cur_buffer;
runlist_iova = g->ops.mm.get_iova_addr(
g, runlist->mem[new_buf].priv.sgt->sgl, 0);
runlist_iova = nvgpu_mem_get_addr(g, &runlist->mem[new_buf]);
gk20a_dbg_info("runlist_id : %d, switch to new buffer 0x%16llx",
runlist_id, (u64)runlist_iova);

View File

@@ -730,8 +730,6 @@ struct gpu_ops {
struct vm_gk20a *vm);
u64 (*gpu_phys_addr)(struct gk20a *g,
struct nvgpu_gmmu_attrs *attrs, u64 phys);
u64 (*get_iova_addr)(struct gk20a *g, struct scatterlist *sgl,
u32 flags);
size_t (*get_vidmem_size)(struct gk20a *g);
void (*init_inst_block)(struct nvgpu_mem *inst_block,
struct vm_gk20a *vm, u32 big_page_size);

View File

@@ -4443,7 +4443,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
gk20a_dbg_fn("");
/* init mmu debug buffer */
addr = g->ops.mm.get_iova_addr(g, gr->mmu_wr_mem.priv.sgt->sgl, 0);
addr = nvgpu_mem_get_addr(g, &gr->mmu_wr_mem);
addr >>= fb_mmu_debug_wr_addr_alignment_v();
gk20a_writel(g, fb_mmu_debug_wr_r(),
@@ -4453,7 +4453,7 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
fb_mmu_debug_wr_vol_false_f() |
fb_mmu_debug_wr_addr_f(addr));
addr = g->ops.mm.get_iova_addr(g, gr->mmu_rd_mem.priv.sgt->sgl, 0);
addr = nvgpu_mem_get_addr(g, &gr->mmu_rd_mem);
addr >>= fb_mmu_debug_rd_addr_alignment_v();
gk20a_writel(g, fb_mmu_debug_rd_r(),

View File

@@ -1383,7 +1383,7 @@ int nvgpu_vm_map_compbits(struct vm_gk20a *vm,
return -EINVAL;
}
*mapping_iova = gk20a_mm_iova_addr(g, mapped_buffer->sgt->sgl, 0);
*mapping_iova = nvgpu_mem_get_addr_sgl(g, mapped_buffer->sgt->sgl);
*compbits_win_gva = mapped_buffer->ctag_map_win_addr;
nvgpu_mutex_release(&vm->update_gmmu_lock);
@@ -1454,30 +1454,6 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
}
#endif
/*
* If mem is in VIDMEM, return base address in vidmem
* else return IOVA address for SYSMEM
*/
u64 nvgpu_mem_get_base_addr(struct gk20a *g, struct nvgpu_mem *mem,
u32 flags)
{
struct nvgpu_page_alloc *alloc;
u64 addr;
if (mem->aperture == APERTURE_VIDMEM) {
alloc = get_vidmem_page_alloc(mem->priv.sgt->sgl);
/* This API should not be used with > 1 chunks */
WARN_ON(alloc->nr_chunks != 1);
addr = alloc->base;
} else {
addr = g->ops.mm.get_iova_addr(g, mem->priv.sgt->sgl, flags);
}
return addr;
}
#if defined(CONFIG_GK20A_VIDMEM)
static struct nvgpu_mem *get_pending_mem_desc(struct mm_gk20a *mm)
{
@@ -1526,8 +1502,7 @@ dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
nvgpu_mutex_acquire(&vm->update_gmmu_lock);
buffer = __nvgpu_vm_find_mapped_buf(vm, gpu_vaddr);
if (buffer)
addr = g->ops.mm.get_iova_addr(g, buffer->sgt->sgl,
buffer->flags);
addr = nvgpu_mem_get_addr_sgl(g, buffer->sgt->sgl);
nvgpu_mutex_release(&vm->update_gmmu_lock);
return addr;
@@ -1545,21 +1520,6 @@ u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova)
return iova;
}
u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
u32 flags)
{
if (!device_is_iommuable(dev_from_gk20a(g)))
return sg_phys(sgl);
if (sg_dma_address(sgl) == 0)
return sg_phys(sgl);
if (sg_dma_address(sgl) == DMA_ERROR_CODE)
return 0;
return gk20a_mm_smmu_vaddr_translate(g, sg_dma_address(sgl));
}
/* for gk20a the "video memory" apertures here are misnomers. */
static inline u32 big_valid_pde0_bits(struct gk20a *g,
struct nvgpu_gmmu_pd *pd, u64 addr)
@@ -2071,7 +2031,7 @@ u64 gk20a_mm_inst_block_addr(struct gk20a *g, struct nvgpu_mem *inst_block)
if (g->mm.has_physical_mode)
addr = gk20a_mem_phys(inst_block);
else
addr = nvgpu_mem_get_base_addr(g, inst_block, 0);
addr = nvgpu_mem_get_addr(g, inst_block);
return addr;
}
@@ -2194,7 +2154,7 @@ static int gk20a_init_ce_vm(struct mm_gk20a *mm)
void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
struct vm_gk20a *vm)
{
u64 pdb_addr = nvgpu_mem_get_base_addr(g, vm->pdb.mem, 0);
u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
u32 pdb_addr_hi = u64_hi32(pdb_addr);
@@ -2465,6 +2425,11 @@ u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
return 34;
}
u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags)
{
return phys;
}
const struct gk20a_mmu_level *gk20a_mm_get_mmu_levels(struct gk20a *g,
u32 big_page_size)
{

View File

@@ -345,11 +345,8 @@ void gk20a_mm_dump_vm(struct vm_gk20a *vm,
int gk20a_mm_suspend(struct gk20a *g);
u64 gk20a_mm_iova_addr(struct gk20a *g, struct scatterlist *sgl,
u32 flags);
u64 gk20a_mm_gpu_phys_addr(struct gk20a *g, u64 phys, u32 flags);
u64 gk20a_mm_smmu_vaddr_translate(struct gk20a *g, dma_addr_t iova);
u64 nvgpu_mem_get_base_addr(struct gk20a *g, struct nvgpu_mem *mem,
u32 flags);
void gk20a_mm_ltc_isr(struct gk20a *g);

View File

@@ -1081,7 +1081,7 @@ static int gm20b_bootstrap_hs_flcn(struct gk20a *g)
u32 *acr_ucode_header_t210_load;
u32 *acr_ucode_data_t210_load;
start = g->ops.mm.get_iova_addr(g, acr->ucode_blob.priv.sgt->sgl, 0);
start = nvgpu_mem_get_addr(g, &acr->ucode_blob);
size = acr->ucode_blob.size;
gm20b_dbg_pmu("");

View File

@@ -401,8 +401,8 @@ void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr)
if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL))
compbit_store_iova = gk20a_mem_phys(&gr->compbit_store.mem);
else
compbit_store_iova = g->ops.mm.get_iova_addr(g,
gr->compbit_store.mem.priv.sgt->sgl, 0);
compbit_store_iova = nvgpu_mem_get_addr(g,
&gr->compbit_store.mem);
compbit_base_post_divide64 = compbit_store_iova >>
ltc_ltcs_ltss_cbc_base_alignment_shift_v();

View File

@@ -80,7 +80,6 @@ void gm20b_init_mm(struct gpu_ops *gops)
gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
gops->mm.get_default_big_page_size = gm20b_mm_get_default_big_page_size;
gops->mm.gpu_phys_addr = gm20b_gpu_phys_addr;
gops->mm.get_iova_addr = gk20a_mm_iova_addr;
gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels;
gops->mm.init_pdb = gk20a_mm_init_pdb;

View File

@@ -20,6 +20,9 @@ struct gk20a;
#define PDE_ADDR_START(x, y) ((x) & ~((0x1UL << (y)) - 1))
#define PDE_ADDR_END(x, y) ((x) | ((0x1UL << (y)) - 1))
u64 gm20b_gpu_phys_addr(struct gk20a *g,
struct nvgpu_gmmu_attrs *attrs, u64 phys);
void gm20b_init_mm(struct gpu_ops *gops);
int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
#endif

View File

@@ -48,8 +48,7 @@ static int gp10b_init_mm_setup_hw(struct gk20a *g)
g->ops.fb.set_mmu_page_size(g);
gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
(g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.priv.sgt->sgl, 0)
>> 8ULL));
nvgpu_mem_get_addr(g, &g->mm.sysmem_flush) >> 8ULL);
g->ops.bus.bar1_bind(g, inst_block);
@@ -343,7 +342,7 @@ static const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
static void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
struct vm_gk20a *vm)
{
u64 pdb_addr = nvgpu_mem_get_base_addr(g, vm->pdb.mem, 0);
u64 pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
u32 pdb_addr_hi = u64_hi32(pdb_addr);

View File

@@ -19,9 +19,11 @@
struct page;
struct sg_table;
struct scatterlist;
struct gk20a;
struct nvgpu_mem;
struct nvgpu_gmmu_attrs;
struct nvgpu_mem_priv {
struct page **pages;
@@ -29,6 +31,8 @@ struct nvgpu_mem_priv {
unsigned long flags;
};
u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl);
/**
* __nvgpu_mem_create_from_pages - Create an nvgpu_mem from physical pages.
*

View File

@@ -27,6 +27,7 @@ struct sg_table;
struct gk20a;
struct nvgpu_allocator;
struct nvgpu_gmmu_attrs;
/*
* Real location of a buffer - nvgpu_aperture_mask() will deduce what will be
@@ -180,6 +181,8 @@ void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
u32 c, u32 size);
u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask);
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,

View File

@@ -308,8 +308,8 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
for (chid = 0; chid < f->num_channels; chid++) {
f->channel[chid].userd_iova =
g->ops.mm.get_iova_addr(g, f->userd.priv.sgt->sgl, 0)
+ chid * f->userd_entry_size;
nvgpu_mem_get_addr(g, &f->userd) +
chid * f->userd_entry_size;
f->channel[chid].userd_gpu_va =
f->userd.gpu_va + chid * f->userd_entry_size;

View File

@@ -23,8 +23,11 @@
#include <nvgpu/vgpu/vm.h>
#include <nvgpu/linux/nvgpu_mem.h>
#include "vgpu/vgpu.h"
#include "gk20a/mm_gk20a.h"
#include "gm20b/mm_gm20b.h"
#include "common/linux/vm_priv.h"
@@ -95,7 +98,7 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(d);
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
u64 addr = g->ops.mm.get_iova_addr(g, sgt->sgl, flags);
u64 addr = nvgpu_mem_get_addr_sgl(g, sgt->sgl);
u8 prot;
gk20a_dbg_fn("");
@@ -243,7 +246,7 @@ u64 vgpu_bar1_map(struct gk20a *g, struct sg_table **sgt, u64 size)
{
struct dma_iommu_mapping *mapping =
to_dma_iommu_mapping(dev_from_gk20a(g));
u64 addr = g->ops.mm.get_iova_addr(g, (*sgt)->sgl, 0);
u64 addr = nvgpu_mem_get_addr_sgl(g, (*sgt)->sgl);
struct tegra_vgpu_cmd_msg msg;
struct tegra_vgpu_as_map_params *p = &msg.params.as_map;
int err;
@@ -368,6 +371,6 @@ void vgpu_init_mm_ops(struct gpu_ops *gops)
gops->mm.l2_flush = vgpu_mm_l2_flush;
gops->fb.tlb_invalidate = vgpu_mm_tlb_invalidate;
gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
gops->mm.get_iova_addr = gk20a_mm_iova_addr;
gops->mm.gpu_phys_addr = gm20b_gpu_phys_addr;
gops->mm.init_mm_setup_hw = NULL;
}