gpu: nvgpu: Use coherent aperture flag

When using a coherent DMA API wee must make sure to program
any aperture fields with the coherent aperture setting. To
do this the nvgpu_aperture_mask() function was modified to
take a third aperture mask argument, a coherent setting, so
that code can use this function to generate coherent aperture
settings.

The aperture choice is some what tricky: the default version
of this function uses the state of the DMA API to determine
what aperture to use for SYSMEM: either coherent or
non-coherent internally. Thus a kernel user need only specify
the normal nvgpu_mem struct and the correct mask should be
chosen. Due to many uses of nvgpu_mem structs not created
directly from the DMA API wrapper it's easier to translate
SYSMEM to SYSMEM_COH after creation.

However, the GMMU mapping code, will encounter buffers from
userspace with difference coerency attributes than the DMA
API. Thus the __nvgpu_aperture_mask() really respects the
aperture setting passed in regardless of the DMA API state.
This aperture setting is pulled from NVGPU_VM_MAP_IO_COHERENT
since this is either passed in from userspace or set by the
kernel when using coherent DMA. The aperture field in attrs
is upgraded to coh if this flag is set.

This change also adds a coherent sysmem mask everywhere that
it can. There's a couple places that do not have a coherent
register field defined yet. These need to eventually be
defined and added.

Lastly the aperture mask code has been mvoed from the Linux
vm.c code to the general vm.c code since this function has
no Linux dependencies.

Note: depends on https://git-master.nvidia.com/r/1664536 for
new register fields.

JIRA EVLR-2333

Change-Id: I4b347911ecb7c511738563fe6c34d0e6aa380d71
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1655220
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2018-02-09 11:57:54 -08:00
committed by mobile promotions
parent 71f53272b2
commit 1170687c33
24 changed files with 184 additions and 103 deletions

View File

@@ -34,30 +34,6 @@
#include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h"
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask)
{
switch (aperture) {
case APERTURE_SYSMEM:
/* some igpus consider system memory vidmem */
return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
? sysmem_mask : vidmem_mask;
case APERTURE_VIDMEM:
/* for dgpus only */
return vidmem_mask;
case APERTURE_INVALID:
WARN_ON("Bad aperture");
}
return 0;
}
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 vidmem_mask)
{
return __nvgpu_aperture_mask(g, mem->aperture,
sysmem_mask, vidmem_mask);
}
int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
{
void *cpu_va;

View File

@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
vm_aspace_id(vm),
mapped_buffer->flags,
nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
nvgpu_aperture_str(g,
gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
return mapped_buffer;
}

View File

@@ -634,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
page_size >> 10,
nvgpu_gmmu_perm_str(attrs->rw_flag),
attrs->kind_v,
nvgpu_aperture_str(attrs->aperture),
nvgpu_aperture_str(g, attrs->aperture),
attrs->cacheable ? 'C' : '-',
attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-',
@@ -711,6 +711,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
/*
* Handle the IO coherency aperture: make sure the .aperture field is
* correct based on the IO coherency flag.
*/
if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
attrs.aperture = __APERTURE_SYSMEM_COH;
/*
* Only allocate a new GPU VA range if we haven't already been passed a
* GPU VA range. This facilitates fixed mappings.

View File

@@ -28,6 +28,52 @@
#include "gk20a/gk20a.h"
/*
* Make sure to use the right coherency aperture if you use this function! This
* will not add any checks. If you want to simply use the default coherency then
* use nvgpu_aperture_mask().
*/
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
{
/*
* Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
* "sysmem" aperture should really be translated to VIDMEM.
*/
if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
aperture = APERTURE_VIDMEM;
switch (aperture) {
case __APERTURE_SYSMEM_COH:
return sysmem_coh_mask;
case APERTURE_SYSMEM:
return sysmem_mask;
case APERTURE_VIDMEM:
return vidmem_mask;
case APERTURE_INVALID:
WARN_ON("Bad aperture");
}
return 0;
}
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
{
enum nvgpu_aperture ap = mem->aperture;
/*
* Handle the coherent aperture: ideally most of the driver is not
* aware of the difference between coherent and non-coherent sysmem so
* we add this translation step here.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
ap == APERTURE_SYSMEM)
ap = __APERTURE_SYSMEM_COH;
return __nvgpu_aperture_mask(g, ap,
sysmem_mask, sysmem_coh_mask, vidmem_mask);
}
void *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, void *sgl)
{
return sgt->ops->sgl_next(sgl);

View File

@@ -21,6 +21,7 @@
*/
#include <nvgpu/page_allocator.h>
#include <nvgpu/enabled.h>
#include <nvgpu/log.h>
#include <nvgpu/soc.h>
#include <nvgpu/bus.h>
@@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
gk20a_writel(g, bus_bar1_block_r(),
nvgpu_aperture_mask(g, bar1_inst,
bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_sys_mem_coh_f(),
bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_mode_virtual_f() |
bus_bar1_block_ptr_f(ptr_v));

View File

@@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
fb_mmu_invalidate_pdb_addr_f(addr_lo) |
nvgpu_aperture_mask(g, pdb,
fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
gk20a_writel(g, fb_mmu_invalidate_r(),
fb_mmu_invalidate_all_va_true_f() |

View File

@@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
return -ENOMEM;
aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
if (nvgpu_mem_begin(g, mem))

View File

@@ -3240,8 +3240,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
gk20a_writel(g, fifo_runlist_base_r(),
fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
nvgpu_aperture_mask(g, &runlist->mem[new_buf],
fifo_runlist_base_target_sys_mem_ncoh_f(),
fifo_runlist_base_target_vid_mem_f()));
fifo_runlist_base_target_sys_mem_ncoh_f(),
fifo_runlist_base_target_sys_mem_coh_f(),
fifo_runlist_base_target_vid_mem_f()));
}
gk20a_writel(g, fifo_runlist_r(),
@@ -3763,8 +3764,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
nvgpu_mem_wr32(g, &c->inst_block,
ram_in_ramfc_w() + ram_fc_userd_w(),
nvgpu_aperture_mask(g, &g->fifo.userd,
pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_vid_mem_f()) |
pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_sys_mem_coh_f(),
pbdma_userd_target_vid_mem_f()) |
pbdma_userd_addr_f(addr_lo));
nvgpu_mem_wr32(g, &c->inst_block,

View File

@@ -745,8 +745,9 @@ static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
ram_in_base_shift_v();
u32 aperture = nvgpu_aperture_mask(g, inst_block,
gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
gr_fecs_current_ctx_target_vid_mem_f());
gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
gr_fecs_current_ctx_target_sys_mem_coh_f(),
gr_fecs_current_ctx_target_vid_mem_f());
return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
gr_fecs_current_ctx_valid_f(1);
@@ -2171,16 +2172,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
gk20a_writel(g, gr_fecs_new_ctx_r(),
gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
gr_fecs_new_ctx_target_sys_mem_coh_f(),
gr_fecs_new_ctx_target_vid_mem_f()) |
gr_fecs_new_ctx_valid_m());
gr_fecs_new_ctx_valid_m());
gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
@@ -4379,8 +4382,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
gk20a_writel(g, fb_mmu_debug_wr_r(),
nvgpu_aperture_mask(g, &gr->mmu_wr_mem,
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_wr_aperture_vid_mem_f()) |
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
fb_mmu_debug_wr_aperture_vid_mem_f()) |
fb_mmu_debug_wr_vol_false_f() |
fb_mmu_debug_wr_addr_f(addr));
@@ -4389,8 +4393,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
gk20a_writel(g, fb_mmu_debug_rd_r(),
nvgpu_aperture_mask(g, &gr->mmu_rd_mem,
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_rd_aperture_vid_mem_f()) |
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
fb_mmu_debug_rd_aperture_vid_mem_f()) |
fb_mmu_debug_rd_vol_false_f() |
fb_mmu_debug_rd_addr_f(addr));

View File

@@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
{
u32 pde0_bits =
nvgpu_aperture_mask(g, pd->mem,
gmmu_pde_aperture_big_sys_mem_ncoh_f(),
gmmu_pde_aperture_big_video_memory_f()) |
gmmu_pde_aperture_big_sys_mem_ncoh_f(),
gmmu_pde_aperture_big_sys_mem_coh_f(),
gmmu_pde_aperture_big_video_memory_f()) |
gmmu_pde_address_big_sys_f(
(u32)(addr >> gmmu_pde_address_shift_v()));
@@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g,
{
u32 pde1_bits =
nvgpu_aperture_mask(g, pd->mem,
gmmu_pde_aperture_small_sys_mem_ncoh_f(),
gmmu_pde_aperture_small_video_memory_f()) |
gmmu_pde_aperture_small_sys_mem_ncoh_f(),
gmmu_pde_aperture_small_sys_mem_coh_f(),
gmmu_pde_aperture_small_video_memory_f()) |
gmmu_pde_vol_small_true_f() | /* tbd: why? */
gmmu_pde_address_small_sys_f(
(u32)(addr >> gmmu_pde_address_shift_v()));
@@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm,
pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
gmmu_pte_aperture_sys_mem_ncoh_f(),
gmmu_pte_aperture_sys_mem_coh_f(),
gmmu_pte_aperture_video_memory_f()) |
gmmu_pte_kind_f(attrs->kind_v) |
gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
@@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
page_size >> 10,
nvgpu_gmmu_perm_str(attrs->rw_flag),
attrs->kind_v,
nvgpu_aperture_str(attrs->aperture),
nvgpu_aperture_str(g, attrs->aperture),
attrs->cacheable ? 'C' : '-',
attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-',
@@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
nvgpu_aperture_mask(g, vm->pdb.mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_vid_mem_f()) |
ram_in_page_dir_base_vol_true_f() |
ram_in_page_dir_base_lo_f(pdb_addr_lo));
nvgpu_aperture_mask(g, vm->pdb.mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_sys_mem_coh_f(),
ram_in_page_dir_base_target_vid_mem_f()) |
ram_in_page_dir_base_vol_true_f() |
ram_in_page_dir_base_lo_f(pdb_addr_lo));
nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
ram_in_page_dir_base_hi_f(pdb_addr_hi));

View File

@@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
u32 lo = (u32)(addr & 0xfffff);
u32 win = nvgpu_aperture_mask(g, mem,
bus_bar0_window_target_sys_mem_noncoherent_f(),
bus_bar0_window_target_sys_mem_coherent_f(),
bus_bar0_window_target_vid_mem_f()) |
bus_bar0_window_base_f(hi);

View File

@@ -25,6 +25,7 @@
#include <nvgpu/timers.h>
#include <nvgpu/bus.h>
#include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include "bus_gm20b.h"
#include "gk20a/gk20a.h"
@@ -43,8 +44,9 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
gk20a_writel(g, bus_bar1_block_r(),
nvgpu_aperture_mask(g, bar1_inst,
bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_sys_mem_coh_f(),
bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_mode_virtual_f() |
bus_bar1_block_ptr_f(ptr_v));
nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);

View File

@@ -32,6 +32,7 @@
#include <nvgpu/atomic.h>
#include <nvgpu/barrier.h>
#include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
#include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -51,11 +52,12 @@ void channel_gm20b_bind(struct channel_gk20a *c)
gk20a_writel(g, ccsr_channel_inst_r(c->chid),
ccsr_channel_inst_ptr_f(inst_ptr) |
nvgpu_aperture_mask(g, &c->inst_block,
ccsr_channel_inst_target_sys_mem_ncoh_f(),
ccsr_channel_inst_target_vid_mem_f()) |
ccsr_channel_inst_bind_true_f());
ccsr_channel_inst_ptr_f(inst_ptr) |
nvgpu_aperture_mask(g, &c->inst_block,
ccsr_channel_inst_target_sys_mem_ncoh_f(),
ccsr_channel_inst_target_sys_mem_coh_f(),
ccsr_channel_inst_target_vid_mem_f()) |
ccsr_channel_inst_bind_true_f());
gk20a_writel(g, ccsr_channel_r(c->chid),
(gk20a_readl(g, ccsr_channel_r(c->chid)) &

View File

@@ -99,6 +99,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) |
nvgpu_aperture_mask(g, &mm->pmu.inst_block,
pwr_pmu_new_instblk_target_sys_ncoh_f(),
pwr_pmu_new_instblk_target_sys_coh_f(),
pwr_pmu_new_instblk_target_fb_f()));
@@ -165,6 +166,7 @@ void init_pmu_setup_hw1(struct gk20a *g)
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) |
nvgpu_aperture_mask(g, &mm->pmu.inst_block,
pwr_pmu_new_instblk_target_sys_ncoh_f(),
pwr_pmu_new_instblk_target_sys_coh_f(),
pwr_pmu_new_instblk_target_fb_f()));

View File

@@ -25,6 +25,7 @@
#include <nvgpu/dma.h>
#include <nvgpu/bug.h>
#include <nvgpu/log2.h>
#include <nvgpu/enabled.h>
#include "fifo_gp10b.h"
@@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c)
nvgpu_mem_wr32(g, &c->inst_block,
ram_in_ramfc_w() + ram_fc_userd_w(),
nvgpu_aperture_mask(g, &g->fifo.userd,
pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_vid_mem_f()) |
pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_sys_mem_coh_f(),
pbdma_userd_target_vid_mem_f()) |
pbdma_userd_addr_f(addr_lo));
nvgpu_mem_wr32(g, &c->inst_block,

View File

@@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g)
gk20a_writel(g, bus_bar2_block_r(),
nvgpu_aperture_mask(g, inst_block,
bus_bar2_block_target_sys_mem_ncoh_f(),
bus_bar2_block_target_vid_mem_f()) |
bus_bar2_block_target_sys_mem_ncoh_f(),
bus_bar2_block_target_sys_mem_coh_f(),
bus_bar2_block_target_vid_mem_f()) |
bus_bar2_block_mode_virtual_f() |
bus_bar2_block_ptr_f(inst_pa));
@@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
phys_addr >>= gmmu_new_pde_address_shift_v();
pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
gmmu_new_pde_aperture_sys_mem_ncoh_f(),
gmmu_new_pde_aperture_video_memory_f());
gmmu_new_pde_aperture_sys_mem_ncoh_f(),
gmmu_new_pde_aperture_sys_mem_coh_f(),
gmmu_new_pde_aperture_video_memory_f());
pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
pde_v[0] |= gmmu_new_pde_vol_true_f();
pde_v[1] |= phys_addr >> 24;
@@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
gmmu_new_dual_pde_address_small_sys_f(small_addr);
pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(),
gmmu_new_dual_pde_aperture_small_video_memory_f());
pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
pde_v[3] |= small_addr >> 24;
@@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(),
gmmu_new_dual_pde_aperture_big_video_memory_f());
pde_v[1] |= big_addr >> 28;
}
@@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm,
gmmu_new_pte_address_sys_f(phys_shifted) :
gmmu_new_pte_address_vid_f(phys_shifted);
u32 pte_tgt = __nvgpu_aperture_mask(g,
attrs->aperture,
attrs->coherent ?
gmmu_new_pte_aperture_sys_mem_coh_f() :
gmmu_new_pte_aperture_sys_mem_ncoh_f(),
gmmu_new_pte_aperture_video_memory_f());
attrs->aperture,
gmmu_new_pte_aperture_sys_mem_ncoh_f(),
gmmu_new_pte_aperture_sys_mem_coh_f(),
gmmu_new_pte_aperture_video_memory_f());
pte_w[0] = pte_valid | pte_addr | pte_tgt;
@@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
page_size >> 10,
nvgpu_gmmu_perm_str(attrs->rw_flag),
attrs->kind_v,
nvgpu_aperture_str(attrs->aperture),
nvgpu_aperture_str(g, attrs->aperture),
attrs->cacheable ? 'C' : '-',
attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-',
@@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
nvgpu_aperture_mask(g, vm->pdb.mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_vid_mem_f()) |
ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_sys_mem_coh_f(),
ram_in_page_dir_base_target_vid_mem_f()) |
ram_in_page_dir_base_vol_true_f() |
ram_in_big_page_size_64kb_f() |
ram_in_page_dir_base_lo_f(pdb_addr_lo) |

View File

@@ -27,9 +27,10 @@
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/kmem.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/acr/nvgpu_acr.h>
#include <nvgpu/firmware.h>
#include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include <nvgpu/acr/nvgpu_acr.h>
#include "gk20a/gk20a.h"
#include "acr_gv11b.h"
@@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
pwr_pmu_new_instblk_ptr_f(
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) |
pwr_pmu_new_instblk_target_sys_ncoh_f());
(nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() :
pwr_pmu_new_instblk_target_sys_ncoh_f())) ;
/*copy bootloader interface structure to dmem*/
nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,

View File

@@ -31,14 +31,14 @@
#include <nvgpu/dma.h>
#include <nvgpu/mm.h>
#include <nvgpu/sizes.h>
#include <nvgpu/enabled.h>
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include "gk20a/gk20a.h"
#include "gk20a/css_gr_gk20a.h"
#include "css_gr_gv11b.h"
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
#include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
@@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
perf_pmasys_mem_block_valid_true_f() |
nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
perf_pmasys_mem_block_target_sys_ncoh_f(),
perf_pmasys_mem_block_target_sys_coh_f(),
perf_pmasys_mem_block_target_lfb_f()));

View File

@@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
gk20a_writel(g, perf_pmasys_mem_block_r(),
perf_pmasys_mem_block_base_f(inst_pa_page) |
perf_pmasys_mem_block_valid_true_f() |
nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
+ perf_pmasys_mem_block_target_sys_ncoh_f(),
+ perf_pmasys_mem_block_target_lfb_f()));
perf_pmasys_mem_block_base_f(inst_pa_page) |
perf_pmasys_mem_block_valid_true_f() |
nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
perf_pmasys_mem_block_target_sys_ncoh_f(),
perf_pmasys_mem_block_target_sys_coh_f(),
perf_pmasys_mem_block_target_lfb_f()));
gk20a_idle(g);
return 0;

View File

@@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist)
c->runqueue_sel) |
ram_rl_entry_chan_userd_target_f(
nvgpu_aperture_mask(g, &g->fifo.userd,
ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(),
ram_rl_entry_chan_userd_target_vid_mem_v())) |
ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(),
ram_rl_entry_chan_userd_target_sys_mem_coh_v(),
ram_rl_entry_chan_userd_target_vid_mem_v())) |
ram_rl_entry_chan_inst_target_f(
nvgpu_aperture_mask(g, &c->inst_block,
ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(),
ram_rl_entry_chan_inst_target_vid_mem_v()));
ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(),
ram_rl_entry_chan_inst_target_sys_mem_coh_v(),
ram_rl_entry_chan_inst_target_vid_mem_v()));
addr_lo = u64_lo32(c->userd_iova) >>
ram_rl_entry_chan_userd_ptr_align_shift_v();

View File

@@ -26,6 +26,7 @@
#include <nvgpu/dma.h>
#include <nvgpu/log.h>
#include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h"
@@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g)
gk20a_writel(g, bus_bar2_block_r(),
nvgpu_aperture_mask(g, inst_block,
bus_bar2_block_target_sys_mem_ncoh_f(),
bus_bar2_block_target_vid_mem_f()) |
bus_bar2_block_target_sys_mem_ncoh_f(),
bus_bar2_block_target_sys_mem_coh_f(),
bus_bar2_block_target_vid_mem_f()) |
bus_bar2_block_mode_virtual_f() |
bus_bar2_block_ptr_f(inst_pa));

View File

@@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f(
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB)
| pwr_pmu_new_instblk_valid_f(1)
| pwr_pmu_new_instblk_target_sys_ncoh_f());
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) |
pwr_pmu_new_instblk_valid_f(1) |
(nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() :
pwr_pmu_new_instblk_target_sys_ncoh_f()));
/* TBD: load all other surfaces */
g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(

View File

@@ -177,8 +177,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm,
u32 pdb_addr_lo, pdb_addr_hi;
u64 pdb_addr;
u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem,
ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
ram_in_sc_page_dir_base_target_vid_mem_v());
ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
ram_in_sc_page_dir_base_target_vid_mem_v());
pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());

View File

@@ -25,6 +25,7 @@
#include <nvgpu/types.h>
#include <nvgpu/list.h>
#include <nvgpu/enabled.h>
#ifdef __KERNEL__
#include <nvgpu/linux/nvgpu_mem.h>
@@ -51,6 +52,10 @@ struct nvgpu_page_alloc;
enum nvgpu_aperture {
APERTURE_INVALID = 0, /* unallocated or N/A */
APERTURE_SYSMEM,
/* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */
__APERTURE_SYSMEM_COH,
APERTURE_VIDMEM
};
@@ -187,12 +192,18 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
clear_list_entry));
};
static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture)
static inline const char *nvgpu_aperture_str(struct gk20a *g,
enum nvgpu_aperture aperture)
{
switch (aperture) {
case APERTURE_INVALID: return "INVAL";
case APERTURE_SYSMEM: return "SYSMEM";
case APERTURE_VIDMEM: return "VIDMEM";
case APERTURE_INVALID:
return "INVAL";
case APERTURE_SYSMEM:
return "SYSMEM";
case __APERTURE_SYSMEM_COH:
return "SYSCOH";
case APERTURE_VIDMEM:
return "VIDMEM";
};
return "UNKNOWN";
}
@@ -322,9 +333,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask);
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 vidmem_mask);
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys);