gpu: nvgpu: Enable IO coherency on GV100

This reverts commit 848af2ce6d.

This is a revert of a revert, etc, etc. It re-enables IO coherence again.

JIRA EVLR-2333

Change-Id: Ibf97dce2f892e48a1200a06cd38a1c5d9603be04
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1669722
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2018-03-06 10:43:16 -08:00
committed by mobile promotions
parent f85a0d3e00
commit 418f31cd91
28 changed files with 275 additions and 126 deletions

View File

@@ -226,6 +226,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
WARN_ON(1); WARN_ON(1);
} }
/*
* WAR for IO coherent chips: the DMA API does not seem to generate
* mappings that work correctly. Unclear why - Bug ID: 2040115.
*
* Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
* and then make a vmap() ourselves.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
/* /*
* Before the debug print so we see this in the total. But during * Before the debug print so we see this in the total. But during
* cleanup in the fail path this has to be subtracted. * cleanup in the fail path this has to be subtracted.
@@ -260,7 +270,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
iova, size, flags); iova, size, flags);
} }
if (err) if (err)
goto fail_free; goto fail_free_dma;
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
mem->cpu_va = vmap(mem->priv.pages,
size >> PAGE_SHIFT,
0, PAGE_KERNEL);
if (!mem->cpu_va) {
err = -ENOMEM;
goto fail_free_sgt;
}
}
mem->aligned_size = size; mem->aligned_size = size;
mem->aperture = APERTURE_SYSMEM; mem->aperture = APERTURE_SYSMEM;
@@ -270,12 +290,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
return 0; return 0;
fail_free: fail_free_sgt:
g->dma_memory_used -= mem->aligned_size; nvgpu_free_sgtable(g, &mem->priv.sgt);
fail_free_dma:
dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
mem->cpu_va = NULL; mem->cpu_va = NULL;
mem->priv.sgt = NULL; mem->priv.sgt = NULL;
mem->size = 0; mem->size = 0;
g->dma_memory_used -= mem->aligned_size;
return err; return err;
} }
@@ -476,6 +498,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
!(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
(mem->cpu_va || mem->priv.pages)) { (mem->cpu_va || mem->priv.pages)) {
/*
* Free side of WAR for bug 2040115.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
vunmap(mem->cpu_va);
if (mem->priv.flags) { if (mem->priv.flags) {
NVGPU_DEFINE_DMA_ATTRS(dma_attrs); NVGPU_DEFINE_DMA_ATTRS(dma_attrs);

View File

@@ -20,6 +20,7 @@
#include <linux/of.h> #include <linux/of.h>
#include <linux/of_device.h> #include <linux/of_device.h>
#include <linux/of_platform.h> #include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/reset.h> #include <linux/reset.h>
@@ -1111,6 +1112,7 @@ static int gk20a_probe(struct platform_device *dev)
struct gk20a *gk20a; struct gk20a *gk20a;
int err; int err;
struct gk20a_platform *platform = NULL; struct gk20a_platform *platform = NULL;
struct device_node *np;
if (dev->dev.of_node) { if (dev->dev.of_node) {
const struct of_device_id *match; const struct of_device_id *match;
@@ -1151,6 +1153,12 @@ static int gk20a_probe(struct platform_device *dev)
if (err) if (err)
goto return_err; goto return_err;
np = nvgpu_get_node(gk20a);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
__nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
}
if (nvgpu_platform_is_simulation(gk20a)) if (nvgpu_platform_is_simulation(gk20a))
__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);

View File

@@ -34,39 +34,24 @@
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h" #include "gk20a/mm_gk20a.h"
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask)
{
switch (aperture) {
case APERTURE_SYSMEM:
/* some igpus consider system memory vidmem */
return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
? sysmem_mask : vidmem_mask;
case APERTURE_VIDMEM:
/* for dgpus only */
return vidmem_mask;
case APERTURE_INVALID:
WARN_ON("Bad aperture");
}
return 0;
}
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 vidmem_mask)
{
return __nvgpu_aperture_mask(g, mem->aperture,
sysmem_mask, vidmem_mask);
}
int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
{ {
void *cpu_va; void *cpu_va;
pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL : pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
PAGE_KERNEL :
pgprot_writecombine(PAGE_KERNEL); pgprot_writecombine(PAGE_KERNEL);
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
return 0; return 0;
/*
* WAR for bug 2040115: we already will always have a coherent vmap()
* for all sysmem buffers. The prot settings are left alone since
* eventually this should be deleted.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
return 0;
/* /*
* A CPU mapping is implicitly made for all SYSMEM DMA allocations that * A CPU mapping is implicitly made for all SYSMEM DMA allocations that
* don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
@@ -96,6 +81,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
return; return;
/*
* WAR for bug 2040115: skip this since the map will be taken care of
* during the free in the DMA API.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
return;
/* /*
* Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
* already made by the DMA API. * already made by the DMA API.
@@ -315,7 +307,8 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
*/ */
u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl) u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
{ {
if (!nvgpu_iommuable(g)) if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
!nvgpu_iommuable(g))
return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl)); return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
if (sg_dma_address(sgl) == 0) if (sg_dma_address(sgl) == 0)
@@ -415,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
/* /*
* Re-use the CPU mapping only if the mapping was made by the DMA API. * Re-use the CPU mapping only if the mapping was made by the DMA API.
*
* Bug 2040115: the DMA API wrapper makes the mapping that we should
* re-use.
*/ */
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
dest->priv.pages = src->priv.pages + start_page; dest->priv.pages = src->priv.pages + start_page;

View File

@@ -17,13 +17,13 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <nvgpu/nvgpu_common.h> #include <nvgpu/nvgpu_common.h>
#include <nvgpu/kmem.h> #include <nvgpu/kmem.h>
#include <nvgpu/enabled.h> #include <nvgpu/enabled.h>
#include <nvgpu/nvlink.h> #include <nvgpu/nvlink.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "clk/clk.h" #include "clk/clk.h"
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
platform->g = g; platform->g = g;
l->dev = &pdev->dev; l->dev = &pdev->dev;
np = nvgpu_get_node(g);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
}
err = pci_enable_device(pdev); err = pci_enable_device(pdev);
if (err) if (err)
return err; return err;
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
g->mm.has_physical_mode = false; g->mm.has_physical_mode = false;
np = nvgpu_get_node(g);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
}
return 0; return 0;
} }

View File

@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10, vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
vm_aspace_id(vm), vm_aspace_id(vm),
mapped_buffer->flags, mapped_buffer->flags,
nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf))); nvgpu_aperture_str(g,
gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
return mapped_buffer; return mapped_buffer;
} }

View File

@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
if (!sgt) if (!sgt)
return -ENOMEM; return -ENOMEM;
/*
* If the GPU is IO coherent and the DMA API is giving us IO coherent
* CPU mappings then we gotta make sure we use the IO coherent aperture.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
flags |= NVGPU_VM_MAP_IO_COHERENT;
nvgpu_mutex_acquire(&vm->update_gmmu_lock); nvgpu_mutex_acquire(&vm->update_gmmu_lock);
vaddr = g->ops.mm.gmmu_map(vm, addr, vaddr = g->ops.mm.gmmu_map(vm, addr,
sgt, /* sg list */ sgt, /* sg list */
@@ -627,7 +634,7 @@ static int __nvgpu_gmmu_update_page_table(struct vm_gk20a *vm,
page_size >> 10, page_size >> 10,
nvgpu_gmmu_perm_str(attrs->rw_flag), nvgpu_gmmu_perm_str(attrs->rw_flag),
attrs->kind_v, attrs->kind_v,
nvgpu_aperture_str(attrs->aperture), nvgpu_aperture_str(g, attrs->aperture),
attrs->cacheable ? 'C' : '-', attrs->cacheable ? 'C' : '-',
attrs->sparse ? 'S' : '-', attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-', attrs->priv ? 'P' : '-',
@@ -704,6 +711,13 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC); attrs.l3_alloc = (bool)(flags & NVGPU_VM_MAP_L3_ALLOC);
/*
* Handle the IO coherency aperture: make sure the .aperture field is
* correct based on the IO coherency flag.
*/
if (attrs.coherent && attrs.aperture == APERTURE_SYSMEM)
attrs.aperture = __APERTURE_SYSMEM_COH;
/* /*
* Only allocate a new GPU VA range if we haven't already been passed a * Only allocate a new GPU VA range if we haven't already been passed a
* GPU VA range. This facilitates fixed mappings. * GPU VA range. This facilitates fixed mappings.

View File

@@ -28,6 +28,53 @@
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
/*
* Make sure to use the right coherency aperture if you use this function! This
* will not add any checks. If you want to simply use the default coherency then
* use nvgpu_aperture_mask().
*/
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
{
/*
* Some iGPUs treat sysmem (i.e SoC DRAM) as vidmem. In these cases the
* "sysmem" aperture should really be translated to VIDMEM.
*/
if (!nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE))
aperture = APERTURE_VIDMEM;
switch (aperture) {
case __APERTURE_SYSMEM_COH:
return sysmem_coh_mask;
case APERTURE_SYSMEM:
return sysmem_mask;
case APERTURE_VIDMEM:
return vidmem_mask;
case APERTURE_INVALID:
WARN_ON("Bad aperture");
}
return 0;
}
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask)
{
enum nvgpu_aperture ap = mem->aperture;
/*
* Handle the coherent aperture: ideally most of the driver is not
* aware of the difference between coherent and non-coherent sysmem so
* we add this translation step here.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) &&
ap == APERTURE_SYSMEM)
ap = __APERTURE_SYSMEM_COH;
return __nvgpu_aperture_mask(g, ap,
sysmem_mask, sysmem_coh_mask, vidmem_mask);
}
struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt, struct nvgpu_sgl *nvgpu_sgt_get_next(struct nvgpu_sgt *sgt,
struct nvgpu_sgl *sgl) struct nvgpu_sgl *sgl)
{ {

View File

@@ -21,6 +21,7 @@
*/ */
#include <nvgpu/page_allocator.h> #include <nvgpu/page_allocator.h>
#include <nvgpu/enabled.h>
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/soc.h> #include <nvgpu/soc.h>
#include <nvgpu/bus.h> #include <nvgpu/bus.h>
@@ -155,8 +156,9 @@ int gk20a_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
gk20a_writel(g, bus_bar1_block_r(), gk20a_writel(g, bus_bar1_block_r(),
nvgpu_aperture_mask(g, bar1_inst, nvgpu_aperture_mask(g, bar1_inst,
bus_bar1_block_target_sys_mem_ncoh_f(), bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_vid_mem_f()) | bus_bar1_block_target_sys_mem_coh_f(),
bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_mode_virtual_f() | bus_bar1_block_mode_virtual_f() |
bus_bar1_block_ptr_f(ptr_v)); bus_bar1_block_ptr_f(ptr_v));

View File

@@ -98,8 +98,9 @@ void gk20a_fb_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb)
gk20a_writel(g, fb_mmu_invalidate_pdb_r(), gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
fb_mmu_invalidate_pdb_addr_f(addr_lo) | fb_mmu_invalidate_pdb_addr_f(addr_lo) |
nvgpu_aperture_mask(g, pdb, nvgpu_aperture_mask(g, pdb,
fb_mmu_invalidate_pdb_aperture_sys_mem_f(), fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
fb_mmu_invalidate_pdb_aperture_vid_mem_f())); fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
gk20a_writel(g, fb_mmu_invalidate_r(), gk20a_writel(g, fb_mmu_invalidate_r(),
fb_mmu_invalidate_all_va_true_f() | fb_mmu_invalidate_all_va_true_f() |

View File

@@ -653,6 +653,7 @@ int gk20a_fecs_trace_bind_channel(struct gk20a *g,
return -ENOMEM; return -ENOMEM;
aperture = nvgpu_aperture_mask(g, &trace->trace_buf, aperture = nvgpu_aperture_mask(g, &trace->trace_buf,
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(), ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_noncoherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_sys_mem_coherent_f(),
ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f()); ctxsw_prog_main_image_context_timestamp_buffer_ptr_hi_target_vid_mem_f());
if (nvgpu_mem_begin(g, mem)) if (nvgpu_mem_begin(g, mem))

View File

@@ -28,6 +28,7 @@
#include <nvgpu/dma.h> #include <nvgpu/dma.h>
#include <nvgpu/timers.h> #include <nvgpu/timers.h>
#include <nvgpu/semaphore.h> #include <nvgpu/semaphore.h>
#include <nvgpu/enabled.h>
#include <nvgpu/kmem.h> #include <nvgpu/kmem.h>
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/soc.h> #include <nvgpu/soc.h>
@@ -666,11 +667,13 @@ static void fifo_engine_exception_status(struct gk20a *g,
static int init_runlist(struct gk20a *g, struct fifo_gk20a *f) static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
{ {
struct fifo_runlist_info_gk20a *runlist; struct fifo_runlist_info_gk20a *runlist;
struct fifo_engine_info_gk20a *engine_info;
unsigned int runlist_id; unsigned int runlist_id;
u32 i; u32 i;
size_t runlist_size; size_t runlist_size;
u32 active_engine_id, pbdma_id, engine_id; u32 active_engine_id, pbdma_id, engine_id;
struct fifo_engine_info_gk20a *engine_info; int flags = nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ?
NVGPU_DMA_FORCE_CONTIGUOUS : 0;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
@@ -705,8 +708,9 @@ static int init_runlist(struct gk20a *g, struct fifo_gk20a *f)
f->num_runlist_entries, runlist_size); f->num_runlist_entries, runlist_size);
for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) { for (i = 0; i < MAX_RUNLIST_BUFFERS; i++) {
int err = nvgpu_dma_alloc_sys(g, runlist_size, int err = nvgpu_dma_alloc_flags_sys(g, flags,
&runlist->mem[i]); runlist_size,
&runlist->mem[i]);
if (err) { if (err) {
nvgpu_err(g, "memory allocation failed"); nvgpu_err(g, "memory allocation failed");
goto clean_up_runlist; goto clean_up_runlist;
@@ -3240,8 +3244,9 @@ static int gk20a_fifo_update_runlist_locked(struct gk20a *g, u32 runlist_id,
gk20a_writel(g, fifo_runlist_base_r(), gk20a_writel(g, fifo_runlist_base_r(),
fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) | fifo_runlist_base_ptr_f(u64_lo32(runlist_iova >> 12)) |
nvgpu_aperture_mask(g, &runlist->mem[new_buf], nvgpu_aperture_mask(g, &runlist->mem[new_buf],
fifo_runlist_base_target_sys_mem_ncoh_f(), fifo_runlist_base_target_sys_mem_ncoh_f(),
fifo_runlist_base_target_vid_mem_f())); fifo_runlist_base_target_sys_mem_coh_f(),
fifo_runlist_base_target_vid_mem_f()));
} }
gk20a_writel(g, fifo_runlist_r(), gk20a_writel(g, fifo_runlist_r(),
@@ -3763,8 +3768,9 @@ static int gk20a_fifo_commit_userd(struct channel_gk20a *c)
nvgpu_mem_wr32(g, &c->inst_block, nvgpu_mem_wr32(g, &c->inst_block,
ram_in_ramfc_w() + ram_fc_userd_w(), ram_in_ramfc_w() + ram_fc_userd_w(),
nvgpu_aperture_mask(g, &g->fifo.userd, nvgpu_aperture_mask(g, &g->fifo.userd,
pbdma_userd_target_sys_mem_ncoh_f(), pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_vid_mem_f()) | pbdma_userd_target_sys_mem_coh_f(),
pbdma_userd_target_vid_mem_f()) |
pbdma_userd_addr_f(addr_lo)); pbdma_userd_addr_f(addr_lo));
nvgpu_mem_wr32(g, &c->inst_block, nvgpu_mem_wr32(g, &c->inst_block,

View File

@@ -742,13 +742,14 @@ void gr_gk20a_ctx_patch_write(struct gk20a *g,
static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block) static u32 fecs_current_ctx_data(struct gk20a *g, struct nvgpu_mem *inst_block)
{ {
u32 ptr = u64_lo32(nvgpu_inst_block_addr(g, inst_block) u64 ptr = nvgpu_inst_block_addr(g, inst_block) >>
>> ram_in_base_shift_v()); ram_in_base_shift_v();
u32 aperture = nvgpu_aperture_mask(g, inst_block, u32 aperture = nvgpu_aperture_mask(g, inst_block,
gr_fecs_current_ctx_target_sys_mem_ncoh_f(), gr_fecs_current_ctx_target_sys_mem_ncoh_f(),
gr_fecs_current_ctx_target_vid_mem_f()); gr_fecs_current_ctx_target_sys_mem_coh_f(),
gr_fecs_current_ctx_target_vid_mem_f());
return gr_fecs_current_ctx_ptr_f(ptr) | aperture | return gr_fecs_current_ctx_ptr_f(u64_lo32(ptr)) | aperture |
gr_fecs_current_ctx_valid_f(1); gr_fecs_current_ctx_valid_f(1);
} }
@@ -2199,16 +2200,18 @@ void gr_gk20a_load_falcon_bind_instblk(struct gk20a *g)
inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc); inst_ptr = nvgpu_inst_block_addr(g, &ucode_info->inst_blk_desc);
gk20a_writel(g, gr_fecs_new_ctx_r(), gk20a_writel(g, gr_fecs_new_ctx_r(),
gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) | gr_fecs_new_ctx_ptr_f(inst_ptr >> 12) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_new_ctx_target_sys_mem_ncoh_f(), gr_fecs_new_ctx_target_sys_mem_ncoh_f(),
gr_fecs_new_ctx_target_sys_mem_coh_f(),
gr_fecs_new_ctx_target_vid_mem_f()) | gr_fecs_new_ctx_target_vid_mem_f()) |
gr_fecs_new_ctx_valid_m()); gr_fecs_new_ctx_valid_m());
gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(), gk20a_writel(g, gr_fecs_arb_ctx_ptr_r(),
gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) | gr_fecs_arb_ctx_ptr_ptr_f(inst_ptr >> 12) |
nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc, nvgpu_aperture_mask(g, &ucode_info->inst_blk_desc,
gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(), gr_fecs_arb_ctx_ptr_target_sys_mem_ncoh_f(),
gr_fecs_arb_ctx_ptr_target_sys_mem_coh_f(),
gr_fecs_arb_ctx_ptr_target_vid_mem_f())); gr_fecs_arb_ctx_ptr_target_vid_mem_f()));
gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7); gk20a_writel(g, gr_fecs_arb_ctx_cmd_r(), 0x7);
@@ -4440,8 +4443,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
gk20a_writel(g, fb_mmu_debug_wr_r(), gk20a_writel(g, fb_mmu_debug_wr_r(),
nvgpu_aperture_mask(g, &gr->mmu_wr_mem, nvgpu_aperture_mask(g, &gr->mmu_wr_mem,
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_wr_aperture_vid_mem_f()) | fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
fb_mmu_debug_wr_aperture_vid_mem_f()) |
fb_mmu_debug_wr_vol_false_f() | fb_mmu_debug_wr_vol_false_f() |
fb_mmu_debug_wr_addr_f(addr)); fb_mmu_debug_wr_addr_f(addr));
@@ -4450,8 +4454,9 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g)
gk20a_writel(g, fb_mmu_debug_rd_r(), gk20a_writel(g, fb_mmu_debug_rd_r(),
nvgpu_aperture_mask(g, &gr->mmu_rd_mem, nvgpu_aperture_mask(g, &gr->mmu_rd_mem,
fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(), fb_mmu_debug_wr_aperture_sys_mem_ncoh_f(),
fb_mmu_debug_rd_aperture_vid_mem_f()) | fb_mmu_debug_wr_aperture_sys_mem_coh_f(),
fb_mmu_debug_rd_aperture_vid_mem_f()) |
fb_mmu_debug_rd_vol_false_f() | fb_mmu_debug_rd_vol_false_f() |
fb_mmu_debug_rd_addr_f(addr)); fb_mmu_debug_rd_addr_f(addr));

View File

@@ -122,8 +122,9 @@ static inline u32 big_valid_pde0_bits(struct gk20a *g,
{ {
u32 pde0_bits = u32 pde0_bits =
nvgpu_aperture_mask(g, pd->mem, nvgpu_aperture_mask(g, pd->mem,
gmmu_pde_aperture_big_sys_mem_ncoh_f(), gmmu_pde_aperture_big_sys_mem_ncoh_f(),
gmmu_pde_aperture_big_video_memory_f()) | gmmu_pde_aperture_big_sys_mem_coh_f(),
gmmu_pde_aperture_big_video_memory_f()) |
gmmu_pde_address_big_sys_f( gmmu_pde_address_big_sys_f(
(u32)(addr >> gmmu_pde_address_shift_v())); (u32)(addr >> gmmu_pde_address_shift_v()));
@@ -135,8 +136,9 @@ static inline u32 small_valid_pde1_bits(struct gk20a *g,
{ {
u32 pde1_bits = u32 pde1_bits =
nvgpu_aperture_mask(g, pd->mem, nvgpu_aperture_mask(g, pd->mem,
gmmu_pde_aperture_small_sys_mem_ncoh_f(), gmmu_pde_aperture_small_sys_mem_ncoh_f(),
gmmu_pde_aperture_small_video_memory_f()) | gmmu_pde_aperture_small_sys_mem_coh_f(),
gmmu_pde_aperture_small_video_memory_f()) |
gmmu_pde_vol_small_true_f() | /* tbd: why? */ gmmu_pde_vol_small_true_f() | /* tbd: why? */
gmmu_pde_address_small_sys_f( gmmu_pde_address_small_sys_f(
(u32)(addr >> gmmu_pde_address_shift_v())); (u32)(addr >> gmmu_pde_address_shift_v()));
@@ -215,6 +217,7 @@ static void __update_pte(struct vm_gk20a *vm,
pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture, pte_w[1] = __nvgpu_aperture_mask(g, attrs->aperture,
gmmu_pte_aperture_sys_mem_ncoh_f(), gmmu_pte_aperture_sys_mem_ncoh_f(),
gmmu_pte_aperture_sys_mem_coh_f(),
gmmu_pte_aperture_video_memory_f()) | gmmu_pte_aperture_video_memory_f()) |
gmmu_pte_kind_f(attrs->kind_v) | gmmu_pte_kind_f(attrs->kind_v) |
gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift)); gmmu_pte_comptagline_f((u32)(attrs->ctag >> ctag_shift));
@@ -268,7 +271,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
page_size >> 10, page_size >> 10,
nvgpu_gmmu_perm_str(attrs->rw_flag), nvgpu_gmmu_perm_str(attrs->rw_flag),
attrs->kind_v, attrs->kind_v,
nvgpu_aperture_str(attrs->aperture), nvgpu_aperture_str(g, attrs->aperture),
attrs->cacheable ? 'C' : '-', attrs->cacheable ? 'C' : '-',
attrs->sparse ? 'S' : '-', attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-', attrs->priv ? 'P' : '-',
@@ -363,11 +366,12 @@ void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
gk20a_dbg_info("pde pa=0x%llx", pdb_addr); gk20a_dbg_info("pde pa=0x%llx", pdb_addr);
nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
nvgpu_aperture_mask(g, vm->pdb.mem, nvgpu_aperture_mask(g, vm->pdb.mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(), ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_vid_mem_f()) | ram_in_page_dir_base_target_sys_mem_coh_f(),
ram_in_page_dir_base_vol_true_f() | ram_in_page_dir_base_target_vid_mem_f()) |
ram_in_page_dir_base_lo_f(pdb_addr_lo)); ram_in_page_dir_base_vol_true_f() |
ram_in_page_dir_base_lo_f(pdb_addr_lo));
nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(), nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_hi_w(),
ram_in_page_dir_base_hi_f(pdb_addr_hi)); ram_in_page_dir_base_hi_f(pdb_addr_hi));

View File

@@ -41,6 +41,7 @@ u32 gk20a_pramin_enter(struct gk20a *g, struct nvgpu_mem *mem,
u32 lo = (u32)(addr & 0xfffff); u32 lo = (u32)(addr & 0xfffff);
u32 win = nvgpu_aperture_mask(g, mem, u32 win = nvgpu_aperture_mask(g, mem,
bus_bar0_window_target_sys_mem_noncoherent_f(), bus_bar0_window_target_sys_mem_noncoherent_f(),
bus_bar0_window_target_sys_mem_coherent_f(),
bus_bar0_window_target_vid_mem_f()) | bus_bar0_window_target_vid_mem_f()) |
bus_bar0_window_base_f(hi); bus_bar0_window_base_f(hi);

View File

@@ -25,6 +25,7 @@
#include <nvgpu/timers.h> #include <nvgpu/timers.h>
#include <nvgpu/bus.h> #include <nvgpu/bus.h>
#include <nvgpu/mm.h> #include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include "bus_gm20b.h" #include "bus_gm20b.h"
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
@@ -43,8 +44,9 @@ int gm20b_bus_bar1_bind(struct gk20a *g, struct nvgpu_mem *bar1_inst)
gk20a_writel(g, bus_bar1_block_r(), gk20a_writel(g, bus_bar1_block_r(),
nvgpu_aperture_mask(g, bar1_inst, nvgpu_aperture_mask(g, bar1_inst,
bus_bar1_block_target_sys_mem_ncoh_f(), bus_bar1_block_target_sys_mem_ncoh_f(),
bus_bar1_block_target_vid_mem_f()) | bus_bar1_block_target_sys_mem_coh_f(),
bus_bar1_block_target_vid_mem_f()) |
bus_bar1_block_mode_virtual_f() | bus_bar1_block_mode_virtual_f() |
bus_bar1_block_ptr_f(ptr_v)); bus_bar1_block_ptr_f(ptr_v));
nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER); nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);

View File

@@ -32,6 +32,7 @@
#include <nvgpu/atomic.h> #include <nvgpu/atomic.h>
#include <nvgpu/barrier.h> #include <nvgpu/barrier.h>
#include <nvgpu/mm.h> #include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h> #include <nvgpu/hw/gm20b/hw_ccsr_gm20b.h>
#include <nvgpu/hw/gm20b/hw_ram_gm20b.h> #include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
@@ -51,11 +52,12 @@ void channel_gm20b_bind(struct channel_gk20a *c)
gk20a_writel(g, ccsr_channel_inst_r(c->chid), gk20a_writel(g, ccsr_channel_inst_r(c->chid),
ccsr_channel_inst_ptr_f(inst_ptr) | ccsr_channel_inst_ptr_f(inst_ptr) |
nvgpu_aperture_mask(g, &c->inst_block, nvgpu_aperture_mask(g, &c->inst_block,
ccsr_channel_inst_target_sys_mem_ncoh_f(), ccsr_channel_inst_target_sys_mem_ncoh_f(),
ccsr_channel_inst_target_vid_mem_f()) | ccsr_channel_inst_target_sys_mem_coh_f(),
ccsr_channel_inst_bind_true_f()); ccsr_channel_inst_target_vid_mem_f()) |
ccsr_channel_inst_bind_true_f());
gk20a_writel(g, ccsr_channel_r(c->chid), gk20a_writel(g, ccsr_channel_r(c->chid),
(gk20a_readl(g, ccsr_channel_r(c->chid)) & (gk20a_readl(g, ccsr_channel_r(c->chid)) &

View File

@@ -99,6 +99,7 @@ int bl_bootstrap_sec2(struct nvgpu_pmu *pmu,
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_valid_f(1) |
nvgpu_aperture_mask(g, &mm->pmu.inst_block, nvgpu_aperture_mask(g, &mm->pmu.inst_block,
pwr_pmu_new_instblk_target_sys_ncoh_f(),
pwr_pmu_new_instblk_target_sys_coh_f(), pwr_pmu_new_instblk_target_sys_coh_f(),
pwr_pmu_new_instblk_target_fb_f())); pwr_pmu_new_instblk_target_fb_f()));
@@ -165,6 +166,7 @@ void init_pmu_setup_hw1(struct gk20a *g)
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_valid_f(1) |
nvgpu_aperture_mask(g, &mm->pmu.inst_block, nvgpu_aperture_mask(g, &mm->pmu.inst_block,
pwr_pmu_new_instblk_target_sys_ncoh_f(),
pwr_pmu_new_instblk_target_sys_coh_f(), pwr_pmu_new_instblk_target_sys_coh_f(),
pwr_pmu_new_instblk_target_fb_f())); pwr_pmu_new_instblk_target_fb_f()));

View File

@@ -25,6 +25,7 @@
#include <nvgpu/dma.h> #include <nvgpu/dma.h>
#include <nvgpu/bug.h> #include <nvgpu/bug.h>
#include <nvgpu/log2.h> #include <nvgpu/log2.h>
#include <nvgpu/enabled.h>
#include "fifo_gp10b.h" #include "fifo_gp10b.h"
@@ -78,8 +79,9 @@ int channel_gp10b_commit_userd(struct channel_gk20a *c)
nvgpu_mem_wr32(g, &c->inst_block, nvgpu_mem_wr32(g, &c->inst_block,
ram_in_ramfc_w() + ram_fc_userd_w(), ram_in_ramfc_w() + ram_fc_userd_w(),
nvgpu_aperture_mask(g, &g->fifo.userd, nvgpu_aperture_mask(g, &g->fifo.userd,
pbdma_userd_target_sys_mem_ncoh_f(), pbdma_userd_target_sys_mem_ncoh_f(),
pbdma_userd_target_vid_mem_f()) | pbdma_userd_target_sys_mem_coh_f(),
pbdma_userd_target_vid_mem_f()) |
pbdma_userd_addr_f(addr_lo)); pbdma_userd_addr_f(addr_lo));
nvgpu_mem_wr32(g, &c->inst_block, nvgpu_mem_wr32(g, &c->inst_block,

View File

@@ -124,8 +124,9 @@ int gp10b_init_bar2_mm_hw_setup(struct gk20a *g)
gk20a_writel(g, bus_bar2_block_r(), gk20a_writel(g, bus_bar2_block_r(),
nvgpu_aperture_mask(g, inst_block, nvgpu_aperture_mask(g, inst_block,
bus_bar2_block_target_sys_mem_ncoh_f(), bus_bar2_block_target_sys_mem_ncoh_f(),
bus_bar2_block_target_vid_mem_f()) | bus_bar2_block_target_sys_mem_coh_f(),
bus_bar2_block_target_vid_mem_f()) |
bus_bar2_block_mode_virtual_f() | bus_bar2_block_mode_virtual_f() |
bus_bar2_block_ptr_f(inst_pa)); bus_bar2_block_ptr_f(inst_pa));
@@ -148,8 +149,9 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
phys_addr >>= gmmu_new_pde_address_shift_v(); phys_addr >>= gmmu_new_pde_address_shift_v();
pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
gmmu_new_pde_aperture_sys_mem_ncoh_f(), gmmu_new_pde_aperture_sys_mem_ncoh_f(),
gmmu_new_pde_aperture_video_memory_f()); gmmu_new_pde_aperture_sys_mem_coh_f(),
gmmu_new_pde_aperture_video_memory_f());
pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr)); pde_v[0] |= gmmu_new_pde_address_sys_f(u64_lo32(phys_addr));
pde_v[0] |= gmmu_new_pde_vol_true_f(); pde_v[0] |= gmmu_new_pde_vol_true_f();
pde_v[1] |= phys_addr >> 24; pde_v[1] |= phys_addr >> 24;
@@ -194,6 +196,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
gmmu_new_dual_pde_address_small_sys_f(small_addr); gmmu_new_dual_pde_address_small_sys_f(small_addr);
pde_v[2] |= nvgpu_aperture_mask(g, pd->mem, pde_v[2] |= nvgpu_aperture_mask(g, pd->mem,
gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(), gmmu_new_dual_pde_aperture_small_sys_mem_ncoh_f(),
gmmu_new_dual_pde_aperture_small_sys_mem_coh_f(),
gmmu_new_dual_pde_aperture_small_video_memory_f()); gmmu_new_dual_pde_aperture_small_video_memory_f());
pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f(); pde_v[2] |= gmmu_new_dual_pde_vol_small_true_f();
pde_v[3] |= small_addr >> 24; pde_v[3] |= small_addr >> 24;
@@ -204,6 +207,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f(); pde_v[0] |= gmmu_new_dual_pde_vol_big_true_f();
pde_v[0] |= nvgpu_aperture_mask(g, pd->mem, pde_v[0] |= nvgpu_aperture_mask(g, pd->mem,
gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(), gmmu_new_dual_pde_aperture_big_sys_mem_ncoh_f(),
gmmu_new_dual_pde_aperture_big_sys_mem_coh_f(),
gmmu_new_dual_pde_aperture_big_video_memory_f()); gmmu_new_dual_pde_aperture_big_video_memory_f());
pde_v[1] |= big_addr >> 28; pde_v[1] |= big_addr >> 28;
} }
@@ -240,11 +244,10 @@ static void __update_pte(struct vm_gk20a *vm,
gmmu_new_pte_address_sys_f(phys_shifted) : gmmu_new_pte_address_sys_f(phys_shifted) :
gmmu_new_pte_address_vid_f(phys_shifted); gmmu_new_pte_address_vid_f(phys_shifted);
u32 pte_tgt = __nvgpu_aperture_mask(g, u32 pte_tgt = __nvgpu_aperture_mask(g,
attrs->aperture, attrs->aperture,
attrs->coherent ? gmmu_new_pte_aperture_sys_mem_ncoh_f(),
gmmu_new_pte_aperture_sys_mem_coh_f() : gmmu_new_pte_aperture_sys_mem_coh_f(),
gmmu_new_pte_aperture_sys_mem_ncoh_f(), gmmu_new_pte_aperture_video_memory_f());
gmmu_new_pte_aperture_video_memory_f());
pte_w[0] = pte_valid | pte_addr | pte_tgt; pte_w[0] = pte_valid | pte_addr | pte_tgt;
@@ -306,7 +309,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
page_size >> 10, page_size >> 10,
nvgpu_gmmu_perm_str(attrs->rw_flag), nvgpu_gmmu_perm_str(attrs->rw_flag),
attrs->kind_v, attrs->kind_v,
nvgpu_aperture_str(attrs->aperture), nvgpu_aperture_str(g, attrs->aperture),
attrs->cacheable ? 'C' : '-', attrs->cacheable ? 'C' : '-',
attrs->sparse ? 'S' : '-', attrs->sparse ? 'S' : '-',
attrs->priv ? 'P' : '-', attrs->priv ? 'P' : '-',
@@ -428,8 +431,9 @@ void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(), nvgpu_mem_wr32(g, inst_block, ram_in_page_dir_base_lo_w(),
nvgpu_aperture_mask(g, vm->pdb.mem, nvgpu_aperture_mask(g, vm->pdb.mem,
ram_in_page_dir_base_target_sys_mem_ncoh_f(), ram_in_page_dir_base_target_sys_mem_ncoh_f(),
ram_in_page_dir_base_target_vid_mem_f()) | ram_in_page_dir_base_target_sys_mem_coh_f(),
ram_in_page_dir_base_target_vid_mem_f()) |
ram_in_page_dir_base_vol_true_f() | ram_in_page_dir_base_vol_true_f() |
ram_in_big_page_size_64kb_f() | ram_in_big_page_size_64kb_f() |
ram_in_page_dir_base_lo_f(pdb_addr_lo) | ram_in_page_dir_base_lo_f(pdb_addr_lo) |

View File

@@ -27,9 +27,10 @@
#include <nvgpu/nvgpu_common.h> #include <nvgpu/nvgpu_common.h>
#include <nvgpu/kmem.h> #include <nvgpu/kmem.h>
#include <nvgpu/nvgpu_mem.h> #include <nvgpu/nvgpu_mem.h>
#include <nvgpu/acr/nvgpu_acr.h>
#include <nvgpu/firmware.h> #include <nvgpu/firmware.h>
#include <nvgpu/mm.h> #include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include <nvgpu/acr/nvgpu_acr.h>
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "acr_gv11b.h" #include "acr_gv11b.h"
@@ -220,7 +221,9 @@ static int bl_bootstrap(struct nvgpu_pmu *pmu,
pwr_pmu_new_instblk_ptr_f( pwr_pmu_new_instblk_ptr_f(
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) | nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> 12) |
pwr_pmu_new_instblk_valid_f(1) | pwr_pmu_new_instblk_valid_f(1) |
pwr_pmu_new_instblk_target_sys_ncoh_f()); (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() :
pwr_pmu_new_instblk_target_sys_ncoh_f())) ;
/*copy bootloader interface structure to dmem*/ /*copy bootloader interface structure to dmem*/
nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc, nvgpu_flcn_copy_to_dmem(pmu->flcn, 0, (u8 *)pbl_desc,

View File

@@ -31,14 +31,14 @@
#include <nvgpu/dma.h> #include <nvgpu/dma.h>
#include <nvgpu/mm.h> #include <nvgpu/mm.h>
#include <nvgpu/sizes.h> #include <nvgpu/sizes.h>
#include <nvgpu/enabled.h>
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "gk20a/css_gr_gk20a.h" #include "gk20a/css_gr_gk20a.h"
#include "css_gr_gv11b.h" #include "css_gr_gv11b.h"
#include <nvgpu/log.h>
#include <nvgpu/bug.h>
#include <nvgpu/hw/gv11b/hw_perf_gv11b.h> #include <nvgpu/hw/gv11b/hw_perf_gv11b.h>
#include <nvgpu/hw/gv11b/hw_mc_gv11b.h> #include <nvgpu/hw/gv11b/hw_mc_gv11b.h>
@@ -144,6 +144,7 @@ int gv11b_css_hw_enable_snapshot(struct channel_gk20a *ch,
perf_pmasys_mem_block_valid_true_f() | perf_pmasys_mem_block_valid_true_f() |
nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block, nvgpu_aperture_mask(g, &g->mm.hwpm.inst_block,
perf_pmasys_mem_block_target_sys_ncoh_f(), perf_pmasys_mem_block_target_sys_ncoh_f(),
perf_pmasys_mem_block_target_sys_coh_f(),
perf_pmasys_mem_block_target_lfb_f())); perf_pmasys_mem_block_target_lfb_f()));

View File

@@ -59,11 +59,12 @@ int gv11b_perfbuf_enable_locked(struct gk20a *g, u64 offset, u32 size)
inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12; inst_pa_page = nvgpu_inst_block_addr(g, &mm->perfbuf.inst_block) >> 12;
gk20a_writel(g, perf_pmasys_mem_block_r(), gk20a_writel(g, perf_pmasys_mem_block_r(),
perf_pmasys_mem_block_base_f(inst_pa_page) | perf_pmasys_mem_block_base_f(inst_pa_page) |
perf_pmasys_mem_block_valid_true_f() | perf_pmasys_mem_block_valid_true_f() |
nvgpu_aperture_mask(g, &mm->perfbuf.inst_block, nvgpu_aperture_mask(g, &mm->perfbuf.inst_block,
+ perf_pmasys_mem_block_target_sys_ncoh_f(), perf_pmasys_mem_block_target_sys_ncoh_f(),
+ perf_pmasys_mem_block_target_lfb_f())); perf_pmasys_mem_block_target_sys_coh_f(),
perf_pmasys_mem_block_target_lfb_f()));
gk20a_idle(g); gk20a_idle(g);
return 0; return 0;

View File

@@ -101,12 +101,14 @@ void gv11b_get_ch_runlist_entry(struct channel_gk20a *c, u32 *runlist)
c->runqueue_sel) | c->runqueue_sel) |
ram_rl_entry_chan_userd_target_f( ram_rl_entry_chan_userd_target_f(
nvgpu_aperture_mask(g, &g->fifo.userd, nvgpu_aperture_mask(g, &g->fifo.userd,
ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(), ram_rl_entry_chan_userd_target_sys_mem_ncoh_v(),
ram_rl_entry_chan_userd_target_vid_mem_v())) | ram_rl_entry_chan_userd_target_sys_mem_coh_v(),
ram_rl_entry_chan_userd_target_vid_mem_v())) |
ram_rl_entry_chan_inst_target_f( ram_rl_entry_chan_inst_target_f(
nvgpu_aperture_mask(g, &c->inst_block, nvgpu_aperture_mask(g, &c->inst_block,
ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(), ram_rl_entry_chan_inst_target_sys_mem_ncoh_v(),
ram_rl_entry_chan_inst_target_vid_mem_v())); ram_rl_entry_chan_inst_target_sys_mem_coh_v(),
ram_rl_entry_chan_inst_target_vid_mem_v()));
addr_lo = u64_lo32(c->userd_iova) >> addr_lo = u64_lo32(c->userd_iova) >>
ram_rl_entry_chan_userd_ptr_align_shift_v(); ram_rl_entry_chan_userd_ptr_align_shift_v();

View File

@@ -26,6 +26,7 @@
#include <nvgpu/dma.h> #include <nvgpu/dma.h>
#include <nvgpu/log.h> #include <nvgpu/log.h>
#include <nvgpu/mm.h> #include <nvgpu/mm.h>
#include <nvgpu/enabled.h>
#include "gk20a/gk20a.h" #include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h" #include "gk20a/mm_gk20a.h"
@@ -292,8 +293,9 @@ int gv11b_init_bar2_mm_hw_setup(struct gk20a *g)
gk20a_writel(g, bus_bar2_block_r(), gk20a_writel(g, bus_bar2_block_r(),
nvgpu_aperture_mask(g, inst_block, nvgpu_aperture_mask(g, inst_block,
bus_bar2_block_target_sys_mem_ncoh_f(), bus_bar2_block_target_sys_mem_ncoh_f(),
bus_bar2_block_target_vid_mem_f()) | bus_bar2_block_target_sys_mem_coh_f(),
bus_bar2_block_target_vid_mem_f()) |
bus_bar2_block_mode_virtual_f() | bus_bar2_block_mode_virtual_f() |
bus_bar2_block_ptr_f(inst_pa)); bus_bar2_block_ptr_f(inst_pa));

View File

@@ -195,9 +195,11 @@ int gv11b_pmu_bootstrap(struct nvgpu_pmu *pmu)
gk20a_writel(g, pwr_pmu_new_instblk_r(), gk20a_writel(g, pwr_pmu_new_instblk_r(),
pwr_pmu_new_instblk_ptr_f( pwr_pmu_new_instblk_ptr_f(
nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) nvgpu_inst_block_addr(g, &mm->pmu.inst_block) >> ALIGN_4KB) |
| pwr_pmu_new_instblk_valid_f(1) pwr_pmu_new_instblk_valid_f(1) |
| pwr_pmu_new_instblk_target_sys_ncoh_f()); (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
pwr_pmu_new_instblk_target_sys_coh_f() :
pwr_pmu_new_instblk_target_sys_ncoh_f()));
/* TBD: load all other surfaces */ /* TBD: load all other surfaces */
g->ops.pmu_ver.set_pmu_cmdline_args_trace_size( g->ops.pmu_ver.set_pmu_cmdline_args_trace_size(

View File

@@ -178,8 +178,9 @@ void gv11b_subctx_commit_pdb(struct vm_gk20a *vm,
u64 pdb_addr; u64 pdb_addr;
u32 max_subctx_count = gr_pri_fe_chip_def_info_max_veid_count_init_v(); u32 max_subctx_count = gr_pri_fe_chip_def_info_max_veid_count_init_v();
u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem, u32 aperture = nvgpu_aperture_mask(g, vm->pdb.mem,
ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(), ram_in_sc_page_dir_base_target_sys_mem_ncoh_v(),
ram_in_sc_page_dir_base_target_vid_mem_v()); ram_in_sc_page_dir_base_target_sys_mem_coh_v(),
ram_in_sc_page_dir_base_target_vid_mem_v());
pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem); pdb_addr = nvgpu_mem_get_addr(g, vm->pdb.mem);
pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v()); pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());

View File

@@ -75,8 +75,8 @@ struct gk20a;
#define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24 #define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL 24
/* Support batch mapping */ /* Support batch mapping */
#define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25 #define NVGPU_SUPPORT_MAP_BUFFER_BATCH 25
/* Support DMA coherence */ /* Use coherent aperture for sysmem. */
#define NVGPU_DMA_COHERENT 26 #define NVGPU_USE_COHERENT_SYSMEM 26
/* Use physical scatter tables instead of IOMMU */ /* Use physical scatter tables instead of IOMMU */
#define NVGPU_MM_USE_PHYSICAL_SG 27 #define NVGPU_MM_USE_PHYSICAL_SG 27

View File

@@ -25,6 +25,7 @@
#include <nvgpu/types.h> #include <nvgpu/types.h>
#include <nvgpu/list.h> #include <nvgpu/list.h>
#include <nvgpu/enabled.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <nvgpu/linux/nvgpu_mem.h> #include <nvgpu/linux/nvgpu_mem.h>
@@ -51,6 +52,10 @@ struct nvgpu_page_alloc;
enum nvgpu_aperture { enum nvgpu_aperture {
APERTURE_INVALID = 0, /* unallocated or N/A */ APERTURE_INVALID = 0, /* unallocated or N/A */
APERTURE_SYSMEM, APERTURE_SYSMEM,
/* Don't use directly. Use APERTURE_SYSMEM, this is used internally. */
__APERTURE_SYSMEM_COH,
APERTURE_VIDMEM APERTURE_VIDMEM
}; };
@@ -195,12 +200,18 @@ nvgpu_mem_from_clear_list_entry(struct nvgpu_list_node *node)
clear_list_entry)); clear_list_entry));
}; };
static inline const char *nvgpu_aperture_str(enum nvgpu_aperture aperture) static inline const char *nvgpu_aperture_str(struct gk20a *g,
enum nvgpu_aperture aperture)
{ {
switch (aperture) { switch (aperture) {
case APERTURE_INVALID: return "INVAL"; case APERTURE_INVALID:
case APERTURE_SYSMEM: return "SYSMEM"; return "INVAL";
case APERTURE_VIDMEM: return "VIDMEM"; case APERTURE_SYSMEM:
return "SYSMEM";
case __APERTURE_SYSMEM_COH:
return "SYSCOH";
case APERTURE_VIDMEM:
return "VIDMEM";
}; };
return "UNKNOWN"; return "UNKNOWN";
} }
@@ -332,9 +343,9 @@ u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture, u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask); u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem, u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 vidmem_mask); u32 sysmem_mask, u32 sysmem_coh_mask, u32 vidmem_mask);
u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys); u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys);