Revert "Revert "gpu: nvgpu: Get coherency on gv100 + NVLINK working""

This reverts commit 5a35a95654.

JIRA EVLR-2333

Change-Id: I923c32496c343d39d34f6d406c38a9f6ce7dc6e0
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1667167
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2018-03-01 20:47:25 -08:00
committed by mobile promotions
parent ef116a6e63
commit 89fbf39a05
28 changed files with 275 additions and 126 deletions

View File

@@ -221,6 +221,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);
void *alloc_ret;
/*
* WAR for IO coherent chips: the DMA API does not seem to generate
* mappings that work correctly. Unclear why - Bug ID: 2040115.
*
* Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING
* and then make a vmap() ourselves.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
flags |= NVGPU_DMA_NO_KERNEL_MAPPING;
/*
* Before the debug print so we see this in the total. But during
* cleanup in the fail path this has to be subtracted.
@@ -255,7 +265,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
iova, size, flags);
}
if (err)
goto fail_free;
goto fail_free_dma;
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) {
mem->cpu_va = vmap(mem->priv.pages,
size >> PAGE_SHIFT,
0, PAGE_KERNEL);
if (!mem->cpu_va) {
err = -ENOMEM;
goto fail_free_sgt;
}
}
mem->aligned_size = size;
mem->aperture = APERTURE_SYSMEM;
@@ -265,12 +285,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags,
return 0;
fail_free:
g->dma_memory_used -= mem->aligned_size;
fail_free_sgt:
nvgpu_free_sgtable(g, &mem->priv.sgt);
fail_free_dma:
dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs));
mem->cpu_va = NULL;
mem->priv.sgt = NULL;
mem->size = 0;
g->dma_memory_used -= mem->aligned_size;
return err;
}
@@ -466,6 +488,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) &&
!(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) &&
(mem->cpu_va || mem->priv.pages)) {
/*
* Free side of WAR for bug 2040115.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
vunmap(mem->cpu_va);
if (mem->priv.flags) {
NVGPU_DEFINE_DMA_ATTRS(dma_attrs);

View File

@@ -20,6 +20,7 @@
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <linux/interrupt.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
@@ -1107,6 +1108,7 @@ static int gk20a_probe(struct platform_device *dev)
struct gk20a *gk20a;
int err;
struct gk20a_platform *platform = NULL;
struct device_node *np;
if (dev->dev.of_node) {
const struct of_device_id *match;
@@ -1147,6 +1149,12 @@ static int gk20a_probe(struct platform_device *dev)
if (err)
goto return_err;
np = nvgpu_get_node(gk20a);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
__nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
}
if (nvgpu_platform_is_simulation(gk20a))
__nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true);

View File

@@ -34,39 +34,24 @@
#include "gk20a/gk20a.h"
#include "gk20a/mm_gk20a.h"
u32 __nvgpu_aperture_mask(struct gk20a *g, enum nvgpu_aperture aperture,
u32 sysmem_mask, u32 vidmem_mask)
{
switch (aperture) {
case APERTURE_SYSMEM:
/* some igpus consider system memory vidmem */
return nvgpu_is_enabled(g, NVGPU_MM_HONORS_APERTURE)
? sysmem_mask : vidmem_mask;
case APERTURE_VIDMEM:
/* for dgpus only */
return vidmem_mask;
case APERTURE_INVALID:
WARN_ON("Bad aperture");
}
return 0;
}
u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
u32 sysmem_mask, u32 vidmem_mask)
{
return __nvgpu_aperture_mask(g, mem->aperture,
sysmem_mask, vidmem_mask);
}
int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
{
void *cpu_va;
pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL :
pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
PAGE_KERNEL :
pgprot_writecombine(PAGE_KERNEL);
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
return 0;
/*
* WAR for bug 2040115: we already will always have a coherent vmap()
* for all sysmem buffers. The prot settings are left alone since
* eventually this should be deleted.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
return 0;
/*
* A CPU mapping is implicitly made for all SYSMEM DMA allocations that
* don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make
@@ -96,6 +81,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem)
if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
return;
/*
* WAR for bug 2040115: skip this since the map will be taken care of
* during the free in the DMA API.
*/
if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
return;
/*
* Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping
* already made by the DMA API.
@@ -315,7 +307,8 @@ void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
*/
u64 nvgpu_mem_get_addr_sgl(struct gk20a *g, struct scatterlist *sgl)
{
if (!nvgpu_iommuable(g))
if (nvgpu_is_enabled(g, NVGPU_MM_USE_PHYSICAL_SG) ||
!nvgpu_iommuable(g))
return g->ops.mm.gpu_phys_addr(g, NULL, sg_phys(sgl));
if (sg_dma_address(sgl) == 0)
@@ -415,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g,
/*
* Re-use the CPU mapping only if the mapping was made by the DMA API.
*
* Bug 2040115: the DMA API wrapper makes the mapping that we should
* re-use.
*/
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING))
if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) ||
nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page);
dest->priv.pages = src->priv.pages + start_page;

View File

@@ -17,13 +17,13 @@
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/pm_runtime.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <nvgpu/nvgpu_common.h>
#include <nvgpu/kmem.h>
#include <nvgpu/enabled.h>
#include <nvgpu/nvlink.h>
#include <linux/of_platform.h>
#include <linux/of_address.h>
#include "gk20a/gk20a.h"
#include "clk/clk.h"
@@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
platform->g = g;
l->dev = &pdev->dev;
np = nvgpu_get_node(g);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
}
err = pci_enable_device(pdev);
if (err)
return err;
@@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
g->mm.has_physical_mode = false;
np = nvgpu_get_node(g);
if (of_dma_is_coherent(np)) {
__nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
}
return 0;
}

View File

@@ -166,7 +166,8 @@ struct nvgpu_mapped_buf *nvgpu_vm_find_mapping(struct vm_gk20a *vm,
vm->gmmu_page_sizes[mapped_buffer->pgsz_idx] >> 10,
vm_aspace_id(vm),
mapped_buffer->flags,
nvgpu_aperture_str(gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
nvgpu_aperture_str(g,
gk20a_dmabuf_aperture(g, os_buf->dmabuf)));
return mapped_buffer;
}