diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c index c13dae8b3..81aebb7d7 100644 --- a/drivers/gpu/nvgpu/common/linux/dma.c +++ b/drivers/gpu/nvgpu/common/linux/dma.c @@ -221,6 +221,16 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, NVGPU_DEFINE_DMA_ATTRS(dma_attrs); void *alloc_ret; + /* + * WAR for IO coherent chips: the DMA API does not seem to generate + * mappings that work correctly. Unclear why - Bug ID: 2040115. + * + * Basically we just tell the DMA API not to map with NO_KERNEL_MAPPING + * and then make a vmap() ourselves. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + flags |= NVGPU_DMA_NO_KERNEL_MAPPING; + /* * Before the debug print so we see this in the total. But during * cleanup in the fail path this has to be subtracted. @@ -255,7 +265,17 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, iova, size, flags); } if (err) - goto fail_free; + goto fail_free_dma; + + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) { + mem->cpu_va = vmap(mem->priv.pages, + size >> PAGE_SHIFT, + 0, PAGE_KERNEL); + if (!mem->cpu_va) { + err = -ENOMEM; + goto fail_free_sgt; + } + } mem->aligned_size = size; mem->aperture = APERTURE_SYSMEM; @@ -265,12 +285,14 @@ int nvgpu_dma_alloc_flags_sys(struct gk20a *g, unsigned long flags, return 0; -fail_free: - g->dma_memory_used -= mem->aligned_size; +fail_free_sgt: + nvgpu_free_sgtable(g, &mem->priv.sgt); +fail_free_dma: dma_free_attrs(d, size, alloc_ret, iova, NVGPU_DMA_ATTR(dma_attrs)); mem->cpu_va = NULL; mem->priv.sgt = NULL; mem->size = 0; + g->dma_memory_used -= mem->aligned_size; return err; } @@ -466,6 +488,12 @@ static void nvgpu_dma_free_sys(struct gk20a *g, struct nvgpu_mem *mem) if (!(mem->mem_flags & NVGPU_MEM_FLAG_SHADOW_COPY) && !(mem->mem_flags & __NVGPU_MEM_FLAG_NO_DMA) && (mem->cpu_va || mem->priv.pages)) { + /* + * Free side of WAR for bug 2040115. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + vunmap(mem->cpu_va); + if (mem->priv.flags) { NVGPU_DEFINE_DMA_ATTRS(dma_attrs); diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c index 52348db00..741c86e77 100644 --- a/drivers/gpu/nvgpu/common/linux/module.c +++ b/drivers/gpu/nvgpu/common/linux/module.c @@ -1149,6 +1149,12 @@ static int gk20a_probe(struct platform_device *dev) if (err) goto return_err; + np = nvgpu_get_node(gk20a); + if (of_dma_is_coherent(np)) { + __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); + __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); + } + if (nvgpu_platform_is_simulation(gk20a)) __nvgpu_set_enabled(gk20a, NVGPU_IS_FMODEL, true); @@ -1208,12 +1214,6 @@ static int gk20a_probe(struct platform_device *dev) gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a); - np = nvgpu_get_node(gk20a); - if (of_dma_is_coherent(np)) { - __nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true); - __nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true); - } - return 0; return_err: diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c index c859520d0..698976940 100644 --- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c @@ -44,6 +44,14 @@ int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem) if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) return 0; + /* + * WAR for bug 2040115: we already will always have a coherent vmap() + * for all sysmem buffers. The prot settings are left alone since + * eventually this should be deleted. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + return 0; + /* * A CPU mapping is implicitly made for all SYSMEM DMA allocations that * don't have NVGPU_DMA_NO_KERNEL_MAPPING. Thus we don't need to make @@ -73,6 +81,13 @@ void nvgpu_mem_end(struct gk20a *g, struct nvgpu_mem *mem) if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin) return; + /* + * WAR for bug 2040115: skip this since the map will be taken care of + * during the free in the DMA API. + */ + if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) + return; + /* * Similar to nvgpu_mem_begin() we don't need to unmap the CPU mapping * already made by the DMA API. @@ -393,8 +408,12 @@ int nvgpu_mem_create_from_mem(struct gk20a *g, /* * Re-use the CPU mapping only if the mapping was made by the DMA API. + * + * Bug 2040115: the DMA API wrapper makes the mapping that we should + * re-use. */ - if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING)) + if (!(src->priv.flags & NVGPU_DMA_NO_KERNEL_MAPPING) || + nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM)) dest->cpu_va = src->cpu_va + (PAGE_SIZE * start_page); dest->priv.pages = src->priv.pages + start_page; diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c index 4ba839c40..973da9ca1 100644 --- a/drivers/gpu/nvgpu/common/linux/pci.c +++ b/drivers/gpu/nvgpu/common/linux/pci.c @@ -566,6 +566,12 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, platform->g = g; l->dev = &pdev->dev; + np = nvgpu_get_node(g); + if (of_dma_is_coherent(np)) { + __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); + __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); + } + err = pci_enable_device(pdev); if (err) return err; @@ -644,13 +650,6 @@ static int nvgpu_pci_probe(struct pci_dev *pdev, g->mm.has_physical_mode = false; - np = nvgpu_get_node(g); - - if (of_dma_is_coherent(np)) { - __nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true); - __nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true); - } - return 0; }