diff --git a/drivers/gpu/nvgpu/common/linux/module.c b/drivers/gpu/nvgpu/common/linux/module.c
index b103fcea4..52348db00 100644
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -20,6 +20,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_platform.h>
+#include <linux/of_address.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
@@ -1107,6 +1108,7 @@ static int gk20a_probe(struct platform_device *dev)
 	struct gk20a *gk20a;
 	int err;
 	struct gk20a_platform *platform = NULL;
+	struct device_node *np;
 
 	if (dev->dev.of_node) {
 		const struct of_device_id *match;
@@ -1206,6 +1208,12 @@ static int gk20a_probe(struct platform_device *dev)
 
 	gk20a->mm.has_physical_mode = !nvgpu_is_hypervisor_mode(gk20a);
 
+	np = nvgpu_get_node(gk20a);
+	if (of_dma_is_coherent(np)) {
+		__nvgpu_set_enabled(gk20a, NVGPU_USE_COHERENT_SYSMEM, true);
+		__nvgpu_set_enabled(gk20a, NVGPU_SUPPORT_IO_COHERENCE, true);
+	}
+
 	return 0;
 
 return_err:
diff --git a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
index 206b83e15..64f638e2b 100644
--- a/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/linux/nvgpu_mem.c
@@ -61,7 +61,8 @@ u32 nvgpu_aperture_mask(struct gk20a *g, struct nvgpu_mem *mem,
 int nvgpu_mem_begin(struct gk20a *g, struct nvgpu_mem *mem)
 {
 	void *cpu_va;
-	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_DMA_COHERENT) ? PAGE_KERNEL :
+	pgprot_t prot = nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM) ?
+		PAGE_KERNEL :
 		pgprot_writecombine(PAGE_KERNEL);
 
 	if (mem->aperture != APERTURE_SYSMEM || g->mm.force_pramin)
diff --git a/drivers/gpu/nvgpu/common/linux/pci.c b/drivers/gpu/nvgpu/common/linux/pci.c
index 6ebe8dda5..4ba839c40 100644
--- a/drivers/gpu/nvgpu/common/linux/pci.c
+++ b/drivers/gpu/nvgpu/common/linux/pci.c
@@ -17,13 +17,13 @@
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <linux/pm_runtime.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
 
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/enabled.h>
 #include <nvgpu/nvlink.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
 
 #include "gk20a/gk20a.h"
 #include "clk/clk.h"
@@ -647,7 +647,7 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
 	np = nvgpu_get_node(g);
 
 	if (of_dma_is_coherent(np)) {
-		__nvgpu_set_enabled(g, NVGPU_DMA_COHERENT, true);
+		__nvgpu_set_enabled(g, NVGPU_USE_COHERENT_SYSMEM, true);
 		__nvgpu_set_enabled(g, NVGPU_SUPPORT_IO_COHERENCE, true);
 	}
 
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index ffac324ce..5abf59513 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -79,6 +79,13 @@ static u64 __nvgpu_gmmu_map(struct vm_gk20a *vm,
 	if (!sgt)
 		return -ENOMEM;
 
+	/*
+	 * If the GPU is IO coherent and the DMA API is giving us IO coherent
+	 * CPU mappings then we gotta make sure we use the IO coherent aperture.
+	 */
+	if (nvgpu_is_enabled(g, NVGPU_USE_COHERENT_SYSMEM))
+		flags |= NVGPU_VM_MAP_IO_COHERENT;
+
 	nvgpu_mutex_acquire(&vm->update_gmmu_lock);
 	vaddr = g->ops.mm.gmmu_map(vm, addr,
 				   sgt,    /* sg list */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
index a3d9df24a..24748a194 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h
@@ -75,8 +75,8 @@ struct gk20a;
 #define NVGPU_SUPPORT_MAP_DIRECT_KIND_CTRL	24
 /* Support batch mapping */
 #define NVGPU_SUPPORT_MAP_BUFFER_BATCH		25
-/* Support DMA coherence */
-#define NVGPU_DMA_COHERENT			26
+/* Use coherent aperture for sysmem. */
+#define NVGPU_USE_COHERENT_SYSMEM		26
 /* Use physical scatter tables instead of IOMMU */
 #define NVGPU_MM_USE_PHYSICAL_SG		27