diff --git a/drivers/gpu/nvgpu/Makefile.nvgpu b/drivers/gpu/nvgpu/Makefile.nvgpu
index 34d8d19f2..d994ac1dd 100644
--- a/drivers/gpu/nvgpu/Makefile.nvgpu
+++ b/drivers/gpu/nvgpu/Makefile.nvgpu
@@ -32,6 +32,7 @@ nvgpu-y := \
 	common/linux/ioctl_tsg.o \
 	common/linux/log.o \
 	common/linux/nvgpu_mem.o \
+	common/linux/dma.o \
 	common/mm/nvgpu_allocator.o \
 	common/mm/bitmap_allocator.o \
 	common/mm/buddy_allocator.o \
diff --git a/drivers/gpu/nvgpu/common/linux/dma.c b/drivers/gpu/nvgpu/common/linux/dma.c
new file mode 100644
index 000000000..755848eae
--- /dev/null
+++ b/drivers/gpu/nvgpu/common/linux/dma.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/dma-attrs.h>
+#include <linux/dma-mapping.h>
+
+#include <nvgpu/dma.h>
+#include <nvgpu/lock.h>
+
+#include "gk20a/gk20a.h"
+
+#if defined(CONFIG_GK20A_VIDMEM)
+static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
+				size_t size)
+{
+	u64 addr = 0;
+
+	if (at)
+		addr = nvgpu_alloc_fixed(allocator, at, size, 0);
+	else
+		addr = nvgpu_alloc(allocator, size);
+
+	return addr;
+}
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
+static void gk20a_dma_flags_to_attrs(unsigned long *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) *x
+#else
+static void gk20a_dma_flags_to_attrs(struct dma_attrs *attrs,
+		unsigned long flags)
+#define ATTR_ARG(x) x
+#endif
+{
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
+	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
+		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
+	if (flags & NVGPU_DMA_READ_ONLY)
+		dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
+#undef ATTR_ARG
+}
+
+int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags(g, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+		struct nvgpu_mem *mem)
+{
+	if (g->mm.vidmem_is_vidmem) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 */
+		int err = gk20a_gmmu_alloc_flags_vid(g,
+				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return gk20a_gmmu_alloc_flags_sys(g, flags, size, mem);
+}
+
+int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_sys(g, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+	int err;
+	dma_addr_t iova;
+
+	gk20a_dbg_fn("");
+
+	if (flags) {
+		DEFINE_DMA_ATTRS(dma_attrs);
+
+		gk20a_dma_flags_to_attrs(&dma_attrs, flags);
+
+		if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+			mem->pages = dma_alloc_attrs(d,
+					size, &iova, GFP_KERNEL,
+					__DMA_ATTR(dma_attrs));
+			if (!mem->pages)
+				return -ENOMEM;
+		} else {
+			mem->cpu_va = dma_alloc_attrs(d,
+					size, &iova, GFP_KERNEL,
+					__DMA_ATTR(dma_attrs));
+			if (!mem->cpu_va)
+				return -ENOMEM;
+		}
+	} else {
+		mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
+		if (!mem->cpu_va)
+			return -ENOMEM;
+	}
+
+	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
+		err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages,
+						   iova, size);
+	else {
+		err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
+		memset(mem->cpu_va, 0, size);
+	}
+	if (err)
+		goto fail_free;
+
+	mem->size = size;
+	mem->aperture = APERTURE_SYSMEM;
+	mem->flags = flags;
+
+	gk20a_dbg_fn("done");
+
+	return 0;
+
+fail_free:
+	dma_free_coherent(d, size, mem->cpu_va, iova);
+	mem->cpu_va = NULL;
+	mem->sgt = NULL;
+	return err;
+}
+
+int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_vid(g,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_flags_vid_at(g, flags, size, mem, 0);
+}
+
+int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem, dma_addr_t at)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	u64 addr;
+	int err;
+	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
+		&g->mm.vidmem.allocator :
+		&g->mm.vidmem.bootstrap_allocator;
+	int before_pending;
+
+	gk20a_dbg_fn("");
+
+	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
+		return -ENOSYS;
+
+	/*
+	 * Our own allocator doesn't have any flags yet, and we can't
+	 * kernel-map these, so require explicit flags.
+	 */
+	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
+	addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
+	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+	if (!addr) {
+		/*
+		 * If memory is known to be freed soon, let the user know that
+		 * it may be available after a while.
+		 */
+		if (before_pending)
+			return -EAGAIN;
+		else
+			return -ENOMEM;
+	}
+
+	if (at)
+		mem->fixed = true;
+	else
+		mem->fixed = false;
+
+	mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
+	if (!mem->sgt) {
+		err = -ENOMEM;
+		goto fail_physfree;
+	}
+
+	err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
+	if (err)
+		goto fail_kfree;
+
+	set_vidmem_page_alloc(mem->sgt->sgl, addr);
+	sg_set_page(mem->sgt->sgl, NULL, size, 0);
+
+	mem->size = size;
+	mem->aperture = APERTURE_VIDMEM;
+	mem->allocator = vidmem_alloc;
+	mem->flags = flags;
+
+	nvgpu_init_list_node(&mem->clear_list_entry);
+
+	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
+
+	return 0;
+
+fail_kfree:
+	nvgpu_kfree(g, mem->sgt);
+fail_physfree:
+	nvgpu_free(&g->mm.vidmem.allocator, addr);
+	return err;
+#else
+	return -ENOSYS;
+#endif
+}
+
+int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags(vm, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	if (vm->mm->vidmem_is_vidmem) {
+		/*
+		 * Force the no-kernel-mapping flag on because we don't support
+		 * the lack of it for vidmem - the user should not care when
+		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
+		 * difference, the user should use the flag explicitly anyway.
+		 */
+		int err = gk20a_gmmu_alloc_map_flags_vid(vm,
+				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
+				size, mem);
+
+		if (!err)
+			return 0;
+		/*
+		 * Fall back to sysmem (which may then also fail) in case
+		 * vidmem is exhausted.
+		 */
+	}
+
+	return gk20a_gmmu_alloc_map_flags_sys(vm, flags, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags_sys(vm, 0, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = gk20a_gmmu_alloc_flags_sys(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	gk20a_gmmu_free(vm->mm->g, mem);
+	return err;
+}
+
+int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem)
+{
+	return gk20a_gmmu_alloc_map_flags_vid(vm,
+			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
+}
+
+int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem)
+{
+	int err = gk20a_gmmu_alloc_flags_vid(vm->mm->g, flags, size, mem);
+
+	if (err)
+		return err;
+
+	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
+				     gk20a_mem_flag_none, false,
+				     mem->aperture);
+	if (!mem->gpu_va) {
+		err = -ENOMEM;
+		goto fail_free;
+	}
+
+	return 0;
+
+fail_free:
+	gk20a_gmmu_free(vm->mm->g, mem);
+	return err;
+}
+
+static void gk20a_gmmu_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	struct device *d = dev_from_gk20a(g);
+
+	if (mem->cpu_va || mem->pages) {
+		if (mem->flags) {
+			DEFINE_DMA_ATTRS(dma_attrs);
+
+			gk20a_dma_flags_to_attrs(&dma_attrs, mem->flags);
+
+			if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
+				dma_free_attrs(d, mem->size, mem->pages,
+					sg_dma_address(mem->sgt->sgl),
+					__DMA_ATTR(dma_attrs));
+			} else {
+				dma_free_attrs(d, mem->size, mem->cpu_va,
+					sg_dma_address(mem->sgt->sgl),
+					__DMA_ATTR(dma_attrs));
+			}
+		} else {
+			dma_free_coherent(d, mem->size, mem->cpu_va,
+					sg_dma_address(mem->sgt->sgl));
+		}
+		mem->cpu_va = NULL;
+		mem->pages = NULL;
+	}
+
+	if (mem->sgt)
+		gk20a_free_sgtable(g, &mem->sgt);
+
+	mem->size = 0;
+	mem->aperture = APERTURE_INVALID;
+}
+
+static void gk20a_gmmu_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
+{
+#if defined(CONFIG_GK20A_VIDMEM)
+	bool was_empty;
+
+	/* Sanity check - only this supported when allocating. */
+	WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING);
+
+	if (mem->user_mem) {
+		nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
+		was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
+		nvgpu_list_add_tail(&mem->clear_list_entry,
+			      &g->mm.vidmem.clear_list_head);
+		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
+		nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
+
+		if (was_empty) {
+			cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
+			schedule_work(&g->mm.vidmem.clear_mem_worker);
+		}
+	} else {
+		nvgpu_memset(g, mem, 0, 0, mem->size);
+		nvgpu_free(mem->allocator,
+			   (u64)get_vidmem_page_alloc(mem->sgt->sgl));
+		gk20a_free_sgtable(g, &mem->sgt);
+
+		mem->size = 0;
+		mem->aperture = APERTURE_INVALID;
+	}
+#endif
+}
+
+void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem)
+{
+	switch (mem->aperture) {
+	case APERTURE_SYSMEM:
+		return gk20a_gmmu_free_sys(g, mem);
+	case APERTURE_VIDMEM:
+		return gk20a_gmmu_free_vid(g, mem);
+	default:
+		break; /* like free() on "null" memory */
+	}
+}
+
+void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
+{
+	if (mem->gpu_va)
+		gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
+	mem->gpu_va = 0;
+
+	gk20a_gmmu_free(vm->mm->g, mem);
+}
diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index 6fb6c27e2..cfe1149f7 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -18,6 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/highmem.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
 
diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
index 0db6c21ae..e70ee4a67 100644
--- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c
@@ -24,6 +24,7 @@
 
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
index ce76bfc32..9cc4b6783 100644
--- a/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ce2_gk20a.c
@@ -26,6 +26,7 @@
 #include <linux/debugfs.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "debug_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d0e2be79e..6be616b37 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/semaphore.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "debug_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
index 98fa53ab9..738e8c1cd 100644
--- a/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/css_gr_gk20a.c
@@ -22,6 +22,7 @@
 
 #include <nvgpu/kmem.h>
 #include <nvgpu/lock.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "css_gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
index 3ed28718e..d8fa75057 100644
--- a/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fecs_trace_gk20a.c
@@ -24,6 +24,7 @@
 #include <uapi/linux/nvgpu.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "ctxsw_trace_gk20a.h"
 #include "fecs_trace_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
index 1e9a8e15c..c1f94eb38 100644
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -26,6 +26,7 @@
 #endif
 #include <linux/sort.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/semaphore.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 971e23209..a9b6a5460 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -29,6 +29,7 @@
 #include <linux/bsearch.h>
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c
index 7c4db84ec..7c73be777 100644
--- a/drivers/gpu/nvgpu/gk20a/ltc_common.c
+++ b/drivers/gpu/nvgpu/gk20a/ltc_common.c
@@ -21,6 +21,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
 
+#include <nvgpu/dma.h>
+
 #include "gk20a.h"
 #include "gr_gk20a.h"
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index cdd0e541f..79654af30 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -30,6 +30,7 @@
 #include <uapi/linux/nvgpu.h>
 #include <trace/events/gk20a.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/kmem.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/pramin.h>
@@ -2519,152 +2520,6 @@ u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
 			aperture);
 }
 
-int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags(g, 0, size, mem);
-}
-
-int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
-		struct nvgpu_mem *mem)
-{
-	if (g->mm.vidmem_is_vidmem) {
-		/*
-		 * Force the no-kernel-mapping flag on because we don't support
-		 * the lack of it for vidmem - the user should not care when
-		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
-		 * difference, the user should use the flag explicitly anyway.
-		 */
-		int err = gk20a_gmmu_alloc_flags_vid(g,
-				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
-				size, mem);
-
-		if (!err)
-			return 0;
-		/*
-		 * Fall back to sysmem (which may then also fail) in case
-		 * vidmem is exhausted.
-		 */
-	}
-
-	return gk20a_gmmu_alloc_flags_sys(g, flags, size, mem);
-}
-
-int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags_sys(g, 0, size, mem);
-}
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)
-static void gk20a_dma_flags_to_attrs(unsigned long *attrs,
-		unsigned long flags)
-#define ATTR_ARG(x) *x
-#else
-static void gk20a_dma_flags_to_attrs(struct dma_attrs *attrs,
-		unsigned long flags)
-#define ATTR_ARG(x) x
-#endif
-{
-	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, ATTR_ARG(attrs));
-	if (flags & NVGPU_DMA_FORCE_CONTIGUOUS)
-		dma_set_attr(DMA_ATTR_FORCE_CONTIGUOUS, ATTR_ARG(attrs));
-	if (flags & NVGPU_DMA_READ_ONLY)
-		dma_set_attr(DMA_ATTR_READ_ONLY, ATTR_ARG(attrs));
-#undef ATTR_ARG
-}
-
-int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	struct device *d = dev_from_gk20a(g);
-	int err;
-	dma_addr_t iova;
-
-	gk20a_dbg_fn("");
-
-	if (flags) {
-		DEFINE_DMA_ATTRS(dma_attrs);
-
-		gk20a_dma_flags_to_attrs(&dma_attrs, flags);
-
-		if (flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-			mem->pages = dma_alloc_attrs(d,
-					size, &iova, GFP_KERNEL,
-					__DMA_ATTR(dma_attrs));
-			if (!mem->pages)
-				return -ENOMEM;
-		} else {
-			mem->cpu_va = dma_alloc_attrs(d,
-					size, &iova, GFP_KERNEL,
-					__DMA_ATTR(dma_attrs));
-			if (!mem->cpu_va)
-				return -ENOMEM;
-		}
-	} else {
-		mem->cpu_va = dma_alloc_coherent(d, size, &iova, GFP_KERNEL);
-		if (!mem->cpu_va)
-			return -ENOMEM;
-	}
-
-	if (flags & NVGPU_DMA_NO_KERNEL_MAPPING)
-		err = gk20a_get_sgtable_from_pages(d, &mem->sgt, mem->pages,
-						   iova, size);
-	else {
-		err = gk20a_get_sgtable(d, &mem->sgt, mem->cpu_va, iova, size);
-		memset(mem->cpu_va, 0, size);
-	}
-	if (err)
-		goto fail_free;
-
-	mem->size = size;
-	mem->aperture = APERTURE_SYSMEM;
-	mem->flags = flags;
-
-	gk20a_dbg_fn("done");
-
-	return 0;
-
-fail_free:
-	dma_free_coherent(d, size, mem->cpu_va, iova);
-	mem->cpu_va = NULL;
-	mem->sgt = NULL;
-	return err;
-}
-
-static void gk20a_gmmu_free_sys(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	struct device *d = dev_from_gk20a(g);
-
-	if (mem->cpu_va || mem->pages) {
-		if (mem->flags) {
-			DEFINE_DMA_ATTRS(dma_attrs);
-
-			gk20a_dma_flags_to_attrs(&dma_attrs, mem->flags);
-
-			if (mem->flags & NVGPU_DMA_NO_KERNEL_MAPPING) {
-				dma_free_attrs(d, mem->size, mem->pages,
-					sg_dma_address(mem->sgt->sgl),
-					__DMA_ATTR(dma_attrs));
-			} else {
-				dma_free_attrs(d, mem->size, mem->cpu_va,
-					sg_dma_address(mem->sgt->sgl),
-					__DMA_ATTR(dma_attrs));
-			}
-		} else {
-			dma_free_coherent(d, mem->size, mem->cpu_va,
-					sg_dma_address(mem->sgt->sgl));
-		}
-		mem->cpu_va = NULL;
-		mem->pages = NULL;
-	}
-
-	if (mem->sgt)
-		gk20a_free_sgtable(g, &mem->sgt);
-
-	mem->size = 0;
-	mem->aperture = APERTURE_INVALID;
-}
-
 #if defined(CONFIG_GK20A_VIDMEM)
 static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 {
@@ -2728,153 +2583,6 @@ static int gk20a_gmmu_clear_vidmem_mem(struct gk20a *g, struct nvgpu_mem *mem)
 }
 #endif
 
-int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags_vid(g,
-			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-
-int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_flags_vid_at(g, flags, size, mem, 0);
-}
-
-#if defined(CONFIG_GK20A_VIDMEM)
-static u64 __gk20a_gmmu_alloc(struct nvgpu_allocator *allocator, dma_addr_t at,
-				size_t size)
-{
-	u64 addr = 0;
-
-	if (at)
-		addr = nvgpu_alloc_fixed(allocator, at, size, 0);
-	else
-		addr = nvgpu_alloc(allocator, size);
-
-	return addr;
-}
-#endif
-
-int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem, dma_addr_t at)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	u64 addr;
-	int err;
-	struct nvgpu_allocator *vidmem_alloc = g->mm.vidmem.cleared ?
-		&g->mm.vidmem.allocator :
-		&g->mm.vidmem.bootstrap_allocator;
-	int before_pending;
-
-	gk20a_dbg_fn("");
-
-	if (!nvgpu_alloc_initialized(&g->mm.vidmem.allocator))
-		return -ENOSYS;
-
-	/*
-	 * Our own allocator doesn't have any flags yet, and we can't
-	 * kernel-map these, so require explicit flags.
-	 */
-	WARN_ON(flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-
-	nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-	before_pending = atomic64_read(&g->mm.vidmem.bytes_pending);
-	addr = __gk20a_gmmu_alloc(vidmem_alloc, at, size);
-	nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
-	if (!addr) {
-		/*
-		 * If memory is known to be freed soon, let the user know that
-		 * it may be available after a while.
-		 */
-		if (before_pending)
-			return -EAGAIN;
-		else
-			return -ENOMEM;
-	}
-
-	if (at)
-		mem->fixed = true;
-	else
-		mem->fixed = false;
-
-	mem->sgt = nvgpu_kzalloc(g, sizeof(struct sg_table));
-	if (!mem->sgt) {
-		err = -ENOMEM;
-		goto fail_physfree;
-	}
-
-	err = sg_alloc_table(mem->sgt, 1, GFP_KERNEL);
-	if (err)
-		goto fail_kfree;
-
-	set_vidmem_page_alloc(mem->sgt->sgl, addr);
-	sg_set_page(mem->sgt->sgl, NULL, size, 0);
-
-	mem->size = size;
-	mem->aperture = APERTURE_VIDMEM;
-	mem->allocator = vidmem_alloc;
-	mem->flags = flags;
-
-	nvgpu_init_list_node(&mem->clear_list_entry);
-
-	gk20a_dbg_fn("done at 0x%llx size %zu", addr, size);
-
-	return 0;
-
-fail_kfree:
-	nvgpu_kfree(g, mem->sgt);
-fail_physfree:
-	nvgpu_free(&g->mm.vidmem.allocator, addr);
-	return err;
-#else
-	return -ENOSYS;
-#endif
-}
-
-static void gk20a_gmmu_free_vid(struct gk20a *g, struct nvgpu_mem *mem)
-{
-#if defined(CONFIG_GK20A_VIDMEM)
-	bool was_empty;
-
-	/* Sanity check - only this supported when allocating. */
-	WARN_ON(mem->flags != NVGPU_DMA_NO_KERNEL_MAPPING);
-
-	if (mem->user_mem) {
-		nvgpu_mutex_acquire(&g->mm.vidmem.clear_list_mutex);
-		was_empty = nvgpu_list_empty(&g->mm.vidmem.clear_list_head);
-		nvgpu_list_add_tail(&mem->clear_list_entry,
-			      &g->mm.vidmem.clear_list_head);
-		atomic64_add(mem->size, &g->mm.vidmem.bytes_pending);
-		nvgpu_mutex_release(&g->mm.vidmem.clear_list_mutex);
-
-		if (was_empty) {
-			cancel_work_sync(&g->mm.vidmem.clear_mem_worker);
-			schedule_work(&g->mm.vidmem.clear_mem_worker);
-		}
-	} else {
-		nvgpu_memset(g, mem, 0, 0, mem->size);
-		nvgpu_free(mem->allocator,
-			   (u64)get_vidmem_page_alloc(mem->sgt->sgl));
-		gk20a_free_sgtable(g, &mem->sgt);
-
-		mem->size = 0;
-		mem->aperture = APERTURE_INVALID;
-	}
-#endif
-}
-
-void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem)
-{
-	switch (mem->aperture) {
-	case APERTURE_SYSMEM:
-		return gk20a_gmmu_free_sys(g, mem);
-	case APERTURE_VIDMEM:
-		return gk20a_gmmu_free_vid(g, mem);
-	default:
-		break; /* like free() on "null" memory */
-	}
-}
-
 /*
  * If mem is in VIDMEM, return base address in vidmem
  * else return IOVA address for SYSMEM
@@ -2938,105 +2646,6 @@ static void gk20a_vidmem_clear_mem_worker(struct work_struct *work)
 }
 #endif
 
-int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_map_flags(vm, 0, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	if (vm->mm->vidmem_is_vidmem) {
-		/*
-		 * Force the no-kernel-mapping flag on because we don't support
-		 * the lack of it for vidmem - the user should not care when
-		 * using gk20a_gmmu_alloc_map and it's vidmem, or if there's a
-		 * difference, the user should use the flag explicitly anyway.
-		 */
-		int err = gk20a_gmmu_alloc_map_flags_vid(vm,
-				flags | NVGPU_DMA_NO_KERNEL_MAPPING,
-				size, mem);
-
-		if (!err)
-			return 0;
-		/*
-		 * Fall back to sysmem (which may then also fail) in case
-		 * vidmem is exhausted.
-		 */
-	}
-
-	return gk20a_gmmu_alloc_map_flags_sys(vm, flags, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_map_flags_sys(vm, 0, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	int err = gk20a_gmmu_alloc_flags_sys(vm->mm->g, flags, size, mem);
-
-	if (err)
-		return err;
-
-	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
-				     gk20a_mem_flag_none, false,
-				     mem->aperture);
-	if (!mem->gpu_va) {
-		err = -ENOMEM;
-		goto fail_free;
-	}
-
-	return 0;
-
-fail_free:
-	gk20a_gmmu_free(vm->mm->g, mem);
-	return err;
-}
-
-int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem)
-{
-	return gk20a_gmmu_alloc_map_flags_vid(vm,
-			NVGPU_DMA_NO_KERNEL_MAPPING, size, mem);
-}
-
-int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem)
-{
-	int err = gk20a_gmmu_alloc_flags_vid(vm->mm->g, flags, size, mem);
-
-	if (err)
-		return err;
-
-	mem->gpu_va = gk20a_gmmu_map(vm, &mem->sgt, size, 0,
-				     gk20a_mem_flag_none, false,
-				     mem->aperture);
-	if (!mem->gpu_va) {
-		err = -ENOMEM;
-		goto fail_free;
-	}
-
-	return 0;
-
-fail_free:
-	gk20a_gmmu_free(vm->mm->g, mem);
-	return err;
-}
-
-void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem)
-{
-	if (mem->gpu_va)
-		gk20a_gmmu_unmap(vm, mem->gpu_va, mem->size, gk20a_mem_flag_none);
-	mem->gpu_va = 0;
-
-	gk20a_gmmu_free(vm->mm->g, mem);
-}
-
 dma_addr_t gk20a_mm_gpuva_to_iova_base(struct vm_gk20a *vm, u64 gpu_vaddr)
 {
 	struct mapped_buffer_node *buffer;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index db72ca79f..53366caf0 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -524,56 +524,6 @@ u64 gk20a_gmmu_fixed_map(struct vm_gk20a *vm,
 		bool priv,
 		enum nvgpu_aperture aperture);
 
-/* Flags for the below gk20a_gmmu_{alloc,alloc_map}_flags* */
-
-/*
- * Don't create a virtual kernel mapping for the buffer but only allocate it;
- * this may save some resources. The buffer can be mapped later explicitly.
- */
-#define NVGPU_DMA_NO_KERNEL_MAPPING	(1 << 0)
-/*
- * Don't allow building the buffer from individual pages but require a
- * physically contiguous block.
- */
-#define NVGPU_DMA_FORCE_CONTIGUOUS	(1 << 1)
-/*
- * Make the mapping read-only.
- */
-#define NVGPU_DMA_READ_ONLY		(1 << 2)
-
-int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
-		struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
-		struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-
-int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem);
-int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
-		size_t size, struct nvgpu_mem *mem, dma_addr_t at);
-
-void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem);
-
 static inline phys_addr_t gk20a_mem_phys(struct nvgpu_mem *mem)
 {
 	/* FIXME: the sgt/sgl may get null if this is accessed e.g. in an isr
diff --git a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
index 591b71636..7a6bfe227 100644
--- a/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/pmu_gk20a.c
@@ -26,6 +26,7 @@
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a.h"
 #include "gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
index 013ce43af..3cfcbb193 100644
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -20,6 +20,7 @@
 
 #include <linux/platform/tegra/mc.h>
 
+#include <nvgpu/dma.h>
 #include <nvgpu/timers.h>
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
diff --git a/drivers/gpu/nvgpu/gp106/acr_gp106.c b/drivers/gpu/nvgpu/gp106/acr_gp106.c
index 41c4981d7..9acc8edad 100644
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -20,6 +20,7 @@
 
 #include <nvgpu/nvgpu_common.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 #include <nvgpu/acr/nvgpu_acr.h>
 
 #include "gk20a/gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp106/gr_gp106.c b/drivers/gpu/nvgpu/gp106/gr_gp106.c
index dae23374e..78859f881 100644
--- a/drivers/gpu/nvgpu/gp106/gr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/gr_gp106.c
@@ -13,6 +13,8 @@
  * more details.
  */
 
+#include <nvgpu/dma.h>
+
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
 #include "gm20b/gr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
index a7e77232a..b305b8959 100644
--- a/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/fifo_gp10b.c
@@ -15,6 +15,8 @@
 
 #include <linux/delay.h>
 
+#include <nvgpu/dma.h>
+
 #include "fifo_gp10b.h"
 
 #include "gk20a/gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
index 8e1517f63..b93671201 100644
--- a/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/gr_gp10b.c
@@ -22,6 +22,7 @@
 
 #include <nvgpu/timers.h>
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "gk20a/gk20a.h"
 #include "gk20a/gr_gk20a.h"
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index a0dc8c55f..2f894435e 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -16,6 +16,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 
+#include <nvgpu/dma.h>
+
 #include "gk20a/gk20a.h"
 #include "gm20b/mm_gm20b.h"
 #include "mm_gp10b.h"
diff --git a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
index e73bcd8f1..bf52b5c97 100644
--- a/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/rpfb_gp10b.c
@@ -16,6 +16,8 @@
 #include <linux/pm_runtime.h>
 #include <linux/dma-mapping.h>
 
+#include <nvgpu/dma.h>
+
 #include "gk20a/gk20a.h"
 
 #include "rpfb_gp10b.h"
diff --git a/drivers/gpu/nvgpu/include/nvgpu/dma.h b/drivers/gpu/nvgpu/include/nvgpu/dma.h
new file mode 100644
index 000000000..d4fad584e
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/dma.h
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __NVGPU_DMA_H__
+#define __NVGPU_DMA_H__
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+struct vm_gk20a;
+struct nvgpu_mem;
+
+/*
+ * Flags for the below gk20a_gmmu_{alloc,alloc_map}_flags*
+ */
+
+/*
+ * Don't create a virtual kernel mapping for the buffer but only allocate it;
+ * this may save some resources. The buffer can be mapped later explicitly.
+ */
+#define NVGPU_DMA_NO_KERNEL_MAPPING	(1 << 0)
+
+/*
+ * Don't allow building the buffer from individual pages but require a
+ * physically contiguous block.
+ */
+#define NVGPU_DMA_FORCE_CONTIGUOUS	(1 << 1)
+
+/*
+ * Make the mapping read-only.
+ */
+#define NVGPU_DMA_READ_ONLY		(1 << 2)
+
+/**
+ * gk20a_gmmu_alloc - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * memory can be either placed in VIDMEM or SYSMEM, which ever is more
+ * convenient for the driver.
+ */
+int gk20a_gmmu_alloc(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * memory can be either placed in VIDMEM or SYSMEM, which ever is more
+ * convenient for the driver.
+ *
+ * The following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_flags(struct gk20a *g, unsigned long flags, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_sys - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in SYSMEM.
+ */
+int gk20a_gmmu_alloc_sys(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags_sys - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in SYSMEM.
+ *
+ * The following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_flags_sys(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_vid - Allocate DMA memory
+ *
+ * @g    - The GPU.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ */
+int gk20a_gmmu_alloc_vid(struct gk20a *g, size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags_vid - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ *
+ * Only the following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *
+ */
+int gk20a_gmmu_alloc_flags_vid(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_flags_vid_at - Allocate DMA memory
+ *
+ * @g     - The GPU.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ * @at    - A specific location to attempt to allocate memory from or 0 if the
+ *          caller does not care what the address is.
+ *
+ * Allocate memory suitable for doing DMA. Store the allocation info in @mem.
+ * Returns 0 on success and a suitable error code when there's an error. This
+ * allocates memory specifically in VIDMEM.
+ *
+ * Only the following flags are accepted:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ */
+int gk20a_gmmu_alloc_flags_vid_at(struct gk20a *g, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem, dma_addr_t at);
+
+/**
+ * gk20a_gmmu_free - Free a DMA allocation
+ *
+ * @g   - The GPU.
+ * @mem - An allocation to free.
+ *
+ * Free memory created with any of:
+ *
+ *   gk20a_gmmu_alloc()
+ *   gk20a_gmmu_alloc_flags()
+ *   gk20a_gmmu_alloc_sys()
+ *   gk20a_gmmu_alloc_flags_sys()
+ *   gk20a_gmmu_alloc_vid()
+ *   gk20a_gmmu_alloc_flags_vid()
+ *   gk20a_gmmu_alloc_flags_vid_at()
+ */
+void gk20a_gmmu_free(struct gk20a *g, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * Note this is different than mapping it into the CPU. This memory can be
+ * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
+ * driver.
+ */
+int gk20a_gmmu_alloc_map(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_flags - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * Note this is different than mapping it into the CPU. This memory can be
+ * either placed in VIDMEM or SYSMEM, which ever is more convenient for the
+ * driver.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_sys - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in SYSMEM.
+ */
+int gk20a_gmmu_alloc_map_sys(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_flags_sys - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in SYSMEM.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags_sys(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_vid - Allocate DMA memory and map into GMMU.
+ *
+ * @vm   - VM context for GMMU mapping.
+ * @size - Size of the allocation in bytes.
+ * @mem  - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in VIDMEM.
+ */
+int gk20a_gmmu_alloc_map_vid(struct vm_gk20a *vm, size_t size,
+		struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_alloc_map_flags_vid - Allocate DMA memory and map into GMMU.
+ *
+ * @vm    - VM context for GMMU mapping.
+ * @flags - Flags modifying the operation of the DMA allocation.
+ * @size  - Size of the allocation in bytes.
+ * @mem   - Struct for storing the allocation information.
+ *
+ * Allocate memory suitable for doing DMA and map that memory into the GMMU.
+ * This memory will be placed in VIDMEM.
+ *
+ * This version passes @flags on to the underlying DMA allocation. The accepted
+ * flags are:
+ *
+ *   %NVGPU_DMA_NO_KERNEL_MAPPING
+ *   %NVGPU_DMA_FORCE_CONTIGUOUS
+ *   %NVGPU_DMA_READ_ONLY
+ */
+int gk20a_gmmu_alloc_map_flags_vid(struct vm_gk20a *vm, unsigned long flags,
+		size_t size, struct nvgpu_mem *mem);
+
+/**
+ * gk20a_gmmu_unmap_free - Free a DMA allocation
+ *
+ * @g   - The GPU.
+ * @mem - An allocation to free.
+ *
+ * Free memory created with any of:
+ *
+ *   gk20a_gmmu_alloc_map()
+ *   gk20a_gmmu_alloc_map_flags()
+ *   gk20a_gmmu_alloc_map_sys()
+ *   gk20a_gmmu_alloc_map_flags_sys()
+ *   gk20a_gmmu_alloc_map_vid()
+ *   gk20a_gmmu_alloc_map_flags_vid()
+ */
+void gk20a_gmmu_unmap_free(struct vm_gk20a *vm, struct nvgpu_mem *mem);
+
+#endif
diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
index cfe9322e1..59fb0c4af 100644
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -17,6 +17,7 @@
 #include <trace/events/gk20a.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "vgpu/vgpu.h"
 #include "gk20a/ctxsw_trace_gk20a.h"
diff --git a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
index 8cb5b0299..527e12e4a 100644
--- a/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
+++ b/drivers/gpu/nvgpu/vgpu/gp10b/vgpu_gr_gp10b.c
@@ -12,6 +12,7 @@
  */
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "vgpu/vgpu.h"
 #include "vgpu/gm20b/vgpu_gr_gm20b.h"
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index ea81cefe3..b12f8a53d 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -16,6 +16,7 @@
 #include <linux/dma-mapping.h>
 
 #include <nvgpu/kmem.h>
+#include <nvgpu/dma.h>
 
 #include "vgpu/vgpu.h"
 #include "gk20a/mm_gk20a.h"