diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
index 796169abb..f3fbb413e 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
@@ -25,6 +25,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
+#include <nvgpu/pd_cache.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/nvgpu_sgt.h>
 #include <nvgpu/enabled.h>
@@ -218,22 +219,6 @@ int nvgpu_gmmu_init_page_table(struct vm_gk20a *vm)
 	return 0;
 }
 
-/*
- * Return the _physical_ address of a page directory.
- */
-u64 nvgpu_pde_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
-{
-	u64 page_addr;
-
-	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
-		page_addr = nvgpu_mem_get_phys_addr(g, pd->mem);
-	} else {
-		page_addr = nvgpu_mem_get_addr(g, pd->mem);
-	}
-
-	return page_addr + pd->mem_offs;
-}
-
 /*
  * Return the aligned length based on the page size in attrs.
  */
@@ -477,7 +462,7 @@ static int __set_pd_level(struct vm_gk20a *vm,
 		 * target addr is the real physical address we are aiming for.
 		 */
 		target_addr = (next_pd != NULL) ?
-			nvgpu_pde_gpu_addr(g, next_pd) :
+			nvgpu_pd_gpu_addr(g, next_pd) :
 			phys_addr;
 
 		l->update_entry(vm, l,
@@ -987,8 +972,8 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
 	 * Take into account the real offset into the nvgpu_mem since the PD
 	 * may be located at an offset other than 0 (due to PD packing).
 	 */
-	pte_base = (pd->mem_offs / sizeof(u32)) +
-		pd_offset_from_index(l, pd_idx);
+	pte_base = (u32)(pd->mem_offs / sizeof(u32)) +
+		nvgpu_pd_offset_from_index(l, pd_idx);
 	pte_size = (u32)(l->entry_size / sizeof(u32));
 
 	if (data != NULL) {
@@ -1006,7 +991,7 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
 	}
 
 	if (pd_offs_out != NULL) {
-		*pd_offs_out = pd_offset_from_index(l, pd_idx);
+		*pd_offs_out = nvgpu_pd_offset_from_index(l, pd_idx);
 	}
 
 	return 0;
@@ -1043,7 +1028,7 @@ int __nvgpu_set_pte(struct gk20a *g, struct vm_gk20a *vm, u64 vaddr, u32 *pte)
 	pte_size = __nvgpu_pte_words(g);
 
 	for (i = 0; i < pte_size; i++) {
-		pd_write(g, pd, (size_t)pd_offs + (size_t)i, pte[i]);
+		nvgpu_pd_write(g, pd, (size_t)pd_offs + (size_t)i, pte[i]);
 		pte_dbg(g, attrs_ptr,
 			"PTE: idx=%-4u (%d) 0x%08x", pd_idx, i, pte[i]);
 	}
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c
index 8db71b5f6..838fc7d6e 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/pd_cache.c
@@ -28,6 +28,7 @@
 #include <nvgpu/list.h>
 #include <nvgpu/log2.h>
 #include <nvgpu/gk20a.h>
+#include <nvgpu/enabled.h>
 
 #define pd_dbg(g, fmt, args...) nvgpu_log(g, gpu_dbg_pd_cache, fmt, ##args)
 
@@ -159,6 +160,34 @@ static u32 nvgpu_pd_cache_get_nr_entries(struct nvgpu_pd_mem_entry *pentry)
 	return PAGE_SIZE / pentry->pd_size;
 }
 
+/*
+ * Return the _physical_ address of a page directory.
+ */
+u64 nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd)
+{
+	u64 page_addr;
+
+	if (nvgpu_is_enabled(g, NVGPU_SUPPORT_NVLINK)) {
+		page_addr = nvgpu_mem_get_phys_addr(g, pd->mem);
+	} else {
+		page_addr = nvgpu_mem_get_addr(g, pd->mem);
+	}
+
+	return page_addr + pd->mem_offs;
+}
+
+u32 nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx)
+{
+	return (pd_idx * l->entry_size) / U32(sizeof(u32));
+}
+
+void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
+		    size_t w, u32 data)
+{
+	nvgpu_mem_wr32(g, pd->mem,
+		       (u32)((pd->mem_offs / sizeof(u32)) + w), data);
+}
+
 int nvgpu_pd_cache_init(struct gk20a *g)
 {
 	struct nvgpu_pd_cache *cache;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 5231f3f59..8e7c17edd 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -44,6 +44,7 @@
 #include <nvgpu/utils.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/channel.h>
+#include <nvgpu/pd_cache.h>
 
 #include "mm_gk20a.h"
 #include "fence_gk20a.h"
@@ -162,7 +163,7 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	bool small_valid, big_valid;
-	u32 pd_offset = pd_offset_from_index(l, pd_idx);
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
 	u32 pde_v[2] = {0, 0};
 
 	small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
@@ -190,8 +191,8 @@ static void update_gmmu_pde_locked(struct vm_gk20a *vm,
 		virt_addr, phys_addr,
 		pde_v[1], pde_v[0]);
 
-	pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
-	pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
+	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)0, pde_v[0]);
+	nvgpu_pd_write(g, &vm->pdb, (size_t)pd_offset + (size_t)1, pde_v[1]);
 }
 
 static void __update_pte_sparse(u32 *pte_w)
@@ -268,7 +269,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
-	u32 pd_offset = pd_offset_from_index(l, pd_idx);
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
 	u32 pte_w[2] = {0, 0};
 	int ctag_shift = 0;
 	int shamt = ilog2(g->ops.fb.compression_page_size(g));
@@ -304,8 +305,8 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 		(u32)attrs->ctag >> ctag_shift,
 		pte_w[1], pte_w[0]);
 
-	pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
-	pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
 }
 
 u32 gk20a_get_pde_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
@@ -376,7 +377,7 @@ int gk20a_vm_bind_channel(struct vm_gk20a *vm, struct channel_gk20a *ch)
 void gk20a_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm)
 {
-	u64 pdb_addr = nvgpu_pde_gpu_addr(g, &vm->pdb);
+	u64 pdb_addr = nvgpu_pd_gpu_addr(g, &vm->pdb);
 	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
 	u32 pdb_addr_hi = u64_hi32(pdb_addr);
 
diff --git a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
index 2a1b065eb..9e0bf185d 100644
--- a/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/mm_gp10b.c
@@ -25,6 +25,7 @@
 #include <nvgpu/mm.h>
 #include <nvgpu/dma.h>
 #include <nvgpu/gmmu.h>
+#include <nvgpu/pd_cache.h>
 #include <nvgpu/sizes.h>
 #include <nvgpu/utils.h>
 #include <nvgpu/gk20a.h>
@@ -88,7 +89,7 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx];
-	u32 pd_offset = pd_offset_from_index(l, pd_idx);
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
 	u32 pde_v[2] = {0, 0};
 
 	phys_addr >>= gmmu_new_pde_address_shift_v();
@@ -101,8 +102,8 @@ static void update_gmmu_pde3_locked(struct vm_gk20a *vm,
 	pde_v[0] |= gmmu_new_pde_vol_true_f();
 	pde_v[1] |= phys_addr >> 24;
 
-	pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
-	pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
 
 	pte_dbg(g, attrs,
 		"PDE: i=%-4u size=%-2u offs=%-4u pgsz: -- | "
@@ -125,7 +126,7 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
 	struct nvgpu_gmmu_pd *next_pd = &pd->entries[pd_idx];
 	bool small_valid, big_valid;
 	u32 small_addr = 0, big_addr = 0;
-	u32 pd_offset = pd_offset_from_index(l, pd_idx);
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
 	u32 pde_v[4] = {0, 0, 0, 0};
 
 	small_valid = attrs->pgsz == GMMU_PAGE_SIZE_SMALL;
@@ -160,10 +161,10 @@ static void update_gmmu_pde0_locked(struct vm_gk20a *vm,
 		pde_v[1] |= big_addr >> 28;
 	}
 
-	pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
-	pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
-	pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]);
-	pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pde_v[0]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pde_v[1]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)2, pde_v[2]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)3, pde_v[3]);
 
 	pte_dbg(g, attrs,
 		"PDE: i=%-4u size=%-2u offs=%-4u pgsz: %c%c | "
@@ -240,7 +241,7 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 {
 	struct gk20a *g = vm->mm->g;
 	u32 page_size  = vm->gmmu_page_sizes[attrs->pgsz];
-	u32 pd_offset = pd_offset_from_index(l, pd_idx);
+	u32 pd_offset = nvgpu_pd_offset_from_index(l, pd_idx);
 	u32 pte_w[2] = {0, 0};
 
 	if (phys_addr != 0ULL) {
@@ -271,8 +272,8 @@ static void update_gmmu_pte_locked(struct vm_gk20a *vm,
 		(u32)attrs->ctag / g->ops.fb.compression_page_size(g),
 		pte_w[1], pte_w[0]);
 
-	pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
-	pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)0, pte_w[0]);
+	nvgpu_pd_write(g, pd, (size_t)pd_offset + (size_t)1, pte_w[1]);
 }
 
 #define GP10B_PDE0_ENTRY_SIZE 16U
@@ -287,7 +288,7 @@ static u32 gp10b_get_pde0_pgsz(struct gk20a *g, const struct gk20a_mmu_level *l,
 				struct nvgpu_gmmu_pd *pd, u32 pd_idx)
 {
 	u32 pde_base = pd->mem_offs / sizeof(u32);
-	u32 pde_offset = pde_base + pd_offset_from_index(l, pd_idx);
+	u32 pde_offset = pde_base + nvgpu_pd_offset_from_index(l, pd_idx);
 	u32 pde_v[GP10B_PDE0_ENTRY_SIZE >> 2];
 	u32 i;
 	u32 pgsz = GMMU_NR_PAGE_SIZES;
@@ -380,7 +381,7 @@ const struct gk20a_mmu_level *gp10b_mm_get_mmu_levels(struct gk20a *g,
 void gp10b_mm_init_pdb(struct gk20a *g, struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm)
 {
-	u64 pdb_addr = nvgpu_pde_gpu_addr(g, &vm->pdb);
+	u64 pdb_addr = nvgpu_pd_gpu_addr(g, &vm->pdb);
 	u32 pdb_addr_lo = u64_lo32(pdb_addr >> ram_in_base_shift_v());
 	u32 pdb_addr_hi = u64_hi32(pdb_addr);
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
index a77786201..d7db66fd4 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gmmu.h
@@ -37,6 +37,7 @@
 
 struct vm_gk20a;
 struct nvgpu_mem;
+struct nvgpu_gmmu_pd;
 
 #define GMMU_PAGE_SIZE_SMALL	0U
 #define GMMU_PAGE_SIZE_BIG	1U
@@ -49,29 +50,6 @@ enum gk20a_mem_rw_flag {
 	gk20a_mem_flag_write_only = 2,	/* WO */
 };
 
-/*
- * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
- * in the GMMU.
- */
-struct nvgpu_gmmu_pd {
-	/*
-	 * DMA memory describing the PTEs or PDEs. @mem_offs describes the
-	 * offset of the PDE table in @mem. @cached specifies if this PD is
-	 * using pd_cache memory.
-	 */
-	struct nvgpu_mem	*mem;
-	u32			 mem_offs;
-	bool			 cached;
-	u32			 pd_size; /* In bytes. */
-
-	/*
-	 * List of pointers to the next level of page tables. Will not be
-	 * populated when this PD is pointing to PTEs.
-	 */
-	struct nvgpu_gmmu_pd	*entries;
-	int			 num_entries;
-};
-
 /*
  * Reduce the number of arguments getting passed through the various levels of
  * GMMU mapping functions.
@@ -185,27 +163,6 @@ void nvgpu_gmmu_unmap(struct vm_gk20a *vm,
 		      struct nvgpu_mem *mem,
 		      u64 gpu_va);
 
-int nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
-void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
-int nvgpu_pd_cache_init(struct gk20a *g);
-void nvgpu_pd_cache_fini(struct gk20a *g);
-u64 nvgpu_pde_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
-
-/*
- * Some useful routines that are shared across chips.
- */
-static inline u32 pd_offset_from_index(const struct gk20a_mmu_level *l,
-				       u32 pd_idx)
-{
-	return (pd_idx * l->entry_size) / U32(sizeof(u32));
-}
-
-static inline void pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
-			    size_t w, size_t data)
-{
-	nvgpu_mem_wr32(g, pd->mem, (pd->mem_offs / sizeof(u32)) + w, data);
-}
-
 /**
  * __nvgpu_pte_words - Compute number of words in a PTE.
  *
diff --git a/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h
new file mode 100644
index 000000000..557c808d9
--- /dev/null
+++ b/drivers/gpu/nvgpu/include/nvgpu/pd_cache.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef NVGPU_PD_CACHE_H
+#define NVGPU_PD_CACHE_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+struct vm_gk20a;
+struct nvgpu_mem;
+struct gk20a_mmu_level;
+
+/*
+ * GMMU page directory. This is the kernel's tracking of a list of PDEs or PTEs
+ * in the GMMU.
+ */
+struct nvgpu_gmmu_pd {
+	/*
+	 * DMA memory describing the PTEs or PDEs. @mem_offs describes the
+	 * offset of the PDE table in @mem. @cached specifies if this PD is
+	 * using pd_cache memory.
+	 */
+	struct nvgpu_mem	*mem;
+	u32			 mem_offs;
+	bool			 cached;
+	u32			 pd_size; /* In bytes. */
+
+	/*
+	 * List of pointers to the next level of page tables. Does not
+	 * need to be populated when this PD is pointing to PTEs.
+	 */
+	struct nvgpu_gmmu_pd	*entries;
+	int			 num_entries;
+};
+
+int  nvgpu_pd_alloc(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd, u32 bytes);
+void nvgpu_pd_free(struct vm_gk20a *vm, struct nvgpu_gmmu_pd *pd);
+int  nvgpu_pd_cache_init(struct gk20a *g);
+void nvgpu_pd_cache_fini(struct gk20a *g);
+u32  nvgpu_pd_offset_from_index(const struct gk20a_mmu_level *l, u32 pd_idx);
+void nvgpu_pd_write(struct gk20a *g, struct nvgpu_gmmu_pd *pd,
+		    size_t w, u32 data);
+u64  nvgpu_pd_gpu_addr(struct gk20a *g, struct nvgpu_gmmu_pd *pd);
+
+#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/vm.h b/drivers/gpu/nvgpu/include/nvgpu/vm.h
index 50f530714..5b07a1857 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/vm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/vm.h
@@ -28,6 +28,7 @@
 #include <nvgpu/rbtree.h>
 #include <nvgpu/types.h>
 #include <nvgpu/gmmu.h>
+#include <nvgpu/pd_cache.h>
 #include <nvgpu/nvgpu_mem.h>
 #include <nvgpu/allocator.h>
 
diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export
index a5cd2a3ac..59353b52d 100644
--- a/drivers/gpu/nvgpu/libnvgpu-drv.export
+++ b/drivers/gpu/nvgpu/libnvgpu-drv.export
@@ -73,6 +73,7 @@ nvgpu_pd_alloc
 nvgpu_pd_cache_fini
 nvgpu_pd_cache_init
 nvgpu_pd_free
+nvgpu_pd_write
 nvgpu_mem_rd32
 nvgpu_mem_wr32
 nvgpu_mutex_acquire
diff --git a/userspace/units/mm/pd_cache/pd_cache.c b/userspace/units/mm/pd_cache/pd_cache.c
index fda8ddc6d..ca674ee7e 100644
--- a/userspace/units/mm/pd_cache/pd_cache.c
+++ b/userspace/units/mm/pd_cache/pd_cache.c
@@ -26,6 +26,7 @@
 
 #include <nvgpu/gk20a.h>
 #include <nvgpu/gmmu.h>
+#include <nvgpu/pd_cache.h>
 #include <nvgpu/enabled.h>
 
 #include <nvgpu/posix/kmem.h>
@@ -598,7 +599,7 @@ static int test_pd_cache_valid_alloc(struct unit_module *m,
 		 * the nvgpu_mem. Using the zeroth word makes it easy to read
 		 * back.
 		 */
-		pd_write(g, &pd, 0, 0x12345678);
+		nvgpu_pd_write(g, &pd, 0, 0x12345678);
 
 		if (0x12345678 !=
 		    nvgpu_mem_rd32(g, pd.mem, pd.mem_offs / sizeof(u32))) {