diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
index 758344412..a471ec500 100644
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -562,6 +562,17 @@ static int nvgpu_init_mm_setup_sw(struct gk20a *g)
 		return err;
 	}
 
+	/*
+	 * Some chips support replayable MMU faults. For such chips make sure
+	 * SW is initialized.
+	 */
+	if (g->ops.mm.mmu_fault.setup_sw != NULL) {
+		err = g->ops.mm.mmu_fault.setup_sw(g);
+		if (err != 0) {
+			return err;
+		}
+	}
+
 	mm->remove_support = nvgpu_remove_mm_support;
 	mm->remove_ce_support = nvgpu_remove_mm_ce_support;
 
@@ -591,6 +602,52 @@ static int nvgpu_init_mm_pdb_cache_war(struct gk20a *g)
 	return 0;
 }
 
+/*
+ * Called through the HAL to handle vGPU: the vGPU doesn't have HW to initialize
+ * here.
+ */
+int nvgpu_mm_setup_hw(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	int err;
+
+	nvgpu_log_fn(g, " ");
+
+	if (g->ops.fb.set_mmu_page_size != NULL) {
+		g->ops.fb.set_mmu_page_size(g);
+	}
+
+	if (g->ops.fb.set_use_full_comp_tag_line != NULL) {
+		mm->use_full_comp_tag_line =
+			g->ops.fb.set_use_full_comp_tag_line(g);
+	}
+
+	g->ops.fb.init_hw(g);
+
+	if (g->ops.bus.bar1_bind != NULL) {
+		g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
+	}
+
+	if (g->ops.bus.bar2_bind != NULL) {
+		err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
+		if (err != 0) {
+			return err;
+		}
+	}
+
+	if (g->ops.mm.cache.fb_flush(g) != 0 ||
+	    g->ops.mm.cache.fb_flush(g) != 0) {
+		return -EBUSY;
+	}
+
+	if (g->ops.mm.mmu_fault.setup_hw != NULL) {
+		g->ops.mm.mmu_fault.setup_hw(g);
+	}
+
+	nvgpu_log_fn(g, "done");
+	return 0;
+}
+
 int nvgpu_init_mm_support(struct gk20a *g)
 {
 	int err;
@@ -610,8 +667,8 @@ int nvgpu_init_mm_support(struct gk20a *g)
 		return err;
 	}
 
-	if (g->ops.mm.init_mm_setup_hw != NULL) {
-		err = g->ops.mm.init_mm_setup_hw(g);
+	if (g->ops.mm.setup_hw != NULL) {
+		err = g->ops.mm.setup_hw(g);
 	}
 
 	return err;
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index aaf827da9..62963fc18 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -569,7 +569,7 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 	},
 	.mm = {
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.init_mm_setup_hw = NULL,
+		.setup_hw = NULL,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.init_inst_block = gk20a_init_inst_block,
 		.init_bar2_vm = gp10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 5f87b970b..0ea64822f 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -663,7 +663,7 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 	},
 	.mm = {
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.init_mm_setup_hw = NULL,
+		.setup_hw = NULL,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.init_inst_block = gv11b_init_inst_block,
 		.init_bar2_vm = gp10b_init_bar2_vm,
diff --git a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
index b1c70ade2..6512f6bea 100644
--- a/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/common/vgpu/mm/mm_vgpu.c
@@ -77,10 +77,6 @@ int vgpu_init_mm_support(struct gk20a *g)
 		return err;
 	}
 
-	if (g->ops.mm.init_mm_setup_hw) {
-		err = g->ops.mm.init_mm_setup_hw(g);
-	}
-
 	return err;
 }
 
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index da67471cd..cc6fc8d79 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -82,45 +82,6 @@
  *
  */
 
-/* make sure gk20a_init_mm_support is called before */
-int gk20a_init_mm_setup_hw(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	int err;
-
-	nvgpu_log_fn(g, " ");
-
-	if (g->ops.fb.set_mmu_page_size != NULL) {
-		g->ops.fb.set_mmu_page_size(g);
-	}
-
-	if (g->ops.fb.set_use_full_comp_tag_line != NULL) {
-		mm->use_full_comp_tag_line =
-			g->ops.fb.set_use_full_comp_tag_line(g);
-	}
-
-	g->ops.fb.init_hw(g);
-
-	if (g->ops.bus.bar1_bind != NULL) {
-		g->ops.bus.bar1_bind(g, &mm->bar1.inst_block);
-	}
-
-	if (g->ops.bus.bar2_bind != NULL) {
-		err = g->ops.bus.bar2_bind(g, &mm->bar2.inst_block);
-		if (err != 0) {
-			return err;
-		}
-	}
-
-	if (g->ops.mm.cache.fb_flush(g) != 0 ||
-	    g->ops.mm.cache.fb_flush(g) != 0) {
-		return -EBUSY;
-	}
-
-	nvgpu_log_fn(g, "done");
-	return 0;
-}
-
 void gk20a_init_inst_block(struct nvgpu_mem *inst_block, struct vm_gk20a *vm,
 		u32 big_page_size)
 {
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
index 272fb97cb..82d9b5f29 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
@@ -146,7 +146,7 @@ static void gv11b_mm_mmu_hw_fault_buf_init(struct gk20a *g)
 	}
 }
 
-static void gv11b_mm_mmu_fault_setup_hw(struct gk20a *g)
+void gv11b_mm_mmu_fault_setup_hw(struct gk20a *g)
 {
 	if (nvgpu_mem_is_valid(
 			&g->mm.hw_fault_buf[NVGPU_MM_MMU_FAULT_TYPE_OTHER_AND_NONREPLAY])) {
@@ -159,7 +159,7 @@ static void gv11b_mm_mmu_fault_setup_hw(struct gk20a *g)
 	}
 }
 
-static int gv11b_mm_mmu_fault_setup_sw(struct gk20a *g)
+int gv11b_mm_mmu_fault_setup_sw(struct gk20a *g)
 {
 	int err = 0;
 
@@ -179,21 +179,3 @@ static int gv11b_mm_mmu_fault_setup_sw(struct gk20a *g)
 
 	return err;
 }
-
-int gv11b_init_mm_setup_hw(struct gk20a *g)
-{
-	int err = 0;
-
-	nvgpu_log_fn(g, " ");
-
-	err = gk20a_init_mm_setup_hw(g);
-
-	err = gv11b_mm_mmu_fault_setup_sw(g);
-	if (err == 0) {
-		gv11b_mm_mmu_fault_setup_hw(g);
-	}
-
-	nvgpu_log_fn(g, "end");
-
-	return err;
-}
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
index f9bf346d9..c76fec94b 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
@@ -31,8 +31,10 @@ struct vm_gk20a;
 bool gv11b_mm_is_bar1_supported(struct gk20a *g);
 void gv11b_init_inst_block(struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm, u32 big_page_size);
-int gv11b_init_mm_setup_hw(struct gk20a *g);
 void gv11b_mm_fault_info_mem_destroy(struct gk20a *g);
 void gv11b_mm_mmu_fault_disable_hw(struct gk20a *g);
 
+void gv11b_mm_mmu_fault_setup_hw(struct gk20a *g);
+int  gv11b_mm_mmu_fault_setup_sw(struct gk20a *g);
+
 #endif
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index 3279ca2b7..d5c3fa5b8 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -849,7 +849,7 @@ static const struct gpu_ops gm20b_ops = {
 	},
 	.mm = {
 		.vm_bind_channel = nvgpu_vm_bind_channel,
-		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
+		.setup_hw = nvgpu_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index ad3d16e5e..f2e7e181f 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -929,7 +929,7 @@ static const struct gpu_ops gp10b_ops = {
 	},
 	.mm = {
 		.vm_bind_channel = nvgpu_vm_bind_channel,
-		.init_mm_setup_hw = gk20a_init_mm_setup_hw,
+		.setup_hw = nvgpu_mm_setup_hw,
 		.is_bar1_supported = gm20b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gk20a_init_inst_block,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv100.c b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
index 087ccad9e..6a6ba0612 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv100.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv100.c
@@ -1118,7 +1118,7 @@ static const struct gpu_ops gv100_ops = {
 	},
 	.mm = {
 		.vm_bind_channel = nvgpu_vm_bind_channel,
-		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
+		.setup_hw = nvgpu_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gv11b_init_inst_block,
@@ -1128,6 +1128,10 @@ static const struct gpu_ops gv100_ops = {
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.get_flush_retries = gv100_mm_get_flush_retries,
 		.bar1_map_userd = NULL,
+		.mmu_fault = {
+			.setup_sw = gv11b_mm_mmu_fault_setup_sw,
+			.setup_hw = gv11b_mm_mmu_fault_setup_hw,
+		},
 		.cache = {
 			.fb_flush = gk20a_mm_fb_flush,
 			.l2_invalidate = gk20a_mm_l2_invalidate,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index b16ff54fd..8320255ad 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1093,7 +1093,7 @@ static const struct gpu_ops gv11b_ops = {
 	},
 	.mm = {
 		.vm_bind_channel = nvgpu_vm_bind_channel,
-		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
+		.setup_hw = nvgpu_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gv11b_init_inst_block,
@@ -1102,6 +1102,10 @@ static const struct gpu_ops gv11b_ops = {
 		.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy,
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.bar1_map_userd = NULL,
+		.mmu_fault = {
+			.setup_sw = gv11b_mm_mmu_fault_setup_sw,
+			.setup_hw = gv11b_mm_mmu_fault_setup_hw,
+		},
 		.cache = {
 			.fb_flush = gk20a_mm_fb_flush,
 			.l2_invalidate = gk20a_mm_l2_invalidate,
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index 554f08fd6..c2407a8df 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1153,7 +1153,7 @@ static const struct gpu_ops tu104_ops = {
 	},
 	.mm = {
 		.vm_bind_channel = nvgpu_vm_bind_channel,
-		.init_mm_setup_hw = gv11b_init_mm_setup_hw,
+		.setup_hw = nvgpu_mm_setup_hw,
 		.is_bar1_supported = gv11b_mm_is_bar1_supported,
 		.alloc_inst_block = gk20a_alloc_inst_block,
 		.init_inst_block = gv11b_init_inst_block,
@@ -1163,6 +1163,10 @@ static const struct gpu_ops tu104_ops = {
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.get_flush_retries = tu104_mm_get_flush_retries,
 		.bar1_map_userd = NULL,
+		.mmu_fault = {
+			.setup_sw = gv11b_mm_mmu_fault_setup_sw,
+			.setup_hw = gv11b_mm_mmu_fault_setup_hw,
+		},
 		.cache = {
 			.fb_flush = gk20a_mm_fb_flush,
 			.l2_invalidate = gk20a_mm_l2_invalidate,
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index e8723d121..65ff6e972 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1265,7 +1265,7 @@ struct gpu_ops {
 	struct {
 		int (*vm_bind_channel)(struct vm_gk20a *vm,
 				struct channel_gk20a *ch);
-		int (*init_mm_setup_hw)(struct gk20a *g);
+		int (*setup_hw)(struct gk20a *g);
 		bool (*is_bar1_supported)(struct gk20a *g);
 		int (*init_bar2_vm)(struct gk20a *g);
 		void (*remove_bar2_vm)(struct gk20a *g);
@@ -1280,6 +1280,10 @@ struct gpu_ops {
 		u64 (*bar1_map_userd)(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
 		int (*vm_as_alloc_share)(struct gk20a *g, struct vm_gk20a *vm);
 		void (*vm_as_free_share)(struct vm_gk20a *vm);
+		struct {
+			int (*setup_sw)(struct gk20a *g);
+			void (*setup_hw)(struct gk20a *g);
+		} mmu_fault;
 		struct {
 			int (*fb_flush)(struct gk20a *g);
 			void (*l2_invalidate)(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/include/nvgpu/mm.h b/drivers/gpu/nvgpu/include/nvgpu/mm.h
index 65f5458fd..c83805f4e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/mm.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/mm.h
@@ -221,4 +221,6 @@ int nvgpu_mm_suspend(struct gk20a *g);
 u32 nvgpu_mm_get_default_big_page_size(struct gk20a *g);
 u32 nvgpu_mm_get_available_big_page_sizes(struct gk20a *g);
 
+int nvgpu_mm_setup_hw(struct gk20a *g);
+
 #endif /* NVGPU_MM_H */
diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export
index 3cc689d2e..a082a28be 100644
--- a/drivers/gpu/nvgpu/libnvgpu-drv.export
+++ b/drivers/gpu/nvgpu/libnvgpu-drv.export
@@ -51,7 +51,6 @@ gv11b_fb_is_fault_buf_enabled
 gv11b_fb_intr_is_mmu_fault_pending
 gv11b_gpu_phys_addr
 gv11b_init_inst_block
-gv11b_init_mm_setup_hw
 gv11b_mm_fault_info_mem_destroy
 gv11b_mm_is_bar1_supported
 gv11b_mm_l2_flush
@@ -116,6 +115,7 @@ nvgpu_mem_sgt_posix_create_from_list
 nvgpu_mem_wr
 nvgpu_mem_wr32
 nvgpu_mem_wr_n
+nvgpu_mm_setup_hw
 nvgpu_mutex_acquire
 nvgpu_mutex_destroy
 nvgpu_mutex_init
diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c
index 03e36a526..3853f84a8 100644
--- a/userspace/units/mm/page_table_faults/page_table_faults.c
+++ b/userspace/units/mm/page_table_faults/page_table_faults.c
@@ -134,7 +134,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	g->ops.mc.is_mmu_fault_pending = gv11b_mc_is_mmu_fault_pending;
 	g->ops.mm.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy;
 	g->ops.mm.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw;
-	g->ops.mm.init_mm_setup_hw = gv11b_init_mm_setup_hw;
+	g->ops.mm.setup_hw = nvgpu_mm_setup_hw;
 	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
 	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 	g->ops.fb.init_hw = gv11b_fb_init_hw;
@@ -209,7 +209,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	g->mm.mmu_rd_mem.aperture = APERTURE_SYSMEM;
 
 	/* Init MM H/W */
-	err = g->ops.mm.init_mm_setup_hw(g);
+	err = g->ops.mm.setup_hw(g);
 	if (err != 0) {
 		unit_return_fail(m, "init_mm_setup_hw failed code=%d\n", err);
 	}
@@ -218,7 +218,7 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	 * Call the init function again to cover branches checking for already
 	 * initialized structures
 	 */
-	err = g->ops.mm.init_mm_setup_hw(g);
+	err = g->ops.mm.setup_hw(g);
 	if (err != 0) {
 		unit_return_fail(m, "init_mm_setup_hw/2 failed code=%d\n", err);
 	}