diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 27d1f592f..f131803fa 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -150,6 +150,8 @@ nvgpu-y += \
 	common/nvlink/nvlink.o \
 	common/nvlink/nvlink_gv100.o \
 	common/nvlink/nvlink_tu104.o \
+	hal/mm/cache/flush_gk20a.o \
+	hal/mm/cache/flush_gv11b.o \
 	hal/mc/mc_gm20b.o \
 	hal/mc/mc_gp10b.o  \
 	hal/mc/mc_gv11b.o  \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index c368ca439..886222958 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -271,6 +271,8 @@ srcs += common/sim.c \
 	tu104/mm_tu104.c \
 	tu104/hal_tu104.c \
 	tu104/func_tu104.c \
+	hal/mm/cache/flush_gk20a.c \
+	hal/mm/cache/flush_gv11b.c \
 	hal/mc/mc_gm20b.c  \
 	hal/mc/mc_gp10b.c  \
 	hal/mc/mc_gv11b.c  \
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index b564c7e49..385f4824b 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -2594,7 +2594,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
 	 * Ensure that all pending writes are actually done  before trying to
 	 * read semaphore values from DRAM.
 	 */
-	g->ops.mm.fb_flush(g);
+	g->ops.mm.cache.fb_flush(g);
 
 	for (chid = 0; chid < f->num_channels; chid++) {
 		struct channel_gk20a *c = g->fifo.channel+chid;
diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c
index d24599159..2ba41525d 100644
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
@@ -689,7 +689,7 @@ u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 	if (!gr_ctx->ctx_id_valid) {
 		/* Channel gr_ctx buffer is gpu cacheable.
 		   Flush and invalidate before cpu update. */
-		if (g->ops.mm.l2_flush(g, true) != 0) {
+		if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 			nvgpu_err(g, "l2_flush failed");
 		}
 
@@ -707,7 +707,7 @@ int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
 	int err;
 
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
@@ -753,7 +753,7 @@ int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
@@ -828,7 +828,7 @@ int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
index 51f1cdae7..9d2adc143 100644
--- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c
+++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
@@ -479,7 +479,7 @@ int nvgpu_gr_fecs_trace_poll(struct gk20a *g)
 		read, g->ops.gr.fecs_trace.get_read_index(g), write, cnt);
 
 	/* Ensure all FECS writes have made it to SYSMEM */
-	g->ops.mm.fb_flush(g);
+	g->ops.mm.cache.fb_flush(g);
 
 	while (read != write) {
 		cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask);
diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c
index 85b98aa33..2cffe2612 100644
--- a/drivers/gpu/nvgpu/common/gr/global_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c
@@ -284,7 +284,7 @@ void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
 {
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	if (g->ops.mm.l2_flush(g, true) != 0) {
+	if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 		nvgpu_err(g, "l2_flush failed");
 	}
 
diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c
index 9a3b33104..d425dab50 100644
--- a/drivers/gpu/nvgpu/common/gr/subctx.c
+++ b/drivers/gpu/nvgpu/common/gr/subctx.c
@@ -88,7 +88,7 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_mem *ctxheader = &subctx->ctx_header;
 	int err = 0;
 
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 	}
@@ -154,4 +154,3 @@ struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct gk20a *g,
 {
 	return &subctx->ctx_header;
 }
-
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
index 413a7d549..cef21117a 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
@@ -882,7 +882,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 	}
 
 	if (batch == NULL) {
-		if (gk20a_mm_l2_flush(g, true) != 0) {
+		if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 			nvgpu_err(g, "gk20a_mm_l2_flush[1] failed");
 		}
 		err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
@@ -891,7 +891,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 		}
 	} else {
 		if (!batch->gpu_l2_flushed) {
-			if (gk20a_mm_l2_flush(g, true) != 0) {
+			if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 				nvgpu_err(g, "gk20a_mm_l2_flush[2] failed");
 			}
 			batch->gpu_l2_flushed = true;
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
index afdb2c8a3..47990a03f 100644
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -124,8 +124,8 @@ int nvgpu_mm_suspend(struct gk20a *g)
 
 	nvgpu_vidmem_thread_pause_sync(&g->mm);
 
-	g->ops.mm.cbc_clean(g);
-	err = g->ops.mm.l2_flush(g, false);
+	g->ops.mm.cache.cbc_clean(g);
+	err = g->ops.mm.cache.l2_flush(g, false);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index 4717b0e39..023ee9819 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -571,10 +571,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.gmmu_map = vgpu_locked_gmmu_map,
 		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.fb_flush = vgpu_mm_fb_flush,
-		.l2_invalidate = vgpu_mm_l2_invalidate,
-		.l2_flush = vgpu_mm_l2_flush,
-		.cbc_clean = NULL,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -590,6 +586,12 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.bar1_map_userd = vgpu_mm_bar1_map_userd,
 		.vm_as_alloc_share = vgpu_vm_as_alloc_share,
 		.vm_as_free_share = vgpu_vm_as_free_share,
+		.cache = {
+			.fb_flush = vgpu_mm_fb_flush,
+			.l2_invalidate = vgpu_mm_l2_invalidate,
+			.l2_flush = vgpu_mm_l2_flush,
+			.cbc_clean = NULL,
+		},
 	},
 	.pramin = {
 		.data032_r = NULL,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 11769ee5f..022706e4b 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -663,10 +663,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.gmmu_map = vgpu_locked_gmmu_map,
 		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.fb_flush = vgpu_mm_fb_flush,
-		.l2_invalidate = vgpu_mm_l2_invalidate,
-		.l2_flush = vgpu_mm_l2_flush,
-		.cbc_clean = NULL,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -683,6 +679,12 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.bar1_map_userd = vgpu_mm_bar1_map_userd,
 		.vm_as_alloc_share = vgpu_vm_as_alloc_share,
 		.vm_as_free_share = vgpu_vm_as_free_share,
+		.cache = {
+			.fb_flush = vgpu_mm_fb_flush,
+			.l2_invalidate = vgpu_mm_l2_invalidate,
+			.l2_flush = vgpu_mm_l2_flush,
+			.cbc_clean = NULL,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = NULL,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 51e4f5bed..c90b6f9b4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2158,7 +2158,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 		goto cleanup;
 	}
 
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		goto cleanup;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index af6f36e18..f6bc19255 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -51,7 +51,6 @@
 
 #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
 
 /*
  * GPU mapping life cycle
@@ -114,7 +113,8 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 		}
 	}
 
-	if ((gk20a_mm_fb_flush(g) != 0) || (gk20a_mm_fb_flush(g) != 0)) {
+	if (g->ops.mm.cache.fb_flush(g) != 0 ||
+	    g->ops.mm.cache.fb_flush(g) != 0) {
 		return -EBUSY;
 	}
 
@@ -406,234 +406,6 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
 	return 0;
 }
 
-int gk20a_mm_fb_flush(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	u32 data;
-	int ret = 0;
-	struct nvgpu_timeout timeout;
-	u32 retries;
-
-	nvgpu_log_fn(g, " ");
-
-	gk20a_busy_noresume(g);
-	if (!g->power_on) {
-		gk20a_idle_nosuspend(g);
-		return 0;
-	}
-
-	retries = 100;
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	nvgpu_mutex_acquire(&mm->l2_op_lock);
-
-	/* Make sure all previous writes are committed to the L2. There's no
-	   guarantee that writes are to DRAM. This will be a sysmembar internal
-	   to the L2. */
-
-	trace_gk20a_mm_fb_flush(g->name);
-
-	gk20a_writel(g, flush_fb_flush_r(),
-		flush_fb_flush_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_fb_flush_r());
-
-		if (flush_fb_flush_outstanding_v(data) ==
-			flush_fb_flush_outstanding_true_v() ||
-		    flush_fb_flush_pending_v(data) ==
-			flush_fb_flush_pending_busy_v()) {
-				nvgpu_log_info(g, "fb_flush 0x%x", data);
-				nvgpu_udelay(5);
-		} else {
-			break;
-		}
-	} while (nvgpu_timeout_expired(&timeout) == 0);
-
-	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
-		if (g->ops.fb.dump_vpr_info != NULL) {
-			g->ops.fb.dump_vpr_info(g);
-		}
-		if (g->ops.fb.dump_wpr_info != NULL) {
-			g->ops.fb.dump_wpr_info(g);
-		}
-		ret = -EBUSY;
-	}
-
-	trace_gk20a_mm_fb_flush_done(g->name);
-
-	nvgpu_mutex_release(&mm->l2_op_lock);
-
-	gk20a_idle_nosuspend(g);
-
-	return ret;
-}
-
-static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
-{
-	u32 data;
-	struct nvgpu_timeout timeout;
-	u32 retries = 200;
-
-	trace_gk20a_mm_l2_invalidate(g->name);
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	/* Invalidate any clean lines from the L2 so subsequent reads go to
-	   DRAM. Dirty lines are not affected by this operation. */
-	gk20a_writel(g, flush_l2_system_invalidate_r(),
-		flush_l2_system_invalidate_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_l2_system_invalidate_r());
-
-		if (flush_l2_system_invalidate_outstanding_v(data) ==
-			flush_l2_system_invalidate_outstanding_true_v() ||
-		    flush_l2_system_invalidate_pending_v(data) ==
-			flush_l2_system_invalidate_pending_busy_v()) {
-				nvgpu_log_info(g, "l2_system_invalidate 0x%x",
-						data);
-				nvgpu_udelay(5);
-		} else {
-			break;
-		}
-	} while (nvgpu_timeout_expired(&timeout) == 0);
-
-	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
-		nvgpu_warn(g, "l2_system_invalidate too many retries");
-	}
-
-	trace_gk20a_mm_l2_invalidate_done(g->name);
-}
-
-void gk20a_mm_l2_invalidate(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	gk20a_busy_noresume(g);
-	if (g->power_on) {
-		nvgpu_mutex_acquire(&mm->l2_op_lock);
-		gk20a_mm_l2_invalidate_locked(g);
-		nvgpu_mutex_release(&mm->l2_op_lock);
-	}
-	gk20a_idle_nosuspend(g);
-}
-
-int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
-{
-	struct mm_gk20a *mm = &g->mm;
-	u32 data;
-	struct nvgpu_timeout timeout;
-	u32 retries = 2000;
-	int err = -ETIMEDOUT;
-
-	nvgpu_log_fn(g, " ");
-
-	gk20a_busy_noresume(g);
-	if (!g->power_on) {
-		goto hw_was_off;
-	}
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	nvgpu_mutex_acquire(&mm->l2_op_lock);
-
-	trace_gk20a_mm_l2_flush(g->name);
-
-	/* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
-	   as clean, so subsequent reads might hit in the L2. */
-	gk20a_writel(g, flush_l2_flush_dirty_r(),
-		flush_l2_flush_dirty_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_l2_flush_dirty_r());
-
-		if (flush_l2_flush_dirty_outstanding_v(data) ==
-			flush_l2_flush_dirty_outstanding_true_v() ||
-		    flush_l2_flush_dirty_pending_v(data) ==
-			flush_l2_flush_dirty_pending_busy_v()) {
-				nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
-				nvgpu_udelay(5);
-		} else {
-			err = 0;
-			break;
-		}
-	} while (nvgpu_timeout_expired_msg(&timeout,
-				"l2_flush_dirty too many retries") == 0);
-
-	trace_gk20a_mm_l2_flush_done(g->name);
-
-	if (invalidate) {
-		gk20a_mm_l2_invalidate_locked(g);
-	}
-
-	nvgpu_mutex_release(&mm->l2_op_lock);
-
-hw_was_off:
-	gk20a_idle_nosuspend(g);
-
-	return err;
-}
-
-void gk20a_mm_cbc_clean(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	u32 data;
-	struct nvgpu_timeout timeout;
-	u32 retries = 200;
-
-	nvgpu_log_fn(g, " ");
-
-	gk20a_busy_noresume(g);
-	if (!g->power_on) {
-		goto hw_was_off;
-	}
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	nvgpu_mutex_acquire(&mm->l2_op_lock);
-
-	/* Flush all dirty lines from the CBC to L2 */
-	gk20a_writel(g, flush_l2_clean_comptags_r(),
-		flush_l2_clean_comptags_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_l2_clean_comptags_r());
-
-		if (flush_l2_clean_comptags_outstanding_v(data) ==
-			flush_l2_clean_comptags_outstanding_true_v() ||
-		    flush_l2_clean_comptags_pending_v(data) ==
-			flush_l2_clean_comptags_pending_busy_v()) {
-				nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
-				nvgpu_udelay(5);
-		} else {
-			break;
-		}
-	} while (nvgpu_timeout_expired_msg(&timeout,
-				"l2_clean_comptags too many retries") == 0);
-
-	nvgpu_mutex_release(&mm->l2_op_lock);
-
-hw_was_off:
-	gk20a_idle_nosuspend(g);
-}
-
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 {
 	return 34;
@@ -656,4 +428,3 @@ u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
 				    gk20a_mem_flag_none, false,
 				    mem->aperture);
 }
-
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b7749a897..0426bd912 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -72,11 +72,6 @@ gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
 struct gk20a;
 struct channel_gk20a;
 
-int gk20a_mm_fb_flush(struct gk20a *g);
-int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
-void gk20a_mm_cbc_clean(struct gk20a *g);
-void gk20a_mm_l2_invalidate(struct gk20a *g);
-
 #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
 
 void gk20a_mm_ltc_isr(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index be9a39270..40c4e0193 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -46,6 +46,7 @@
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 
+#include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/bus/bus_gm20b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -806,10 +807,6 @@ static const struct gpu_ops gm20b_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gk20a_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gm20b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -822,6 +819,12 @@ static const struct gpu_ops gm20b_ops = {
 		.get_kind_invalid = gm20b_get_kind_invalid,
 		.get_kind_pitch = gm20b_get_kind_pitch,
 		.bar1_map_userd = gk20a_mm_bar1_map_userd,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gk20a_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = gm20b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index a5000410a..857bb8cfe 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -47,6 +47,7 @@
 #include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 
+#include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -906,10 +907,6 @@ static const struct gpu_ops gp10b_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gk20a_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -924,6 +921,12 @@ static const struct gpu_ops gp10b_ops = {
 		.get_kind_invalid = gm20b_get_kind_invalid,
 		.get_kind_pitch = gm20b_get_kind_pitch,
 		.bar1_map_userd = gk20a_mm_bar1_map_userd,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gk20a_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index fce669fe9..a041aa45d 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -22,6 +22,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1088,10 +1090,6 @@ static const struct gpu_ops gv100_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gv11b_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gv11b_gpu_phys_addr,
@@ -1108,6 +1106,12 @@ static const struct gpu_ops gv100_ops = {
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.get_flush_retries = gv100_mm_get_flush_retries,
 		.bar1_map_userd = NULL,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gv11b_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index f740101a7..c2c76753c 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -30,6 +30,8 @@
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1064,10 +1066,6 @@ static const struct gpu_ops gv11b_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gv11b_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gv11b_gpu_phys_addr,
@@ -1084,6 +1082,12 @@ static const struct gpu_ops gv11b_ops = {
 		.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy,
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.bar1_map_userd = NULL,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gv11b_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = gv11b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
index 0932a6d3a..a0895e446 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
@@ -200,40 +200,6 @@ int gv11b_init_mm_setup_hw(struct gk20a *g)
 	return err;
 }
 
-int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate)
-{
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn, "gv11b_mm_l2_flush");
-
-	err = g->ops.mm.fb_flush(g);
-	if (err != 0) {
-		nvgpu_err(g, "mm.fb_flush()[1] failed err=%d", err);
-		return err;
-	}
-	err = gk20a_mm_l2_flush(g, invalidate);
-	if (err != 0) {
-		nvgpu_err(g, "gk20a_mm_l2_flush failed");
-		return err;
-	}
-	if (g->ops.bus.bar1_bind != NULL) {
-		err = g->ops.fb.tlb_invalidate(g,
-				g->mm.bar1.vm->pdb.mem);
-		if (err != 0) {
-			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
-			return err;
-		}
-	} else {
-		err = g->ops.mm.fb_flush(g);
-		if (err != 0) {
-			nvgpu_err(g, "mm.fb_flush()[2] failed err=%d", err);
-			return err;
-		}
-	}
-
-	return err;
-}
-
 /*
  * On Volta the GPU determines whether to do L3 allocation for a mapping by
  * checking bit 36 of the phsyical address. So if a mapping should allocte lines
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
index bca67f083..d9d1fe0c0 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
@@ -32,7 +32,6 @@ bool gv11b_mm_is_bar1_supported(struct gk20a *g);
 void gv11b_init_inst_block(struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm, u32 big_page_size);
 int gv11b_init_mm_setup_hw(struct gk20a *g);
-int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate);
 u64 gv11b_gpu_phys_addr(struct gk20a *g,
 			struct nvgpu_gmmu_attrs *attrs, u64 phys);
 void gv11b_mm_fault_info_mem_destroy(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c
index 58575848e..0d62f99de 100644
--- a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c
@@ -61,7 +61,7 @@ void gp10b_ltc_intr_handle_lts_interrupts(struct gk20a *g, u32 ltc, u32 slice)
 		nvgpu_writel_check(g,
 			ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
 			ecc_stats_reg_val);
-		if (g->ops.mm.l2_flush(g, true) != 0) {
+		if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 			nvgpu_err(g, "l2_flush failed");
 		}
 	}
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c
new file mode 100644
index 000000000..a51fd6dd5
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <trace/events/gk20a.h>
+
+#include <nvgpu/mm.h>
+#include <nvgpu/io.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/timers.h>
+
+#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
+
+#include "flush_gk20a.h"
+
+int gk20a_mm_fb_flush(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	u32 data;
+	int ret = 0;
+	struct nvgpu_timeout timeout;
+	u32 retries;
+
+	nvgpu_log_fn(g, " ");
+
+	gk20a_busy_noresume(g);
+	if (!g->power_on) {
+		gk20a_idle_nosuspend(g);
+		return 0;
+	}
+
+	retries = 100;
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
+
+	/* Make sure all previous writes are committed to the L2. There's no
+	   guarantee that writes are to DRAM. This will be a sysmembar internal
+	   to the L2. */
+
+	trace_gk20a_mm_fb_flush(g->name);
+
+	nvgpu_writel(g, flush_fb_flush_r(),
+		flush_fb_flush_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_fb_flush_r());
+
+		if (flush_fb_flush_outstanding_v(data) ==
+			flush_fb_flush_outstanding_true_v() ||
+		    flush_fb_flush_pending_v(data) ==
+			flush_fb_flush_pending_busy_v()) {
+				nvgpu_log_info(g, "fb_flush 0x%x", data);
+				nvgpu_udelay(5);
+		} else {
+			break;
+		}
+	} while (nvgpu_timeout_expired(&timeout) == 0);
+
+	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
+		if (g->ops.fb.dump_vpr_info != NULL) {
+			g->ops.fb.dump_vpr_info(g);
+		}
+		if (g->ops.fb.dump_wpr_info != NULL) {
+			g->ops.fb.dump_wpr_info(g);
+		}
+		ret = -EBUSY;
+	}
+
+	trace_gk20a_mm_fb_flush_done(g->name);
+
+	nvgpu_mutex_release(&mm->l2_op_lock);
+
+	gk20a_idle_nosuspend(g);
+
+	return ret;
+}
+
+static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
+{
+	u32 data;
+	struct nvgpu_timeout timeout;
+	u32 retries = 200;
+
+	trace_gk20a_mm_l2_invalidate(g->name);
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	/* Invalidate any clean lines from the L2 so subsequent reads go to
+	   DRAM. Dirty lines are not affected by this operation. */
+	nvgpu_writel(g, flush_l2_system_invalidate_r(),
+		flush_l2_system_invalidate_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_l2_system_invalidate_r());
+
+		if (flush_l2_system_invalidate_outstanding_v(data) ==
+			flush_l2_system_invalidate_outstanding_true_v() ||
+		    flush_l2_system_invalidate_pending_v(data) ==
+			flush_l2_system_invalidate_pending_busy_v()) {
+				nvgpu_log_info(g, "l2_system_invalidate 0x%x",
+						data);
+				nvgpu_udelay(5);
+		} else {
+			break;
+		}
+	} while (nvgpu_timeout_expired(&timeout) == 0);
+
+	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
+		nvgpu_warn(g, "l2_system_invalidate too many retries");
+	}
+
+	trace_gk20a_mm_l2_invalidate_done(g->name);
+}
+
+void gk20a_mm_l2_invalidate(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	gk20a_busy_noresume(g);
+	if (g->power_on) {
+		nvgpu_mutex_acquire(&mm->l2_op_lock);
+		gk20a_mm_l2_invalidate_locked(g);
+		nvgpu_mutex_release(&mm->l2_op_lock);
+	}
+	gk20a_idle_nosuspend(g);
+}
+
+int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+	struct mm_gk20a *mm = &g->mm;
+	u32 data;
+	struct nvgpu_timeout timeout;
+	u32 retries = 2000;
+	int err = -ETIMEDOUT;
+
+	nvgpu_log_fn(g, " ");
+
+	gk20a_busy_noresume(g);
+	if (!g->power_on) {
+		goto hw_was_off;
+	}
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
+
+	trace_gk20a_mm_l2_flush(g->name);
+
+	/* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
+	   as clean, so subsequent reads might hit in the L2. */
+	nvgpu_writel(g, flush_l2_flush_dirty_r(),
+		flush_l2_flush_dirty_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_l2_flush_dirty_r());
+
+		if (flush_l2_flush_dirty_outstanding_v(data) ==
+			flush_l2_flush_dirty_outstanding_true_v() ||
+		    flush_l2_flush_dirty_pending_v(data) ==
+			flush_l2_flush_dirty_pending_busy_v()) {
+				nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
+				nvgpu_udelay(5);
+		} else {
+			err = 0;
+			break;
+		}
+	} while (nvgpu_timeout_expired_msg(&timeout,
+				"l2_flush_dirty too many retries") == 0);
+
+	trace_gk20a_mm_l2_flush_done(g->name);
+
+	if (invalidate) {
+		gk20a_mm_l2_invalidate_locked(g);
+	}
+
+	nvgpu_mutex_release(&mm->l2_op_lock);
+
+hw_was_off:
+	gk20a_idle_nosuspend(g);
+
+	return err;
+}
+
+void gk20a_mm_cbc_clean(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	u32 data;
+	struct nvgpu_timeout timeout;
+	u32 retries = 200;
+
+	nvgpu_log_fn(g, " ");
+
+	gk20a_busy_noresume(g);
+	if (!g->power_on) {
+		goto hw_was_off;
+	}
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
+
+	/* Flush all dirty lines from the CBC to L2 */
+	nvgpu_writel(g, flush_l2_clean_comptags_r(),
+		flush_l2_clean_comptags_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_l2_clean_comptags_r());
+
+		if (flush_l2_clean_comptags_outstanding_v(data) ==
+			flush_l2_clean_comptags_outstanding_true_v() ||
+		    flush_l2_clean_comptags_pending_v(data) ==
+			flush_l2_clean_comptags_pending_busy_v()) {
+				nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
+				nvgpu_udelay(5);
+		} else {
+			break;
+		}
+	} while (nvgpu_timeout_expired_msg(&timeout,
+				"l2_clean_comptags too many retries") == 0);
+
+	nvgpu_mutex_release(&mm->l2_op_lock);
+
+hw_was_off:
+	gk20a_idle_nosuspend(g);
+}
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h
new file mode 100644
index 000000000..f157e7898
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HAL_MM_FLUSH_FLUSH_GK20A_H
+#define HAL_MM_FLUSH_FLUSH_GK20A_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+
+int gk20a_mm_fb_flush(struct gk20a *g);
+int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
+void gk20a_mm_cbc_clean(struct gk20a *g);
+void gk20a_mm_l2_invalidate(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c
new file mode 100644
index 000000000..0ee040b8a
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+
+#include <nvgpu/hw/gv11b/hw_flush_gv11b.h>
+
+#include "flush_gk20a.h"
+#include "flush_gv11b.h"
+
+int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn, "gv11b_mm_l2_flush");
+
+	err = g->ops.mm.cache.fb_flush(g);
+	if (err != 0) {
+		nvgpu_err(g, "mm.cache.fb_flush()[1] failed err=%d", err);
+		return err;
+	}
+	err = gk20a_mm_l2_flush(g, invalidate);
+	if (err != 0) {
+		nvgpu_err(g, "gk20a_mm_l2_flush failed");
+		return err;
+	}
+	if (g->ops.bus.bar1_bind != NULL) {
+		err = g->ops.fb.tlb_invalidate(g, g->mm.bar1.vm->pdb.mem);
+		if (err != 0) {
+			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
+			return err;
+		}
+	} else {
+		err = g->ops.mm.cache.fb_flush(g);
+		if (err != 0) {
+			nvgpu_err(g, "mm.cache.fb_flush()[2] failed err=%d",
+				  err);
+			return err;
+		}
+	}
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h
new file mode 100644
index 000000000..c0ff42532
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HAL_MM_FLUSH_FLUSH_GV11B_H
+#define HAL_MM_FLUSH_FLUSH_GV11B_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+
+int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate);
+
+#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 86ea6bc22..d5cd9562a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1363,10 +1363,6 @@ struct gpu_ops {
 				struct vm_gk20a_mapping_batch *batch);
 		int (*vm_bind_channel)(struct vm_gk20a *vm,
 				struct channel_gk20a *ch);
-		int (*fb_flush)(struct gk20a *g);
-		void (*l2_invalidate)(struct gk20a *g);
-		int (*l2_flush)(struct gk20a *g, bool invalidate);
-		void (*cbc_clean)(struct gk20a *g);
 		u32 (*get_big_page_sizes)(void);
 		u32 (*get_default_big_page_size)(void);
 		u32 (*get_iommu_bit)(struct gk20a *g);
@@ -1391,6 +1387,12 @@ struct gpu_ops {
 		u64 (*bar1_map_userd)(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
 		int (*vm_as_alloc_share)(struct gk20a *g, struct vm_gk20a *vm);
 		void (*vm_as_free_share)(struct vm_gk20a *vm);
+		struct {
+			int (*fb_flush)(struct gk20a *g);
+			void (*l2_invalidate)(struct gk20a *g);
+			int (*l2_flush)(struct gk20a *g, bool invalidate);
+			void (*cbc_clean)(struct gk20a *g);
+		} cache;
 	} mm;
 	/*
 	 * This function is called to allocate secure memory (memory
diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export
index 6433009f2..b06247574 100644
--- a/drivers/gpu/nvgpu/libnvgpu-drv.export
+++ b/drivers/gpu/nvgpu/libnvgpu-drv.export
@@ -31,6 +31,7 @@ gk20a_runlist_get_tsg_entry
 gk20a_locked_gmmu_map
 gk20a_locked_gmmu_unmap
 gk20a_ramin_alloc_size
+gk20a_mm_fb_flush
 gm20b_fb_tlb_invalidate
 gm20b_fuse_status_opt_gpc
 gm20b_ramin_set_big_page_size
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 470e6d4e3..a733ab2e5 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -609,7 +609,8 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
 		return -EINVAL;
 
 	if (args->l2_flush) {
-		err = g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
+		err = g->ops.mm.cache.l2_flush(g, args->l2_invalidate ?
+								true : false);
 		if (err != 0) {
 			nvgpu_err(g, "l2_flush failed");
 			return err;
@@ -617,7 +618,7 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
 	}
 
 	if (args->fb_flush) {
-		g->ops.mm.fb_flush(g);
+		g->ops.mm.cache.fb_flush(g);
 	}
 
 	return err;
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index 576bef22a..348a5ed96 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -22,6 +22,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1126,10 +1128,6 @@ static const struct gpu_ops tu104_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gv11b_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gv11b_gpu_phys_addr,
@@ -1146,6 +1144,12 @@ static const struct gpu_ops tu104_ops = {
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.get_flush_retries = tu104_mm_get_flush_retries,
 		.bar1_map_userd = NULL,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gv11b_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c
index 1e9800bae..bcdc1e264 100644
--- a/userspace/units/mm/gmmu/page_table/page_table.c
+++ b/userspace/units/mm/gmmu/page_table/page_table.c
@@ -41,6 +41,8 @@
 #include <gv11b/mm_gv11b.h>
 #include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h>
 
+#include <hal/mm/cache/flush_gk20a.h>
+#include <hal/mm/cache/flush_gv11b.h>
 #include <hal/fb/fb_gp10b.h>
 #include <hal/fb/fb_gm20b.h>
 #include <hal/fifo/ramin_gk20a.h>
@@ -303,6 +305,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	g->ops.mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
 	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.mm.is_bar1_supported = gv11b_mm_is_bar1_supported;
+	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 	g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
 	g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
 	g->ops.ramin.init_pdb = gp10b_ramin_init_pdb;
diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c
index b86d60983..7df3c80a3 100644
--- a/userspace/units/mm/page_table_faults/page_table_faults.c
+++ b/userspace/units/mm/page_table_faults/page_table_faults.c
@@ -43,12 +43,14 @@
 #include "nvgpu/hw/gv11b/hw_gmmu_gv11b.h"
 #include "nvgpu/hw/gv11b/hw_fb_gv11b.h"
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gv11b.h"
 #include "hal/fb/fb_gp10b.h"
 #include "hal/fb/fb_gm20b.h"
 #include "hal/fb/fb_gv11b.h"
-#include "hal/fifo/ramin_gk20a.h"
 #include "hal/fb/intr/fb_intr_gv11b.h"
+#include "hal/fifo/ramin_gk20a.h"
 #include "hal/fifo/ramin_gm20b.h"
 #include "hal/fifo/ramin_gp10b.h"
 
@@ -133,7 +135,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	g->ops.mm.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy;
 	g->ops.mm.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw;
 	g->ops.mm.init_mm_setup_hw = gv11b_init_mm_setup_hw;
-	g->ops.mm.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 	g->ops.fb.init_hw = gv11b_fb_init_hw;
 	g->ops.fb.intr.enable = gv11b_fb_intr_enable;
 	g->ops.fb.fault_buf_configure_hw = gv11b_fb_fault_buf_configure_hw;
diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c
index 271d9214c..615928bb6 100644
--- a/userspace/units/mm/vm/vm.c
+++ b/userspace/units/mm/vm/vm.c
@@ -33,6 +33,8 @@
 #include <nvgpu/nvgpu_sgt.h>
 #include <nvgpu/vm_area.h>
 #include <gp10b/mm_gp10b.h>
+#include <hal/mm/cache/flush_gk20a.h>
+#include <hal/mm/cache/flush_gv11b.h>
 #include <hal/fb/fb_gp10b.h>
 #include <hal/fb/fb_gm20b.h>
 #include <gv11b/mm_gv11b.h>
@@ -137,6 +139,8 @@ static int init_test_env(struct unit_module *m, struct gk20a *g)
 	g->ops.mm.gmmu_map = gk20a_locked_gmmu_map;
 	g->ops.mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
 	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 
 	return UNIT_SUCCESS;
 }