From 3a764030b1404554bd6f21445485506418750ba1 Mon Sep 17 00:00:00 2001
From: Alex Waterman <alexw@nvidia.com>
Date: Thu, 11 Apr 2019 16:25:36 -0700
Subject: [PATCH] gpu: nvgpu: Add new mm HAL and move cache code to that HAL

Add a new MM HAL directory to contain all MM related HAL units.
As part of this change add cache unit to the MM HAL. This contains
several related fixes:

1. Move the cache code in gk20a/mm_gk20a.c and gv11b/mm_gv11b.c to
   the new cache HAL. Update makefiles and header includes to take
   this into account. Also rename gk20a_{read,write}l() to their
   nvgpu_ variants.

2. Update the MM gops: move the cache related functions to the new
   cache HAL and update all calls to this HAL to reflect the new
   name.

3. Update some direct calls to gk20a MM cache ops to pass through
   the HAL instead.

4. Update the unit tests for various MM related things to use the
   new MM HAL locations.

This change accomplishes two architecture design goals. Firstly it
removes a multiple HW include from mm_gk20a.c (the flush HW header).
Secondly it moves code from the gk20a/ and gv11b/ directories into
more proper locations under hal/.

JIRA NVGPU-2042

Change-Id: I91e4bdca4341be4dbb46fabd72622b917769f4a6
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2095749
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/Makefile                    |   2 +
 drivers/gpu/nvgpu/Makefile.sources            |   2 +
 drivers/gpu/nvgpu/common/fifo/channel.c       |   2 +-
 drivers/gpu/nvgpu/common/gr/ctx.c             |   8 +-
 drivers/gpu/nvgpu/common/gr/fecs_trace.c      |   2 +-
 drivers/gpu/nvgpu/common/gr/global_ctx.c      |   2 +-
 drivers/gpu/nvgpu/common/gr/subctx.c          |   3 +-
 drivers/gpu/nvgpu/common/mm/gmmu/page_table.c |   4 +-
 drivers/gpu/nvgpu/common/mm/mm.c              |   4 +-
 .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c  |  10 +-
 .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c  |  10 +-
 drivers/gpu/nvgpu/gk20a/gr_gk20a.c            |   2 +-
 drivers/gpu/nvgpu/gk20a/mm_gk20a.c            | 233 +---------------
 drivers/gpu/nvgpu/gk20a/mm_gk20a.h            |   5 -
 drivers/gpu/nvgpu/gm20b/hal_gm20b.c           |  11 +-
 drivers/gpu/nvgpu/gp10b/hal_gp10b.c           |  11 +-
 drivers/gpu/nvgpu/gv100/hal_gv100.c           |  12 +-
 drivers/gpu/nvgpu/gv11b/hal_gv11b.c           |  12 +-
 drivers/gpu/nvgpu/gv11b/mm_gv11b.c            |  34 ---
 drivers/gpu/nvgpu/gv11b/mm_gv11b.h            |   1 -
 .../gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c   |   2 +-
 drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c  | 260 ++++++++++++++++++
 drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h  |  35 +++
 drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c  |  62 +++++
 drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h  |  32 +++
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h       |  10 +-
 drivers/gpu/nvgpu/libnvgpu-drv.export         |   1 +
 drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c       |   5 +-
 drivers/gpu/nvgpu/tu104/hal_tu104.c           |  12 +-
 .../units/mm/gmmu/page_table/page_table.c     |   4 +
 .../mm/page_table_faults/page_table_faults.c  |   7 +-
 userspace/units/mm/vm/vm.c                    |   4 +
 32 files changed, 482 insertions(+), 322 deletions(-)
 create mode 100644 drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c
 create mode 100644 drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h
 create mode 100644 drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c
 create mode 100644 drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h

diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile
index 27d1f592f..f131803fa 100644
--- a/drivers/gpu/nvgpu/Makefile
+++ b/drivers/gpu/nvgpu/Makefile
@@ -150,6 +150,8 @@ nvgpu-y += \
 	common/nvlink/nvlink.o \
 	common/nvlink/nvlink_gv100.o \
 	common/nvlink/nvlink_tu104.o \
+	hal/mm/cache/flush_gk20a.o \
+	hal/mm/cache/flush_gv11b.o \
 	hal/mc/mc_gm20b.o \
 	hal/mc/mc_gp10b.o  \
 	hal/mc/mc_gv11b.o  \
diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources
index c368ca439..886222958 100644
--- a/drivers/gpu/nvgpu/Makefile.sources
+++ b/drivers/gpu/nvgpu/Makefile.sources
@@ -271,6 +271,8 @@ srcs += common/sim.c \
 	tu104/mm_tu104.c \
 	tu104/hal_tu104.c \
 	tu104/func_tu104.c \
+	hal/mm/cache/flush_gk20a.c \
+	hal/mm/cache/flush_gv11b.c \
 	hal/mc/mc_gm20b.c  \
 	hal/mc/mc_gp10b.c  \
 	hal/mc/mc_gv11b.c  \
diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index b564c7e49..385f4824b 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -2594,7 +2594,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
 	 * Ensure that all pending writes are actually done  before trying to
 	 * read semaphore values from DRAM.
 	 */
-	g->ops.mm.fb_flush(g);
+	g->ops.mm.cache.fb_flush(g);
 
 	for (chid = 0; chid < f->num_channels; chid++) {
 		struct channel_gk20a *c = g->fifo.channel+chid;
diff --git a/drivers/gpu/nvgpu/common/gr/ctx.c b/drivers/gpu/nvgpu/common/gr/ctx.c
index d24599159..2ba41525d 100644
--- a/drivers/gpu/nvgpu/common/gr/ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/ctx.c
@@ -689,7 +689,7 @@ u32 nvgpu_gr_ctx_get_ctx_id(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 	if (!gr_ctx->ctx_id_valid) {
 		/* Channel gr_ctx buffer is gpu cacheable.
 		   Flush and invalidate before cpu update. */
-		if (g->ops.mm.l2_flush(g, true) != 0) {
+		if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 			nvgpu_err(g, "l2_flush failed");
 		}
 
@@ -707,7 +707,7 @@ int nvgpu_gr_ctx_init_zcull(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx)
 {
 	int err;
 
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
@@ -753,7 +753,7 @@ int nvgpu_gr_ctx_set_smpc_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
@@ -828,7 +828,7 @@ int nvgpu_gr_ctx_set_hwpm_mode(struct gk20a *g, struct nvgpu_gr_ctx *gr_ctx,
 
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
diff --git a/drivers/gpu/nvgpu/common/gr/fecs_trace.c b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
index 51f1cdae7..9d2adc143 100644
--- a/drivers/gpu/nvgpu/common/gr/fecs_trace.c
+++ b/drivers/gpu/nvgpu/common/gr/fecs_trace.c
@@ -479,7 +479,7 @@ int nvgpu_gr_fecs_trace_poll(struct gk20a *g)
 		read, g->ops.gr.fecs_trace.get_read_index(g), write, cnt);
 
 	/* Ensure all FECS writes have made it to SYSMEM */
-	g->ops.mm.fb_flush(g);
+	g->ops.mm.cache.fb_flush(g);
 
 	while (read != write) {
 		cnt = nvgpu_gr_fecs_trace_ring_read(g, read, &vm_update_mask);
diff --git a/drivers/gpu/nvgpu/common/gr/global_ctx.c b/drivers/gpu/nvgpu/common/gr/global_ctx.c
index 85b98aa33..2cffe2612 100644
--- a/drivers/gpu/nvgpu/common/gr/global_ctx.c
+++ b/drivers/gpu/nvgpu/common/gr/global_ctx.c
@@ -284,7 +284,7 @@ void nvgpu_gr_global_ctx_load_local_golden_image(struct gk20a *g,
 {
 	/* Channel gr_ctx buffer is gpu cacheable.
 	   Flush and invalidate before cpu update. */
-	if (g->ops.mm.l2_flush(g, true) != 0) {
+	if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 		nvgpu_err(g, "l2_flush failed");
 	}
 
diff --git a/drivers/gpu/nvgpu/common/gr/subctx.c b/drivers/gpu/nvgpu/common/gr/subctx.c
index 9a3b33104..d425dab50 100644
--- a/drivers/gpu/nvgpu/common/gr/subctx.c
+++ b/drivers/gpu/nvgpu/common/gr/subctx.c
@@ -88,7 +88,7 @@ void nvgpu_gr_subctx_load_ctx_header(struct gk20a *g,
 	struct nvgpu_mem *ctxheader = &subctx->ctx_header;
 	int err = 0;
 
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 	}
@@ -154,4 +154,3 @@ struct nvgpu_mem *nvgpu_gr_subctx_get_ctx_header(struct gk20a *g,
 {
 	return &subctx->ctx_header;
 }
-
diff --git a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
index 413a7d549..cef21117a 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu/page_table.c
@@ -882,7 +882,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 	}
 
 	if (batch == NULL) {
-		if (gk20a_mm_l2_flush(g, true) != 0) {
+		if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 			nvgpu_err(g, "gk20a_mm_l2_flush[1] failed");
 		}
 		err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
@@ -891,7 +891,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 		}
 	} else {
 		if (!batch->gpu_l2_flushed) {
-			if (gk20a_mm_l2_flush(g, true) != 0) {
+			if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 				nvgpu_err(g, "gk20a_mm_l2_flush[2] failed");
 			}
 			batch->gpu_l2_flushed = true;
diff --git a/drivers/gpu/nvgpu/common/mm/mm.c b/drivers/gpu/nvgpu/common/mm/mm.c
index afdb2c8a3..47990a03f 100644
--- a/drivers/gpu/nvgpu/common/mm/mm.c
+++ b/drivers/gpu/nvgpu/common/mm/mm.c
@@ -124,8 +124,8 @@ int nvgpu_mm_suspend(struct gk20a *g)
 
 	nvgpu_vidmem_thread_pause_sync(&g->mm);
 
-	g->ops.mm.cbc_clean(g);
-	err = g->ops.mm.l2_flush(g, false);
+	g->ops.mm.cache.cbc_clean(g);
+	err = g->ops.mm.cache.l2_flush(g, false);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		return err;
diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
index 4717b0e39..023ee9819 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c
@@ -571,10 +571,6 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.gmmu_map = vgpu_locked_gmmu_map,
 		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.fb_flush = vgpu_mm_fb_flush,
-		.l2_invalidate = vgpu_mm_l2_invalidate,
-		.l2_flush = vgpu_mm_l2_flush,
-		.cbc_clean = NULL,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -590,6 +586,12 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 		.bar1_map_userd = vgpu_mm_bar1_map_userd,
 		.vm_as_alloc_share = vgpu_vm_as_alloc_share,
 		.vm_as_free_share = vgpu_vm_as_free_share,
+		.cache = {
+			.fb_flush = vgpu_mm_fb_flush,
+			.l2_invalidate = vgpu_mm_l2_invalidate,
+			.l2_flush = vgpu_mm_l2_flush,
+			.cbc_clean = NULL,
+		},
 	},
 	.pramin = {
 		.data032_r = NULL,
diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
index 11769ee5f..022706e4b 100644
--- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c
@@ -663,10 +663,6 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.gmmu_map = vgpu_locked_gmmu_map,
 		.gmmu_unmap = vgpu_locked_gmmu_unmap,
 		.vm_bind_channel = vgpu_vm_bind_channel,
-		.fb_flush = vgpu_mm_fb_flush,
-		.l2_invalidate = vgpu_mm_l2_invalidate,
-		.l2_flush = vgpu_mm_l2_flush,
-		.cbc_clean = NULL,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -683,6 +679,12 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 		.bar1_map_userd = vgpu_mm_bar1_map_userd,
 		.vm_as_alloc_share = vgpu_vm_as_alloc_share,
 		.vm_as_free_share = vgpu_vm_as_free_share,
+		.cache = {
+			.fb_flush = vgpu_mm_fb_flush,
+			.l2_invalidate = vgpu_mm_l2_invalidate,
+			.l2_flush = vgpu_mm_l2_flush,
+			.cbc_clean = NULL,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = NULL,
diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
index 51e4f5bed..c90b6f9b4 100644
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -2158,7 +2158,7 @@ int __gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch,
 		goto cleanup;
 	}
 
-	err = g->ops.mm.l2_flush(g, true);
+	err = g->ops.mm.cache.l2_flush(g, true);
 	if (err != 0) {
 		nvgpu_err(g, "l2_flush failed");
 		goto cleanup;
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index af6f36e18..f6bc19255 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -51,7 +51,6 @@
 
 #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
 
 /*
  * GPU mapping life cycle
@@ -114,7 +113,8 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 		}
 	}
 
-	if ((gk20a_mm_fb_flush(g) != 0) || (gk20a_mm_fb_flush(g) != 0)) {
+	if (g->ops.mm.cache.fb_flush(g) != 0 ||
+	    g->ops.mm.cache.fb_flush(g) != 0) {
 		return -EBUSY;
 	}
 
@@ -406,234 +406,6 @@ int gk20a_alloc_inst_block(struct gk20a *g, struct nvgpu_mem *inst_block)
 	return 0;
 }
 
-int gk20a_mm_fb_flush(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	u32 data;
-	int ret = 0;
-	struct nvgpu_timeout timeout;
-	u32 retries;
-
-	nvgpu_log_fn(g, " ");
-
-	gk20a_busy_noresume(g);
-	if (!g->power_on) {
-		gk20a_idle_nosuspend(g);
-		return 0;
-	}
-
-	retries = 100;
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	nvgpu_mutex_acquire(&mm->l2_op_lock);
-
-	/* Make sure all previous writes are committed to the L2. There's no
-	   guarantee that writes are to DRAM. This will be a sysmembar internal
-	   to the L2. */
-
-	trace_gk20a_mm_fb_flush(g->name);
-
-	gk20a_writel(g, flush_fb_flush_r(),
-		flush_fb_flush_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_fb_flush_r());
-
-		if (flush_fb_flush_outstanding_v(data) ==
-			flush_fb_flush_outstanding_true_v() ||
-		    flush_fb_flush_pending_v(data) ==
-			flush_fb_flush_pending_busy_v()) {
-				nvgpu_log_info(g, "fb_flush 0x%x", data);
-				nvgpu_udelay(5);
-		} else {
-			break;
-		}
-	} while (nvgpu_timeout_expired(&timeout) == 0);
-
-	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
-		if (g->ops.fb.dump_vpr_info != NULL) {
-			g->ops.fb.dump_vpr_info(g);
-		}
-		if (g->ops.fb.dump_wpr_info != NULL) {
-			g->ops.fb.dump_wpr_info(g);
-		}
-		ret = -EBUSY;
-	}
-
-	trace_gk20a_mm_fb_flush_done(g->name);
-
-	nvgpu_mutex_release(&mm->l2_op_lock);
-
-	gk20a_idle_nosuspend(g);
-
-	return ret;
-}
-
-static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
-{
-	u32 data;
-	struct nvgpu_timeout timeout;
-	u32 retries = 200;
-
-	trace_gk20a_mm_l2_invalidate(g->name);
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	/* Invalidate any clean lines from the L2 so subsequent reads go to
-	   DRAM. Dirty lines are not affected by this operation. */
-	gk20a_writel(g, flush_l2_system_invalidate_r(),
-		flush_l2_system_invalidate_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_l2_system_invalidate_r());
-
-		if (flush_l2_system_invalidate_outstanding_v(data) ==
-			flush_l2_system_invalidate_outstanding_true_v() ||
-		    flush_l2_system_invalidate_pending_v(data) ==
-			flush_l2_system_invalidate_pending_busy_v()) {
-				nvgpu_log_info(g, "l2_system_invalidate 0x%x",
-						data);
-				nvgpu_udelay(5);
-		} else {
-			break;
-		}
-	} while (nvgpu_timeout_expired(&timeout) == 0);
-
-	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
-		nvgpu_warn(g, "l2_system_invalidate too many retries");
-	}
-
-	trace_gk20a_mm_l2_invalidate_done(g->name);
-}
-
-void gk20a_mm_l2_invalidate(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	gk20a_busy_noresume(g);
-	if (g->power_on) {
-		nvgpu_mutex_acquire(&mm->l2_op_lock);
-		gk20a_mm_l2_invalidate_locked(g);
-		nvgpu_mutex_release(&mm->l2_op_lock);
-	}
-	gk20a_idle_nosuspend(g);
-}
-
-int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
-{
-	struct mm_gk20a *mm = &g->mm;
-	u32 data;
-	struct nvgpu_timeout timeout;
-	u32 retries = 2000;
-	int err = -ETIMEDOUT;
-
-	nvgpu_log_fn(g, " ");
-
-	gk20a_busy_noresume(g);
-	if (!g->power_on) {
-		goto hw_was_off;
-	}
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	nvgpu_mutex_acquire(&mm->l2_op_lock);
-
-	trace_gk20a_mm_l2_flush(g->name);
-
-	/* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
-	   as clean, so subsequent reads might hit in the L2. */
-	gk20a_writel(g, flush_l2_flush_dirty_r(),
-		flush_l2_flush_dirty_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_l2_flush_dirty_r());
-
-		if (flush_l2_flush_dirty_outstanding_v(data) ==
-			flush_l2_flush_dirty_outstanding_true_v() ||
-		    flush_l2_flush_dirty_pending_v(data) ==
-			flush_l2_flush_dirty_pending_busy_v()) {
-				nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
-				nvgpu_udelay(5);
-		} else {
-			err = 0;
-			break;
-		}
-	} while (nvgpu_timeout_expired_msg(&timeout,
-				"l2_flush_dirty too many retries") == 0);
-
-	trace_gk20a_mm_l2_flush_done(g->name);
-
-	if (invalidate) {
-		gk20a_mm_l2_invalidate_locked(g);
-	}
-
-	nvgpu_mutex_release(&mm->l2_op_lock);
-
-hw_was_off:
-	gk20a_idle_nosuspend(g);
-
-	return err;
-}
-
-void gk20a_mm_cbc_clean(struct gk20a *g)
-{
-	struct mm_gk20a *mm = &g->mm;
-	u32 data;
-	struct nvgpu_timeout timeout;
-	u32 retries = 200;
-
-	nvgpu_log_fn(g, " ");
-
-	gk20a_busy_noresume(g);
-	if (!g->power_on) {
-		goto hw_was_off;
-	}
-
-	if (g->ops.mm.get_flush_retries != NULL) {
-		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
-	}
-
-	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
-
-	nvgpu_mutex_acquire(&mm->l2_op_lock);
-
-	/* Flush all dirty lines from the CBC to L2 */
-	gk20a_writel(g, flush_l2_clean_comptags_r(),
-		flush_l2_clean_comptags_pending_busy_f());
-
-	do {
-		data = gk20a_readl(g, flush_l2_clean_comptags_r());
-
-		if (flush_l2_clean_comptags_outstanding_v(data) ==
-			flush_l2_clean_comptags_outstanding_true_v() ||
-		    flush_l2_clean_comptags_pending_v(data) ==
-			flush_l2_clean_comptags_pending_busy_v()) {
-				nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
-				nvgpu_udelay(5);
-		} else {
-			break;
-		}
-	} while (nvgpu_timeout_expired_msg(&timeout,
-				"l2_clean_comptags too many retries") == 0);
-
-	nvgpu_mutex_release(&mm->l2_op_lock);
-
-hw_was_off:
-	gk20a_idle_nosuspend(g);
-}
-
 u32 gk20a_mm_get_iommu_bit(struct gk20a *g)
 {
 	return 34;
@@ -656,4 +428,3 @@ u64 gk20a_mm_bar1_map_userd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
 				    gk20a_mem_flag_none, false,
 				    mem->aperture);
 }
-
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index b7749a897..0426bd912 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -72,11 +72,6 @@ gk20a_buffer_state_from_list(struct nvgpu_list_node *node)
 struct gk20a;
 struct channel_gk20a;
 
-int gk20a_mm_fb_flush(struct gk20a *g);
-int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
-void gk20a_mm_cbc_clean(struct gk20a *g);
-void gk20a_mm_l2_invalidate(struct gk20a *g);
-
 #define dev_from_vm(vm) dev_from_gk20a(vm->mm->g)
 
 void gk20a_mm_ltc_isr(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
index be9a39270..40c4e0193 100644
--- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c
@@ -46,6 +46,7 @@
 #include <nvgpu/gr/setup.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 
+#include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/bus/bus_gm20b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -806,10 +807,6 @@ static const struct gpu_ops gm20b_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gk20a_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gm20b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -822,6 +819,12 @@ static const struct gpu_ops gm20b_ops = {
 		.get_kind_invalid = gm20b_get_kind_invalid,
 		.get_kind_pitch = gm20b_get_kind_pitch,
 		.bar1_map_userd = gk20a_mm_bar1_map_userd,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gk20a_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = gm20b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
index a5000410a..857bb8cfe 100644
--- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c
@@ -47,6 +47,7 @@
 #include <nvgpu/gr/gr_intr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 
+#include "hal/mm/cache/flush_gk20a.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/bus/bus_gk20a.h"
@@ -906,10 +907,6 @@ static const struct gpu_ops gp10b_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gk20a_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gm20b_gpu_phys_addr,
@@ -924,6 +921,12 @@ static const struct gpu_ops gp10b_ops = {
 		.get_kind_invalid = gm20b_get_kind_invalid,
 		.get_kind_pitch = gm20b_get_kind_pitch,
 		.bar1_map_userd = gk20a_mm_bar1_map_userd,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gk20a_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c
index fce669fe9..a041aa45d 100644
--- a/drivers/gpu/nvgpu/gv100/hal_gv100.c
+++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c
@@ -22,6 +22,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1088,10 +1090,6 @@ static const struct gpu_ops gv100_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gv11b_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gv11b_gpu_phys_addr,
@@ -1108,6 +1106,12 @@ static const struct gpu_ops gv100_ops = {
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.get_flush_retries = gv100_mm_get_flush_retries,
 		.bar1_map_userd = NULL,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gv11b_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
index f740101a7..c2c76753c 100644
--- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c
@@ -30,6 +30,8 @@
 #include <nvgpu/gr/gr.h>
 #include <nvgpu/pmu/pmu_perfmon.h>
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1064,10 +1066,6 @@ static const struct gpu_ops gv11b_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gv11b_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gv11b_gpu_phys_addr,
@@ -1084,6 +1082,12 @@ static const struct gpu_ops gv11b_ops = {
 		.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy,
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.bar1_map_userd = NULL,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gv11b_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.therm = {
 		.init_therm_setup_hw = gv11b_init_therm_setup_hw,
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
index 0932a6d3a..a0895e446 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.c
@@ -200,40 +200,6 @@ int gv11b_init_mm_setup_hw(struct gk20a *g)
 	return err;
 }
 
-int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate)
-{
-	int err = 0;
-
-	nvgpu_log(g, gpu_dbg_fn, "gv11b_mm_l2_flush");
-
-	err = g->ops.mm.fb_flush(g);
-	if (err != 0) {
-		nvgpu_err(g, "mm.fb_flush()[1] failed err=%d", err);
-		return err;
-	}
-	err = gk20a_mm_l2_flush(g, invalidate);
-	if (err != 0) {
-		nvgpu_err(g, "gk20a_mm_l2_flush failed");
-		return err;
-	}
-	if (g->ops.bus.bar1_bind != NULL) {
-		err = g->ops.fb.tlb_invalidate(g,
-				g->mm.bar1.vm->pdb.mem);
-		if (err != 0) {
-			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
-			return err;
-		}
-	} else {
-		err = g->ops.mm.fb_flush(g);
-		if (err != 0) {
-			nvgpu_err(g, "mm.fb_flush()[2] failed err=%d", err);
-			return err;
-		}
-	}
-
-	return err;
-}
-
 /*
  * On Volta the GPU determines whether to do L3 allocation for a mapping by
  * checking bit 36 of the phsyical address. So if a mapping should allocte lines
diff --git a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
index bca67f083..d9d1fe0c0 100644
--- a/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
+++ b/drivers/gpu/nvgpu/gv11b/mm_gv11b.h
@@ -32,7 +32,6 @@ bool gv11b_mm_is_bar1_supported(struct gk20a *g);
 void gv11b_init_inst_block(struct nvgpu_mem *inst_block,
 		struct vm_gk20a *vm, u32 big_page_size);
 int gv11b_init_mm_setup_hw(struct gk20a *g);
-int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate);
 u64 gv11b_gpu_phys_addr(struct gk20a *g,
 			struct nvgpu_gmmu_attrs *attrs, u64 phys);
 void gv11b_mm_fault_info_mem_destroy(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c
index 58575848e..0d62f99de 100644
--- a/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/ltc/intr/ltc_intr_gp10b.c
@@ -61,7 +61,7 @@ void gp10b_ltc_intr_handle_lts_interrupts(struct gk20a *g, u32 ltc, u32 slice)
 		nvgpu_writel_check(g,
 			ltc_ltc0_lts0_dstg_ecc_report_r() + offset,
 			ecc_stats_reg_val);
-		if (g->ops.mm.l2_flush(g, true) != 0) {
+		if (g->ops.mm.cache.l2_flush(g, true) != 0) {
 			nvgpu_err(g, "l2_flush failed");
 		}
 	}
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c
new file mode 100644
index 000000000..a51fd6dd5
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <trace/events/gk20a.h>
+
+#include <nvgpu/mm.h>
+#include <nvgpu/io.h>
+#include <nvgpu/gk20a.h>
+#include <nvgpu/timers.h>
+
+#include <nvgpu/hw/gk20a/hw_flush_gk20a.h>
+
+#include "flush_gk20a.h"
+
+int gk20a_mm_fb_flush(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	u32 data;
+	int ret = 0;
+	struct nvgpu_timeout timeout;
+	u32 retries;
+
+	nvgpu_log_fn(g, " ");
+
+	gk20a_busy_noresume(g);
+	if (!g->power_on) {
+		gk20a_idle_nosuspend(g);
+		return 0;
+	}
+
+	retries = 100;
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_FB);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
+
+	/* Make sure all previous writes are committed to the L2. There's no
+	   guarantee that writes are to DRAM. This will be a sysmembar internal
+	   to the L2. */
+
+	trace_gk20a_mm_fb_flush(g->name);
+
+	nvgpu_writel(g, flush_fb_flush_r(),
+		flush_fb_flush_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_fb_flush_r());
+
+		if (flush_fb_flush_outstanding_v(data) ==
+			flush_fb_flush_outstanding_true_v() ||
+		    flush_fb_flush_pending_v(data) ==
+			flush_fb_flush_pending_busy_v()) {
+				nvgpu_log_info(g, "fb_flush 0x%x", data);
+				nvgpu_udelay(5);
+		} else {
+			break;
+		}
+	} while (nvgpu_timeout_expired(&timeout) == 0);
+
+	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
+		if (g->ops.fb.dump_vpr_info != NULL) {
+			g->ops.fb.dump_vpr_info(g);
+		}
+		if (g->ops.fb.dump_wpr_info != NULL) {
+			g->ops.fb.dump_wpr_info(g);
+		}
+		ret = -EBUSY;
+	}
+
+	trace_gk20a_mm_fb_flush_done(g->name);
+
+	nvgpu_mutex_release(&mm->l2_op_lock);
+
+	gk20a_idle_nosuspend(g);
+
+	return ret;
+}
+
+static void gk20a_mm_l2_invalidate_locked(struct gk20a *g)
+{
+	u32 data;
+	struct nvgpu_timeout timeout;
+	u32 retries = 200;
+
+	trace_gk20a_mm_l2_invalidate(g->name);
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_INV);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	/* Invalidate any clean lines from the L2 so subsequent reads go to
+	   DRAM. Dirty lines are not affected by this operation. */
+	nvgpu_writel(g, flush_l2_system_invalidate_r(),
+		flush_l2_system_invalidate_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_l2_system_invalidate_r());
+
+		if (flush_l2_system_invalidate_outstanding_v(data) ==
+			flush_l2_system_invalidate_outstanding_true_v() ||
+		    flush_l2_system_invalidate_pending_v(data) ==
+			flush_l2_system_invalidate_pending_busy_v()) {
+				nvgpu_log_info(g, "l2_system_invalidate 0x%x",
+						data);
+				nvgpu_udelay(5);
+		} else {
+			break;
+		}
+	} while (nvgpu_timeout_expired(&timeout) == 0);
+
+	if (nvgpu_timeout_peek_expired(&timeout) != 0) {
+		nvgpu_warn(g, "l2_system_invalidate too many retries");
+	}
+
+	trace_gk20a_mm_l2_invalidate_done(g->name);
+}
+
+void gk20a_mm_l2_invalidate(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	gk20a_busy_noresume(g);
+	if (g->power_on) {
+		nvgpu_mutex_acquire(&mm->l2_op_lock);
+		gk20a_mm_l2_invalidate_locked(g);
+		nvgpu_mutex_release(&mm->l2_op_lock);
+	}
+	gk20a_idle_nosuspend(g);
+}
+
+int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+	struct mm_gk20a *mm = &g->mm;
+	u32 data;
+	struct nvgpu_timeout timeout;
+	u32 retries = 2000;
+	int err = -ETIMEDOUT;
+
+	nvgpu_log_fn(g, " ");
+
+	gk20a_busy_noresume(g);
+	if (!g->power_on) {
+		goto hw_was_off;
+	}
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_L2_FLUSH);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
+
+	trace_gk20a_mm_l2_flush(g->name);
+
+	/* Flush all dirty lines from the L2 to DRAM. Lines are left in the L2
+	   as clean, so subsequent reads might hit in the L2. */
+	nvgpu_writel(g, flush_l2_flush_dirty_r(),
+		flush_l2_flush_dirty_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_l2_flush_dirty_r());
+
+		if (flush_l2_flush_dirty_outstanding_v(data) ==
+			flush_l2_flush_dirty_outstanding_true_v() ||
+		    flush_l2_flush_dirty_pending_v(data) ==
+			flush_l2_flush_dirty_pending_busy_v()) {
+				nvgpu_log_info(g, "l2_flush_dirty 0x%x", data);
+				nvgpu_udelay(5);
+		} else {
+			err = 0;
+			break;
+		}
+	} while (nvgpu_timeout_expired_msg(&timeout,
+				"l2_flush_dirty too many retries") == 0);
+
+	trace_gk20a_mm_l2_flush_done(g->name);
+
+	if (invalidate) {
+		gk20a_mm_l2_invalidate_locked(g);
+	}
+
+	nvgpu_mutex_release(&mm->l2_op_lock);
+
+hw_was_off:
+	gk20a_idle_nosuspend(g);
+
+	return err;
+}
+
+void gk20a_mm_cbc_clean(struct gk20a *g)
+{
+	struct mm_gk20a *mm = &g->mm;
+	u32 data;
+	struct nvgpu_timeout timeout;
+	u32 retries = 200;
+
+	nvgpu_log_fn(g, " ");
+
+	gk20a_busy_noresume(g);
+	if (!g->power_on) {
+		goto hw_was_off;
+	}
+
+	if (g->ops.mm.get_flush_retries != NULL) {
+		retries = g->ops.mm.get_flush_retries(g, NVGPU_FLUSH_CBC_CLEAN);
+	}
+
+	nvgpu_timeout_init(g, &timeout, retries, NVGPU_TIMER_RETRY_TIMER);
+
+	nvgpu_mutex_acquire(&mm->l2_op_lock);
+
+	/* Flush all dirty lines from the CBC to L2 */
+	nvgpu_writel(g, flush_l2_clean_comptags_r(),
+		flush_l2_clean_comptags_pending_busy_f());
+
+	do {
+		data = nvgpu_readl(g, flush_l2_clean_comptags_r());
+
+		if (flush_l2_clean_comptags_outstanding_v(data) ==
+			flush_l2_clean_comptags_outstanding_true_v() ||
+		    flush_l2_clean_comptags_pending_v(data) ==
+			flush_l2_clean_comptags_pending_busy_v()) {
+				nvgpu_log_info(g, "l2_clean_comptags 0x%x", data);
+				nvgpu_udelay(5);
+		} else {
+			break;
+		}
+	} while (nvgpu_timeout_expired_msg(&timeout,
+				"l2_clean_comptags too many retries") == 0);
+
+	nvgpu_mutex_release(&mm->l2_op_lock);
+
+hw_was_off:
+	gk20a_idle_nosuspend(g);
+}
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h
new file mode 100644
index 000000000..f157e7898
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gk20a.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HAL_MM_FLUSH_FLUSH_GK20A_H
+#define HAL_MM_FLUSH_FLUSH_GK20A_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+
+int gk20a_mm_fb_flush(struct gk20a *g);
+int gk20a_mm_l2_flush(struct gk20a *g, bool invalidate);
+void gk20a_mm_cbc_clean(struct gk20a *g);
+void gk20a_mm_l2_invalidate(struct gk20a *g);
+
+#endif
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c
new file mode 100644
index 000000000..0ee040b8a
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <nvgpu/gk20a.h>
+
+#include <nvgpu/hw/gv11b/hw_flush_gv11b.h>
+
+#include "flush_gk20a.h"
+#include "flush_gv11b.h"
+
+int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate)
+{
+	int err = 0;
+
+	nvgpu_log(g, gpu_dbg_fn, "gv11b_mm_l2_flush");
+
+	err = g->ops.mm.cache.fb_flush(g);
+	if (err != 0) {
+		nvgpu_err(g, "mm.cache.fb_flush()[1] failed err=%d", err);
+		return err;
+	}
+	err = gk20a_mm_l2_flush(g, invalidate);
+	if (err != 0) {
+		nvgpu_err(g, "gk20a_mm_l2_flush failed");
+		return err;
+	}
+	if (g->ops.bus.bar1_bind != NULL) {
+		err = g->ops.fb.tlb_invalidate(g, g->mm.bar1.vm->pdb.mem);
+		if (err != 0) {
+			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
+			return err;
+		}
+	} else {
+		err = g->ops.mm.cache.fb_flush(g);
+		if (err != 0) {
+			nvgpu_err(g, "mm.cache.fb_flush()[2] failed err=%d",
+				  err);
+			return err;
+		}
+	}
+
+	return err;
+}
diff --git a/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h
new file mode 100644
index 000000000..c0ff42532
--- /dev/null
+++ b/drivers/gpu/nvgpu/hal/mm/cache/flush_gv11b.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HAL_MM_FLUSH_FLUSH_GV11B_H
+#define HAL_MM_FLUSH_FLUSH_GV11B_H
+
+#include <nvgpu/types.h>
+
+struct gk20a;
+
+int gv11b_mm_l2_flush(struct gk20a *g, bool invalidate);
+
+#endif
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index 86ea6bc22..d5cd9562a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -1363,10 +1363,6 @@ struct gpu_ops {
 				struct vm_gk20a_mapping_batch *batch);
 		int (*vm_bind_channel)(struct vm_gk20a *vm,
 				struct channel_gk20a *ch);
-		int (*fb_flush)(struct gk20a *g);
-		void (*l2_invalidate)(struct gk20a *g);
-		int (*l2_flush)(struct gk20a *g, bool invalidate);
-		void (*cbc_clean)(struct gk20a *g);
 		u32 (*get_big_page_sizes)(void);
 		u32 (*get_default_big_page_size)(void);
 		u32 (*get_iommu_bit)(struct gk20a *g);
@@ -1391,6 +1387,12 @@ struct gpu_ops {
 		u64 (*bar1_map_userd)(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
 		int (*vm_as_alloc_share)(struct gk20a *g, struct vm_gk20a *vm);
 		void (*vm_as_free_share)(struct vm_gk20a *vm);
+		struct {
+			int (*fb_flush)(struct gk20a *g);
+			void (*l2_invalidate)(struct gk20a *g);
+			int (*l2_flush)(struct gk20a *g, bool invalidate);
+			void (*cbc_clean)(struct gk20a *g);
+		} cache;
 	} mm;
 	/*
 	 * This function is called to allocate secure memory (memory
diff --git a/drivers/gpu/nvgpu/libnvgpu-drv.export b/drivers/gpu/nvgpu/libnvgpu-drv.export
index 6433009f2..b06247574 100644
--- a/drivers/gpu/nvgpu/libnvgpu-drv.export
+++ b/drivers/gpu/nvgpu/libnvgpu-drv.export
@@ -31,6 +31,7 @@ gk20a_runlist_get_tsg_entry
 gk20a_locked_gmmu_map
 gk20a_locked_gmmu_unmap
 gk20a_ramin_alloc_size
+gk20a_mm_fb_flush
 gm20b_fb_tlb_invalidate
 gm20b_fuse_status_opt_gpc
 gm20b_ramin_set_big_page_size
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
index 470e6d4e3..a733ab2e5 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c
@@ -609,7 +609,8 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
 		return -EINVAL;
 
 	if (args->l2_flush) {
-		err = g->ops.mm.l2_flush(g, args->l2_invalidate ? true : false);
+		err = g->ops.mm.cache.l2_flush(g, args->l2_invalidate ?
+								true : false);
 		if (err != 0) {
 			nvgpu_err(g, "l2_flush failed");
 			return err;
@@ -617,7 +618,7 @@ static int nvgpu_gpu_ioctl_l2_fb_ops(struct gk20a *g,
 	}
 
 	if (args->fb_flush) {
-		g->ops.mm.fb_flush(g);
+		g->ops.mm.cache.fb_flush(g);
 	}
 
 	return err;
diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c
index 576bef22a..348a5ed96 100644
--- a/drivers/gpu/nvgpu/tu104/hal_tu104.c
+++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c
@@ -22,6 +22,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gm20b.h"
 #include "hal/mc/mc_gp10b.h"
 #include "hal/mc/mc_gv11b.h"
@@ -1126,10 +1128,6 @@ static const struct gpu_ops tu104_ops = {
 		.gmmu_map = gk20a_locked_gmmu_map,
 		.gmmu_unmap = gk20a_locked_gmmu_unmap,
 		.vm_bind_channel = gk20a_vm_bind_channel,
-		.fb_flush = gk20a_mm_fb_flush,
-		.l2_invalidate = gk20a_mm_l2_invalidate,
-		.l2_flush = gv11b_mm_l2_flush,
-		.cbc_clean = gk20a_mm_cbc_clean,
 		.get_big_page_sizes = gm20b_mm_get_big_page_sizes,
 		.get_default_big_page_size = gp10b_mm_get_default_big_page_size,
 		.gpu_phys_addr = gv11b_gpu_phys_addr,
@@ -1146,6 +1144,12 @@ static const struct gpu_ops tu104_ops = {
 		.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw,
 		.get_flush_retries = tu104_mm_get_flush_retries,
 		.bar1_map_userd = NULL,
+		.cache = {
+			.fb_flush = gk20a_mm_fb_flush,
+			.l2_invalidate = gk20a_mm_l2_invalidate,
+			.l2_flush = gv11b_mm_l2_flush,
+			.cbc_clean = gk20a_mm_cbc_clean,
+		},
 	},
 	.pramin = {
 		.data032_r = pram_data032_r,
diff --git a/userspace/units/mm/gmmu/page_table/page_table.c b/userspace/units/mm/gmmu/page_table/page_table.c
index 1e9800bae..bcdc1e264 100644
--- a/userspace/units/mm/gmmu/page_table/page_table.c
+++ b/userspace/units/mm/gmmu/page_table/page_table.c
@@ -41,6 +41,8 @@
 #include <gv11b/mm_gv11b.h>
 #include <nvgpu/hw/gv11b/hw_gmmu_gv11b.h>
 
+#include <hal/mm/cache/flush_gk20a.h>
+#include <hal/mm/cache/flush_gv11b.h>
 #include <hal/fb/fb_gp10b.h>
 #include <hal/fb/fb_gm20b.h>
 #include <hal/fifo/ramin_gk20a.h>
@@ -303,6 +305,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	g->ops.mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
 	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
 	g->ops.mm.is_bar1_supported = gv11b_mm_is_bar1_supported;
+	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 	g->ops.fb.compression_page_size = gp10b_fb_compression_page_size;
 	g->ops.fb.tlb_invalidate = gm20b_fb_tlb_invalidate;
 	g->ops.ramin.init_pdb = gp10b_ramin_init_pdb;
diff --git a/userspace/units/mm/page_table_faults/page_table_faults.c b/userspace/units/mm/page_table_faults/page_table_faults.c
index b86d60983..7df3c80a3 100644
--- a/userspace/units/mm/page_table_faults/page_table_faults.c
+++ b/userspace/units/mm/page_table_faults/page_table_faults.c
@@ -43,12 +43,14 @@
 #include "nvgpu/hw/gv11b/hw_gmmu_gv11b.h"
 #include "nvgpu/hw/gv11b/hw_fb_gv11b.h"
 
+#include "hal/mm/cache/flush_gk20a.h"
+#include "hal/mm/cache/flush_gv11b.h"
 #include "hal/mc/mc_gv11b.h"
 #include "hal/fb/fb_gp10b.h"
 #include "hal/fb/fb_gm20b.h"
 #include "hal/fb/fb_gv11b.h"
-#include "hal/fifo/ramin_gk20a.h"
 #include "hal/fb/intr/fb_intr_gv11b.h"
+#include "hal/fifo/ramin_gk20a.h"
 #include "hal/fifo/ramin_gm20b.h"
 #include "hal/fifo/ramin_gp10b.h"
 
@@ -133,7 +135,8 @@ static int init_mm(struct unit_module *m, struct gk20a *g)
 	g->ops.mm.fault_info_mem_destroy = gv11b_mm_fault_info_mem_destroy;
 	g->ops.mm.mmu_fault_disable_hw = gv11b_mm_mmu_fault_disable_hw;
 	g->ops.mm.init_mm_setup_hw = gv11b_init_mm_setup_hw;
-	g->ops.mm.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 	g->ops.fb.init_hw = gv11b_fb_init_hw;
 	g->ops.fb.intr.enable = gv11b_fb_intr_enable;
 	g->ops.fb.fault_buf_configure_hw = gv11b_fb_fault_buf_configure_hw;
diff --git a/userspace/units/mm/vm/vm.c b/userspace/units/mm/vm/vm.c
index 271d9214c..615928bb6 100644
--- a/userspace/units/mm/vm/vm.c
+++ b/userspace/units/mm/vm/vm.c
@@ -33,6 +33,8 @@
 #include <nvgpu/nvgpu_sgt.h>
 #include <nvgpu/vm_area.h>
 #include <gp10b/mm_gp10b.h>
+#include <hal/mm/cache/flush_gk20a.h>
+#include <hal/mm/cache/flush_gv11b.h>
 #include <hal/fb/fb_gp10b.h>
 #include <hal/fb/fb_gm20b.h>
 #include <gv11b/mm_gv11b.h>
@@ -137,6 +139,8 @@ static int init_test_env(struct unit_module *m, struct gk20a *g)
 	g->ops.mm.gmmu_map = gk20a_locked_gmmu_map;
 	g->ops.mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
 	g->ops.mm.gpu_phys_addr = gv11b_gpu_phys_addr;
+	g->ops.mm.cache.l2_flush = gv11b_mm_l2_flush;
+	g->ops.mm.cache.fb_flush = gk20a_mm_fb_flush;
 
 	return UNIT_SUCCESS;
 }