gpu: nvgpu: Move all FB programming to FB HAL

Move all programming of FB to fb_*.c files, and remove the inclusion of FB hardware headers from other files. TLB invalidate function took previously a pointer to VM, but the new API takes only a PDB mem_desc, because FB does not need to know about higher level VM. GPC MMU is programmed from the same function as FB MMU, so added dependency to GR hardware header to FB. GP106 ACR was also triggering a VPR fetch, but that's not applicable to dGPU, so removed that call. Change-Id: I4eb69377ac3745da205907626cf60948b7c5392a Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/1321516 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2017-03-15 14:08:32 -07:00
parent 0742f4e703
commit ca762e4220
15 changed files with 197 additions and 187 deletions
--- a/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/ctrl_gk20a.c
@@ -385,7 +385,7 @@ static int nvgpu_gpu_ioctl_set_mmu_debug_mode(
 	}

 	nvgpu_mutex_acquire(&g->dbg_sessions_lock);
-	g->ops.mm.set_debug_mode(g, args->state == 1);
+	g->ops.fb.set_debug_mode(g, args->state == 1);
 	nvgpu_mutex_release(&g->dbg_sessions_lock);

 	gk20a_idle(g->dev);
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c
@@ -14,6 +14,8 @@
 */

 #include <linux/types.h>
+#include <trace/events/gk20a.h>
+#include <linux/delay.h>

 #include "gk20a.h"
 #include "kind_gk20a.h"
@@ -40,6 +42,13 @@ void fb_gk20a_reset(struct gk20a *g)
 	gk20a_writel(g, mc_elpg_enable_r(), val);
 }

+void gk20a_fb_init_hw(struct gk20a *g)
+{
+	gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
+		     g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
+		     >> 8);
+}
+
 static void gk20a_fb_set_mmu_page_size(struct gk20a *g)
 {
 	/* set large page size in fb */
@@ -62,12 +71,104 @@ static unsigned int gk20a_fb_compressible_page_size(struct gk20a *g)
 	return SZ_64K;
 }

+bool gk20a_fb_debug_mode_enabled(struct gk20a *g)
+{
+	u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
+	return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
+		fb_mmu_debug_ctrl_debug_enabled_v();
+}
+
+static void gk20a_fb_set_debug_mode(struct gk20a *g, bool enable)
+{
+	u32 reg_val, debug_ctrl;
+
+	reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
+	if (enable) {
+		debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
+		g->mmu_debug_ctrl = true;
+	} else {
+		debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
+		g->mmu_debug_ctrl = false;
+	}
+
+	reg_val = set_field(reg_val,
+				fb_mmu_debug_ctrl_debug_m(), debug_ctrl);
+	gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
+}
+
+void gk20a_fb_tlb_invalidate(struct gk20a *g, struct mem_desc *pdb)
+{
+	struct nvgpu_timeout timeout;
+	u32 addr_lo;
+	u32 data;
+
+	gk20a_dbg_fn("");
+
+	/* pagetables are considered sw states which are preserved after
+	   prepare_poweroff. When gk20a deinit releases those pagetables,
+	   common code in vm unmap path calls tlb invalidate that touches
+	   hw. Use the power_on flag to skip tlb invalidation when gpu
+	   power is turned off */
+
+	if (!g->power_on)
+		return;
+
+	addr_lo = u64_lo32(gk20a_mem_get_base_addr(g, pdb, 0) >> 12);
+
+	nvgpu_mutex_acquire(&g->mm.tlb_lock);
+
+	trace_gk20a_mm_tlb_invalidate(dev_name(g->dev));
+
+	nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
+
+	do {
+		data = gk20a_readl(g, fb_mmu_ctrl_r());
+		if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
+			break;
+		udelay(2);
+	} while (!nvgpu_timeout_expired_msg(&timeout,
+					 "wait mmu fifo space"));
+
+	if (nvgpu_timeout_peek_expired(&timeout))
+		goto out;
+
+	nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
+
+	gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
+		fb_mmu_invalidate_pdb_addr_f(addr_lo) |
+		gk20a_aperture_mask(g, pdb,
+		  fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
+		  fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
+
+	gk20a_writel(g, fb_mmu_invalidate_r(),
+		fb_mmu_invalidate_all_va_true_f() |
+		fb_mmu_invalidate_trigger_true_f());
+
+	do {
+		data = gk20a_readl(g, fb_mmu_ctrl_r());
+		if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
+			fb_mmu_ctrl_pri_fifo_empty_false_f())
+			break;
+		udelay(2);
+	} while (!nvgpu_timeout_expired_msg(&timeout,
+					 "wait mmu invalidate"));
+
+	trace_gk20a_mm_tlb_invalidate_done(dev_name(g->dev));
+
+out:
+	nvgpu_mutex_release(&g->mm.tlb_lock);
+}
+
 void gk20a_init_fb(struct gpu_ops *gops)
 {
+	gops->fb.init_hw = gk20a_fb_init_hw;
 	gops->fb.reset = fb_gk20a_reset;
 	gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size;
 	gops->fb.compression_page_size = gk20a_fb_compression_page_size;
 	gops->fb.compressible_page_size = gk20a_fb_compressible_page_size;
+	gops->fb.is_debug_mode_enabled = gk20a_fb_debug_mode_enabled;
+	gops->fb.set_debug_mode = gk20a_fb_set_debug_mode;
+	gops->fb.tlb_invalidate = gk20a_fb_tlb_invalidate;
 	gk20a_init_uncompressed_kind_map();
 	gk20a_init_kind_attr();
 }
--- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.h
@@ -13,8 +13,13 @@

 #ifndef FB_GK20A_H
 #define FB_GK20A_H
+
 struct gk20a;
+struct mem_desc;

 void gk20a_init_fb(struct gpu_ops *gops);
 void fb_gk20a_reset(struct gk20a *g);
+void gk20a_fb_init_hw(struct gk20a *g);
+void gk20a_fb_tlb_invalidate(struct gk20a *g, struct mem_desc *pdb);
+
 #endif
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -1267,7 +1267,7 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
 	/* channel recovery is only deferred if an sm debugger
 	   is attached and has MMU debug mode is enabled */
 	if (!gk20a_gr_sm_debugger_attached(g) ||
-	    !g->ops.mm.is_debug_mode_enabled(g))
+	    !g->ops.fb.is_debug_mode_enabled(g))
 		return false;

 	/* if this fault is fake (due to RC recovery), don't defer recovery */
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1083,7 +1083,7 @@ int gk20a_pm_finalize_poweron(struct device *dev)
 	}

 	/* Restore the debug setting */
-	g->ops.mm.set_debug_mode(g, g->mmu_debug_ctrl);
+	g->ops.fb.set_debug_mode(g, g->mmu_debug_ctrl);

 	gk20a_channel_resume(g);
 	set_user_nice(current, nice_value);
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -349,6 +349,7 @@ struct gpu_ops {
 	} gr;
 	const char *name;
 	struct {
+		void (*init_hw)(struct gk20a *g);
 		void (*init_fs_state)(struct gk20a *g);
 		void (*reset)(struct gk20a *g);
 		void (*init_uncompressed_kind_map)(struct gk20a *g);
@@ -358,6 +359,10 @@ struct gpu_ops {
 		unsigned int (*compression_page_size)(struct gk20a *g);
 		unsigned int (*compressible_page_size)(struct gk20a *g);
 		void (*dump_vpr_wpr_info)(struct gk20a *g);
+		int (*vpr_info_fetch)(struct gk20a *g);
+		bool (*is_debug_mode_enabled)(struct gk20a *g);
+		void (*set_debug_mode)(struct gk20a *g, bool enable);
+		void (*tlb_invalidate)(struct gk20a *g, struct mem_desc *pdb);
 	} fb;
 	struct {
 		void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod);
@@ -573,8 +578,6 @@ struct gpu_ops {
 	} fecs_trace;
 	struct {
 		bool (*support_sparse)(struct gk20a *g);
-		bool (*is_debug_mode_enabled)(struct gk20a *g);
-		void (*set_debug_mode)(struct gk20a *g, bool enable);
 		u64 (*gmmu_map)(struct vm_gk20a *vm,
 				u64 map_offset,
 				struct sg_table *sgt,
@@ -607,7 +610,6 @@ struct gpu_ops {
 		void (*l2_invalidate)(struct gk20a *g);
 		void (*l2_flush)(struct gk20a *g, bool invalidate);
 		void (*cbc_clean)(struct gk20a *g);
-		void (*tlb_invalidate)(struct vm_gk20a *vm);
 		void (*set_big_page_size)(struct gk20a *g,
 					  struct mem_desc *mem, int size);
 		u32 (*get_big_page_sizes)(void);
--- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c
@@ -8583,7 +8583,7 @@ int gk20a_gr_wait_for_sm_lock_down(struct gk20a *g, u32 gpc, u32 tpc,
 	bool locked_down;
 	bool no_error_pending;
 	u32 delay = GR_IDLE_CHECK_DEFAULT;
-	bool mmu_debug_mode_enabled = g->ops.mm.is_debug_mode_enabled(g);
+	bool mmu_debug_mode_enabled = g->ops.fb.is_debug_mode_enabled(g);
 	u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
 	u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
 	u32 offset =
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -42,7 +42,6 @@
 #include "kind_gk20a.h"

 #include <nvgpu/hw/gk20a/hw_gmmu_gk20a.h>
-#include <nvgpu/hw/gk20a/hw_fb_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_bus_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_ram_gk20a.h>
 #include <nvgpu/hw/gk20a/hw_pram_gk20a.h>
@@ -1084,9 +1083,7 @@ int gk20a_init_mm_setup_hw(struct gk20a *g)
 		mm->use_full_comp_tag_line =
 			g->ops.fb.set_use_full_comp_tag_line(g);

-	gk20a_writel(g, fb_niso_flush_sysmem_addr_r(),
-		     g->ops.mm.get_iova_addr(g, g->mm.sysmem_flush.sgt->sgl, 0)
-		     >> 8);
+	g->ops.fb.init_hw(g);

 	if (g->ops.mm.bar1_bind)
 		g->ops.mm.bar1_bind(g, &mm->bar1.inst_block);
@@ -1538,7 +1535,7 @@ void gk20a_vm_mapping_batch_finish_locked(

 	if (mapping_batch->need_tlb_invalidate) {
 		struct gk20a *g = gk20a_from_vm(vm);
-		g->ops.mm.tlb_invalidate(vm);
+		g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
 	}
 }

@@ -1959,7 +1956,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 	}

 	if (!batch)
-		g->ops.mm.tlb_invalidate(vm);
+		g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
 	else
 		batch->need_tlb_invalidate = true;

@@ -2018,7 +2015,7 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,

 	if (!batch) {
 		gk20a_mm_l2_flush(g, true);
-		g->ops.mm.tlb_invalidate(vm);
+		g->ops.fb.tlb_invalidate(g, &vm->pdb.mem);
 	} else {
 		if (!batch->gpu_l2_flushed) {
 			gk20a_mm_l2_flush(g, true);
@@ -5344,70 +5341,6 @@ int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
 	return 0;
 }

-void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
-{
-	struct gk20a *g = gk20a_from_vm(vm);
-	struct nvgpu_timeout timeout;
-	u32 addr_lo;
-	u32 data;
-
-	gk20a_dbg_fn("");
-
-	/* pagetables are considered sw states which are preserved after
-	   prepare_poweroff. When gk20a deinit releases those pagetables,
-	   common code in vm unmap path calls tlb invalidate that touches
-	   hw. Use the power_on flag to skip tlb invalidation when gpu
-	   power is turned off */
-
-	if (!g->power_on)
-		return;
-
-	addr_lo = u64_lo32(gk20a_mem_get_base_addr(g, &vm->pdb.mem, 0) >> 12);
-
-	nvgpu_mutex_acquire(&g->mm.tlb_lock);
-
-	trace_gk20a_mm_tlb_invalidate(dev_name(g->dev));
-
-	nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
-
-	do {
-		data = gk20a_readl(g, fb_mmu_ctrl_r());
-		if (fb_mmu_ctrl_pri_fifo_space_v(data) != 0)
-			break;
-		udelay(2);
-	} while (!nvgpu_timeout_expired_msg(&timeout,
-					 "wait mmu fifo space"));
-
-	if (nvgpu_timeout_peek_expired(&timeout))
-		goto out;
-
-	nvgpu_timeout_init(g, &timeout, 1000, NVGPU_TIMER_RETRY_TIMER);
-
-	gk20a_writel(g, fb_mmu_invalidate_pdb_r(),
-		fb_mmu_invalidate_pdb_addr_f(addr_lo) |
-		gk20a_aperture_mask(g, &vm->pdb.mem,
-		  fb_mmu_invalidate_pdb_aperture_sys_mem_f(),
-		  fb_mmu_invalidate_pdb_aperture_vid_mem_f()));
-
-	gk20a_writel(g, fb_mmu_invalidate_r(),
-		fb_mmu_invalidate_all_va_true_f() |
-		fb_mmu_invalidate_trigger_true_f());
-
-	do {
-		data = gk20a_readl(g, fb_mmu_ctrl_r());
-		if (fb_mmu_ctrl_pri_fifo_empty_v(data) !=
-			fb_mmu_ctrl_pri_fifo_empty_false_f())
-			break;
-		udelay(2);
-	} while (!nvgpu_timeout_expired_msg(&timeout,
-					 "wait mmu invalidate"));
-
-	trace_gk20a_mm_tlb_invalidate_done(dev_name(g->dev));
-
-out:
-	nvgpu_mutex_release(&g->mm.tlb_lock);
-}
-
 int gk20a_mm_suspend(struct gk20a *g)
 {
 	gk20a_dbg_fn("");
@@ -5423,31 +5356,6 @@ int gk20a_mm_suspend(struct gk20a *g)
 	return 0;
 }

-bool gk20a_mm_mmu_debug_mode_enabled(struct gk20a *g)
-{
-	u32 debug_ctrl = gk20a_readl(g, fb_mmu_debug_ctrl_r());
-	return fb_mmu_debug_ctrl_debug_v(debug_ctrl) ==
-		fb_mmu_debug_ctrl_debug_enabled_v();
-}
-
-static void gk20a_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
-{
-	u32 reg_val, debug_ctrl;
-
-	reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
-	if (enable) {
-		debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
-		g->mmu_debug_ctrl = true;
-	} else {
-		debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
-		g->mmu_debug_ctrl = false;
-	}
-
-	reg_val = set_field(reg_val,
-				fb_mmu_debug_ctrl_debug_m(), debug_ctrl);
-	gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
-}
-
 u32 gk20a_mm_get_physical_addr_bits(struct gk20a *g)
 {
 	return 34;
@@ -5510,8 +5418,6 @@ void gk20a_mm_debugfs_init(struct device *dev)

 void gk20a_init_mm(struct gpu_ops *gops)
 {
-	gops->mm.is_debug_mode_enabled = gk20a_mm_mmu_debug_mode_enabled;
-	gops->mm.set_debug_mode = gk20a_mm_mmu_set_debug_mode;
 	gops->mm.gmmu_map = gk20a_locked_gmmu_map;
 	gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
 	gops->mm.vm_remove = gk20a_vm_remove_support;
@@ -5521,7 +5427,6 @@ void gk20a_init_mm(struct gpu_ops *gops)
 	gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
 	gops->mm.l2_flush = gk20a_mm_l2_flush;
 	gops->mm.cbc_clean = gk20a_mm_cbc_clean;
-	gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 	gops->mm.get_iova_addr = gk20a_mm_iova_addr;
 	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
 	gops->mm.get_mmu_levels = gk20a_mm_get_mmu_levels;
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -679,9 +679,6 @@ void gk20a_vm_put_buffers(struct vm_gk20a *vm,
 			  struct mapped_buffer_node **mapped_buffers,
 			  int num_buffers);

-/* invalidate tlbs for the vm area */
-void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm);
-
 /* find buffer corresponding to va */
 int gk20a_vm_find_buffer(struct vm_gk20a *vm, u64 gpu_va,
 			 struct dma_buf **dmabuf,
--- a/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/acr_gm20b.c
@@ -397,7 +397,7 @@ int prepare_ucode_blob(struct gk20a *g)
 	plsfm = &lsfm_l;
 	memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr));
 	gm20b_dbg_pmu("fetching GMMU regs\n");
-	gm20b_mm_mmu_vpr_info_fetch(g);
+	g->ops.fb.vpr_info_fetch(g);
 	gr_gk20a_init_ctxsw_ucode(g);

 	g->ops.pmu.get_wpr(g, &wpr_inf);
--- a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c
@@ -22,6 +22,9 @@
 #include <nvgpu/hw/gm20b/hw_fb_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_top_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h>
+#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
+
+#define VPR_INFO_FETCH_WAIT	(5)

 static void fb_gm20b_init_fs_state(struct gk20a *g)
 {
@@ -140,15 +143,84 @@ static void gm20b_fb_dump_vpr_wpr_info(struct gk20a *g)

 }

+static int gm20b_fb_vpr_info_fetch_wait(struct gk20a *g,
+					    unsigned int msec)
+{
+	struct nvgpu_timeout timeout;
+
+	nvgpu_timeout_init(g, &timeout, msec, NVGPU_TIMER_CPU_TIMER);
+
+	do {
+		u32 val;
+
+		val = gk20a_readl(g, fb_mmu_vpr_info_r());
+		if (fb_mmu_vpr_info_fetch_v(val) ==
+		    fb_mmu_vpr_info_fetch_false_v())
+			return 0;
+
+	} while (!nvgpu_timeout_expired(&timeout));
+
+	return -ETIMEDOUT;
+}
+
+int gm20b_fb_vpr_info_fetch(struct gk20a *g)
+{
+	if (gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
+		return -ETIME;
+	}
+
+	gk20a_writel(g, fb_mmu_vpr_info_r(),
+			fb_mmu_vpr_info_fetch_true_v());
+
+	return gm20b_fb_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
+}
+
+static bool gm20b_fb_debug_mode_enabled(struct gk20a *g)
+{
+	u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
+	return gr_gpcs_pri_mmu_debug_ctrl_debug_v(debug_ctrl) ==
+		gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v();
+}
+
+static void gm20b_fb_set_debug_mode(struct gk20a *g, bool enable)
+{
+	u32 reg_val, fb_debug_ctrl, gpc_debug_ctrl;
+
+	if (enable) {
+		fb_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
+		gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f();
+		g->mmu_debug_ctrl = true;
+	} else {
+		fb_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
+		gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f();
+		g->mmu_debug_ctrl = false;
+	}
+
+	reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
+	reg_val = set_field(reg_val,
+			fb_mmu_debug_ctrl_debug_m(), fb_debug_ctrl);
+	gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
+
+	reg_val = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
+	reg_val = set_field(reg_val,
+			gr_gpcs_pri_mmu_debug_ctrl_debug_m(), gpc_debug_ctrl);
+	gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), reg_val);
+}
+
 void gm20b_init_fb(struct gpu_ops *gops)
 {
 	gops->fb.reset = fb_gk20a_reset;
+	gops->fb.init_hw = gk20a_fb_init_hw;
 	gops->fb.init_fs_state = fb_gm20b_init_fs_state;
 	gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size;
 	gops->fb.set_use_full_comp_tag_line = gm20b_fb_set_use_full_comp_tag_line;
 	gops->fb.compression_page_size = gm20b_fb_compression_page_size;
 	gops->fb.compressible_page_size = gm20b_fb_compressible_page_size;
+	gops->fb.vpr_info_fetch = gm20b_fb_vpr_info_fetch;
 	gops->fb.dump_vpr_wpr_info = gm20b_fb_dump_vpr_wpr_info;
+	gops->fb.is_debug_mode_enabled = gm20b_fb_debug_mode_enabled;
+	gops->fb.set_debug_mode = gm20b_fb_set_debug_mode;
+	gops->fb.tlb_invalidate = gk20a_fb_tlb_invalidate;
 	gm20b_init_uncompressed_kind_map();
 	gm20b_init_kind_attr();
 }
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -22,75 +22,9 @@
 #include <nvgpu/timers.h>

 #include <nvgpu/hw/gm20b/hw_gmmu_gm20b.h>
-#include <nvgpu/hw/gm20b/hw_fb_gm20b.h>
-#include <nvgpu/hw/gm20b/hw_gr_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_ram_gm20b.h>
 #include <nvgpu/hw/gm20b/hw_bus_gm20b.h>

-static int gm20b_mm_mmu_vpr_info_fetch_wait(struct gk20a *g,
-					    unsigned int msec)
-{
-	struct nvgpu_timeout timeout;
-
-	nvgpu_timeout_init(g, &timeout, msec, NVGPU_TIMER_CPU_TIMER);
-
-	do {
-		u32 val;
-
-		val = gk20a_readl(g, fb_mmu_vpr_info_r());
-		if (fb_mmu_vpr_info_fetch_v(val) ==
-		    fb_mmu_vpr_info_fetch_false_v())
-			return 0;
-
-	} while (!nvgpu_timeout_expired(&timeout));
-
-	return -ETIMEDOUT;
-}
-
-int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g)
-{
-	if (gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT)) {
-		return -ETIME;
-	}
-
-	gk20a_writel(g, fb_mmu_vpr_info_r(),
-			fb_mmu_vpr_info_fetch_true_v());
-
-	return gm20b_mm_mmu_vpr_info_fetch_wait(g, VPR_INFO_FETCH_WAIT);
-}
-
-static bool gm20b_mm_mmu_debug_mode_enabled(struct gk20a *g)
-{
-	u32 debug_ctrl = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
-	return gr_gpcs_pri_mmu_debug_ctrl_debug_v(debug_ctrl) ==
-		gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_v();
-}
-
-static void gm20b_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)
-{
-	u32 reg_val, fb_debug_ctrl, gpc_debug_ctrl;
-
-	if (enable) {
-		fb_debug_ctrl = fb_mmu_debug_ctrl_debug_enabled_f();
-		gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_enabled_f();
-		g->mmu_debug_ctrl = true;
-	} else {
-		fb_debug_ctrl = fb_mmu_debug_ctrl_debug_disabled_f();
-		gpc_debug_ctrl = gr_gpcs_pri_mmu_debug_ctrl_debug_disabled_f();
-		g->mmu_debug_ctrl = false;
-	}
-
-	reg_val = gk20a_readl(g, fb_mmu_debug_ctrl_r());
-	reg_val = set_field(reg_val,
-			fb_mmu_debug_ctrl_debug_m(), fb_debug_ctrl);
-	gk20a_writel(g, fb_mmu_debug_ctrl_r(), reg_val);
-
-	reg_val = gk20a_readl(g, gr_gpcs_pri_mmu_debug_ctrl_r());
-	reg_val = set_field(reg_val,
-			gr_gpcs_pri_mmu_debug_ctrl_debug_m(), gpc_debug_ctrl);
-	gk20a_writel(g, gr_gpcs_pri_mmu_debug_ctrl_r(), reg_val);
-}
-
 static void gm20b_mm_set_big_page_size(struct gk20a *g,
 				struct mem_desc *mem, int size)
 {
@@ -157,8 +91,6 @@ static bool gm20b_mm_is_bar1_supported(struct gk20a *g)
 void gm20b_init_mm(struct gpu_ops *gops)
 {
 	gops->mm.support_sparse = gm20b_mm_support_sparse;
-	gops->mm.is_debug_mode_enabled = gm20b_mm_mmu_debug_mode_enabled;
-	gops->mm.set_debug_mode = gm20b_mm_mmu_set_debug_mode;
 	gops->mm.gmmu_map = gk20a_locked_gmmu_map;
 	gops->mm.gmmu_unmap = gk20a_locked_gmmu_unmap;
 	gops->mm.vm_remove = gk20a_vm_remove_support;
@@ -168,7 +100,6 @@ void gm20b_init_mm(struct gpu_ops *gops)
 	gops->mm.l2_invalidate = gk20a_mm_l2_invalidate;
 	gops->mm.l2_flush = gk20a_mm_l2_flush;
 	gops->mm.cbc_clean = gk20a_mm_cbc_clean;
-	gops->mm.tlb_invalidate = gk20a_mm_tlb_invalidate;
 	gops->mm.set_big_page_size = gm20b_mm_set_big_page_size;
 	gops->mm.get_big_page_sizes = gm20b_mm_get_big_page_sizes;
 	gops->mm.get_iova_addr = gk20a_mm_iova_addr;
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.h
@@ -19,7 +19,6 @@ struct gk20a;

 #define PDE_ADDR_START(x, y)	((x) &  ~((0x1UL << (y)) - 1))
 #define PDE_ADDR_END(x, y)	((x) | ((0x1UL << (y)) - 1))
-#define VPR_INFO_FETCH_WAIT	(5)

 void gm20b_init_mm(struct gpu_ops *gops);
 int gm20b_mm_mmu_vpr_info_fetch(struct gk20a *g);
--- a/drivers/gpu/nvgpu/gp106/acr_gp106.c
+++ b/drivers/gpu/nvgpu/gp106/acr_gp106.c
@@ -394,8 +394,6 @@ static int gp106_prepare_ucode_blob(struct gk20a *g)
 	}
 	plsfm = &lsfm_l;
 	memset((void *)plsfm, 0, sizeof(struct ls_flcn_mgr_v1));
-	gp106_dbg_pmu("fetching GMMU regs\n");
-	gm20b_mm_mmu_vpr_info_fetch(g);
 	gr_gk20a_init_ctxsw_ucode(g);

 	g->ops.pmu.get_wpr(g, &wpr_inf);
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -498,11 +498,11 @@ static void vgpu_mm_l2_flush(struct gk20a *g, bool invalidate)
 	vgpu_cache_maint(vgpu_get_handle(g), op);
 }

-static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
+static void vgpu_mm_tlb_invalidate(struct gk20a *g, struct mem_desc *pdb)
 {
 	gk20a_dbg_fn("");

-	gk20a_err(dev_from_vm(vm), "%s: call to RM server not supported",
+	gk20a_err(g->dev, "%s: call to RM server not supported",
 		__func__);
 }

@@ -523,8 +523,8 @@ static void vgpu_mm_mmu_set_debug_mode(struct gk20a *g, bool enable)

 void vgpu_init_mm_ops(struct gpu_ops *gops)
 {
-	gops->mm.is_debug_mode_enabled = NULL;
-	gops->mm.set_debug_mode = vgpu_mm_mmu_set_debug_mode;
+	gops->fb.is_debug_mode_enabled = NULL;
+	gops->fb.set_debug_mode = vgpu_mm_mmu_set_debug_mode;
 	gops->mm.gmmu_map = vgpu_locked_gmmu_map;
 	gops->mm.gmmu_unmap = vgpu_locked_gmmu_unmap;
 	gops->mm.vm_remove = vgpu_vm_remove_support;
@@ -533,7 +533,7 @@ void vgpu_init_mm_ops(struct gpu_ops *gops)
 	gops->mm.fb_flush = vgpu_mm_fb_flush;
 	gops->mm.l2_invalidate = vgpu_mm_l2_invalidate;
 	gops->mm.l2_flush = vgpu_mm_l2_flush;
-	gops->mm.tlb_invalidate = vgpu_mm_tlb_invalidate;
+	gops->fb.tlb_invalidate = vgpu_mm_tlb_invalidate;
 	gops->mm.get_physical_addr_bits = gk20a_mm_get_physical_addr_bits;
 	gops->mm.get_iova_addr = gk20a_mm_iova_addr;
 	gops->mm.init_mm_setup_hw = NULL;