diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index d61656fc8..a32496a26 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1589,13 +1589,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	gk20a_dbg_info("pre-submit put %d, get %d, size %d",
 		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);
 
-	/* Invalidate tlb if it's dirty...                                   */
-	/* TBD: this should be done in the cmd stream, not with PRIs.        */
-	/* We don't know what context is currently running...                */
-	/* Note also: there can be more than one context associated with the */
-	/* address space (vm).   */
-	g->ops.mm.tlb_invalidate(c->vm);
-
 	/* Make sure we have enough space for gpfifo entries. If not,
 	 * wait for signals from completed submits */
 	if (gp_free_count(c) < num_entries + extra_entries) {
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
index 17cf8d83c..3f71fc5ee 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1161,6 +1161,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 		goto fail_validate;
 	}
 
+	g->ops.mm.tlb_invalidate(vm);
+
 	return map_offset;
 fail_validate:
 	if (allocated)
@@ -1202,8 +1204,6 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 		dev_err(dev_from_vm(vm),
 			"failed to update gmmu ptes on unmap");
 
-	/* detect which if any pdes/ptes can now be released */
-
 	/* flush l2 so any dirty lines are written out *now*.
 	 *  also as we could potentially be switching this buffer
 	 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
@@ -1213,6 +1213,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 	 * unmapping (below). */
 
 	gk20a_mm_l2_flush(g, true);
+
+	g->ops.mm.tlb_invalidate(vm);
 }
 
 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
@@ -1502,10 +1504,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 
 	mutex_unlock(&vm->update_gmmu_lock);
 
-	/* Invalidate kernel mappings immediately */
-	if (vm_aspace_id(vm) == -1)
-		gk20a_mm_tlb_invalidate(vm);
-
 	return map_offset;
 
 clean_up:
@@ -1549,9 +1547,6 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
 		return 0;
 	}
 
-	/* Invalidate kernel mappings immediately */
-	g->ops.mm.tlb_invalidate(vm);
-
 	return vaddr;
 }
 
@@ -1882,8 +1877,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
 	}
 
 	smp_mb();
-	vm->tlb_dirty = true;
-	gk20a_dbg_fn("set tlb dirty");
 
 	return 0;
 
@@ -1972,8 +1965,6 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
 	gk20a_mm_l2_invalidate(vm->mm->g);
 
 	gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
-
-	vm->tlb_dirty  = true;
 }
 
 /* NOTE! mapped_buffers lock must be held */
@@ -2992,13 +2983,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
 	if (!g->power_on)
 		return;
 
-	/* No need to invalidate if tlb is clean */
-	mutex_lock(&vm->update_gmmu_lock);
-	if (!vm->tlb_dirty) {
-		mutex_unlock(&vm->update_gmmu_lock);
-		return;
-	}
-
 	mutex_lock(&tlb_lock);
 
 	trace_gk20a_mm_tlb_invalidate(g->dev->name);
@@ -3042,8 +3026,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
 
 out:
 	mutex_unlock(&tlb_lock);
-	vm->tlb_dirty = false;
-	mutex_unlock(&vm->update_gmmu_lock);
 }
 
 int gk20a_mm_suspend(struct gk20a *g)
diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
index 596661ca9..663bd5d3f 100644
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -290,7 +290,6 @@ struct vm_gk20a {
 
 	bool big_pages;   /* enable large page support */
 	bool enable_ctag;
-	bool tlb_dirty;
 	bool mapped;
 
 	u32 big_page_size;
diff --git a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
index 1adff5abe..605464d41 100644
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -1,7 +1,7 @@
 /*
  * GM20B MMU
  *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -34,6 +34,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
 	u32 pte_cur;
 	void *pte_kv_cur;
 	struct page_table_gk20a *pte;
+	struct gk20a *g = gk20a_from_vm(vm);
 
 	gk20a_dbg_fn("");
 
@@ -74,8 +75,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
 	unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);
 
 	smp_mb();
-	vm->tlb_dirty = true;
-	gk20a_dbg_fn("set tlb dirty");
+	g->ops.mm.tlb_invalidate(vm);
 
 	return 0;
 fail:
diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
index 6817b107b..82d16bd1c 100644
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -1,7 +1,7 @@
 /*
  * Virtualized GPU Memory Management
  *
- * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -114,7 +114,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
 	if (err || msg.ret)
 		goto fail;
 
-	vm->tlb_dirty = true;
+	g->ops.mm.tlb_invalidate(vm);
+
 	return map_offset;
 fail:
 	gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
@@ -154,7 +155,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
 		dev_err(dev_from_vm(vm),
 			"failed to update gmmu ptes on unmap");
 
-	vm->tlb_dirty = true;
+	g->ops.mm.tlb_invalidate(vm);
 }
 
 static void vgpu_vm_remove_support(struct vm_gk20a *vm)
@@ -402,20 +403,11 @@ static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)
 
 	gk20a_dbg_fn("");
 
-	/* No need to invalidate if tlb is clean */
-	mutex_lock(&vm->update_gmmu_lock);
-	if (!vm->tlb_dirty) {
-		mutex_unlock(&vm->update_gmmu_lock);
-		return;
-	}
-
 	msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
 	msg.handle = platform->virt_handle;
 	p->handle = vm->handle;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
-	vm->tlb_dirty = false;
-	mutex_unlock(&vm->update_gmmu_lock);
 }
 
 void vgpu_init_mm_ops(struct gpu_ops *gops)