gpu: nvgpu: TLB invalidate after map/unmap

Always invalidate TLB after mapping or unmapping, and remove the delayed TLB invalidate. Change-Id: I6df3c5c1fcca59f0f9e3f911168cb2f913c42815 Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com> Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/696413 Reviewed-by: Automatic_Commit_Validation_User
2025-12-24 02:22:34 +03:00 · 2015-02-09 20:37:02 -08:00
parent 80d26baf05
commit a3b26f25a2
5 changed files with 11 additions and 45 deletions
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1589,13 +1589,6 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 	gk20a_dbg_info("pre-submit put %d, get %d, size %d",
 		c->gpfifo.put, c->gpfifo.get, c->gpfifo.entry_num);

-	/* Invalidate tlb if it's dirty...                                   */
-	/* TBD: this should be done in the cmd stream, not with PRIs.        */
-	/* We don't know what context is currently running...                */
-	/* Note also: there can be more than one context associated with the */
-	/* address space (vm).   */
-	g->ops.mm.tlb_invalidate(c->vm);
-
 	/* Make sure we have enough space for gpfifo entries. If not,
 	 * wait for signals from completed submits */
 	if (gp_free_count(c) < num_entries + extra_entries) {
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -1161,6 +1161,8 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 		goto fail_validate;
 	}

+	g->ops.mm.tlb_invalidate(vm);
+
 	return map_offset;
 fail_validate:
 	if (allocated)
@@ -1202,8 +1204,6 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 		dev_err(dev_from_vm(vm),
 			"failed to update gmmu ptes on unmap");

-	/* detect which if any pdes/ptes can now be released */
-
 	/* flush l2 so any dirty lines are written out *now*.
 	 *  also as we could potentially be switching this buffer
 	 * from nonvolatile (l2 cacheable) to volatile (l2 non-cacheable) at
@@ -1213,6 +1213,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 	 * unmapping (below). */

 	gk20a_mm_l2_flush(g, true);
+
+	g->ops.mm.tlb_invalidate(vm);
 }

 static u64 gk20a_vm_map_duplicate_locked(struct vm_gk20a *vm,
@@ -1502,10 +1504,6 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,

 	mutex_unlock(&vm->update_gmmu_lock);

-	/* Invalidate kernel mappings immediately */
-	if (vm_aspace_id(vm) == -1)
-		gk20a_mm_tlb_invalidate(vm);
-
 	return map_offset;

 clean_up:
@@ -1549,9 +1547,6 @@ u64 gk20a_gmmu_map(struct vm_gk20a *vm,
 		return 0;
 	}

-	/* Invalidate kernel mappings immediately */
-	g->ops.mm.tlb_invalidate(vm);
-
 	return vaddr;
 }

@@ -1882,8 +1877,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm,
 	}

 	smp_mb();
-	vm->tlb_dirty = true;
-	gk20a_dbg_fn("set tlb dirty");

 	return 0;

@@ -1972,8 +1965,6 @@ void update_gmmu_pde_locked(struct vm_gk20a *vm, u32 i)
 	gk20a_mm_l2_invalidate(vm->mm->g);

 	gk20a_dbg(gpu_dbg_pte, "pde:%d = 0x%x,0x%08x\n", i, pde_v[1], pde_v[0]);
-
-	vm->tlb_dirty  = true;
 }

 /* NOTE! mapped_buffers lock must be held */
@@ -2992,13 +2983,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)
 	if (!g->power_on)
 		return;

-	/* No need to invalidate if tlb is clean */
-	mutex_lock(&vm->update_gmmu_lock);
-	if (!vm->tlb_dirty) {
-		mutex_unlock(&vm->update_gmmu_lock);
-		return;
-	}
-
 	mutex_lock(&tlb_lock);

 	trace_gk20a_mm_tlb_invalidate(g->dev->name);
@@ -3042,8 +3026,6 @@ void gk20a_mm_tlb_invalidate(struct vm_gk20a *vm)

 out:
 	mutex_unlock(&tlb_lock);
-	vm->tlb_dirty = false;
-	mutex_unlock(&vm->update_gmmu_lock);
 }

 int gk20a_mm_suspend(struct gk20a *g)
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -290,7 +290,6 @@ struct vm_gk20a {

 	bool big_pages;   /* enable large page support */
 	bool enable_ctag;
-	bool tlb_dirty;
 	bool mapped;

 	u32 big_page_size;
--- a/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
+++ b/drivers/gpu/nvgpu/gm20b/mm_gm20b.c
@@ -1,7 +1,7 @@
 /*
 * GM20B MMU
 *
- * Copyright (c) 2014, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -34,6 +34,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
 	u32 pte_cur;
 	void *pte_kv_cur;
 	struct page_table_gk20a *pte;
+	struct gk20a *g = gk20a_from_vm(vm);

 	gk20a_dbg_fn("");

@@ -74,8 +75,7 @@ static int allocate_gmmu_ptes_sparse(struct vm_gk20a *vm,
 	unmap_gmmu_pages(pte->ref, pte->sgt, pte_kv_cur);

 	smp_mb();
-	vm->tlb_dirty = true;
-	gk20a_dbg_fn("set tlb dirty");
+	g->ops.mm.tlb_invalidate(vm);

 	return 0;
 fail:
--- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c
@@ -1,7 +1,7 @@
 /*
 * Virtualized GPU Memory Management
 *
- * Copyright (c) 2014 NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
@@ -114,7 +114,8 @@ static u64 vgpu_locked_gmmu_map(struct vm_gk20a *vm,
 	if (err || msg.ret)
 		goto fail;

-	vm->tlb_dirty = true;
+	g->ops.mm.tlb_invalidate(vm);
+
 	return map_offset;
 fail:
 	gk20a_err(d, "%s: failed with err=%d\n", __func__, err);
@@ -154,7 +155,7 @@ static void vgpu_locked_gmmu_unmap(struct vm_gk20a *vm,
 		dev_err(dev_from_vm(vm),
 			"failed to update gmmu ptes on unmap");

-	vm->tlb_dirty = true;
+	g->ops.mm.tlb_invalidate(vm);
 }

 static void vgpu_vm_remove_support(struct vm_gk20a *vm)
@@ -402,20 +403,11 @@ static void vgpu_mm_tlb_invalidate(struct vm_gk20a *vm)

 	gk20a_dbg_fn("");

-	/* No need to invalidate if tlb is clean */
-	mutex_lock(&vm->update_gmmu_lock);
-	if (!vm->tlb_dirty) {
-		mutex_unlock(&vm->update_gmmu_lock);
-		return;
-	}
-
 	msg.cmd = TEGRA_VGPU_CMD_AS_INVALIDATE;
 	msg.handle = platform->virt_handle;
 	p->handle = vm->handle;
 	err = vgpu_comm_sendrecv(&msg, sizeof(msg), sizeof(msg));
 	WARN_ON(err || msg.ret);
-	vm->tlb_dirty = false;
-	mutex_unlock(&vm->update_gmmu_lock);
 }

 void vgpu_init_mm_ops(struct gpu_ops *gops)