From 7fbefcbf4b39643ab414454e6e4154865b333757 Mon Sep 17 00:00:00 2001
From: Thomas Steinle <tsteinle@nvidia.com>
Date: Wed, 23 Aug 2023 18:15:56 +0200
Subject: [PATCH] gpu: nvgpu: clear leftover ptes after failed map

The gmmu mapping code forgot to clear the already written gmmu entries
if a PD allocation failed in the middle. If nvgpu_set_pd_level() fails
when attempting to map, call it again with the same virt addr but unmap.
This may fail again if we're low on memory, but the already updated
entries are guaranteed to exist and get cleared again.

Ensure that TLB is invalidated even in error conditions since the GPU
may have already accessed the partially written data that is now
unmapped again. Likewise, flush L2 too because unmap happened.

Unify the unmap call a bit so that the gmmu attrs for an unmap are now
in only one place, including the unnecessary cbc_comptagline_mode
assignment as it's not used for unmap.

Bug 3529753

Change-Id: I37e31f25b9b303f4b2220f490535945c7389f9d7
Signed-off-by: Thomas Steinle <tsteinle@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2966793
Reviewed-by: Dmitry Pervushin <dpervushin@nvidia.com>
Tested-by: Dmitry Pervushin <dpervushin@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
---
 drivers/gpu/nvgpu/common/mm/gmmu.c | 260 +++++++++++++++++++++--------
 1 file changed, 186 insertions(+), 74 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 748e9f455..0d88fa6b5 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -467,6 +467,29 @@ static int __set_pd_level(struct vm_gk20a *vm,
 	return 0;
 }
 
+static struct nvgpu_gmmu_attrs gmmu_unmap_attrs(u32 pgsz)
+{
+	/*
+	 * Most fields are not relevant for unmapping (zero physical address)
+	 * because the lowest PTE-level entries are written with only zeros.
+	 */
+	return (struct nvgpu_gmmu_attrs){
+		/*
+		 * page size has to match the original mapping, so that we'll
+		 * reach the correct PDEs/PTEs.
+		 */
+		.pgsz = pgsz,
+		/* just in case as this is an enum */
+		.aperture = APERTURE_INVALID,
+		/*
+		 * note: mappings with zero phys addr may be sparse; access to
+		 * such memory would not fault, so we'll want this to be false
+		 * explicitly.
+		 */
+		.sparse = false,
+	};
+}
+
 static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
 					     struct nvgpu_sgt *sgt,
 					     u64 space_to_skip,
@@ -522,57 +545,114 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
 				     attrs);
 
 		return err;
+	} else {
+
+		/*
+		 * Handle cases (2), (3), and (4): do the no-IOMMU mapping. In this case
+		 * we really are mapping physical pages directly.
+		 */
+		nvgpu_sgt_for_each_sgl(sgl, sgt) {
+			u64 phys_addr;
+			u64 chunk_length;
+
+			/*
+			 * Cut out sgl ents for space_to_skip.
+			 */
+			if (space_to_skip != 0ULL &&
+			    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
+				space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
+				continue;
+			}
+
+			phys_addr = g->ops.mm.gpu_phys_addr(g, attrs,
+				    nvgpu_sgt_get_phys(g, sgt, sgl)) + space_to_skip;
+			chunk_length = min(length,
+				nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
+
+			err = __set_pd_level(vm, &vm->pdb,
+					     0,
+					     phys_addr,
+					     virt_addr,
+					     chunk_length,
+					     attrs);
+			if (err) {
+				break;
+			}
+
+			/* Space has been skipped so zero this for future chunks. */
+			space_to_skip = 0;
+
+			/*
+			 * Update the map pointer and the remaining length.
+			 */
+			virt_addr += chunk_length;
+			length    -= chunk_length;
+
+			if (length == 0U) {
+				break;
+			}
+		}
 	}
-
-	/*
-	 * Handle cases (2), (3), and (4): do the no-IOMMU mapping. In this case
-	 * we really are mapping physical pages directly.
-	 */
-	nvgpu_sgt_for_each_sgl(sgl, sgt) {
-		u64 phys_addr;
-		u64 chunk_length;
-
+	
+	if (err < 0) {
+		struct nvgpu_gmmu_attrs unmap_attrs = gmmu_unmap_attrs(attrs->pgsz);
+		int err_unmap;
+		nvgpu_err(g, "Map failed! Backing off.");
+		err_unmap = __set_pd_level(vm, &vm->pdb,
+					 0U,
+					 0,
+					 virt_addr, length,
+					 &unmap_attrs);
 		/*
-		 * Cut out sgl ents for space_to_skip.
+		 * If the mapping attempt failed, this unmap attempt may also
+		 * fail, but it can only up to the point where the map did,
+		 * correctly undoing what was mapped at first. Log and discard
+		 * this error code.
 		 */
-		if (space_to_skip != 0ULL &&
-		    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
-			space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
-			continue;
-		}
-
-		phys_addr = g->ops.mm.gpu_phys_addr(g, attrs,
-			    nvgpu_sgt_get_phys(g, sgt, sgl)) + space_to_skip;
-		chunk_length = min(length,
-			nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
-
-		err = __set_pd_level(vm, &vm->pdb,
-				     0,
-				     phys_addr,
-				     virt_addr,
-				     chunk_length,
-				     attrs);
-		if (err) {
-			break;
-		}
-
-		/* Space has been skipped so zero this for future chunks. */
-		space_to_skip = 0;
-
-		/*
-		 * Update the map pointer and the remaining length.
-		 */
-		virt_addr += chunk_length;
-		length    -= chunk_length;
-
-		if (length == 0U) {
-			break;
+		if (err_unmap != 0) {
+			nvgpu_err(g, "unmap err: %d", err_unmap);
 		}
 	}
 
 	return err;
 }
 
+static int gk20a_gmmu_cache_maint_map(struct gk20a *g, struct vm_gk20a *vm,
+		struct vm_gk20a_mapping_batch *batch)
+{
+	int err = 0;
+	if (batch == NULL) {
+		err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+		if (err != 0) {
+			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
+		}
+	} else {
+		batch->need_tlb_invalidate = true;
+	}
+	return err;
+}
+
+static int gk20a_gmmu_cache_maint_unmap(struct gk20a *g, struct vm_gk20a *vm,
+		struct vm_gk20a_mapping_batch *batch)
+{
+	int err = 0;
+	if (batch == NULL) {
+		gk20a_mm_l2_flush(g, true);
+		err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+		if (err != 0) {
+			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
+		}
+	} else {
+		if (!batch->gpu_l2_flushed) {
+			gk20a_mm_l2_flush(g, true);
+			batch->gpu_l2_flushed = true;
+		}
+		batch->need_tlb_invalidate = true;
+	}
+	
+	return err;
+}
+
 /*
  * This is the true top level GMMU mapping logic. This breaks down the incoming
  * scatter gather table and does actual programming of GPU virtual address to
@@ -682,6 +762,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	int err = 0;
+	int err_maint;
 	bool allocated = false;
 	int ctag_granularity = g->ops.fb.compression_page_size(g);
 	struct nvgpu_gmmu_attrs attrs = {
@@ -724,20 +805,59 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 
 	err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
 					     vaddr, size, &attrs);
-	if (err) {
-		nvgpu_err(g, "failed to update ptes on map");
-		goto fail_validate;
-	}
-
-	if (batch == NULL) {
-		g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+	if (err != 0) {
+		nvgpu_err(g, "failed to update ptes on map, err=%d", err);
+		/*
+		 * The PTEs were partially filled and then unmapped again. Act
+		 * as if this was an unmap to guard against concurrent GPU
+		 * accesses to the buffer.
+		 */
+		err_maint = gk20a_gmmu_cache_maint_unmap(g, vm, batch);
+		if (err_maint != 0) {
+			nvgpu_err(g,
+				  "failed cache maintenance on failed map, err=%d",
+				  err_maint);
+			err = err_maint;
+		}
 	} else {
-		batch->need_tlb_invalidate = true;
+		err_maint = gk20a_gmmu_cache_maint_map(g, vm, batch);
+		if (err_maint != 0) {
+			nvgpu_err(g,
+				  "failed cache maintenance on map! Backing off, err=%d",
+				  err_maint);
+			/*
+			 * Record this original error, and log and discard the
+			 * below if anything goes further wrong.
+			 */
+			err = err_maint;
+			/*
+			 * This should not fail because the PTEs were just
+			 * filled successfully above.
+			 */
+			attrs = gmmu_unmap_attrs(pgsz_idx);
+			err_maint = __nvgpu_gmmu_update_page_table(vm, NULL, 0, vaddr,
+					size, &attrs);
+			if (err_maint != 0) {
+				nvgpu_err(g,
+					  "failed to update gmmu ptes, err=%d",
+					  err_maint);
+			}
+			/* Try the unmap maintenance in any case */
+			err_maint = gk20a_gmmu_cache_maint_unmap(g, vm, batch);
+			if (err_maint != 0) {
+				nvgpu_err(g,
+					  "failed cache maintenance twice, err=%d",
+					  err_maint);
+			}
+		}
+	}
+	if (err != 0) {
+		goto fail_free_va;
 	}
 
 	return vaddr;
 
-fail_validate:
+fail_free_va:
 	if (allocated) {
 		__nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
 	}
@@ -757,17 +877,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 {
 	int err = 0;
 	struct gk20a *g = gk20a_from_vm(vm);
-	struct nvgpu_gmmu_attrs attrs = {
-		.pgsz      = pgsz_idx,
-		.kind_v    = 0,
-		.ctag      = 0,
-		.cacheable = 0,
-		.rw_flag   = rw_flag,
-		.sparse    = sparse,
-		.priv      = 0,
-		.valid     = 0,
-		.aperture  = APERTURE_INVALID,
-	};
+	struct nvgpu_gmmu_attrs attrs = gmmu_unmap_attrs(pgsz_idx);
+	attrs.sparse = sparse;
 
 	if (va_allocated) {
 		err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
@@ -777,23 +888,18 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 		}
 	}
 
-	/* unmap here needs to know the page size we assigned at mapping */
 	err = __nvgpu_gmmu_update_page_table(vm, NULL, 0,
 					     vaddr, size, &attrs);
 	if (err) {
 		nvgpu_err(g, "failed to update gmmu ptes on unmap");
 	}
 
-	if (batch == NULL) {
-		gk20a_mm_l2_flush(g, true);
-		g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
-	} else {
-		if (!batch->gpu_l2_flushed) {
-			gk20a_mm_l2_flush(g, true);
-			batch->gpu_l2_flushed = true;
-		}
-		batch->need_tlb_invalidate = true;
-	}
+	err = gk20a_gmmu_cache_maint_unmap(g, vm, batch);
+	/*
+	 * Can't do anything at the moment if this fails, but if it does, the
+	 * gpu is likely out of reach anyway.
+	 */
+	(void)err;
 }
 
 u32 __nvgpu_pte_words(struct gk20a *g)
@@ -840,7 +946,13 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
 	 * then find the next level PD and recurse.
 	 */
 	if (next_l->update_entry) {
-		struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx;
+		struct nvgpu_gmmu_pd *pd_next;
+
+		/* Not mapped yet, invalid entry */
+		if (pd->entries == NULL) {
+			return -EINVAL;
+		}
+		pd_next = pd->entries + pd_idx;
 
 		/* Invalid entry! */
 		if (pd_next->mem == NULL) {