diff --git a/drivers/gpu/nvgpu/common/mm/gmmu.c b/drivers/gpu/nvgpu/common/mm/gmmu.c
index 748e9f455..0d88fa6b5 100644
--- a/drivers/gpu/nvgpu/common/mm/gmmu.c
+++ b/drivers/gpu/nvgpu/common/mm/gmmu.c
@@ -467,6 +467,29 @@ static int __set_pd_level(struct vm_gk20a *vm,
 	return 0;
 }
 
+static struct nvgpu_gmmu_attrs gmmu_unmap_attrs(u32 pgsz)
+{
+	/*
+	 * Most fields are not relevant for unmapping (zero physical address)
+	 * because the lowest PTE-level entries are written with only zeros.
+	 */
+	return (struct nvgpu_gmmu_attrs){
+		/*
+		 * page size has to match the original mapping, so that we'll
+		 * reach the correct PDEs/PTEs.
+		 */
+		.pgsz = pgsz,
+		/* just in case as this is an enum */
+		.aperture = APERTURE_INVALID,
+		/*
+		 * note: mappings with zero phys addr may be sparse; access to
+		 * such memory would not fault, so we'll want this to be false
+		 * explicitly.
+		 */
+		.sparse = false,
+	};
+}
+
 static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
 					     struct nvgpu_sgt *sgt,
 					     u64 space_to_skip,
@@ -522,57 +545,114 @@ static int __nvgpu_gmmu_do_update_page_table(struct vm_gk20a *vm,
 				     attrs);
 
 		return err;
+	} else {
+
+		/*
+		 * Handle cases (2), (3), and (4): do the no-IOMMU mapping. In this case
+		 * we really are mapping physical pages directly.
+		 */
+		nvgpu_sgt_for_each_sgl(sgl, sgt) {
+			u64 phys_addr;
+			u64 chunk_length;
+
+			/*
+			 * Cut out sgl ents for space_to_skip.
+			 */
+			if (space_to_skip != 0ULL &&
+			    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
+				space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
+				continue;
+			}
+
+			phys_addr = g->ops.mm.gpu_phys_addr(g, attrs,
+				    nvgpu_sgt_get_phys(g, sgt, sgl)) + space_to_skip;
+			chunk_length = min(length,
+				nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
+
+			err = __set_pd_level(vm, &vm->pdb,
+					     0,
+					     phys_addr,
+					     virt_addr,
+					     chunk_length,
+					     attrs);
+			if (err) {
+				break;
+			}
+
+			/* Space has been skipped so zero this for future chunks. */
+			space_to_skip = 0;
+
+			/*
+			 * Update the map pointer and the remaining length.
+			 */
+			virt_addr += chunk_length;
+			length    -= chunk_length;
+
+			if (length == 0U) {
+				break;
+			}
+		}
 	}
-
-	/*
-	 * Handle cases (2), (3), and (4): do the no-IOMMU mapping. In this case
-	 * we really are mapping physical pages directly.
-	 */
-	nvgpu_sgt_for_each_sgl(sgl, sgt) {
-		u64 phys_addr;
-		u64 chunk_length;
-
+	
+	if (err < 0) {
+		struct nvgpu_gmmu_attrs unmap_attrs = gmmu_unmap_attrs(attrs->pgsz);
+		int err_unmap;
+		nvgpu_err(g, "Map failed! Backing off.");
+		err_unmap = __set_pd_level(vm, &vm->pdb,
+					 0U,
+					 0,
+					 virt_addr, length,
+					 &unmap_attrs);
 		/*
-		 * Cut out sgl ents for space_to_skip.
+		 * If the mapping attempt failed, this unmap attempt may also
+		 * fail, but it can only up to the point where the map did,
+		 * correctly undoing what was mapped at first. Log and discard
+		 * this error code.
 		 */
-		if (space_to_skip != 0ULL &&
-		    space_to_skip >= nvgpu_sgt_get_length(sgt, sgl)) {
-			space_to_skip -= nvgpu_sgt_get_length(sgt, sgl);
-			continue;
-		}
-
-		phys_addr = g->ops.mm.gpu_phys_addr(g, attrs,
-			    nvgpu_sgt_get_phys(g, sgt, sgl)) + space_to_skip;
-		chunk_length = min(length,
-			nvgpu_sgt_get_length(sgt, sgl) - space_to_skip);
-
-		err = __set_pd_level(vm, &vm->pdb,
-				     0,
-				     phys_addr,
-				     virt_addr,
-				     chunk_length,
-				     attrs);
-		if (err) {
-			break;
-		}
-
-		/* Space has been skipped so zero this for future chunks. */
-		space_to_skip = 0;
-
-		/*
-		 * Update the map pointer and the remaining length.
-		 */
-		virt_addr += chunk_length;
-		length    -= chunk_length;
-
-		if (length == 0U) {
-			break;
+		if (err_unmap != 0) {
+			nvgpu_err(g, "unmap err: %d", err_unmap);
 		}
 	}
 
 	return err;
 }
 
+static int gk20a_gmmu_cache_maint_map(struct gk20a *g, struct vm_gk20a *vm,
+		struct vm_gk20a_mapping_batch *batch)
+{
+	int err = 0;
+	if (batch == NULL) {
+		err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+		if (err != 0) {
+			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
+		}
+	} else {
+		batch->need_tlb_invalidate = true;
+	}
+	return err;
+}
+
+static int gk20a_gmmu_cache_maint_unmap(struct gk20a *g, struct vm_gk20a *vm,
+		struct vm_gk20a_mapping_batch *batch)
+{
+	int err = 0;
+	if (batch == NULL) {
+		gk20a_mm_l2_flush(g, true);
+		err = g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+		if (err != 0) {
+			nvgpu_err(g, "fb.tlb_invalidate() failed err=%d", err);
+		}
+	} else {
+		if (!batch->gpu_l2_flushed) {
+			gk20a_mm_l2_flush(g, true);
+			batch->gpu_l2_flushed = true;
+		}
+		batch->need_tlb_invalidate = true;
+	}
+	
+	return err;
+}
+
 /*
  * This is the true top level GMMU mapping logic. This breaks down the incoming
  * scatter gather table and does actual programming of GPU virtual address to
@@ -682,6 +762,7 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 {
 	struct gk20a *g = gk20a_from_vm(vm);
 	int err = 0;
+	int err_maint;
 	bool allocated = false;
 	int ctag_granularity = g->ops.fb.compression_page_size(g);
 	struct nvgpu_gmmu_attrs attrs = {
@@ -724,20 +805,59 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm,
 
 	err = __nvgpu_gmmu_update_page_table(vm, sgt, buffer_offset,
 					     vaddr, size, &attrs);
-	if (err) {
-		nvgpu_err(g, "failed to update ptes on map");
-		goto fail_validate;
-	}
-
-	if (batch == NULL) {
-		g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
+	if (err != 0) {
+		nvgpu_err(g, "failed to update ptes on map, err=%d", err);
+		/*
+		 * The PTEs were partially filled and then unmapped again. Act
+		 * as if this was an unmap to guard against concurrent GPU
+		 * accesses to the buffer.
+		 */
+		err_maint = gk20a_gmmu_cache_maint_unmap(g, vm, batch);
+		if (err_maint != 0) {
+			nvgpu_err(g,
+				  "failed cache maintenance on failed map, err=%d",
+				  err_maint);
+			err = err_maint;
+		}
 	} else {
-		batch->need_tlb_invalidate = true;
+		err_maint = gk20a_gmmu_cache_maint_map(g, vm, batch);
+		if (err_maint != 0) {
+			nvgpu_err(g,
+				  "failed cache maintenance on map! Backing off, err=%d",
+				  err_maint);
+			/*
+			 * Record this original error, and log and discard the
+			 * below if anything goes further wrong.
+			 */
+			err = err_maint;
+			/*
+			 * This should not fail because the PTEs were just
+			 * filled successfully above.
+			 */
+			attrs = gmmu_unmap_attrs(pgsz_idx);
+			err_maint = __nvgpu_gmmu_update_page_table(vm, NULL, 0, vaddr,
+					size, &attrs);
+			if (err_maint != 0) {
+				nvgpu_err(g,
+					  "failed to update gmmu ptes, err=%d",
+					  err_maint);
+			}
+			/* Try the unmap maintenance in any case */
+			err_maint = gk20a_gmmu_cache_maint_unmap(g, vm, batch);
+			if (err_maint != 0) {
+				nvgpu_err(g,
+					  "failed cache maintenance twice, err=%d",
+					  err_maint);
+			}
+		}
+	}
+	if (err != 0) {
+		goto fail_free_va;
 	}
 
 	return vaddr;
 
-fail_validate:
+fail_free_va:
 	if (allocated) {
 		__nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
 	}
@@ -757,17 +877,8 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 {
 	int err = 0;
 	struct gk20a *g = gk20a_from_vm(vm);
-	struct nvgpu_gmmu_attrs attrs = {
-		.pgsz      = pgsz_idx,
-		.kind_v    = 0,
-		.ctag      = 0,
-		.cacheable = 0,
-		.rw_flag   = rw_flag,
-		.sparse    = sparse,
-		.priv      = 0,
-		.valid     = 0,
-		.aperture  = APERTURE_INVALID,
-	};
+	struct nvgpu_gmmu_attrs attrs = gmmu_unmap_attrs(pgsz_idx);
+	attrs.sparse = sparse;
 
 	if (va_allocated) {
 		err = __nvgpu_vm_free_va(vm, vaddr, pgsz_idx);
@@ -777,23 +888,18 @@ void gk20a_locked_gmmu_unmap(struct vm_gk20a *vm,
 		}
 	}
 
-	/* unmap here needs to know the page size we assigned at mapping */
 	err = __nvgpu_gmmu_update_page_table(vm, NULL, 0,
 					     vaddr, size, &attrs);
 	if (err) {
 		nvgpu_err(g, "failed to update gmmu ptes on unmap");
 	}
 
-	if (batch == NULL) {
-		gk20a_mm_l2_flush(g, true);
-		g->ops.fb.tlb_invalidate(g, vm->pdb.mem);
-	} else {
-		if (!batch->gpu_l2_flushed) {
-			gk20a_mm_l2_flush(g, true);
-			batch->gpu_l2_flushed = true;
-		}
-		batch->need_tlb_invalidate = true;
-	}
+	err = gk20a_gmmu_cache_maint_unmap(g, vm, batch);
+	/*
+	 * Can't do anything at the moment if this fails, but if it does, the
+	 * gpu is likely out of reach anyway.
+	 */
+	(void)err;
 }
 
 u32 __nvgpu_pte_words(struct gk20a *g)
@@ -840,7 +946,13 @@ static int __nvgpu_locate_pte(struct gk20a *g, struct vm_gk20a *vm,
 	 * then find the next level PD and recurse.
 	 */
 	if (next_l->update_entry) {
-		struct nvgpu_gmmu_pd *pd_next = pd->entries + pd_idx;
+		struct nvgpu_gmmu_pd *pd_next;
+
+		/* Not mapped yet, invalid entry */
+		if (pd->entries == NULL) {
+			return -EINVAL;
+		}
+		pd_next = pd->entries + pd_idx;
 
 		/* Invalid entry! */
 		if (pd_next->mem == NULL) {