gpu: nvgpu: Implement compbits padding for mapping

Implement NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS, which adds extra alignment to compbits allocation for safe compbits mapping. Bug 200077571 Change-Id: I3a74ebb81412e4e1e69501debeb9ef4e2056ef1a Signed-off-by: Sami Kiminki <skiminki@nvidia.com> Reviewed-on: http://git-master/r/730763 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-on: http://git-master/r/740693 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit
2025-12-23 09:57:08 +03:00 · 2015-04-10 15:40:46 +03:00
parent 5a5662fffb
commit 8d6fe0f2ef
3 changed files with 132 additions and 11 deletions
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c
@@ -26,6 +26,7 @@
 #include <linux/tegra-soc.h>
 #include <linux/vmalloc.h>
 #include <linux/dma-buf.h>
 #include <linux/lcm.h>
 #include <uapi/linux/nvgpu.h>
 #include <trace/events/gk20a.h>
@@ -133,7 +134,8 @@ static void gk20a_mm_delete_priv(void *_priv)
 		BUG_ON(!priv->comptag_allocator);
 		priv->comptag_allocator->free(priv->comptag_allocator,
 					      priv->comptags.offset,
-					      priv->comptags.lines, 1);
+					      priv->comptags.allocated_lines,
 					      1);
 	}
 	/* Free buffer states */
@@ -208,22 +210,28 @@ void gk20a_get_comptags(struct device *dev, struct dma_buf *dmabuf,
 		return;
 	if (!priv) {
-		comptags->lines = 0;
+		memset(comptags, 0, sizeof(*comptags));
 		comptags->offset = 0;
 		return;
 	}
 	*comptags = priv->comptags;
 }
-static int gk20a_alloc_comptags(struct device *dev,
+static int gk20a_alloc_comptags(struct gk20a *g,
 				struct device *dev,
 				struct dma_buf *dmabuf,
 				struct gk20a_allocator *allocator,
-				int lines)
+				u32 lines, bool user_mappable)
 {
 	struct gk20a_dmabuf_priv *priv = dma_buf_get_drvdata(dmabuf, dev);
 	u32 offset = 0;
 	int err;
 	u32 ctaglines_to_allocate;
 	u32 ctagline_align;
 	const u32 aggregate_cacheline_sz =
 		g->gr.cacheline_size * g->gr.slices_per_ltc *
 		g->ltc_count;
 	const u32 small_pgsz = 4096;
 	if (!priv)
 		return -ENOSYS;
@@ -231,12 +239,99 @@ static int gk20a_alloc_comptags(struct device *dev,
 	if (!lines)
 		return -EINVAL;
 	if (!user_mappable) {
 		ctaglines_to_allocate = lines;
 		ctagline_align = 1;
 	} else {
 		/* Unfortunately, we cannot use allocation alignment
 		 * here, since compbits per cacheline is not always a
 		 * power of two. So, we just have to allocate enough
 		 * extra that we're guaranteed to find a ctagline
 		 * inside the allocation so that: 1) it is the first
 		 * ctagline in a cacheline that starts at a page
 		 * boundary, and 2) we can add enough overallocation
 		 * that the ctaglines of the succeeding allocation
 		 * are on different page than ours
 		 */
 		ctagline_align =
 			(lcm(aggregate_cacheline_sz, small_pgsz) /
 			 aggregate_cacheline_sz) *
 			g->gr.comptags_per_cacheline;
 		ctaglines_to_allocate =
 			/* for alignment */
 			ctagline_align +
 			/* lines rounded up to cachelines */
 			DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline) *
 			g->gr.comptags_per_cacheline +
 			/* trail-padding */
 			DIV_ROUND_UP(aggregate_cacheline_sz, small_pgsz) *
 			g->gr.comptags_per_cacheline;
 		if (ctaglines_to_allocate < lines)
 			return -EINVAL; /* integer overflow */
 	}
 	/* store the allocator so we can use it when we free the ctags */
 	priv->comptag_allocator = allocator;
-	err = allocator->alloc(allocator, &offset, lines, 1);
+	err = allocator->alloc(allocator, &offset,
 			       ctaglines_to_allocate, 1);
 	if (!err) {
-		priv->comptags.lines = lines;
+		const u32 alignment_lines =
 			DIV_ROUND_UP(offset, ctagline_align) * ctagline_align -
 			offset;
 		/* prune the preceding ctaglines that were allocated
 		   for alignment */
 		if (alignment_lines) {
 			/* free alignment lines */
 			int tmp=
 				allocator->free(allocator, offset,
 						alignment_lines,
 						1);
 			WARN_ON(tmp);
 			offset += alignment_lines;
 			ctaglines_to_allocate -= alignment_lines;
 		}
 		/* check if we can prune the trailing, too */
 		if (user_mappable)
 		{
 			u32 needed_cachelines =
 				DIV_ROUND_UP(lines, g->gr.comptags_per_cacheline);
 			u32 first_unneeded_cacheline =
 				DIV_ROUND_UP(round_up(needed_cachelines *
 						      aggregate_cacheline_sz,
 						      small_pgsz),
 					     aggregate_cacheline_sz);
 			u32 needed_ctaglines =
 				first_unneeded_cacheline *
 				g->gr.comptags_per_cacheline;
 			if (needed_ctaglines < ctaglines_to_allocate) {
 				/* free alignment lines */
 				int tmp=
 					allocator->free(
 						allocator,
 						offset + needed_ctaglines,
 						(ctaglines_to_allocate -
 						 needed_ctaglines),
 						1);
 				WARN_ON(tmp);
 				ctaglines_to_allocate = needed_ctaglines;
 			}
 		}
 		priv->comptags.offset = offset;
 		priv->comptags.lines = lines;
 		priv->comptags.allocated_lines = ctaglines_to_allocate;
 		priv->comptags.user_mappable = user_mappable;
 	}
 	return err;
 }
@@ -955,9 +1050,11 @@ struct buffer_attrs {
 	u64 align;
 	u32 ctag_offset;
 	u32 ctag_lines;
 	u32 ctag_allocated_lines;
 	int pgsz_idx;
 	u8 kind_v;
 	u8 uc_kind_v;
 	bool ctag_user_mappable;
 };
 static void gmmu_select_page_size(struct vm_gk20a *vm,
@@ -1399,22 +1496,37 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 				     g->ops.fb.compression_page_size(g));
 	if (bfr.ctag_lines && !comptags.lines) {
 		const bool user_mappable =
 			!!(flags & NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS);
 		/* allocate compression resources if needed */
-		err = gk20a_alloc_comptags(d, dmabuf, ctag_allocator,
+		err = gk20a_alloc_comptags(g, d, dmabuf, ctag_allocator,
-					   bfr.ctag_lines);
+					   bfr.ctag_lines, user_mappable);
 		if (err) {
 			/* ok to fall back here if we ran out */
 			/* TBD: we can partially alloc ctags as well... */
 			bfr.ctag_lines = bfr.ctag_offset = 0;
 			bfr.kind_v = bfr.uc_kind_v;
 		} else {
 			gk20a_get_comptags(d, dmabuf, &comptags);
 			clear_ctags = true;
 			if (comptags.lines < comptags.allocated_lines) {
 				/* clear tail-padding comptags */
 				u32 ctagmin = comptags.offset + comptags.lines;
 				u32 ctagmax = comptags.offset +
 					comptags.allocated_lines - 1;
 				g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear,
 						    ctagmin, ctagmax);
 			}
 		}
 	}
 	/* store the comptag info */
 	bfr.ctag_offset = comptags.offset;
 	bfr.ctag_lines = comptags.lines;
 	bfr.ctag_allocated_lines = comptags.allocated_lines;
 	bfr.ctag_user_mappable = comptags.user_mappable;
 	/* update gmmu ptes */
 	map_offset = g->ops.mm.gmmu_map(vm, map_offset,
@@ -1433,10 +1545,11 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 	gk20a_dbg(gpu_dbg_map,
 	   "as=%d pgsz=%d "
 	   "kind=0x%x kind_uc=0x%x flags=0x%x "
-	   "ctags=%d start=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
+	   "ctags=%d start=%d ctags_allocated=%d ctags_mappable=%d gv=0x%x,%08x -> 0x%x,%08x -> 0x%x,%08x",
 	   vm_aspace_id(vm), gmmu_page_size,
 	   bfr.kind_v, bfr.uc_kind_v, flags,
 	   bfr.ctag_lines, bfr.ctag_offset,
 	   bfr.ctag_allocated_lines, bfr.ctag_user_mappable,
 	   hi32(map_offset), lo32(map_offset),
 	   hi32((u64)sg_dma_address(bfr.sgt->sgl)),
 	   lo32((u64)sg_dma_address(bfr.sgt->sgl)),
@@ -1473,6 +1586,8 @@ u64 gk20a_vm_map(struct vm_gk20a *vm,
 	mapped_buffer->pgsz_idx    = bfr.pgsz_idx;
 	mapped_buffer->ctag_offset = bfr.ctag_offset;
 	mapped_buffer->ctag_lines  = bfr.ctag_lines;
 	mapped_buffer->ctag_allocated_lines = bfr.ctag_allocated_lines;
 	mapped_buffer->ctags_mappable = bfr.ctag_user_mappable;
 	mapped_buffer->vm          = vm;
 	mapped_buffer->flags       = flags;
 	mapped_buffer->kind        = kind;
--- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h
@@ -140,6 +140,8 @@ enum gmmu_pgsz_gk20a {
 struct gk20a_comptags {
 	u32 offset;
 	u32 lines;
 	u32 allocated_lines;
 	bool user_mappable;
 };
 struct gk20a_mm_entry {
@@ -190,6 +192,9 @@ struct mapped_buffer_node {
 	u32 pgsz_idx;
 	u32 ctag_offset;
 	u32 ctag_lines;
 	u32 ctag_allocated_lines;
 	bool ctags_mappable;
 	u32 flags;
 	u32 kind;
 	bool va_allocated;
--- a/include/uapi/linux/nvgpu.h
+++ b/include/uapi/linux/nvgpu.h
@@ -859,6 +859,7 @@ struct nvgpu_as_map_buffer_args {
 	__u32 flags;		/* in/out */
 #define NVGPU_AS_MAP_BUFFER_FLAGS_FIXED_OFFSET	    (1 << 0)
 #define NVGPU_AS_MAP_BUFFER_FLAGS_CACHEABLE	    (1 << 2)
 #define NVGPU_AS_MAP_BUFFER_FLAGS_MAPPABLE_COMPBITS (1 << 6)
 	__u32 reserved;		/* in */
 	__u32 dmabuf_fd;	/* in */
 	__u32 page_size;	/* inout, 0:= best fit to buffer */