gpu: nvgpu: update nvgpu_mem to accept u64 args

Currently, nvgpu_vidmem_buf_access_memory() accepts u64 size/offset values to access memory. However, underlying nvgpu_mem read and write functions truncate size/offset value to u32. So, any VIDMEM buffer larger than 4GB will be inaccessible above 4GB by userspace IOCTL. This patch updates nvgpu_mem_rd_n() and nvgpu_mem_wr_n() to accept u64 size and u64 offset values. BUG-2489032 Change-Id: I299742b1813e5e343a96ce25f649a39e792c3393 Signed-off-by: Vedashree Vidwans <vvidwans@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2143138 Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Alex Waterman <alexw@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Vinod Gopalakrishnakurup <vinodg@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-24 10:34:43 +03:00 · 2019-06-25 08:51:46 -07:00
parent c69c5a7a60
commit 2fc673df49
4 changed files with 61 additions and 54 deletions
--- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
+++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c
@@ -108,7 +108,7 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys)
 	return phys;
 }

-u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
+u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u64 w)
 {
 	u32 data = 0;

@@ -120,8 +120,8 @@ u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w)
 	}
 #ifdef CONFIG_NVGPU_DGPU
 	else if (mem->aperture == APERTURE_VIDMEM) {
-		nvgpu_pramin_rd_n(g, mem, w * (u32)sizeof(u32),
-				(u32)sizeof(u32), &data);
+		nvgpu_pramin_rd_n(g, mem, w * (u64)sizeof(u32),
+				(u64)sizeof(u32), &data);
 	}
 #endif
 	else {
@@ -139,17 +139,17 @@ u64 nvgpu_mem_rd32_pair(struct gk20a *g, struct nvgpu_mem *mem, u32 lo, u32 hi)
 	return lo_data | (hi_data << 32ULL);
 }

-u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset)
+u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u64 offset)
 {
-	WARN_ON((offset & 3U) != 0U);
-	return nvgpu_mem_rd32(g, mem, offset / (u32)sizeof(u32));
+	WARN_ON((offset & 3ULL) != 0ULL);
+	return nvgpu_mem_rd32(g, mem, offset / (u64)sizeof(u32));
 }

 void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
-		u32 offset, void *dest, u32 size)
+		u64 offset, void *dest, u64 size)
 {
-	WARN_ON((offset & 3U) != 0U);
-	WARN_ON((size & 3U) != 0U);
+	WARN_ON((offset & 3ULL) != 0ULL);
+	WARN_ON((size & 3ULL) != 0ULL);

 	if (mem->aperture == APERTURE_SYSMEM) {
 		u8 *src = (u8 *)mem->cpu_va + offset;
@@ -167,7 +167,7 @@ void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
 	}
 }

-void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
+void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u64 w, u32 data)
 {
 	if (mem->aperture == APERTURE_SYSMEM) {
 		u32 *ptr = mem->cpu_va;
@@ -177,8 +177,9 @@ void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
 	}
 #ifdef CONFIG_NVGPU_DGPU
 	else if (mem->aperture == APERTURE_VIDMEM) {
-		nvgpu_pramin_wr_n(g, mem, w * (u32)sizeof(u32),
-				  (u32)sizeof(u32), &data);
+		nvgpu_pramin_wr_n(g, mem, w * (u64)sizeof(u32),
+				  (u64)sizeof(u32), &data);
+
 		if (!mem->skip_wmb) {
 			nvgpu_wmb();
 		}
@@ -189,17 +190,17 @@ void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data)
 	}
 }

-void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data)
+void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, u32 data)
 {
-	WARN_ON((offset & 3U) != 0U);
-	nvgpu_mem_wr32(g, mem, offset / (u32)sizeof(u32), data);
+	WARN_ON((offset & 3ULL) != 0ULL);
+	nvgpu_mem_wr32(g, mem, offset / (u64)sizeof(u32), data);
 }

-void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
-		void *src, u32 size)
+void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset,
+		void *src, u64 size)
 {
-	WARN_ON((offset & 3U) != 0U);
-	WARN_ON((size & 3U) != 0U);
+	WARN_ON((offset & 3ULL) != 0ULL);
+	WARN_ON((size & 3ULL) != 0ULL);

 	if (mem->aperture == APERTURE_SYSMEM) {
 		u8 *dest = (u8 *)mem->cpu_va + offset;
@@ -220,11 +221,11 @@ void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
 	}
 }

-void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
-		u32 c, u32 size)
+void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 offset,
+		u32 c, u64 size)
 {
-	WARN_ON((offset & 3U) != 0U);
-	WARN_ON((size & 3U) != 0U);
+	WARN_ON((offset & 3ULL) != 0ULL);
+	WARN_ON((size & 3ULL) != 0ULL);
 	WARN_ON((c & ~0xffU) != 0U);

 	c &= 0xffU;
--- a/drivers/gpu/nvgpu/common/pramin.c
+++ b/drivers/gpu/nvgpu/common/pramin.c
@@ -32,7 +32,7 @@
 * This typedef is for functions that get called during the access_batched()
 * operation.
 */
-typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words,
+typedef void (*pramin_access_batch_fn)(struct gk20a *g, u64 start, u64 words,
 				       u32 **arg);

 /*
@@ -41,12 +41,12 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words,
 * One call to "loop" is done per range, with "arg" supplied.
 */
 static void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
-		u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg)
+		u64 offset, u64 size, pramin_access_batch_fn loop, u32 **arg)
 {
 	struct nvgpu_page_alloc *alloc = NULL;
 	struct nvgpu_sgt *sgt;
 	struct nvgpu_sgl *sgl;
-	u32 byteoff, start_reg, until_end, n;
+	u64 byteoff, start_reg, until_end, n;

 	/*
 	 * TODO: Vidmem is not accesible through pramin on shutdown path.
@@ -64,26 +64,26 @@ static void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
 		if (offset >= nvgpu_sgt_get_length(sgt, sgl)) {
 			u64 tmp_offset = nvgpu_sgt_get_length(sgt, sgl);

-			nvgpu_assert(tmp_offset <= U64(offset));
-			offset -= U32(tmp_offset);
+			nvgpu_assert(tmp_offset <= offset);
+			offset -= tmp_offset;
 		} else {
 			break;
 		}
 	}

 	while (size != 0U) {
-		u32 sgl_len;
+		u64 sgl_len;

 		BUG_ON(sgl == NULL);
-		sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl);
+		sgl_len = nvgpu_sgt_get_length(sgt, sgl);

 		nvgpu_spinlock_acquire(&g->mm.pramin_window_lock);
 		byteoff = g->ops.bus.set_bar0_window(g, mem, sgt, sgl,
 					      offset / sizeof(u32));
 		start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32));
-		until_end = U32(SZ_1M) - (byteoff & (U32(SZ_1M) - 1U));
+		until_end = U64(SZ_1M) - (byteoff & (U64(SZ_1M) - 1U));

-		n = min3(size, until_end, (u32)(sgl_len - offset));
+		n = min3(size, until_end, (sgl_len - offset));

 		loop(g, start_reg, n / sizeof(u32), arg);

@@ -104,9 +104,10 @@ static void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem,
 }

 static void nvgpu_pramin_access_batch_rd_n(struct gk20a *g,
-					   u32 start, u32 words, u32 **arg)
+					   u64 start, u64 words, u32 **arg)
 {
-	u32 r = start, *dest_u32 = *arg;
+	u32 *dest_u32 = *arg;
+	u64 r = start;

 	while (words != 0U) {
 		words--;
@@ -118,7 +119,7 @@ static void nvgpu_pramin_access_batch_rd_n(struct gk20a *g,
 }

 void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
-					   u32 start, u32 size, void *dest)
+					   u64 start, u64 size, void *dest)
 {
 	u32 *dest_u32 = dest;

@@ -127,9 +128,10 @@ void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem,
 }

 static void nvgpu_pramin_access_batch_wr_n(struct gk20a *g,
-					   u32 start, u32 words, u32 **arg)
+					   u64 start, u64 words, u32 **arg)
 {
-	u32 r = start, *src_u32 = *arg;
+	u32 *src_u32 = *arg;
+	u64 r = start;

 	while (words != 0U) {
 		words--;
@@ -141,7 +143,7 @@ static void nvgpu_pramin_access_batch_wr_n(struct gk20a *g,
 }

 void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem,
-					   u32 start, u32 size, void *src)
+					   u64 start, u64 size, void *src)
 {
 	u32 *src_u32 = src;

@@ -150,9 +152,10 @@ void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem,
 }

 static void nvgpu_pramin_access_batch_set(struct gk20a *g,
-					  u32 start, u32 words, u32 **arg)
+					  u64 start, u64 words, u32 **arg)
 {
-	u32 r = start, repeat = **arg;
+	u32 repeat = **arg;
+	u64 r = start;

 	while (words != 0U) {
 		words--;
@@ -162,7 +165,7 @@ static void nvgpu_pramin_access_batch_set(struct gk20a *g,
 }

 void nvgpu_pramin_memset(struct gk20a *g, struct nvgpu_mem *mem,
-			 u32 start, u32 size, u32 w)
+			 u64 start, u64 size, u32 w)
 {
 	u32 *p = &w;

--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h
@@ -265,26 +265,26 @@ void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem);
 */

 /* word-indexed offset */
-u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w);
+u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u64 w);
 /* word-indexed offsets */
 u64 nvgpu_mem_rd32_pair(struct gk20a *g, struct nvgpu_mem *mem,
 		u32 lo, u32 hi);
 /* byte offset (32b-aligned) */
-u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset);
+u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u64 offset);
 /* memcpy to cpu, offset and size in bytes (32b-aligned) */
-void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
-		void *dest, u32 size);
+void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset,
+		void *dest, u64 size);

 /* word-indexed offset */
-void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data);
+void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u64 w, u32 data);
 /* byte offset (32b-aligned) */
-void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data);
+void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, u32 data);
 /* memcpy from cpu, offset and size in bytes (32b-aligned) */
-void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
-		void *src, u32 size);
+void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset,
+		void *src, u64 size);
 /* size and offset in bytes (32b-aligned), filled with the constant byte c */
-void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset,
-		u32 c, u32 size);
+void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 offset,
+		u32 c, u64 size);

 u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem);
 u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem);
--- a/drivers/gpu/nvgpu/include/nvgpu/pramin.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/pramin.h
@@ -32,9 +32,12 @@ struct mm_gk20a;
 struct nvgpu_mem;


-void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u32 start, u32 size, void *dest);
-void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 start, u32 size, void *src);
-void nvgpu_pramin_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 start, u32 size, u32 w);
+void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u64 start,
+							u64 size, void *dest);
+void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 start,
+							u64 size, void *src);
+void nvgpu_pramin_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 start,
+							u64 size, u32 w);

 void nvgpu_init_pramin(struct mm_gk20a *mm);