diff --git a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c index 66eafec37..d0ebd47ea 100644 --- a/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c +++ b/drivers/gpu/nvgpu/common/mm/nvgpu_mem.c @@ -108,7 +108,7 @@ u64 nvgpu_mem_iommu_translate(struct gk20a *g, u64 phys) return phys; } -u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w) +u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u64 w) { u32 data = 0; @@ -120,8 +120,8 @@ u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w) } #ifdef CONFIG_NVGPU_DGPU else if (mem->aperture == APERTURE_VIDMEM) { - nvgpu_pramin_rd_n(g, mem, w * (u32)sizeof(u32), - (u32)sizeof(u32), &data); + nvgpu_pramin_rd_n(g, mem, w * (u64)sizeof(u32), + (u64)sizeof(u32), &data); } #endif else { @@ -139,17 +139,17 @@ u64 nvgpu_mem_rd32_pair(struct gk20a *g, struct nvgpu_mem *mem, u32 lo, u32 hi) return lo_data | (hi_data << 32ULL); } -u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset) +u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u64 offset) { - WARN_ON((offset & 3U) != 0U); - return nvgpu_mem_rd32(g, mem, offset / (u32)sizeof(u32)); + WARN_ON((offset & 3ULL) != 0ULL); + return nvgpu_mem_rd32(g, mem, offset / (u64)sizeof(u32)); } void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, - u32 offset, void *dest, u32 size) + u64 offset, void *dest, u64 size) { - WARN_ON((offset & 3U) != 0U); - WARN_ON((size & 3U) != 0U); + WARN_ON((offset & 3ULL) != 0ULL); + WARN_ON((size & 3ULL) != 0ULL); if (mem->aperture == APERTURE_SYSMEM) { u8 *src = (u8 *)mem->cpu_va + offset; @@ -167,7 +167,7 @@ void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, } } -void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) +void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u64 w, u32 data) { if (mem->aperture == APERTURE_SYSMEM) { u32 *ptr = mem->cpu_va; @@ -177,8 +177,9 @@ void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) } #ifdef CONFIG_NVGPU_DGPU else if (mem->aperture == APERTURE_VIDMEM) { - nvgpu_pramin_wr_n(g, mem, w * (u32)sizeof(u32), - (u32)sizeof(u32), &data); + nvgpu_pramin_wr_n(g, mem, w * (u64)sizeof(u32), + (u64)sizeof(u32), &data); + if (!mem->skip_wmb) { nvgpu_wmb(); } @@ -189,17 +190,17 @@ void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data) } } -void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data) +void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, u32 data) { - WARN_ON((offset & 3U) != 0U); - nvgpu_mem_wr32(g, mem, offset / (u32)sizeof(u32), data); + WARN_ON((offset & 3ULL) != 0ULL); + nvgpu_mem_wr32(g, mem, offset / (u64)sizeof(u32), data); } -void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, - void *src, u32 size) +void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, + void *src, u64 size) { - WARN_ON((offset & 3U) != 0U); - WARN_ON((size & 3U) != 0U); + WARN_ON((offset & 3ULL) != 0ULL); + WARN_ON((size & 3ULL) != 0ULL); if (mem->aperture == APERTURE_SYSMEM) { u8 *dest = (u8 *)mem->cpu_va + offset; @@ -220,11 +221,11 @@ void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, } } -void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, - u32 c, u32 size) +void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, + u32 c, u64 size) { - WARN_ON((offset & 3U) != 0U); - WARN_ON((size & 3U) != 0U); + WARN_ON((offset & 3ULL) != 0ULL); + WARN_ON((size & 3ULL) != 0ULL); WARN_ON((c & ~0xffU) != 0U); c &= 0xffU; diff --git a/drivers/gpu/nvgpu/common/pramin.c b/drivers/gpu/nvgpu/common/pramin.c index c2be054b1..561eab2f8 100644 --- a/drivers/gpu/nvgpu/common/pramin.c +++ b/drivers/gpu/nvgpu/common/pramin.c @@ -32,7 +32,7 @@ * This typedef is for functions that get called during the access_batched() * operation. */ -typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, +typedef void (*pramin_access_batch_fn)(struct gk20a *g, u64 start, u64 words, u32 **arg); /* @@ -41,12 +41,12 @@ typedef void (*pramin_access_batch_fn)(struct gk20a *g, u32 start, u32 words, * One call to "loop" is done per range, with "arg" supplied. */ static void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, - u32 offset, u32 size, pramin_access_batch_fn loop, u32 **arg) + u64 offset, u64 size, pramin_access_batch_fn loop, u32 **arg) { struct nvgpu_page_alloc *alloc = NULL; struct nvgpu_sgt *sgt; struct nvgpu_sgl *sgl; - u32 byteoff, start_reg, until_end, n; + u64 byteoff, start_reg, until_end, n; /* * TODO: Vidmem is not accesible through pramin on shutdown path. @@ -64,26 +64,26 @@ static void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, if (offset >= nvgpu_sgt_get_length(sgt, sgl)) { u64 tmp_offset = nvgpu_sgt_get_length(sgt, sgl); - nvgpu_assert(tmp_offset <= U64(offset)); - offset -= U32(tmp_offset); + nvgpu_assert(tmp_offset <= offset); + offset -= tmp_offset; } else { break; } } while (size != 0U) { - u32 sgl_len; + u64 sgl_len; BUG_ON(sgl == NULL); - sgl_len = (u32)nvgpu_sgt_get_length(sgt, sgl); + sgl_len = nvgpu_sgt_get_length(sgt, sgl); nvgpu_spinlock_acquire(&g->mm.pramin_window_lock); byteoff = g->ops.bus.set_bar0_window(g, mem, sgt, sgl, offset / sizeof(u32)); start_reg = g->ops.pramin.data032_r(byteoff / sizeof(u32)); - until_end = U32(SZ_1M) - (byteoff & (U32(SZ_1M) - 1U)); + until_end = U64(SZ_1M) - (byteoff & (U64(SZ_1M) - 1U)); - n = min3(size, until_end, (u32)(sgl_len - offset)); + n = min3(size, until_end, (sgl_len - offset)); loop(g, start_reg, n / sizeof(u32), arg); @@ -104,9 +104,10 @@ static void nvgpu_pramin_access_batched(struct gk20a *g, struct nvgpu_mem *mem, } static void nvgpu_pramin_access_batch_rd_n(struct gk20a *g, - u32 start, u32 words, u32 **arg) + u64 start, u64 words, u32 **arg) { - u32 r = start, *dest_u32 = *arg; + u32 *dest_u32 = *arg; + u64 r = start; while (words != 0U) { words--; @@ -118,7 +119,7 @@ static void nvgpu_pramin_access_batch_rd_n(struct gk20a *g, } void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem, - u32 start, u32 size, void *dest) + u64 start, u64 size, void *dest) { u32 *dest_u32 = dest; @@ -127,9 +128,10 @@ void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem, } static void nvgpu_pramin_access_batch_wr_n(struct gk20a *g, - u32 start, u32 words, u32 **arg) + u64 start, u64 words, u32 **arg) { - u32 r = start, *src_u32 = *arg; + u32 *src_u32 = *arg; + u64 r = start; while (words != 0U) { words--; @@ -141,7 +143,7 @@ static void nvgpu_pramin_access_batch_wr_n(struct gk20a *g, } void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem, - u32 start, u32 size, void *src) + u64 start, u64 size, void *src) { u32 *src_u32 = src; @@ -150,9 +152,10 @@ void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem, } static void nvgpu_pramin_access_batch_set(struct gk20a *g, - u32 start, u32 words, u32 **arg) + u64 start, u64 words, u32 **arg) { - u32 r = start, repeat = **arg; + u32 repeat = **arg; + u64 r = start; while (words != 0U) { words--; @@ -162,7 +165,7 @@ static void nvgpu_pramin_access_batch_set(struct gk20a *g, } void nvgpu_pramin_memset(struct gk20a *g, struct nvgpu_mem *mem, - u32 start, u32 size, u32 w) + u64 start, u64 size, u32 w) { u32 *p = &w; diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h index 1fac7d52a..55cf3066a 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h +++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_mem.h @@ -265,26 +265,26 @@ void __nvgpu_mem_free_vidmem_alloc(struct gk20a *g, struct nvgpu_mem *vidmem); */ /* word-indexed offset */ -u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u32 w); +u32 nvgpu_mem_rd32(struct gk20a *g, struct nvgpu_mem *mem, u64 w); /* word-indexed offsets */ u64 nvgpu_mem_rd32_pair(struct gk20a *g, struct nvgpu_mem *mem, u32 lo, u32 hi); /* byte offset (32b-aligned) */ -u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u32 offset); +u32 nvgpu_mem_rd(struct gk20a *g, struct nvgpu_mem *mem, u64 offset); /* memcpy to cpu, offset and size in bytes (32b-aligned) */ -void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, - void *dest, u32 size); +void nvgpu_mem_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, + void *dest, u64 size); /* word-indexed offset */ -void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u32 w, u32 data); +void nvgpu_mem_wr32(struct gk20a *g, struct nvgpu_mem *mem, u64 w, u32 data); /* byte offset (32b-aligned) */ -void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, u32 data); +void nvgpu_mem_wr(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, u32 data); /* memcpy from cpu, offset and size in bytes (32b-aligned) */ -void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, - void *src, u32 size); +void nvgpu_mem_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, + void *src, u64 size); /* size and offset in bytes (32b-aligned), filled with the constant byte c */ -void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 offset, - u32 c, u32 size); +void nvgpu_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 offset, + u32 c, u64 size); u64 nvgpu_mem_get_addr(struct gk20a *g, struct nvgpu_mem *mem); u64 nvgpu_mem_get_phys_addr(struct gk20a *g, struct nvgpu_mem *mem); diff --git a/drivers/gpu/nvgpu/include/nvgpu/pramin.h b/drivers/gpu/nvgpu/include/nvgpu/pramin.h index 759a5d0ae..c92e383b3 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/pramin.h +++ b/drivers/gpu/nvgpu/include/nvgpu/pramin.h @@ -32,9 +32,12 @@ struct mm_gk20a; struct nvgpu_mem; -void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u32 start, u32 size, void *dest); -void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u32 start, u32 size, void *src); -void nvgpu_pramin_memset(struct gk20a *g, struct nvgpu_mem *mem, u32 start, u32 size, u32 w); +void nvgpu_pramin_rd_n(struct gk20a *g, struct nvgpu_mem *mem, u64 start, + u64 size, void *dest); +void nvgpu_pramin_wr_n(struct gk20a *g, struct nvgpu_mem *mem, u64 start, + u64 size, void *src); +void nvgpu_pramin_memset(struct gk20a *g, struct nvgpu_mem *mem, u64 start, + u64 size, u32 w); void nvgpu_init_pramin(struct mm_gk20a *mm);