diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 08dd41c53..c37f790e3 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1685,7 +1685,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, struct scatterlist *cur_chunk; unsigned int cur_offset; u32 pte_w[2] = {0, 0}; /* invalid pte */ - u32 ctag = ctag_offset; + u32 ctag = ctag_offset * SZ_128K; u32 ctag_incr; u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; u64 addr = 0; @@ -1700,7 +1700,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch * below (per-pte). Note: this doesn't work unless page size (when * comptags are active) is 128KB. We have checks elsewhere for that. */ - ctag_incr = !!ctag_offset; + ctag_incr = ctag_offset ? page_size : 0; cur_offset = 0; if (sgt) { @@ -1769,7 +1769,7 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, >> gmmu_pte_address_shift_v()); pte_w[1] = gmmu_pte_aperture_video_memory_f() | gmmu_pte_kind_f(kind_v) | - gmmu_pte_comptagline_f(ctag); + gmmu_pte_comptagline_f(ctag / SZ_128K); if (rw_flag == gk20a_mem_flag_read_only) { pte_w[0] |= gmmu_pte_read_only_true_f();