From 4aef10c9507a19fb288936b88b0faeb62a520817 Mon Sep 17 00:00:00 2001 From: Terje Bergstrom Date: Mon, 19 Jan 2015 14:50:57 -0800 Subject: [PATCH] gpu: nvgpu: Set compression page per SoC Compression page size varies depending on architecture. Make it 129kB on gk20a and gm20b. Also export some common functions from gm20b. Bug 1592495 Change-Id: Ifb1c5b15d25fa961dab097021080055fc385fecd Signed-off-by: Terje Bergstrom Reviewed-on: http://git-master/r/673790 --- drivers/gpu/nvgpu/gk20a/fb_gk20a.c | 6 ++++++ drivers/gpu/nvgpu/gk20a/gk20a.c | 2 +- drivers/gpu/nvgpu/gk20a/gk20a.h | 1 + drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 3 --- drivers/gpu/nvgpu/gk20a/mm_gk20a.c | 22 +++++++++++----------- drivers/gpu/nvgpu/gk20a/mm_gk20a.h | 1 - drivers/gpu/nvgpu/gm20b/fb_gm20b.c | 6 ++++++ drivers/gpu/nvgpu/gm20b/ltc_gm20b.c | 14 +++++++------- drivers/gpu/nvgpu/gm20b/ltc_gm20b.h | 7 +++++++ drivers/gpu/nvgpu/vgpu/mm_vgpu.c | 1 - 10 files changed, 39 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c index d5b3fd877..568aed7a7 100644 --- a/drivers/gpu/nvgpu/gk20a/fb_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fb_gk20a.c @@ -42,10 +42,16 @@ static void gk20a_fb_set_mmu_page_size(struct gk20a *g) gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); } +static int gk20a_fb_compression_page_size(struct gk20a *g) +{ + return SZ_128K; +} + void gk20a_init_fb(struct gpu_ops *gops) { gops->fb.reset = fb_gk20a_reset; gops->fb.set_mmu_page_size = gk20a_fb_set_mmu_page_size; + gops->fb.compression_page_size = gk20a_fb_compression_page_size; gk20a_init_uncompressed_kind_map(); gk20a_init_kind_attr(); } diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 6c18c895a..57d5f09ac 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1788,7 +1788,7 @@ int gk20a_init_gpu_characteristics(struct gk20a *g) gpu->bus_type = NVGPU_GPU_BUS_TYPE_AXI; /* always AXI for now */ gpu->big_page_size = g->mm.pmu.vm.big_page_size; - gpu->compression_page_size = g->mm.pmu.vm.compression_page_size; + gpu->compression_page_size = g->ops.fb.compression_page_size(g); gpu->pde_coverage_bit_count = g->mm.pmu.vm.pde_stride_shift; gpu->available_big_page_sizes = gpu->big_page_size; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index b9796faab..4fbc25be6 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -167,6 +167,7 @@ struct gpu_ops { void (*init_uncompressed_kind_map)(struct gk20a *g); void (*init_kind_attr)(struct gk20a *g); void (*set_mmu_page_size)(struct gk20a *g); + int (*compression_page_size)(struct gk20a *g); } fb; struct { void (*slcg_bus_load_gating_prod)(struct gk20a *g, bool prod); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index f130b8306..cd6fe9cb4 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -258,9 +258,6 @@ struct gr_gk20a { u32 map_tile_count; u32 map_row_offset; -#define COMP_TAG_LINE_SIZE_SHIFT (17) /* one tag covers 128K */ -#define COMP_TAG_LINE_SIZE (1 << COMP_TAG_LINE_SIZE_SHIFT) - u32 max_comptag_mem; /* max memory size (MB) for comptag */ struct compbit_store_desc compbit_store; struct gk20a_allocator comp_tags; diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c index 3bce3c74c..6b7f84a35 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.c @@ -1001,7 +1001,9 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, enum gmmu_pgsz_gk20a pgsz_idx) { bool kind_compressible; - struct device *d = dev_from_gk20a(vm->mm->g); + struct gk20a *g = gk20a_from_vm(vm); + struct device *d = dev_from_gk20a(g); + int ctag_granularity = g->ops.fb.compression_page_size(g); if (unlikely(bfr->kind_v == gmmu_pte_kind_invalid_v())) bfr->kind_v = gmmu_pte_kind_pitch_v(); @@ -1036,8 +1038,7 @@ static int setup_buffer_kind_and_compression(struct vm_gk20a *vm, kind_compressible = false; } if (kind_compressible) - bfr->ctag_lines = ALIGN(bfr->size, COMP_TAG_LINE_SIZE) >> - COMP_TAG_LINE_SIZE_SHIFT; + bfr->ctag_lines = DIV_ROUND_UP_ULL(bfr->size, ctag_granularity); else bfr->ctag_lines = 0; @@ -1113,10 +1114,10 @@ u64 gk20a_locked_gmmu_map(struct vm_gk20a *vm, u32 pde_lo, pde_hi; struct device *d = dev_from_vm(vm); struct gk20a *g = gk20a_from_vm(vm); + int ctag_granularity = g->ops.fb.compression_page_size(g); if (clear_ctags && ctag_offset) { - u32 ctag_lines = ALIGN(size, COMP_TAG_LINE_SIZE) >> - COMP_TAG_LINE_SIZE_SHIFT; + u32 ctag_lines = DIV_ROUND_UP_ULL(size, ctag_granularity); /* init/clear the ctag buffer */ g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_clear, @@ -1756,7 +1757,9 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, struct scatterlist *cur_chunk; unsigned int cur_offset; u32 pte_w[2] = {0, 0}; /* invalid pte */ - u32 ctag = ctag_offset * SZ_128K; + struct gk20a *g = gk20a_from_vm(vm); + u32 ctag_granularity = g->ops.fb.compression_page_size(g); + u32 ctag = ctag_offset * ctag_granularity; u32 ctag_incr; u32 page_size = vm->gmmu_page_sizes[pgsz_idx]; u64 addr = 0; @@ -1768,9 +1771,6 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, gk20a_dbg(gpu_dbg_pte, "size_idx=%d, pde_lo=%d, pde_hi=%d", pgsz_idx, pde_lo, pde_hi); - /* If ctag_offset !=0 add 1 else add 0. The idea is to avoid a branch - * below (per-pte). Note: this doesn't work unless page size (when - * comptags are active) is 128KB. We have checks elsewhere for that. */ ctag_incr = ctag_offset ? page_size : 0; cur_offset = 0; @@ -1843,7 +1843,8 @@ static int update_gmmu_ptes_locked(struct vm_gk20a *vm, >> gmmu_pte_address_shift_v()); pte_w[1] = gmmu_pte_aperture_video_memory_f() | gmmu_pte_kind_f(kind_v) | - gmmu_pte_comptagline_f(ctag / SZ_128K); + gmmu_pte_comptagline_f(ctag + / ctag_granularity); if (rw_flag == gk20a_mem_flag_read_only) { pte_w[0] |= gmmu_pte_read_only_true_f(); @@ -2161,7 +2162,6 @@ int gk20a_init_vm(struct mm_gk20a *mm, vm->big_pages = big_pages; vm->big_page_size = gmmu_page_sizes[gmmu_page_size_big]; - vm->compression_page_size = gmmu_page_sizes[gmmu_page_size_big]; vm->pde_stride = vm->big_page_size << 10; vm->pde_stride_shift = ilog2(vm->pde_stride); diff --git a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h index 8470a7ac2..e4fc30852 100644 --- a/drivers/gpu/nvgpu/gk20a/mm_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/mm_gk20a.h @@ -293,7 +293,6 @@ struct vm_gk20a { bool tlb_dirty; bool mapped; - u32 compression_page_size; u32 big_page_size; u32 pde_stride; u32 pde_stride_shift; diff --git a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c index 7cdd776ee..deef78965 100644 --- a/drivers/gpu/nvgpu/gm20b/fb_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/fb_gm20b.c @@ -90,10 +90,16 @@ static void gm20b_fb_set_mmu_page_size(struct gk20a *g) gk20a_writel(g, fb_mmu_ctrl_r(), fb_mmu_ctrl); } +static int gm20b_fb_compression_page_size(struct gk20a *g) +{ + return SZ_128K; +} + void gm20b_init_fb(struct gpu_ops *gops) { gops->fb.init_fs_state = fb_gm20b_init_fs_state; gops->fb.set_mmu_page_size = gm20b_fb_set_mmu_page_size; + gops->fb.compression_page_size = gm20b_fb_compression_page_size; gm20b_init_uncompressed_kind_map(); gm20b_init_kind_attr(); } diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index fe2e06d5f..0a0efe414 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -101,8 +101,8 @@ static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) return 0; } -static int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max) +int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) { int err = 0; struct gr_gk20a *gr = &g->gr; @@ -170,7 +170,7 @@ out: return 0; } -static void gm20b_ltc_init_fs_state(struct gk20a *g) +void gm20b_ltc_init_fs_state(struct gk20a *g) { u32 reg; @@ -196,7 +196,7 @@ static void gm20b_ltc_init_fs_state(struct gk20a *g) gk20a_writel(g, ltc_ltcs_ltss_intr_r(), reg); } -static void gm20b_ltc_isr(struct gk20a *g) +void gm20b_ltc_isr(struct gk20a *g) { u32 mc_intr, ltc_intr; int ltc, slice; @@ -221,7 +221,7 @@ static void gm20b_ltc_isr(struct gk20a *g) } } -static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) +void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) { u32 data; bool done[g->ltc_count]; @@ -265,7 +265,7 @@ static void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g) "g_elpg_flush too many retries"); } -static u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) +u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) { u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); if (val == 2) { @@ -281,7 +281,7 @@ static u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) /* * Performs a full flush of the L2 cache. */ -static void gm20b_flush_ltc(struct gk20a *g) +void gm20b_flush_ltc(struct gk20a *g) { u32 op_pending; unsigned long now, timeout; diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h index c7524264d..288e193a7 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h @@ -18,4 +18,11 @@ struct gpu_ops; void gm20b_init_ltc(struct gpu_ops *gops); +void gm20b_ltc_init_fs_state(struct gk20a *g); +int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); +void gm20b_ltc_g_elpg_flush_locked(struct gk20a *g); +void gm20b_ltc_isr(struct gk20a *g); +u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base); +void gm20b_flush_ltc(struct gk20a *g); #endif diff --git a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c index 2dd8cb68c..6817b107b 100644 --- a/drivers/gpu/nvgpu/vgpu/mm_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/mm_vgpu.c @@ -39,7 +39,6 @@ static int vgpu_init_mm_setup_sw(struct gk20a *g) /* gk20a_init_gpu_characteristics expects this to be populated */ vm->big_page_size = big_page_size; - vm->compression_page_size = big_page_size; vm->pde_stride = vm->big_page_size << 10; vm->pde_stride_shift = ilog2(vm->pde_stride);