diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 82aa25d36..ae8ae2244 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -80,6 +80,7 @@ nvgpu-y += \ common/ltc/ltc_gv11b.o \ common/ltc/ltc_tu104.o \ common/gr/gr.o \ + common/cbc/cbc.o \ common/gr/ctxsw_prog/ctxsw_prog_gm20b.o \ common/gr/ctxsw_prog/ctxsw_prog_gp10b.o \ common/gr/ctxsw_prog/ctxsw_prog_gv11b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 07bbd5f81..9803b78ff 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -97,6 +97,7 @@ srcs += common/sim.c \ common/string.c \ common/rbtree.c \ common/ltc/ltc.c \ + common/cbc/cbc.c \ common/ltc/ltc_gm20b.c \ common/ltc/ltc_gp10b.c \ common/ltc/ltc_gv11b.c \ diff --git a/drivers/gpu/nvgpu/common/cbc/cbc.c b/drivers/gpu/nvgpu/common/cbc/cbc.c new file mode 100644 index 000000000..57c8915ce --- /dev/null +++ b/drivers/gpu/nvgpu/common/cbc/cbc.c @@ -0,0 +1,77 @@ +/* + * CBC + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include + +void nvgpu_cbc_remove_support(struct gk20a *g) +{ + struct nvgpu_cbc *cbc = g->cbc; + + nvgpu_log_fn(g, " "); + + if (cbc == NULL) { + return; + } + + if (nvgpu_mem_is_valid(&cbc->compbit_store.mem)) { + nvgpu_dma_free(g, &cbc->compbit_store.mem); + (void) memset(&cbc->compbit_store, 0, + sizeof(struct compbit_store_desc)); + } + gk20a_comptag_allocator_destroy(g, &cbc->comp_tags); + + nvgpu_kfree(g, cbc); + g->cbc = NULL; +} + +int nvgpu_cbc_init_support(struct gk20a *g) +{ + int err = 0; + struct nvgpu_cbc *cbc = g->cbc; + + nvgpu_log_fn(g, " "); + + if (cbc == NULL) { + cbc = nvgpu_kzalloc(g, sizeof(*cbc)); + if (cbc == NULL) { + return -ENOMEM; + } + g->cbc = cbc; + if (g->ops.cbc.alloc_comptags != NULL) { + err = g->ops.cbc.alloc_comptags(g, g->cbc); + } + } + + if (g->ops.cbc.init != NULL) { + g->ops.cbc.init(g, g->cbc); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index 605ea3755..06655f3fd 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -128,7 +128,6 @@ int nvgpu_ecc_counter_init(struct gk20a *g, int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, struct nvgpu_ecc_stat ***stat, const char *name) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_ecc_stat **stats; u32 ltc, lts; int err = 0; @@ -139,7 +138,7 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, } for (ltc = 0; ltc < g->ltc_count; ltc++) { stats[ltc] = nvgpu_kzalloc(g, - sizeof(*stats[ltc]) * gr->slices_per_ltc); + sizeof(*stats[ltc]) * g->slices_per_ltc); if (stats[ltc] == NULL) { err = -ENOMEM; break; @@ -156,7 +155,7 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, } for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (lts = 0; lts < gr->slices_per_ltc; lts++) { + for (lts = 0; lts < g->slices_per_ltc; lts++) { (void) snprintf(stats[ltc][lts].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, "ltc%d_lts%d_%s", ltc, lts, name); diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c index 17e0a0a3f..b2372183b 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c @@ -121,7 +121,7 @@ void gv11b_fb_init_fs_state(struct gk20a *g) } } -void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) +void gv11b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc) { u32 compbit_base_post_divide; u64 compbit_base_post_multiply64; @@ -130,10 +130,10 @@ void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { compbit_store_iova = nvgpu_mem_get_phys_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } else { compbit_store_iova = nvgpu_mem_get_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } /* must be aligned to 64 KB */ compbit_store_iova = roundup(compbit_store_iova, (u64)SZ_64K); @@ -167,7 +167,7 @@ void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log(g, gpu_dbg_fn, "cbc base %x", gk20a_readl(g, fb_mmu_cbc_base_r())); - gr->compbit_store.base_hw = compbit_base_post_divide; + cbc->compbit_store.base_hw = compbit_base_post_divide; } diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.h b/drivers/gpu/nvgpu/common/fb/fb_gv11b.h index 1c68ff222..8883179eb 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.h +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.h @@ -30,11 +30,12 @@ struct gk20a; struct gr_gk20a; +struct nvgpu_cbc; void gv11b_fb_init_hw(struct gk20a *g); void gv11b_fb_init_fs_state(struct gk20a *g); -void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr); +void gv11b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc); void gv11b_fb_reset(struct gk20a *g); void gv11b_fb_hub_isr(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/common/fb/fb_tu104.c b/drivers/gpu/nvgpu/common/fb/fb_tu104.c index d883262b8..ea1aebb2c 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_tu104.c +++ b/drivers/gpu/nvgpu/common/fb/fb_tu104.c @@ -424,7 +424,7 @@ int fb_tu104_mmu_invalidate_replay(struct gk20a *g, return err; } -void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) +void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc) { u64 base_divisor; u64 compbit_store_base; @@ -434,13 +434,13 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) u32 cbc_top_size; u32 cbc_max; - compbit_store_pa = nvgpu_mem_get_addr(g, &gr->compbit_store.mem); + compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem); base_divisor = g->ops.cbc.get_base_divisor(g); compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor); cbc_start_addr = (u64)g->ltc_count * (compbit_store_base << fb_mmu_cbc_base_address_alignment_shift_v()); - cbc_end_addr = cbc_start_addr + gr->compbit_backing_size; + cbc_end_addr = cbc_start_addr + cbc->compbit_backing_size; cbc_top = (cbc_end_addr / g->ltc_count) >> fb_mmu_cbc_base_address_alignment_shift_v(); @@ -452,7 +452,7 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) cbc_max = nvgpu_readl(g, fb_mmu_cbc_max_r()); cbc_max = set_field(cbc_max, fb_mmu_cbc_max_comptagline_m(), - fb_mmu_cbc_max_comptagline_f(gr->max_comptag_lines)); + fb_mmu_cbc_max_comptagline_f(cbc->max_comptag_lines)); nvgpu_writel(g, fb_mmu_cbc_max_r(), cbc_max); nvgpu_writel(g, fb_mmu_cbc_base_r(), @@ -464,7 +464,7 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) (u32)(compbit_store_pa & 0xffffffffU), compbit_store_base); - gr->compbit_store.base_hw = compbit_store_base; + cbc->compbit_store.base_hw = compbit_store_base; } diff --git a/drivers/gpu/nvgpu/common/fb/fb_tu104.h b/drivers/gpu/nvgpu/common/fb/fb_tu104.h index 68d4ad52b..db2ccdf04 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_tu104.h +++ b/drivers/gpu/nvgpu/common/fb/fb_tu104.h @@ -28,6 +28,7 @@ struct gk20a; struct gr_gk20a; struct nvgpu_mem; +struct nvgpu_cbc; void tu104_fb_enable_hub_intr(struct gk20a *g); void tu104_fb_disable_hub_intr(struct gk20a *g); @@ -55,7 +56,7 @@ int fb_tu104_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb); int fb_tu104_mmu_invalidate_replay(struct gk20a *g, u32 invalidate_replay_val); -void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr); +void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc); int tu104_fb_apply_pdb_cache_war(struct gk20a *g); size_t tu104_fb_get_vidmem_size(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index 361d6f880..d52e298a9 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -373,6 +374,12 @@ int gk20a_finalize_poweron(struct gk20a *g) goto done; } + err = nvgpu_cbc_init_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init cbc"); + goto done; + } + g->ops.chip_init_gpu_characteristics(g); /* Restore the debug setting */ diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c index b50f2ee3d..496302a78 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c @@ -40,7 +40,6 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; u32 reg; nvgpu_log_info(g, "initialize gm20b l2"); @@ -50,8 +49,8 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; - gr->cacheline_size = + g->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; + g->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), @@ -92,7 +91,7 @@ void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->gr.slices_per_ltc; slice++) { + for (slice = 0U; slice < g->slices_per_ltc; slice++) { gm20b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c index fdacb7b7a..4f586131e 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c @@ -118,7 +118,7 @@ void gp10b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->gr.slices_per_ltc; slice++) { + for (slice = 0U; slice < g->slices_per_ltc; slice++) { gp10b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c index 54dbf1cdb..f78157a24 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c @@ -53,7 +53,6 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g, void gv11b_ltc_init_fs_state(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; u32 ltc_intr; u32 reg; @@ -64,8 +63,8 @@ void gv11b_ltc_init_fs_state(struct gk20a *g) nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count); reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; - gr->cacheline_size = + g->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; + g->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); /* Disable LTC interrupts */ @@ -239,7 +238,7 @@ void gv11b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->gr.slices_per_ltc; slice++) { + for (slice = 0U; slice < g->slices_per_ltc; slice++) { gv11b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c index ba194622a..314f699fa 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c @@ -37,15 +37,14 @@ void ltc_tu104_init_fs_state(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; u32 reg; gv11b_ltc_init_fs_state(g); reg = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param2_r()); - gr->slices_per_ltc = + g->slices_per_ltc = ltc_ltcs_ltss_cbc_param2_slices_per_ltc_v(reg); - gr->cacheline_size = + g->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param2_cache_line_size_v(reg); /* disable PLC compression */ diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3ff414513..8080e6f33 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -983,7 +983,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, * Get the comptags state, alloc if necessary */ err = gk20a_alloc_or_get_comptags(g, os_buf, - &g->gr.comp_tags, + &g->cbc->comp_tags, &comptags); if (err != 0) { /* diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c index 189b54d2c..aeb4c2cd2 100644 --- a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c @@ -27,7 +27,7 @@ #include "cbc_vgpu.h" -int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); u32 max_comptag_lines = 0; @@ -35,19 +35,19 @@ int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log_fn(g, " "); - gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; + cbc->comptags_per_cacheline = priv->constants.comptags_per_cacheline; max_comptag_lines = priv->constants.comptag_lines; if (max_comptag_lines < 2) { return -ENXIO; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); if (err) { return err; } - gr->max_comptag_lines = max_comptag_lines; + cbc->max_comptag_lines = max_comptag_lines; return 0; } diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h index 2ebb66df8..451f5a667 100644 --- a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h @@ -24,8 +24,8 @@ #define NVGPU_CBC_VGPU_H struct gk20a; -struct gr_gk20a; +struct nvgpu_cbc; -int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); #endif /* NVGPU_CBC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index fe9940ea1..4576d8ef2 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -646,8 +646,6 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr) { nvgpu_log_fn(gr->g, " "); - gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags); - nvgpu_gr_config_deinit(gr->g, gr->config); nvgpu_kfree(gr->g, gr->sm_to_cluster); @@ -685,11 +683,6 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - err = g->ops.cbc.alloc_comptags(g, gr); - if (err) { - goto clean_up; - } - err = vgpu_gr_alloc_global_ctx_buffers(g); if (err) { goto clean_up; diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c index 54006d700..16f78a235 100644 --- a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c @@ -39,11 +39,10 @@ int vgpu_determine_L2_size_bytes(struct gk20a *g) void vgpu_ltc_init_fs_state(struct gk20a *g) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); - struct gr_gk20a *gr = &g->gr; nvgpu_log_fn(g, " "); g->ltc_count = priv->constants.ltc_count; - gr->cacheline_size = priv->constants.cacheline_size; - gr->slices_per_ltc = priv->constants.slices_per_ltc; + g->cacheline_size = priv->constants.cacheline_size; + g->slices_per_ltc = priv->constants.slices_per_ltc; } diff --git a/drivers/gpu/nvgpu/common/vgpu/vgpu.c b/drivers/gpu/nvgpu/common/vgpu/vgpu.c index 5d8df3fc2..709d223eb 100644 --- a/drivers/gpu/nvgpu/common/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/vgpu.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "fecs_trace_vgpu.h" @@ -453,6 +454,12 @@ int vgpu_finalize_poweron_common(struct gk20a *g) return err; } + err = nvgpu_cbc_init_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init cbc"); + return err; + } + g->ops.chip_init_gpu_characteristics(g); g->ops.fifo.channel_resume(g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 18999c5d0..cd1d5cc02 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -2092,11 +2093,6 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) nvgpu_gr_ctx_desc_free(g, gr->gr_ctx_desc); - nvgpu_dma_free(g, &gr->compbit_store.mem); - - (void) memset(&gr->compbit_store, 0, - sizeof(struct compbit_store_desc)); - nvgpu_gr_config_deinit(g, gr->config); nvgpu_kfree(g, gr->sm_to_cluster); @@ -2114,7 +2110,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map); - gk20a_comptag_allocator_destroy(g, &gr->comp_tags); + nvgpu_cbc_remove_support(g); nvgpu_ecc_remove_support(g); nvgpu_gr_zbc_deinit(g, gr->zbc); @@ -2594,10 +2590,6 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) goto out; } - if (g->ops.cbc.init != NULL) { - g->ops.cbc.init(g, gr); - } - if (g->ops.gr.disable_rd_coalesce != NULL) { g->ops.gr.disable_rd_coalesce(g); } @@ -2804,13 +2796,6 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - if (g->ops.cbc.alloc_comptags != NULL) { - err = g->ops.cbc.alloc_comptags(g, gr); - if (err != 0) { - goto clean_up; - } - } - err = gr_gk20a_init_zcull(g, gr); if (err != 0) { goto clean_up; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index d07656604..0c9928f61 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -208,13 +208,6 @@ struct gr_gk20a { u32 num_fbps; u32 max_fbps_count; - u32 max_comptag_lines; - u32 compbit_backing_size; - u32 comptags_per_cacheline; - u32 slices_per_ltc; - u32 cacheline_size; - u32 gobs_per_comptagline_per_slice; - u32 bundle_cb_default_size; u32 min_gpm_fifo_depth; u32 bundle_cb_token_limit; @@ -228,12 +221,6 @@ struct gr_gk20a { u32 gfxp_wfi_timeout_count; bool gfxp_wfi_timeout_unit_usec; - /* - * The deductible memory size for max_comptag_mem (in MBytes) - * Usually close to memory size that running system is taking - */ - u32 comptag_mem_deduct; - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer; struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image; @@ -243,10 +230,6 @@ struct gr_gk20a { struct nvgpu_gr_hwpm_map *hwpm_map; - u32 max_comptag_mem; /* max memory size (MB) for comptag */ - struct compbit_store_desc compbit_store; - struct gk20a_comptag_allocator comp_tags; - struct gr_zcull_gk20a zcull; struct nvgpu_gr_zbc *zbc; diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c index c550b702b..80787d204 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c @@ -37,10 +37,10 @@ #include "cbc_gm20b.h" -int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 128KB */ u32 max_comptag_lines = max_size << 3U; @@ -62,13 +62,18 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return 0; } + /* Already initialized */ + if (cbc->max_comptag_lines != 0U) { + return 0; + } + if (max_comptag_lines > hw_max_comptag_lines) { max_comptag_lines = hw_max_comptag_lines; } compbit_backing_size = DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * - gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; + g->cacheline_size * g->slices_per_ltc * g->ltc_count; /* aligned to 2KB * ltc_count */ compbit_backing_size += @@ -80,7 +85,7 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) max_comptag_lines = (compbit_backing_size * comptags_per_cacheline) / - (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); + (g->cacheline_size * g->slices_per_ltc * g->ltc_count); if (max_comptag_lines > hw_max_comptag_lines) { max_comptag_lines = hw_max_comptag_lines; @@ -96,14 +101,14 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return err; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); if (err != 0) { return err; } - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->compbit_backing_size = compbit_backing_size; + cbc->max_comptag_lines = max_comptag_lines; + cbc->comptags_per_cacheline = comptags_per_cacheline; + cbc->compbit_backing_size = compbit_backing_size; return 0; } @@ -111,7 +116,6 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) int gm20b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; @@ -125,7 +129,7 @@ int gm20b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - if (gr->compbit_store.mem.size == 0ULL) { + if (g->cbc->compbit_store.mem.size == 0ULL) { return 0; } @@ -217,9 +221,9 @@ u32 gm20b_cbc_fix_config(struct gk20a *g, int base) } -void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +void gm20b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc) { - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; u32 max_comptag_lines = max_size << 3U; u32 compbit_base_post_divide; @@ -229,10 +233,10 @@ void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { compbit_store_iova = nvgpu_mem_get_phys_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } else { compbit_store_iova = nvgpu_mem_get_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } compbit_base_post_divide64 = compbit_store_iova >> @@ -263,7 +267,7 @@ void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) (u32)(compbit_store_iova & 0xffffffffU), compbit_base_post_divide); - gr->compbit_store.base_hw = compbit_base_post_divide; + cbc->compbit_store.base_hw = compbit_base_post_divide; g->ops.cbc.ctrl(g, nvgpu_cbc_op_invalidate, 0, max_comptag_lines - 1U); diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h index 808ddc021..b249ac658 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h @@ -28,12 +28,12 @@ #include struct gk20a; -struct gr_gk20a; struct gpu_ops; +struct nvgpu_cbc; enum nvgpu_cbc_op; -int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); -void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); +void gm20b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc); int gm20b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); u32 gm20b_cbc_fix_config(struct gk20a *g, int base); diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c index 117785147..65390506c 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c @@ -36,10 +36,10 @@ #include "cbc_gp10b.h" -int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 64KB */ u32 max_comptag_lines = max_size << 4U; @@ -66,7 +66,7 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) } /* Already initialized */ - if (gr->max_comptag_lines != 0U) { + if (cbc->max_comptag_lines != 0U) { return 0; } @@ -76,9 +76,9 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) compbit_backing_size = roundup(max_comptag_lines * gobs_per_comptagline_per_slice, - gr->cacheline_size); + g->cacheline_size); compbit_backing_size = roundup( - compbit_backing_size * gr->slices_per_ltc * g->ltc_count, + compbit_backing_size * g->slices_per_ltc * g->ltc_count, g->ops.fb.compressible_page_size(g)); /* aligned to 2KB * ltc_count */ @@ -101,15 +101,16 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return err; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, + max_comptag_lines); if (err != 0) { return err; } - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; - gr->compbit_backing_size = compbit_backing_size; + cbc->max_comptag_lines = max_comptag_lines; + cbc->comptags_per_cacheline = comptags_per_cacheline; + cbc->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; + cbc->compbit_backing_size = compbit_backing_size; return 0; } @@ -117,7 +118,6 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; @@ -131,7 +131,7 @@ int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - if (gr->compbit_store.mem.size == 0U) { + if (g->cbc->compbit_store.mem.size == 0U) { return 0; } diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h index 7a891b028..d0b22d76c 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h @@ -23,11 +23,11 @@ #ifndef CBC_GP10B_H #define CBC_GP10B_H struct gk20a; -struct gpu_ops; +struct nvgpu_cbc; #include -int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); -int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, - u32 min, u32 max); +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); +int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); + #endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c index aa2df74dc..047758999 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c @@ -1,5 +1,5 @@ /* - * GP10B CBC + * GV11B CBC * * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. * @@ -29,15 +29,15 @@ #include "cbc_gv11b.h" -void gv11b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +void gv11b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc) { - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 64KB */ u32 max_comptag_lines = max_size << 4; nvgpu_log_fn(g, " "); - g->ops.fb.cbc_configure(g, gr); + g->ops.fb.cbc_configure(g, cbc); g->ops.cbc.ctrl(g, nvgpu_cbc_op_invalidate, 0, max_comptag_lines - 1U); diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h index 3bc33c528..c9366e7cd 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h @@ -23,8 +23,8 @@ #ifndef CBC_GV11B_H #define CBC_GV11B_H struct gk20a; -struct gpu_ops; +struct nvgpu_cbc; -void gv11b_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +void gv11b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc); #endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c index f5ed389a3..c200dd50c 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c @@ -1,4 +1,6 @@ /* + * TU104 CBC + * * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -43,10 +45,10 @@ u64 tu104_cbc_get_base_divisor(struct gk20a *g) ltc_ltcs_ltss_cbc_base_alignment_shift_v(); } -int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 64KB */ u32 max_comptag_lines = max_size << 4U; u32 compbit_backing_size; @@ -64,7 +66,7 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) } /* Already initialized */ - if (gr->max_comptag_lines != 0U) { + if (cbc->max_comptag_lines != 0U) { return 0; } @@ -78,7 +80,7 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) ctags_size = ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(cbc_param); - ctags_per_cacheline = gr->cacheline_size / ctags_size; + ctags_per_cacheline = g->cacheline_size / ctags_size; amap_divide_rounding = (U32(2U) * U32(1024U)) << ltc_ltcs_ltss_cbc_param_amap_divide_rounding_v(cbc_param); @@ -86,9 +88,9 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) ltc_ltcs_ltss_cbc_param_amap_swizzle_rounding_v(cbc_param); compbit_backing_size = - roundup(max_comptag_lines * ctags_size, gr->cacheline_size); + roundup(max_comptag_lines * ctags_size, g->cacheline_size); compbit_backing_size = - compbit_backing_size * gr->slices_per_ltc * g->ltc_count; + compbit_backing_size * g->slices_per_ltc * g->ltc_count; compbit_backing_size += g->ltc_count * amap_divide_rounding; compbit_backing_size += amap_swizzle_rounding; @@ -102,22 +104,22 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return err; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); if (err != 0) { return err; } - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = ctags_per_cacheline; - gr->gobs_per_comptagline_per_slice = ctags_size; - gr->compbit_backing_size = compbit_backing_size; + cbc->max_comptag_lines = max_comptag_lines; + cbc->comptags_per_cacheline = ctags_per_cacheline; + cbc->gobs_per_comptagline_per_slice = ctags_size; + cbc->compbit_backing_size = compbit_backing_size; nvgpu_log_info(g, "compbit backing store size : %d", compbit_backing_size); nvgpu_log_info(g, "max comptag lines : %d", max_comptag_lines); nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", - gr->gobs_per_comptagline_per_slice); + cbc->gobs_per_comptagline_per_slice); return 0; } @@ -125,11 +127,10 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = gr->slices_per_ltc; + u32 slices_per_ltc = g->slices_per_ltc; u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); const u32 max_lines = 16384U; @@ -138,7 +139,7 @@ int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - if (gr->compbit_store.mem.size == 0U) { + if (g->cbc->compbit_store.mem.size == 0U) { return 0; } @@ -218,11 +219,9 @@ out: return err; } -void tu104_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +void tu104_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc) { - - g->ops.fb.cbc_configure(g, gr); - + g->ops.fb.cbc_configure(g, cbc); g->ops.cbc.ctrl(g, nvgpu_cbc_op_invalidate, - 0, gr->max_comptag_lines - 1U); + 0, cbc->max_comptag_lines - 1U); } diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h index 5a3c701ad..5c3b94552 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h @@ -27,12 +27,12 @@ enum nvgpu_cbc_op; struct gk20a; -struct gr_gk20a; +struct nvgpu_cbc; u64 tu104_cbc_get_base_divisor(struct gk20a *g); -int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); -void tu104_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +void tu104_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc); #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/cbc.h b/drivers/gpu/nvgpu/include/nvgpu/cbc.h index a92b98484..0bc298165 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/cbc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/cbc.h @@ -24,6 +24,9 @@ #define NVGPU_CBC_H #include +#include + +#include "gk20a/mm_gk20a.h" struct gk20a; @@ -33,6 +36,18 @@ enum nvgpu_cbc_op { nvgpu_cbc_op_invalidate, }; + +struct nvgpu_cbc { + u32 compbit_backing_size; + u32 comptags_per_cacheline; + u32 gobs_per_comptagline_per_slice; + u32 max_comptag_lines; + struct gk20a_comptag_allocator comp_tags; + struct compbit_store_desc compbit_store; +}; + +int nvgpu_cbc_init_support(struct gk20a *g); +void nvgpu_cbc_remove_support(struct gk20a *g); int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 3e3f7c978..b5c36aedb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -71,6 +71,7 @@ struct nvgpu_channel_hw_state; struct nvgpu_engine_status_info; struct nvgpu_pbdma_status_info; enum nvgpu_nvlink_minion_dlcmd; +struct nvgpu_cbc; #include #include @@ -232,9 +233,10 @@ struct gpu_ops { } err_ops; } ltc; struct { - void (*init)(struct gk20a *g, struct gr_gk20a *gr); + void (*init)(struct gk20a *g, struct nvgpu_cbc *cbc); u64 (*get_base_divisor)(struct gk20a *g); - int (*alloc_comptags)(struct gk20a *g, struct gr_gk20a *gr); + int (*alloc_comptags)(struct gk20a *g, + struct nvgpu_cbc *cbc); int (*ctrl)(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); u32 (*fix_config)(struct gk20a *g, int base); @@ -703,7 +705,7 @@ struct gpu_ops { } gr; struct { void (*init_hw)(struct gk20a *g); - void (*cbc_configure)(struct gk20a *g, struct gr_gk20a *gr); + void (*cbc_configure)(struct gk20a *g, struct nvgpu_cbc *cbc); void (*init_fs_state)(struct gk20a *g); void (*init_uncompressed_kind_map)(struct gk20a *g); void (*init_kind_attr)(struct gk20a *g); @@ -1926,10 +1928,24 @@ struct gk20a { int irqs_enabled; int irq_stall; /* can be same as irq_nonstall in case of PCI */ int irq_nonstall; + + /* This data will be moved to nvgpu_ltc_info */ u32 max_ltc_count; u32 ltc_count; + u32 slices_per_ltc; + u32 cacheline_size; u32 ltc_streamid; + /* + * The deductible memory size for max_comptag_mem (in MBytes) + * Usually close to memory size that running system is taking + */ + u32 comptag_mem_deduct; + + u32 max_comptag_mem; /* max memory size (MB) for comptag */ + + struct nvgpu_cbc *cbc; + struct gk20a_worker { struct nvgpu_thread poll_task; nvgpu_atomic_t put; diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 0bcca6e54..86e204113 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -102,12 +103,13 @@ __must_hold(&cde_app->mutex) struct gk20a *g = &l->g; struct channel_gk20a *ch = cde_ctx->ch; struct vm_gk20a *vm = ch->vm; + struct nvgpu_cbc *cbc = g->cbc; trace_gk20a_cde_remove_ctx(cde_ctx); /* release mapped memory */ gk20a_deinit_cde_img(cde_ctx); - nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, + nvgpu_gmmu_unmap(vm, &cbc->compbit_store.mem, cde_ctx->backing_store_vaddr); /* @@ -403,6 +405,7 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) { struct nvgpu_os_linux *l = cde_ctx->l; struct gk20a *g = &l->g; + struct nvgpu_cbc *cbc = g->cbc; struct nvgpu_mem *target_mem; u32 *target_mem_ptr; u64 new_data; @@ -417,11 +420,11 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) switch (param->id) { case TYPE_PARAM_COMPTAGS_PER_CACHELINE: - new_data = g->gr.comptags_per_cacheline; + new_data = cbc->comptags_per_cacheline; break; case TYPE_PARAM_GPU_CONFIGURATION: - new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * - g->gr.cacheline_size; + new_data = (u64)g->ltc_count * g->slices_per_ltc * + g->cacheline_size; break; case TYPE_PARAM_FIRSTPAGEOFFSET: new_data = cde_ctx->surf_param_offset; @@ -439,7 +442,7 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) new_data = cde_ctx->compbit_size; break; case TYPE_PARAM_BACKINGSTORE_SIZE: - new_data = g->gr.compbit_store.mem.size; + new_data = cbc->compbit_store.mem.size; break; case TYPE_PARAM_SOURCE_SMMU_ADDR: new_data = gpuva_to_iova_base(cde_ctx->vm, @@ -451,10 +454,10 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) } break; case TYPE_PARAM_BACKINGSTORE_BASE_HW: - new_data = g->gr.compbit_store.base_hw; + new_data = cbc->compbit_store.base_hw; break; case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: - new_data = g->gr.gobs_per_comptagline_per_slice; + new_data = cbc->gobs_per_comptagline_per_slice; break; case TYPE_PARAM_SCATTERBUFFER: new_data = cde_ctx->scatterbuffer_vaddr; @@ -1014,6 +1017,7 @@ __releases(&l->cde_app->mutex) { struct gk20a *g = &l->g; struct gk20a_cde_ctx *cde_ctx = NULL; + struct nvgpu_cbc *cbc = g->cbc; struct gk20a_comptags comptags; struct nvgpu_os_buffer os_buf = { compbits_scatter_buf, @@ -1199,7 +1203,7 @@ __releases(&l->cde_app->mutex) } nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", - g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); + cbc->compbit_store.mem.size, cde_ctx->backing_store_vaddr); nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", cde_ctx->compbit_size, cde_ctx->compbit_vaddr); nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", @@ -1310,10 +1314,10 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) { struct nvgpu_os_linux *l = cde_ctx->l; struct gk20a *g = &l->g; + struct nvgpu_cbc *cbc = g->cbc; struct nvgpu_firmware *img; struct channel_gk20a *ch; struct tsg_gk20a *tsg; - struct gr_gk20a *gr = &g->gr; struct nvgpu_setup_bind_args setup_bind_args; int err = 0; u64 vaddr; @@ -1366,12 +1370,12 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) } /* map backing store to gpu virtual space */ - vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, - g->gr.compbit_store.mem.size, + vaddr = nvgpu_gmmu_map(ch->vm, &cbc->compbit_store.mem, + cbc->compbit_store.mem.size, NVGPU_VM_MAP_CACHEABLE, gk20a_mem_flag_read_only, false, - gr->compbit_store.mem.aperture); + cbc->compbit_store.mem.aperture); if (!vaddr) { nvgpu_warn(g, "cde: cannot map compression bit backing store"); @@ -1398,7 +1402,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) return 0; err_init_cde_img: - nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); + nvgpu_gmmu_unmap(ch->vm, &cbc->compbit_store.mem, vaddr); err_map_backingstore: err_setup_bind: nvgpu_vm_put(ch->vm); diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index 6f9ffd75e..e890d0e49 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -105,7 +105,7 @@ static void nvgpu_init_gr_vars(struct gk20a *g) gk20a_init_gr(g); nvgpu_log_info(g, "total ram pages : %lu", totalram_pages); - g->gr.max_comptag_mem = totalram_size_in_mb; + g->max_comptag_mem = totalram_size_in_mb; } static void nvgpu_init_timeout(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 4a04204c9..3365393b7 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -340,13 +340,13 @@ gk20a_ctrl_ioctl_gpu_characteristics( gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); gpu.max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g); gpu.max_lts_per_ltc = g->ops.top.get_max_lts_per_ltc(g); - gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; + gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw; gpu.gr_gobs_per_comptagline_per_slice = - g->gr.gobs_per_comptagline_per_slice; + g->cbc->gobs_per_comptagline_per_slice; gpu.num_ltc = g->ltc_count; - gpu.lts_per_ltc = g->gr.slices_per_ltc; - gpu.cbc_cache_line_size = g->gr.cacheline_size; - gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline; + gpu.lts_per_ltc = g->slices_per_ltc; + gpu.cbc_cache_line_size = g->cacheline_size; + gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline; if (g->ops.clk.get_maxrate) gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); diff --git a/drivers/gpu/nvgpu/os/linux/linux-cbc.c b/drivers/gpu/nvgpu/os/linux/linux-cbc.c index 056798c2c..f60ece563 100644 --- a/drivers/gpu/nvgpu/os/linux/linux-cbc.c +++ b/drivers/gpu/nvgpu/os/linux/linux-cbc.c @@ -19,14 +19,12 @@ #include #include -#include "gk20a/gr_gk20a.h" - int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_cbc *cbc = g->cbc; - if (nvgpu_mem_is_valid(&gr->compbit_store.mem)) + if (nvgpu_mem_is_valid(&cbc->compbit_store.mem)) return 0; if (vidmem_alloc) { @@ -40,11 +38,11 @@ int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, */ return nvgpu_dma_alloc_vid(g, compbit_backing_size, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } else { return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_PHYSICALLY_ADDRESSED, compbit_backing_size, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } } diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index afce42a24..95f312012 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -1097,9 +1097,9 @@ static ssize_t comptag_mem_deduct_store(struct device *dev, return -EINVAL; } - g->gr.comptag_mem_deduct = val; + g->comptag_mem_deduct = val; /* Deduct the part taken by the running system */ - g->gr.max_comptag_mem -= val; + g->max_comptag_mem -= val; return count; } @@ -1109,7 +1109,7 @@ static ssize_t comptag_mem_deduct_show(struct device *dev, { struct gk20a *g = get_gk20a(dev); - return sprintf(buf, "%d\n", g->gr.comptag_mem_deduct); + return sprintf(buf, "%d\n", g->comptag_mem_deduct); } static DEVICE_ATTR(comptag_mem_deduct, ROOTRW, diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c index cab428c9c..e91804732 100644 --- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c @@ -434,7 +434,7 @@ int vgpu_probe(struct platform_device *pdev) gk20a_init_gr(gk20a); nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages); - gk20a->gr.max_comptag_mem = totalram_size_in_mb; + gk20a->max_comptag_mem = totalram_size_in_mb; nvgpu_ref_init(&gk20a->refcount);