From a2bc7d59231ca2f0552efc08df4a0da732d8d9f1 Mon Sep 17 00:00:00 2001 From: Seshendra Gadagottu Date: Wed, 13 Mar 2019 16:59:06 -0700 Subject: [PATCH] gpu: nvgpu: cbc: move cbc related code from gr Moved cbc related code and data from gr to cbc unit. Ltc and cbc related data is moved from gr header: 1. Ltc related data moved from gr_gk20a -> gk20a and it will be moved eventually to ltc unit: u32 slices_per_ltc; u32 cacheline_size; 2. cbc data moved from gr_gk20a -> nvgpu_cbc u32 compbit_backing_size; u32 comptags_per_cacheline; u32 gobs_per_comptagline_per_slice; u32 max_comptag_lines; struct gk20a_comptag_allocator comp_tags; struct compbit_store_desc compbit_store; 3. Following config data moved gr_gk20a -> gk20a u32 comptag_mem_deduct; u32 max_comptag_mem; These are part of initial config which should be available during nvgpu_probe. So it can't be moved to nvgpu_cbc. Modified code to use above updated data structures. Removed cbc init sequence from gr and added in common cbc unit. This sequence is getting called from common nvgpu init code. JIRA NVGPU-2896 JIRA NVGPU-2897 Change-Id: I1a1b1e73b75396d61de684f413ebc551a1202a57 Signed-off-by: Seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/2033286 Reviewed-by: svc-misra-checker GVS: Gerrit_Virtual_Submit Reviewed-by: Deepak Nibade Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.sources | 1 + drivers/gpu/nvgpu/common/cbc/cbc.c | 77 ++++++++++++++++++++ drivers/gpu/nvgpu/common/ecc.c | 5 +- drivers/gpu/nvgpu/common/fb/fb_gv11b.c | 8 +- drivers/gpu/nvgpu/common/fb/fb_gv11b.h | 3 +- drivers/gpu/nvgpu/common/fb/fb_tu104.c | 10 +-- drivers/gpu/nvgpu/common/fb/fb_tu104.h | 3 +- drivers/gpu/nvgpu/common/init/nvgpu_init.c | 7 ++ drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c | 7 +- drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c | 2 +- drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c | 7 +- drivers/gpu/nvgpu/common/ltc/ltc_tu104.c | 5 +- drivers/gpu/nvgpu/common/mm/vm.c | 2 +- drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c | 8 +- drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h | 4 +- drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c | 7 -- drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c | 5 +- drivers/gpu/nvgpu/common/vgpu/vgpu.c | 7 ++ drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 19 +---- drivers/gpu/nvgpu/gk20a/gr_gk20a.h | 17 ----- drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c | 34 +++++---- drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h | 6 +- drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c | 24 +++--- drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h | 8 +- drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c | 8 +- drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h | 4 +- drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c | 39 +++++----- drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h | 6 +- drivers/gpu/nvgpu/include/nvgpu/cbc.h | 15 ++++ drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 22 +++++- drivers/gpu/nvgpu/os/linux/cde.c | 30 ++++---- drivers/gpu/nvgpu/os/linux/driver_common.c | 2 +- drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 10 +-- drivers/gpu/nvgpu/os/linux/linux-cbc.c | 10 +-- drivers/gpu/nvgpu/os/linux/sysfs.c | 6 +- drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c | 2 +- 37 files changed, 259 insertions(+), 172 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/cbc/cbc.c diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 82aa25d36..ae8ae2244 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -80,6 +80,7 @@ nvgpu-y += \ common/ltc/ltc_gv11b.o \ common/ltc/ltc_tu104.o \ common/gr/gr.o \ + common/cbc/cbc.o \ common/gr/ctxsw_prog/ctxsw_prog_gm20b.o \ common/gr/ctxsw_prog/ctxsw_prog_gp10b.o \ common/gr/ctxsw_prog/ctxsw_prog_gv11b.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 07bbd5f81..9803b78ff 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -97,6 +97,7 @@ srcs += common/sim.c \ common/string.c \ common/rbtree.c \ common/ltc/ltc.c \ + common/cbc/cbc.c \ common/ltc/ltc_gm20b.c \ common/ltc/ltc_gp10b.c \ common/ltc/ltc_gv11b.c \ diff --git a/drivers/gpu/nvgpu/common/cbc/cbc.c b/drivers/gpu/nvgpu/common/cbc/cbc.c new file mode 100644 index 000000000..57c8915ce --- /dev/null +++ b/drivers/gpu/nvgpu/common/cbc/cbc.c @@ -0,0 +1,77 @@ +/* + * CBC + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include + +void nvgpu_cbc_remove_support(struct gk20a *g) +{ + struct nvgpu_cbc *cbc = g->cbc; + + nvgpu_log_fn(g, " "); + + if (cbc == NULL) { + return; + } + + if (nvgpu_mem_is_valid(&cbc->compbit_store.mem)) { + nvgpu_dma_free(g, &cbc->compbit_store.mem); + (void) memset(&cbc->compbit_store, 0, + sizeof(struct compbit_store_desc)); + } + gk20a_comptag_allocator_destroy(g, &cbc->comp_tags); + + nvgpu_kfree(g, cbc); + g->cbc = NULL; +} + +int nvgpu_cbc_init_support(struct gk20a *g) +{ + int err = 0; + struct nvgpu_cbc *cbc = g->cbc; + + nvgpu_log_fn(g, " "); + + if (cbc == NULL) { + cbc = nvgpu_kzalloc(g, sizeof(*cbc)); + if (cbc == NULL) { + return -ENOMEM; + } + g->cbc = cbc; + if (g->ops.cbc.alloc_comptags != NULL) { + err = g->ops.cbc.alloc_comptags(g, g->cbc); + } + } + + if (g->ops.cbc.init != NULL) { + g->ops.cbc.init(g, g->cbc); + } + + return err; +} diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index 605ea3755..06655f3fd 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -128,7 +128,6 @@ int nvgpu_ecc_counter_init(struct gk20a *g, int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, struct nvgpu_ecc_stat ***stat, const char *name) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_ecc_stat **stats; u32 ltc, lts; int err = 0; @@ -139,7 +138,7 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, } for (ltc = 0; ltc < g->ltc_count; ltc++) { stats[ltc] = nvgpu_kzalloc(g, - sizeof(*stats[ltc]) * gr->slices_per_ltc); + sizeof(*stats[ltc]) * g->slices_per_ltc); if (stats[ltc] == NULL) { err = -ENOMEM; break; @@ -156,7 +155,7 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, } for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (lts = 0; lts < gr->slices_per_ltc; lts++) { + for (lts = 0; lts < g->slices_per_ltc; lts++) { (void) snprintf(stats[ltc][lts].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, "ltc%d_lts%d_%s", ltc, lts, name); diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c index 17e0a0a3f..b2372183b 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c @@ -121,7 +121,7 @@ void gv11b_fb_init_fs_state(struct gk20a *g) } } -void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) +void gv11b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc) { u32 compbit_base_post_divide; u64 compbit_base_post_multiply64; @@ -130,10 +130,10 @@ void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { compbit_store_iova = nvgpu_mem_get_phys_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } else { compbit_store_iova = nvgpu_mem_get_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } /* must be aligned to 64 KB */ compbit_store_iova = roundup(compbit_store_iova, (u64)SZ_64K); @@ -167,7 +167,7 @@ void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log(g, gpu_dbg_fn, "cbc base %x", gk20a_readl(g, fb_mmu_cbc_base_r())); - gr->compbit_store.base_hw = compbit_base_post_divide; + cbc->compbit_store.base_hw = compbit_base_post_divide; } diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.h b/drivers/gpu/nvgpu/common/fb/fb_gv11b.h index 1c68ff222..8883179eb 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.h +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.h @@ -30,11 +30,12 @@ struct gk20a; struct gr_gk20a; +struct nvgpu_cbc; void gv11b_fb_init_hw(struct gk20a *g); void gv11b_fb_init_fs_state(struct gk20a *g); -void gv11b_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr); +void gv11b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc); void gv11b_fb_reset(struct gk20a *g); void gv11b_fb_hub_isr(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/common/fb/fb_tu104.c b/drivers/gpu/nvgpu/common/fb/fb_tu104.c index d883262b8..ea1aebb2c 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_tu104.c +++ b/drivers/gpu/nvgpu/common/fb/fb_tu104.c @@ -424,7 +424,7 @@ int fb_tu104_mmu_invalidate_replay(struct gk20a *g, return err; } -void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) +void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc) { u64 base_divisor; u64 compbit_store_base; @@ -434,13 +434,13 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) u32 cbc_top_size; u32 cbc_max; - compbit_store_pa = nvgpu_mem_get_addr(g, &gr->compbit_store.mem); + compbit_store_pa = nvgpu_mem_get_addr(g, &cbc->compbit_store.mem); base_divisor = g->ops.cbc.get_base_divisor(g); compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor); cbc_start_addr = (u64)g->ltc_count * (compbit_store_base << fb_mmu_cbc_base_address_alignment_shift_v()); - cbc_end_addr = cbc_start_addr + gr->compbit_backing_size; + cbc_end_addr = cbc_start_addr + cbc->compbit_backing_size; cbc_top = (cbc_end_addr / g->ltc_count) >> fb_mmu_cbc_base_address_alignment_shift_v(); @@ -452,7 +452,7 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) cbc_max = nvgpu_readl(g, fb_mmu_cbc_max_r()); cbc_max = set_field(cbc_max, fb_mmu_cbc_max_comptagline_m(), - fb_mmu_cbc_max_comptagline_f(gr->max_comptag_lines)); + fb_mmu_cbc_max_comptagline_f(cbc->max_comptag_lines)); nvgpu_writel(g, fb_mmu_cbc_max_r(), cbc_max); nvgpu_writel(g, fb_mmu_cbc_base_r(), @@ -464,7 +464,7 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr) (u32)(compbit_store_pa & 0xffffffffU), compbit_store_base); - gr->compbit_store.base_hw = compbit_store_base; + cbc->compbit_store.base_hw = compbit_store_base; } diff --git a/drivers/gpu/nvgpu/common/fb/fb_tu104.h b/drivers/gpu/nvgpu/common/fb/fb_tu104.h index 68d4ad52b..db2ccdf04 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_tu104.h +++ b/drivers/gpu/nvgpu/common/fb/fb_tu104.h @@ -28,6 +28,7 @@ struct gk20a; struct gr_gk20a; struct nvgpu_mem; +struct nvgpu_cbc; void tu104_fb_enable_hub_intr(struct gk20a *g); void tu104_fb_disable_hub_intr(struct gk20a *g); @@ -55,7 +56,7 @@ int fb_tu104_tlb_invalidate(struct gk20a *g, struct nvgpu_mem *pdb); int fb_tu104_mmu_invalidate_replay(struct gk20a *g, u32 invalidate_replay_val); -void tu104_fb_cbc_configure(struct gk20a *g, struct gr_gk20a *gr); +void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc); int tu104_fb_apply_pdb_cache_war(struct gk20a *g); size_t tu104_fb_get_vidmem_size(struct gk20a *g); diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index 361d6f880..d52e298a9 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -373,6 +374,12 @@ int gk20a_finalize_poweron(struct gk20a *g) goto done; } + err = nvgpu_cbc_init_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init cbc"); + goto done; + } + g->ops.chip_init_gpu_characteristics(g); /* Restore the debug setting */ diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c index b50f2ee3d..496302a78 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c @@ -40,7 +40,6 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; u32 reg; nvgpu_log_info(g, "initialize gm20b l2"); @@ -50,8 +49,8 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; - gr->cacheline_size = + g->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; + g->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), @@ -92,7 +91,7 @@ void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->gr.slices_per_ltc; slice++) { + for (slice = 0U; slice < g->slices_per_ltc; slice++) { gm20b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c index fdacb7b7a..4f586131e 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c @@ -118,7 +118,7 @@ void gp10b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->gr.slices_per_ltc; slice++) { + for (slice = 0U; slice < g->slices_per_ltc; slice++) { gp10b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c index 54dbf1cdb..f78157a24 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gv11b.c @@ -53,7 +53,6 @@ void gv11b_ltc_set_zbc_stencil_entry(struct gk20a *g, void gv11b_ltc_init_fs_state(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; u32 ltc_intr; u32 reg; @@ -64,8 +63,8 @@ void gv11b_ltc_init_fs_state(struct gk20a *g) nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count); reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - gr->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; - gr->cacheline_size = + g->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; + g->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); /* Disable LTC interrupts */ @@ -239,7 +238,7 @@ void gv11b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->gr.slices_per_ltc; slice++) { + for (slice = 0U; slice < g->slices_per_ltc; slice++) { gv11b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c index ba194622a..314f699fa 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c @@ -37,15 +37,14 @@ void ltc_tu104_init_fs_state(struct gk20a *g) { - struct gr_gk20a *gr = &g->gr; u32 reg; gv11b_ltc_init_fs_state(g); reg = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param2_r()); - gr->slices_per_ltc = + g->slices_per_ltc = ltc_ltcs_ltss_cbc_param2_slices_per_ltc_v(reg); - gr->cacheline_size = + g->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param2_cache_line_size_v(reg); /* disable PLC compression */ diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 3ff414513..8080e6f33 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -983,7 +983,7 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, * Get the comptags state, alloc if necessary */ err = gk20a_alloc_or_get_comptags(g, os_buf, - &g->gr.comp_tags, + &g->cbc->comp_tags, &comptags); if (err != 0) { /* diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c index 189b54d2c..aeb4c2cd2 100644 --- a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c @@ -27,7 +27,7 @@ #include "cbc_vgpu.h" -int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); u32 max_comptag_lines = 0; @@ -35,19 +35,19 @@ int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) nvgpu_log_fn(g, " "); - gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; + cbc->comptags_per_cacheline = priv->constants.comptags_per_cacheline; max_comptag_lines = priv->constants.comptag_lines; if (max_comptag_lines < 2) { return -ENXIO; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); if (err) { return err; } - gr->max_comptag_lines = max_comptag_lines; + cbc->max_comptag_lines = max_comptag_lines; return 0; } diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h index 2ebb66df8..451f5a667 100644 --- a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h @@ -24,8 +24,8 @@ #define NVGPU_CBC_VGPU_H struct gk20a; -struct gr_gk20a; +struct nvgpu_cbc; -int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); #endif /* NVGPU_CBC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index fe9940ea1..4576d8ef2 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -646,8 +646,6 @@ static void vgpu_remove_gr_support(struct gr_gk20a *gr) { nvgpu_log_fn(gr->g, " "); - gk20a_comptag_allocator_destroy(gr->g, &gr->comp_tags); - nvgpu_gr_config_deinit(gr->g, gr->config); nvgpu_kfree(gr->g, gr->sm_to_cluster); @@ -685,11 +683,6 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - err = g->ops.cbc.alloc_comptags(g, gr); - if (err) { - goto clean_up; - } - err = vgpu_gr_alloc_global_ctx_buffers(g); if (err) { goto clean_up; diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c index 54006d700..16f78a235 100644 --- a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c @@ -39,11 +39,10 @@ int vgpu_determine_L2_size_bytes(struct gk20a *g) void vgpu_ltc_init_fs_state(struct gk20a *g) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); - struct gr_gk20a *gr = &g->gr; nvgpu_log_fn(g, " "); g->ltc_count = priv->constants.ltc_count; - gr->cacheline_size = priv->constants.cacheline_size; - gr->slices_per_ltc = priv->constants.slices_per_ltc; + g->cacheline_size = priv->constants.cacheline_size; + g->slices_per_ltc = priv->constants.slices_per_ltc; } diff --git a/drivers/gpu/nvgpu/common/vgpu/vgpu.c b/drivers/gpu/nvgpu/common/vgpu/vgpu.c index 5d8df3fc2..709d223eb 100644 --- a/drivers/gpu/nvgpu/common/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/vgpu.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "fecs_trace_vgpu.h" @@ -453,6 +454,12 @@ int vgpu_finalize_poweron_common(struct gk20a *g) return err; } + err = nvgpu_cbc_init_support(g); + if (err != 0) { + nvgpu_err(g, "failed to init cbc"); + return err; + } + g->ops.chip_init_gpu_characteristics(g); g->ops.fifo.channel_resume(g); diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index 18999c5d0..cd1d5cc02 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -2092,11 +2093,6 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) nvgpu_gr_ctx_desc_free(g, gr->gr_ctx_desc); - nvgpu_dma_free(g, &gr->compbit_store.mem); - - (void) memset(&gr->compbit_store, 0, - sizeof(struct compbit_store_desc)); - nvgpu_gr_config_deinit(g, gr->config); nvgpu_kfree(g, gr->sm_to_cluster); @@ -2114,7 +2110,7 @@ static void gk20a_remove_gr_support(struct gr_gk20a *gr) nvgpu_gr_hwpm_map_deinit(g, gr->hwpm_map); - gk20a_comptag_allocator_destroy(g, &gr->comp_tags); + nvgpu_cbc_remove_support(g); nvgpu_ecc_remove_support(g); nvgpu_gr_zbc_deinit(g, gr->zbc); @@ -2594,10 +2590,6 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) goto out; } - if (g->ops.cbc.init != NULL) { - g->ops.cbc.init(g, gr); - } - if (g->ops.gr.disable_rd_coalesce != NULL) { g->ops.gr.disable_rd_coalesce(g); } @@ -2804,13 +2796,6 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - if (g->ops.cbc.alloc_comptags != NULL) { - err = g->ops.cbc.alloc_comptags(g, gr); - if (err != 0) { - goto clean_up; - } - } - err = gr_gk20a_init_zcull(g, gr); if (err != 0) { goto clean_up; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h index d07656604..0c9928f61 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.h @@ -208,13 +208,6 @@ struct gr_gk20a { u32 num_fbps; u32 max_fbps_count; - u32 max_comptag_lines; - u32 compbit_backing_size; - u32 comptags_per_cacheline; - u32 slices_per_ltc; - u32 cacheline_size; - u32 gobs_per_comptagline_per_slice; - u32 bundle_cb_default_size; u32 min_gpm_fifo_depth; u32 bundle_cb_token_limit; @@ -228,12 +221,6 @@ struct gr_gk20a { u32 gfxp_wfi_timeout_count; bool gfxp_wfi_timeout_unit_usec; - /* - * The deductible memory size for max_comptag_mem (in MBytes) - * Usually close to memory size that running system is taking - */ - u32 comptag_mem_deduct; - struct nvgpu_gr_global_ctx_buffer_desc *global_ctx_buffer; struct nvgpu_gr_global_ctx_local_golden_image *local_golden_image; @@ -243,10 +230,6 @@ struct gr_gk20a { struct nvgpu_gr_hwpm_map *hwpm_map; - u32 max_comptag_mem; /* max memory size (MB) for comptag */ - struct compbit_store_desc compbit_store; - struct gk20a_comptag_allocator comp_tags; - struct gr_zcull_gk20a zcull; struct nvgpu_gr_zbc *zbc; diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c index c550b702b..80787d204 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c @@ -37,10 +37,10 @@ #include "cbc_gm20b.h" -int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 128KB */ u32 max_comptag_lines = max_size << 3U; @@ -62,13 +62,18 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return 0; } + /* Already initialized */ + if (cbc->max_comptag_lines != 0U) { + return 0; + } + if (max_comptag_lines > hw_max_comptag_lines) { max_comptag_lines = hw_max_comptag_lines; } compbit_backing_size = DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * - gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; + g->cacheline_size * g->slices_per_ltc * g->ltc_count; /* aligned to 2KB * ltc_count */ compbit_backing_size += @@ -80,7 +85,7 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) max_comptag_lines = (compbit_backing_size * comptags_per_cacheline) / - (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); + (g->cacheline_size * g->slices_per_ltc * g->ltc_count); if (max_comptag_lines > hw_max_comptag_lines) { max_comptag_lines = hw_max_comptag_lines; @@ -96,14 +101,14 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return err; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); if (err != 0) { return err; } - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->compbit_backing_size = compbit_backing_size; + cbc->max_comptag_lines = max_comptag_lines; + cbc->comptags_per_cacheline = comptags_per_cacheline; + cbc->compbit_backing_size = compbit_backing_size; return 0; } @@ -111,7 +116,6 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) int gm20b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; @@ -125,7 +129,7 @@ int gm20b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - if (gr->compbit_store.mem.size == 0ULL) { + if (g->cbc->compbit_store.mem.size == 0ULL) { return 0; } @@ -217,9 +221,9 @@ u32 gm20b_cbc_fix_config(struct gk20a *g, int base) } -void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +void gm20b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc) { - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; u32 max_comptag_lines = max_size << 3U; u32 compbit_base_post_divide; @@ -229,10 +233,10 @@ void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { compbit_store_iova = nvgpu_mem_get_phys_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } else { compbit_store_iova = nvgpu_mem_get_addr(g, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } compbit_base_post_divide64 = compbit_store_iova >> @@ -263,7 +267,7 @@ void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) (u32)(compbit_store_iova & 0xffffffffU), compbit_base_post_divide); - gr->compbit_store.base_hw = compbit_base_post_divide; + cbc->compbit_store.base_hw = compbit_base_post_divide; g->ops.cbc.ctrl(g, nvgpu_cbc_op_invalidate, 0, max_comptag_lines - 1U); diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h index 808ddc021..b249ac658 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h @@ -28,12 +28,12 @@ #include struct gk20a; -struct gr_gk20a; struct gpu_ops; +struct nvgpu_cbc; enum nvgpu_cbc_op; -int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); -void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); +void gm20b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc); int gm20b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); u32 gm20b_cbc_fix_config(struct gk20a *g, int base); diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c index 117785147..65390506c 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c @@ -36,10 +36,10 @@ #include "cbc_gp10b.h" -int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 64KB */ u32 max_comptag_lines = max_size << 4U; @@ -66,7 +66,7 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) } /* Already initialized */ - if (gr->max_comptag_lines != 0U) { + if (cbc->max_comptag_lines != 0U) { return 0; } @@ -76,9 +76,9 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) compbit_backing_size = roundup(max_comptag_lines * gobs_per_comptagline_per_slice, - gr->cacheline_size); + g->cacheline_size); compbit_backing_size = roundup( - compbit_backing_size * gr->slices_per_ltc * g->ltc_count, + compbit_backing_size * g->slices_per_ltc * g->ltc_count, g->ops.fb.compressible_page_size(g)); /* aligned to 2KB * ltc_count */ @@ -101,15 +101,16 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return err; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, + max_comptag_lines); if (err != 0) { return err; } - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; - gr->compbit_backing_size = compbit_backing_size; + cbc->max_comptag_lines = max_comptag_lines; + cbc->comptags_per_cacheline = comptags_per_cacheline; + cbc->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; + cbc->compbit_backing_size = compbit_backing_size; return 0; } @@ -117,7 +118,6 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; @@ -131,7 +131,7 @@ int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - if (gr->compbit_store.mem.size == 0U) { + if (g->cbc->compbit_store.mem.size == 0U) { return 0; } diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h index 7a891b028..d0b22d76c 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h @@ -23,11 +23,11 @@ #ifndef CBC_GP10B_H #define CBC_GP10B_H struct gk20a; -struct gpu_ops; +struct nvgpu_cbc; #include -int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); -int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, - u32 min, u32 max); +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); +int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); + #endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c index aa2df74dc..047758999 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.c @@ -1,5 +1,5 @@ /* - * GP10B CBC + * GV11B CBC * * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. * @@ -29,15 +29,15 @@ #include "cbc_gv11b.h" -void gv11b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +void gv11b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc) { - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 64KB */ u32 max_comptag_lines = max_size << 4; nvgpu_log_fn(g, " "); - g->ops.fb.cbc_configure(g, gr); + g->ops.fb.cbc_configure(g, cbc); g->ops.cbc.ctrl(g, nvgpu_cbc_op_invalidate, 0, max_comptag_lines - 1U); diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h index 3bc33c528..c9366e7cd 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gv11b.h @@ -23,8 +23,8 @@ #ifndef CBC_GV11B_H #define CBC_GV11B_H struct gk20a; -struct gpu_ops; +struct nvgpu_cbc; -void gv11b_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +void gv11b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc); #endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c index f5ed389a3..c200dd50c 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c @@ -1,4 +1,6 @@ /* + * TU104 CBC + * * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -43,10 +45,10 @@ u64 tu104_cbc_get_base_divisor(struct gk20a *g) ltc_ltcs_ltss_cbc_base_alignment_shift_v(); } -int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) { /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; + u32 max_size = g->max_comptag_mem; /* one tag line covers 64KB */ u32 max_comptag_lines = max_size << 4U; u32 compbit_backing_size; @@ -64,7 +66,7 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) } /* Already initialized */ - if (gr->max_comptag_lines != 0U) { + if (cbc->max_comptag_lines != 0U) { return 0; } @@ -78,7 +80,7 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) ctags_size = ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(cbc_param); - ctags_per_cacheline = gr->cacheline_size / ctags_size; + ctags_per_cacheline = g->cacheline_size / ctags_size; amap_divide_rounding = (U32(2U) * U32(1024U)) << ltc_ltcs_ltss_cbc_param_amap_divide_rounding_v(cbc_param); @@ -86,9 +88,9 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) ltc_ltcs_ltss_cbc_param_amap_swizzle_rounding_v(cbc_param); compbit_backing_size = - roundup(max_comptag_lines * ctags_size, gr->cacheline_size); + roundup(max_comptag_lines * ctags_size, g->cacheline_size); compbit_backing_size = - compbit_backing_size * gr->slices_per_ltc * g->ltc_count; + compbit_backing_size * g->slices_per_ltc * g->ltc_count; compbit_backing_size += g->ltc_count * amap_divide_rounding; compbit_backing_size += amap_swizzle_rounding; @@ -102,22 +104,22 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) return err; } - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); if (err != 0) { return err; } - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = ctags_per_cacheline; - gr->gobs_per_comptagline_per_slice = ctags_size; - gr->compbit_backing_size = compbit_backing_size; + cbc->max_comptag_lines = max_comptag_lines; + cbc->comptags_per_cacheline = ctags_per_cacheline; + cbc->gobs_per_comptagline_per_slice = ctags_size; + cbc->compbit_backing_size = compbit_backing_size; nvgpu_log_info(g, "compbit backing store size : %d", compbit_backing_size); nvgpu_log_info(g, "max comptag lines : %d", max_comptag_lines); nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", - gr->gobs_per_comptagline_per_slice); + cbc->gobs_per_comptagline_per_slice); return 0; } @@ -125,11 +127,10 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max) { - struct gr_gk20a *gr = &g->gr; struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = gr->slices_per_ltc; + u32 slices_per_ltc = g->slices_per_ltc; u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); const u32 max_lines = 16384U; @@ -138,7 +139,7 @@ int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - if (gr->compbit_store.mem.size == 0U) { + if (g->cbc->compbit_store.mem.size == 0U) { return 0; } @@ -218,11 +219,9 @@ out: return err; } -void tu104_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +void tu104_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc) { - - g->ops.fb.cbc_configure(g, gr); - + g->ops.fb.cbc_configure(g, cbc); g->ops.cbc.ctrl(g, nvgpu_cbc_op_invalidate, - 0, gr->max_comptag_lines - 1U); + 0, cbc->max_comptag_lines - 1U); } diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h index 5a3c701ad..5c3b94552 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h @@ -27,12 +27,12 @@ enum nvgpu_cbc_op; struct gk20a; -struct gr_gk20a; +struct nvgpu_cbc; u64 tu104_cbc_get_base_divisor(struct gk20a *g); -int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc); int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); -void tu104_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +void tu104_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc); #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/cbc.h b/drivers/gpu/nvgpu/include/nvgpu/cbc.h index a92b98484..0bc298165 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/cbc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/cbc.h @@ -24,6 +24,9 @@ #define NVGPU_CBC_H #include +#include + +#include "gk20a/mm_gk20a.h" struct gk20a; @@ -33,6 +36,18 @@ enum nvgpu_cbc_op { nvgpu_cbc_op_invalidate, }; + +struct nvgpu_cbc { + u32 compbit_backing_size; + u32 comptags_per_cacheline; + u32 gobs_per_comptagline_per_slice; + u32 max_comptag_lines; + struct gk20a_comptag_allocator comp_tags; + struct compbit_store_desc compbit_store; +}; + +int nvgpu_cbc_init_support(struct gk20a *g); +void nvgpu_cbc_remove_support(struct gk20a *g); int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc); diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 3e3f7c978..b5c36aedb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -71,6 +71,7 @@ struct nvgpu_channel_hw_state; struct nvgpu_engine_status_info; struct nvgpu_pbdma_status_info; enum nvgpu_nvlink_minion_dlcmd; +struct nvgpu_cbc; #include #include @@ -232,9 +233,10 @@ struct gpu_ops { } err_ops; } ltc; struct { - void (*init)(struct gk20a *g, struct gr_gk20a *gr); + void (*init)(struct gk20a *g, struct nvgpu_cbc *cbc); u64 (*get_base_divisor)(struct gk20a *g); - int (*alloc_comptags)(struct gk20a *g, struct gr_gk20a *gr); + int (*alloc_comptags)(struct gk20a *g, + struct nvgpu_cbc *cbc); int (*ctrl)(struct gk20a *g, enum nvgpu_cbc_op op, u32 min, u32 max); u32 (*fix_config)(struct gk20a *g, int base); @@ -703,7 +705,7 @@ struct gpu_ops { } gr; struct { void (*init_hw)(struct gk20a *g); - void (*cbc_configure)(struct gk20a *g, struct gr_gk20a *gr); + void (*cbc_configure)(struct gk20a *g, struct nvgpu_cbc *cbc); void (*init_fs_state)(struct gk20a *g); void (*init_uncompressed_kind_map)(struct gk20a *g); void (*init_kind_attr)(struct gk20a *g); @@ -1926,10 +1928,24 @@ struct gk20a { int irqs_enabled; int irq_stall; /* can be same as irq_nonstall in case of PCI */ int irq_nonstall; + + /* This data will be moved to nvgpu_ltc_info */ u32 max_ltc_count; u32 ltc_count; + u32 slices_per_ltc; + u32 cacheline_size; u32 ltc_streamid; + /* + * The deductible memory size for max_comptag_mem (in MBytes) + * Usually close to memory size that running system is taking + */ + u32 comptag_mem_deduct; + + u32 max_comptag_mem; /* max memory size (MB) for comptag */ + + struct nvgpu_cbc *cbc; + struct gk20a_worker { struct nvgpu_thread poll_task; nvgpu_atomic_t put; diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 0bcca6e54..86e204113 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -102,12 +103,13 @@ __must_hold(&cde_app->mutex) struct gk20a *g = &l->g; struct channel_gk20a *ch = cde_ctx->ch; struct vm_gk20a *vm = ch->vm; + struct nvgpu_cbc *cbc = g->cbc; trace_gk20a_cde_remove_ctx(cde_ctx); /* release mapped memory */ gk20a_deinit_cde_img(cde_ctx); - nvgpu_gmmu_unmap(vm, &g->gr.compbit_store.mem, + nvgpu_gmmu_unmap(vm, &cbc->compbit_store.mem, cde_ctx->backing_store_vaddr); /* @@ -403,6 +405,7 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) { struct nvgpu_os_linux *l = cde_ctx->l; struct gk20a *g = &l->g; + struct nvgpu_cbc *cbc = g->cbc; struct nvgpu_mem *target_mem; u32 *target_mem_ptr; u64 new_data; @@ -417,11 +420,11 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) switch (param->id) { case TYPE_PARAM_COMPTAGS_PER_CACHELINE: - new_data = g->gr.comptags_per_cacheline; + new_data = cbc->comptags_per_cacheline; break; case TYPE_PARAM_GPU_CONFIGURATION: - new_data = (u64)g->ltc_count * g->gr.slices_per_ltc * - g->gr.cacheline_size; + new_data = (u64)g->ltc_count * g->slices_per_ltc * + g->cacheline_size; break; case TYPE_PARAM_FIRSTPAGEOFFSET: new_data = cde_ctx->surf_param_offset; @@ -439,7 +442,7 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) new_data = cde_ctx->compbit_size; break; case TYPE_PARAM_BACKINGSTORE_SIZE: - new_data = g->gr.compbit_store.mem.size; + new_data = cbc->compbit_store.mem.size; break; case TYPE_PARAM_SOURCE_SMMU_ADDR: new_data = gpuva_to_iova_base(cde_ctx->vm, @@ -451,10 +454,10 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) } break; case TYPE_PARAM_BACKINGSTORE_BASE_HW: - new_data = g->gr.compbit_store.base_hw; + new_data = cbc->compbit_store.base_hw; break; case TYPE_PARAM_GOBS_PER_COMPTAGLINE_PER_SLICE: - new_data = g->gr.gobs_per_comptagline_per_slice; + new_data = cbc->gobs_per_comptagline_per_slice; break; case TYPE_PARAM_SCATTERBUFFER: new_data = cde_ctx->scatterbuffer_vaddr; @@ -1014,6 +1017,7 @@ __releases(&l->cde_app->mutex) { struct gk20a *g = &l->g; struct gk20a_cde_ctx *cde_ctx = NULL; + struct nvgpu_cbc *cbc = g->cbc; struct gk20a_comptags comptags; struct nvgpu_os_buffer os_buf = { compbits_scatter_buf, @@ -1199,7 +1203,7 @@ __releases(&l->cde_app->mutex) } nvgpu_log(g, gpu_dbg_cde, "cde: buffer=cbc, size=%zu, gpuva=%llx\n", - g->gr.compbit_store.mem.size, cde_ctx->backing_store_vaddr); + cbc->compbit_store.mem.size, cde_ctx->backing_store_vaddr); nvgpu_log(g, gpu_dbg_cde, "cde: buffer=compbits, size=%llu, gpuva=%llx\n", cde_ctx->compbit_size, cde_ctx->compbit_vaddr); nvgpu_log(g, gpu_dbg_cde, "cde: buffer=scatterbuffer, size=%llu, gpuva=%llx\n", @@ -1310,10 +1314,10 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) { struct nvgpu_os_linux *l = cde_ctx->l; struct gk20a *g = &l->g; + struct nvgpu_cbc *cbc = g->cbc; struct nvgpu_firmware *img; struct channel_gk20a *ch; struct tsg_gk20a *tsg; - struct gr_gk20a *gr = &g->gr; struct nvgpu_setup_bind_args setup_bind_args; int err = 0; u64 vaddr; @@ -1366,12 +1370,12 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) } /* map backing store to gpu virtual space */ - vaddr = nvgpu_gmmu_map(ch->vm, &gr->compbit_store.mem, - g->gr.compbit_store.mem.size, + vaddr = nvgpu_gmmu_map(ch->vm, &cbc->compbit_store.mem, + cbc->compbit_store.mem.size, NVGPU_VM_MAP_CACHEABLE, gk20a_mem_flag_read_only, false, - gr->compbit_store.mem.aperture); + cbc->compbit_store.mem.aperture); if (!vaddr) { nvgpu_warn(g, "cde: cannot map compression bit backing store"); @@ -1398,7 +1402,7 @@ static int gk20a_cde_load(struct gk20a_cde_ctx *cde_ctx) return 0; err_init_cde_img: - nvgpu_gmmu_unmap(ch->vm, &g->gr.compbit_store.mem, vaddr); + nvgpu_gmmu_unmap(ch->vm, &cbc->compbit_store.mem, vaddr); err_map_backingstore: err_setup_bind: nvgpu_vm_put(ch->vm); diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c index 6f9ffd75e..e890d0e49 100644 --- a/drivers/gpu/nvgpu/os/linux/driver_common.c +++ b/drivers/gpu/nvgpu/os/linux/driver_common.c @@ -105,7 +105,7 @@ static void nvgpu_init_gr_vars(struct gk20a *g) gk20a_init_gr(g); nvgpu_log_info(g, "total ram pages : %lu", totalram_pages); - g->gr.max_comptag_mem = totalram_size_in_mb; + g->max_comptag_mem = totalram_size_in_mb; } static void nvgpu_init_timeout(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 4a04204c9..3365393b7 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -340,13 +340,13 @@ gk20a_ctrl_ioctl_gpu_characteristics( gpu.fbp_en_mask = g->ops.gr.get_fbp_en_mask(g); gpu.max_ltc_per_fbp = g->ops.top.get_max_ltc_per_fbp(g); gpu.max_lts_per_ltc = g->ops.top.get_max_lts_per_ltc(g); - gpu.gr_compbit_store_base_hw = g->gr.compbit_store.base_hw; + gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw; gpu.gr_gobs_per_comptagline_per_slice = - g->gr.gobs_per_comptagline_per_slice; + g->cbc->gobs_per_comptagline_per_slice; gpu.num_ltc = g->ltc_count; - gpu.lts_per_ltc = g->gr.slices_per_ltc; - gpu.cbc_cache_line_size = g->gr.cacheline_size; - gpu.cbc_comptags_per_line = g->gr.comptags_per_cacheline; + gpu.lts_per_ltc = g->slices_per_ltc; + gpu.cbc_cache_line_size = g->cacheline_size; + gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline; if (g->ops.clk.get_maxrate) gpu.max_freq = g->ops.clk.get_maxrate(g, CTRL_CLK_DOMAIN_GPCCLK); diff --git a/drivers/gpu/nvgpu/os/linux/linux-cbc.c b/drivers/gpu/nvgpu/os/linux/linux-cbc.c index 056798c2c..f60ece563 100644 --- a/drivers/gpu/nvgpu/os/linux/linux-cbc.c +++ b/drivers/gpu/nvgpu/os/linux/linux-cbc.c @@ -19,14 +19,12 @@ #include #include -#include "gk20a/gr_gk20a.h" - int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc) { - struct gr_gk20a *gr = &g->gr; + struct nvgpu_cbc *cbc = g->cbc; - if (nvgpu_mem_is_valid(&gr->compbit_store.mem)) + if (nvgpu_mem_is_valid(&cbc->compbit_store.mem)) return 0; if (vidmem_alloc) { @@ -40,11 +38,11 @@ int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, */ return nvgpu_dma_alloc_vid(g, compbit_backing_size, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } else { return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_PHYSICALLY_ADDRESSED, compbit_backing_size, - &gr->compbit_store.mem); + &cbc->compbit_store.mem); } } diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c index afce42a24..95f312012 100644 --- a/drivers/gpu/nvgpu/os/linux/sysfs.c +++ b/drivers/gpu/nvgpu/os/linux/sysfs.c @@ -1097,9 +1097,9 @@ static ssize_t comptag_mem_deduct_store(struct device *dev, return -EINVAL; } - g->gr.comptag_mem_deduct = val; + g->comptag_mem_deduct = val; /* Deduct the part taken by the running system */ - g->gr.max_comptag_mem -= val; + g->max_comptag_mem -= val; return count; } @@ -1109,7 +1109,7 @@ static ssize_t comptag_mem_deduct_show(struct device *dev, { struct gk20a *g = get_gk20a(dev); - return sprintf(buf, "%d\n", g->gr.comptag_mem_deduct); + return sprintf(buf, "%d\n", g->comptag_mem_deduct); } static DEVICE_ATTR(comptag_mem_deduct, ROOTRW, diff --git a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c index cab428c9c..e91804732 100644 --- a/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c +++ b/drivers/gpu/nvgpu/os/linux/vgpu/vgpu_linux.c @@ -434,7 +434,7 @@ int vgpu_probe(struct platform_device *pdev) gk20a_init_gr(gk20a); nvgpu_log_info(gk20a, "total ram pages : %lu", totalram_pages); - gk20a->gr.max_comptag_mem = totalram_size_in_mb; + gk20a->max_comptag_mem = totalram_size_in_mb; nvgpu_ref_init(&gk20a->refcount);