From a3289cb80c7baa4fd5fe31fc64e6ba950d58b384 Mon Sep 17 00:00:00 2001 From: Seshendra Gadagottu Date: Thu, 28 Feb 2019 11:08:56 -0800 Subject: [PATCH] gpu: nvgpu: create cbc unit Create Compression Bit Cache(CBC) unit to have comptags cache related functionality in one place. In this patch Moved following gpu ops from ltc to cbc and renamed accordingly: void (*init)(struct gk20a *g, struct gr_gk20a *gr); u64 (*get_base_divisor)(struct gk20a *g); int (*alloc_comptags)(struct gk20a *g, struct gr_gk20a *gr); int (*ctrl)(struct gk20a *g, enum gk20a_cbc_op op, u32 min, u32 max); u32 (*fix_config)(struct gk20a *g, int base); To avoid ambiguity renamed function pointer from init_comptags to alloc_comptags. Moved following function from ltc.h to cbc.h: nvgpu_ltc_alloc_cbc -> nvgpu_cbc_alloc Also changed file name that implemented nvgpu_cbc_alloc functionality from os/ltc.c -> os/linux-cbc.c JIRA NVGPU-2897 Change-Id: Ide32a98567e9a3f0a784d62221a6f484f8343e53 Signed-off-by: Seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/2030194 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/Makefile | 6 +- drivers/gpu/nvgpu/Makefile.sources | 4 + drivers/gpu/nvgpu/common/fb/fb_gv11b.c | 6 +- drivers/gpu/nvgpu/common/fb/fb_tu104.c | 4 +- drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c | 232 --------------- drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h | 11 - drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c | 178 +----------- drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h | 5 +- drivers/gpu/nvgpu/common/ltc/ltc_tu104.c | 181 ------------ drivers/gpu/nvgpu/common/ltc/ltc_tu104.h | 8 +- drivers/gpu/nvgpu/common/mm/vm.c | 4 +- drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c | 53 ++++ drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h | 31 ++ .../nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c | 12 +- drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c | 2 +- .../nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c | 10 +- drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c | 25 -- drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h | 1 - drivers/gpu/nvgpu/gk20a/gr_gk20a.c | 8 +- drivers/gpu/nvgpu/gm20b/hal_gm20b.c | 12 +- drivers/gpu/nvgpu/gp10b/hal_gp10b.c | 13 +- drivers/gpu/nvgpu/gv100/hal_gv100.c | 11 +- drivers/gpu/nvgpu/gv11b/hal_gv11b.c | 11 +- drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c | 271 ++++++++++++++++++ drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h | 45 +++ drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c | 213 ++++++++++++++ drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h | 33 +++ drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c | 218 ++++++++++++++ drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h | 37 +++ drivers/gpu/nvgpu/include/nvgpu/cbc.h | 33 +++ drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 14 +- drivers/gpu/nvgpu/include/nvgpu/ltc.h | 4 +- .../gpu/nvgpu/os/linux/{ltc.c => linux-cbc.c} | 6 +- drivers/gpu/nvgpu/os/posix/stubs.c | 6 +- drivers/gpu/nvgpu/tu104/hal_tu104.c | 20 +- 35 files changed, 1034 insertions(+), 694 deletions(-) create mode 100644 drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c create mode 100644 drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h create mode 100644 drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c create mode 100644 drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h create mode 100644 drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c create mode 100644 drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h create mode 100644 drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c create mode 100644 drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h create mode 100644 drivers/gpu/nvgpu/include/nvgpu/cbc.h rename drivers/gpu/nvgpu/os/linux/{ltc.c => linux-cbc.c} (89%) diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 1739ba6e8..cc76de2a3 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -178,6 +178,9 @@ nvgpu-y += \ hal/power_features/cg/gv100_gating_reglist.o \ hal/power_features/cg/gv11b_gating_reglist.o \ hal/power_features/cg/tu104_gating_reglist.o \ + hal/cbc/cbc_gm20b.o \ + hal/cbc/cbc_gp10b.o \ + hal/cbc/cbc_tu104.o \ hal/fuse/fuse_gm20b.o \ hal/fuse/fuse_gp10b.o \ hal/fuse/fuse_gp106.o @@ -222,7 +225,7 @@ nvgpu-y += \ os/linux/nvlink.o \ os/linux/dt.o \ os/linux/ecc_sysfs.o \ - os/linux/ltc.o \ + os/linux/linux-cbc.o \ os/linux/os_ops_tu104.o \ os/linux/bsearch.o @@ -407,6 +410,7 @@ nvgpu-$(CONFIG_GK20A_VIDMEM) += \ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ common/vgpu/ltc/ltc_vgpu.o \ + common/vgpu/cbc/cbc_vgpu.o \ common/vgpu/gr/gr_vgpu.o \ common/vgpu/gr/ctx_vgpu.o \ common/vgpu/fifo/fifo_vgpu.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 90abc7b44..393049b8b 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -323,6 +323,7 @@ srcs += common/sim.c \ common/vgpu/clk_vgpu.c \ common/vgpu/debugger_vgpu.c \ common/vgpu/ltc/ltc_vgpu.c \ + common/vgpu/cbc/cbc_vgpu.c \ common/vgpu/ce_vgpu.c \ common/vgpu/gv11b/vgpu_gv11b.c \ common/vgpu/gv11b/vgpu_hal_gv11b.c \ @@ -343,6 +344,9 @@ srcs += common/sim.c \ hal/power_features/cg/gp106_gating_reglist.c \ hal/power_features/cg/gv100_gating_reglist.c \ hal/power_features/cg/tu104_gating_reglist.c \ + hal/cbc/cbc_gm20b.c \ + hal/cbc/cbc_gp10b.c \ + hal/cbc/cbc_tu104.c \ hal/fuse/fuse_gm20b.c \ hal/fuse/fuse_gp10b.c \ hal/fuse/fuse_gp106.c diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c index eb6e6447e..378b6005d 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c @@ -154,9 +154,9 @@ void gv11b_fb_init_cbc(struct gk20a *g, struct gr_gk20a *gr) compbit_base_post_divide++; } - if (g->ops.ltc.cbc_fix_config != NULL) { + if (g->ops.cbc.fix_config != NULL) { compbit_base_post_divide = - g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); + g->ops.cbc.fix_config(g, compbit_base_post_divide); } gk20a_writel(g, fb_mmu_cbc_base_r(), @@ -172,7 +172,7 @@ void gv11b_fb_init_cbc(struct gk20a *g, struct gr_gk20a *gr) gr->compbit_store.base_hw = compbit_base_post_divide; - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, + g->ops.cbc.ctrl(g, gk20a_cbc_op_invalidate, 0, max_comptag_lines - 1U); } diff --git a/drivers/gpu/nvgpu/common/fb/fb_tu104.c b/drivers/gpu/nvgpu/common/fb/fb_tu104.c index b2740d95f..d414ce708 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_tu104.c +++ b/drivers/gpu/nvgpu/common/fb/fb_tu104.c @@ -435,7 +435,7 @@ void fb_tu104_init_cbc(struct gk20a *g, struct gr_gk20a *gr) u32 cbc_max; compbit_store_pa = nvgpu_mem_get_addr(g, &gr->compbit_store.mem); - base_divisor = g->ops.ltc.get_cbc_base_divisor(g); + base_divisor = g->ops.cbc.get_base_divisor(g); compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor); cbc_start_addr = (u64)g->ltc_count * (compbit_store_base << @@ -466,7 +466,7 @@ void fb_tu104_init_cbc(struct gk20a *g, struct gr_gk20a *gr) gr->compbit_store.base_hw = compbit_store_base; - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, + g->ops.cbc.ctrl(g, gk20a_cbc_op_invalidate, 0, gr->max_comptag_lines - 1U); } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c index 56b01165e..b50f2ee3d 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c @@ -38,173 +38,6 @@ #include "ltc_gm20b.h" -int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; - /* one tag line covers 128KB */ - u32 max_comptag_lines = max_size << 3U; - - u32 hw_max_comptag_lines = - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); - - u32 cbc_param = - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - u32 comptags_per_cacheline = - ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); - - u32 compbit_backing_size; - - int err; - - nvgpu_log_fn(g, " "); - - if (max_comptag_lines == 0U) { - return 0; - } - - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - compbit_backing_size = - DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * - gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; - - /* aligned to 2KB * ltc_count */ - compbit_backing_size += - g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - /* must be a multiple of 64KB */ - compbit_backing_size = roundup(compbit_backing_size, - U32(64) * U32(1024)); - - max_comptag_lines = - (compbit_backing_size * comptags_per_cacheline) / - (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); - - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - nvgpu_log_info(g, "compbit backing store size : %d", - compbit_backing_size); - nvgpu_log_info(g, "max comptag lines : %d", - max_comptag_lines); - - err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size, false); - if (err != 0) { - return err; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err != 0) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->compbit_backing_size = compbit_backing_size; - - return 0; -} - -int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max) -{ - struct gr_gk20a *gr = &g->gr; - struct nvgpu_timeout timeout; - int err = 0; - u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - const u32 max_lines = 16384U; - - nvgpu_log_fn(g, " "); - - trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - - if (gr->compbit_store.mem.size == 0ULL) { - return 0; - } - - while (true) { - const u32 iter_max = min(min + max_lines - 1U, max); - bool full_cache_op = true; - - nvgpu_mutex_acquire(&g->mm.l2_op_lock); - - nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); - - if (op == gk20a_cbc_op_clear) { - gk20a_writel( - g, ltc_ltcs_ltss_cbc_ctrl2_r(), - ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( - min)); - gk20a_writel( - g, ltc_ltcs_ltss_cbc_ctrl3_r(), - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( - iter_max)); - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); - full_cache_op = false; - } else if (op == gk20a_cbc_op_clean) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); - } else if (op == gk20a_cbc_op_invalidate) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); - } else { - nvgpu_err(g, "Unknown op: %u", (unsigned)op); - err = -EINVAL; - goto out; - } - gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), - gk20a_readl(g, - ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { - - ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + - ltc * ltc_stride + slice * lts_stride; - - nvgpu_timeout_init(g, &timeout, 2000, - NVGPU_TIMER_RETRY_TIMER); - do { - val = gk20a_readl(g, ctrl1); - if ((val & hw_op) == 0U) { - break; - } - nvgpu_udelay(5); - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - nvgpu_err(g, "comp tag clear timeout"); - err = -EBUSY; - goto out; - } - } - } - - /* are we done? */ - if (full_cache_op || iter_max == max) { - break; - } - - /* note: iter_max is inclusive upper bound */ - min = iter_max + 1U; - - /* give a chance for higher-priority threads to progress */ - nvgpu_mutex_release(&g->mm.l2_op_lock); - } -out: - trace_gk20a_ltc_cbc_ctrl_done(g->name); - nvgpu_mutex_release(&g->mm.l2_op_lock); - return err; -} - void gm20b_ltc_init_fs_state(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -264,18 +97,6 @@ void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc) } } -u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) -{ - u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); - if (val == 2U) { - return base * 2; - } else if (val != 1U) { - nvgpu_err(g, "Invalid number of active ltcs: %08x", val); - } - - return base; -} - /* * Performs a full flush of the L2 cache. */ @@ -432,59 +253,6 @@ void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, depth_val); } -void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) -{ - u32 max_size = gr->max_comptag_mem; - u32 max_comptag_lines = max_size << 3U; - - u32 compbit_base_post_divide; - u64 compbit_base_post_multiply64; - u64 compbit_store_iova; - u64 compbit_base_post_divide64; - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { - compbit_store_iova = nvgpu_mem_get_phys_addr(g, - &gr->compbit_store.mem); - } else { - compbit_store_iova = nvgpu_mem_get_addr(g, - &gr->compbit_store.mem); - } - - compbit_base_post_divide64 = compbit_store_iova >> - ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - do_div(compbit_base_post_divide64, g->ltc_count); - compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); - - compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * - g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - if (compbit_base_post_multiply64 < compbit_store_iova) { - compbit_base_post_divide++; - } - - /* Bug 1477079 indicates sw adjustment on the posted divided base. */ - if (g->ops.ltc.cbc_fix_config != NULL) { - compbit_base_post_divide = - g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); - } - - gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), - compbit_base_post_divide); - - nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, - "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", - (u32)(compbit_store_iova >> 32), - (u32)(compbit_store_iova & 0xffffffffU), - compbit_base_post_divide); - - gr->compbit_store.base_hw = compbit_base_post_divide; - - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, - 0, max_comptag_lines - 1U); - -} - void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled) { u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h index 86989798b..cff668899 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h @@ -29,10 +29,7 @@ struct gk20a; struct gr_gk20a; -struct gpu_ops; -enum gk20a_cbc_op; -int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); int gm20b_determine_L2_size_bytes(struct gk20a *g); void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, u32 *color_l2, @@ -40,19 +37,11 @@ void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, u32 depth_val, u32 index); -void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr); void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled); void gm20b_ltc_init_fs_state(struct gk20a *g); -int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc); void gm20b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice); -u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base); void gm20b_flush_ltc(struct gk20a *g); -int gm20b_ltc_alloc_phys_cbc(struct gk20a *g, - size_t compbit_backing_size); -int gm20b_ltc_alloc_virt_cbc(struct gk20a *g, - size_t compbit_backing_size); bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr); bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr); bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr); diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c index 08e6f20d3..fdacb7b7a 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B L2 * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,182 +56,6 @@ int gp10b_determine_L2_size_bytes(struct gk20a *g) return ret; } -int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; - /* one tag line covers 64KB */ - u32 max_comptag_lines = max_size << 4U; - - u32 hw_max_comptag_lines = - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); - - u32 cbc_param = - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - u32 comptags_per_cacheline = - ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); - u32 cbc_param2 = - gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); - u32 gobs_per_comptagline_per_slice = - ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(cbc_param2); - - u32 compbit_backing_size; - - int err; - - nvgpu_log_fn(g, " "); - - if (max_comptag_lines == 0U) { - return 0; - } - - /* Already initialized */ - if (gr->max_comptag_lines != 0U) { - return 0; - } - - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - compbit_backing_size = - roundup(max_comptag_lines * gobs_per_comptagline_per_slice, - gr->cacheline_size); - compbit_backing_size = roundup( - compbit_backing_size * gr->slices_per_ltc * g->ltc_count, - g->ops.fb.compressible_page_size(g)); - - /* aligned to 2KB * ltc_count */ - compbit_backing_size += - g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - /* must be a multiple of 64KB */ - compbit_backing_size = roundup(compbit_backing_size, - U32(64) * U32(1024)); - - nvgpu_log_info(g, "compbit backing store size : %d", - compbit_backing_size); - nvgpu_log_info(g, "max comptag lines : %d", - max_comptag_lines); - nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", - gobs_per_comptagline_per_slice); - - err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size, false); - if (err != 0) { - return err; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err != 0) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; - gr->compbit_backing_size = compbit_backing_size; - - return 0; -} - -int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max) -{ - struct gr_gk20a *gr = &g->gr; - struct nvgpu_timeout timeout; - int err = 0; - u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - const u32 max_lines = 16384U; - - nvgpu_log_fn(g, " "); - - trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - - if (gr->compbit_store.mem.size == 0U) { - return 0; - } - - while (true) { - const u32 iter_max = min(min + max_lines - 1U, max); - bool full_cache_op = true; - - nvgpu_mutex_acquire(&g->mm.l2_op_lock); - - nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); - - if (op == gk20a_cbc_op_clear) { - nvgpu_writel_check( - g, ltc_ltcs_ltss_cbc_ctrl2_r(), - ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( - min)); - - nvgpu_writel_check( - g, ltc_ltcs_ltss_cbc_ctrl3_r(), - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( - iter_max)); - - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); - full_cache_op = false; - } else if (op == gk20a_cbc_op_clean) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); - } else if (op == gk20a_cbc_op_invalidate) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); - } else { - nvgpu_err(g, "Unknown op: %u", (unsigned)op); - err = -EINVAL; - goto out; - } - gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), - gk20a_readl(g, - ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { - - ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + - ltc * ltc_stride + slice * lts_stride; - - nvgpu_timeout_init(g, &timeout, 2000, - NVGPU_TIMER_RETRY_TIMER); - do { - val = gk20a_readl(g, ctrl1); - if ((val & hw_op) == 0U) { - break; - } - nvgpu_udelay(5); - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - nvgpu_err(g, "comp tag clear timeout"); - err = -EBUSY; - goto out; - } - } - } - - /* are we done? */ - if (full_cache_op || iter_max == max) { - break; - } - - /* note: iter_max is inclusive upper bound */ - min = iter_max + 1U; - - /* give a chance for higher-priority threads to progress */ - nvgpu_mutex_release(&g->mm.l2_op_lock); - } -out: - trace_gk20a_ltc_cbc_ctrl_done(g->name); - nvgpu_mutex_release(&g->mm.l2_op_lock); - return err; -} - void gp10b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice) { u32 offset; diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h index 10b2713e1..387208842 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,10 +26,7 @@ struct gk20a; struct gpu_ops; int gp10b_determine_L2_size_bytes(struct gk20a *g); -int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); void gp10b_ltc_init_fs_state(struct gk20a *g); -int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled); void gp10b_ltc_isr(struct gk20a *g, unsigned int ltc); void gp10b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice); diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c index 871a08767..ba194622a 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c @@ -58,184 +58,3 @@ void ltc_tu104_init_fs_state(struct gk20a *g) ltc_ltcs_ltss_tstg_set_mgmt_1_plc_recompress_rmw_disabled_f()); nvgpu_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_1_r(), reg); } - -u64 ltc_tu104_get_cbc_base_divisor(struct gk20a *g) -{ - return (u64)g->ltc_count << - ltc_ltcs_ltss_cbc_base_alignment_shift_v(); -} - -int ltc_tu104_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; - /* one tag line covers 64KB */ - u32 max_comptag_lines = max_size << 4U; - u32 compbit_backing_size; - u32 hw_max_comptag_lines; - u32 cbc_param; - u32 ctags_size; - u32 ctags_per_cacheline; - u32 amap_divide_rounding, amap_swizzle_rounding; - int err; - - nvgpu_log_fn(g, " "); - - if (max_comptag_lines == 0U) { - return 0; - } - - /* Already initialized */ - if (gr->max_comptag_lines != 0U) { - return 0; - } - - hw_max_comptag_lines = - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - cbc_param = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param_r()); - - ctags_size = - ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(cbc_param); - ctags_per_cacheline = gr->cacheline_size / ctags_size; - - amap_divide_rounding = (U32(2U) * U32(1024U)) << - ltc_ltcs_ltss_cbc_param_amap_divide_rounding_v(cbc_param); - amap_swizzle_rounding = (U32(64U) * U32(1024U)) << - ltc_ltcs_ltss_cbc_param_amap_swizzle_rounding_v(cbc_param); - - compbit_backing_size = - roundup(max_comptag_lines * ctags_size, gr->cacheline_size); - compbit_backing_size = - compbit_backing_size * gr->slices_per_ltc * g->ltc_count; - - compbit_backing_size += g->ltc_count * amap_divide_rounding; - compbit_backing_size += amap_swizzle_rounding; - - /* must be a multiple of 64KB */ - compbit_backing_size = roundup(compbit_backing_size, - U32(64) * U32(1024)); - - err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size, true); - if (err != 0) { - return err; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err != 0) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = ctags_per_cacheline; - gr->gobs_per_comptagline_per_slice = ctags_size; - gr->compbit_backing_size = compbit_backing_size; - - nvgpu_log_info(g, "compbit backing store size : %d", - compbit_backing_size); - nvgpu_log_info(g, "max comptag lines : %d", - max_comptag_lines); - nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", - gr->gobs_per_comptagline_per_slice); - - return 0; -} - -int ltc_tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max) -{ - struct gr_gk20a *gr = &g->gr; - struct nvgpu_timeout timeout; - int err = 0; - u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = gr->slices_per_ltc; - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - const u32 max_lines = 16384U; - - nvgpu_log_fn(g, " "); - - trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - - if (gr->compbit_store.mem.size == 0U) { - return 0; - } - - while (true) { - const u32 iter_max = min(min + max_lines - 1U, max); - bool full_cache_op = true; - - nvgpu_mutex_acquire(&g->mm.l2_op_lock); - - nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); - - if (op == gk20a_cbc_op_clear) { - nvgpu_writel( - g, ltc_ltcs_ltss_cbc_ctrl2_r(), - ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( - min)); - nvgpu_writel( - g, ltc_ltcs_ltss_cbc_ctrl3_r(), - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( - iter_max)); - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); - full_cache_op = false; - } else if (op == gk20a_cbc_op_clean) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); - } else if (op == gk20a_cbc_op_invalidate) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); - } else { - nvgpu_err(g, "Unknown op: %u", (unsigned)op); - err = -EINVAL; - goto out; - } - - nvgpu_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), - nvgpu_readl(g, - ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { - - ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + - ltc * ltc_stride + slice * lts_stride; - - nvgpu_timeout_init(g, &timeout, 2000, - NVGPU_TIMER_RETRY_TIMER); - do { - val = nvgpu_readl(g, ctrl1); - if ((val & hw_op) == 0U) { - break; - } - nvgpu_udelay(5); - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - nvgpu_err(g, "comp tag clear timeout"); - err = -EBUSY; - goto out; - } - } - } - - /* are we done? */ - if (full_cache_op || iter_max == max) { - break; - } - - /* note: iter_max is inclusive upper bound */ - min = iter_max + 1U; - - /* give a chance for higher-priority threads to progress */ - nvgpu_mutex_release(&g->mm.l2_op_lock); - } -out: - trace_gk20a_ltc_cbc_ctrl_done(g->name); - nvgpu_mutex_release(&g->mm.l2_op_lock); - return err; -} diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h index eca61388f..c5e8cd4c7 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h +++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,14 +25,8 @@ #include -enum gk20a_cbc_op; struct gk20a; -struct gr_gk20a; -u64 ltc_tu104_get_cbc_base_divisor(struct gk20a *g); void ltc_tu104_init_fs_state(struct gk20a *g); -int ltc_tu104_init_comptags(struct gk20a *g, struct gr_gk20a *gr); -int ltc_tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); #endif diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 7632396b0..70f7c133b 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -1000,9 +1000,9 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, * Newly allocated comptags needs to be cleared */ if (comptags.needs_clear) { - if (g->ops.ltc.cbc_ctrl != NULL) { + if (g->ops.cbc.ctrl != NULL) { if (gk20a_comptags_start_clear(os_buf)) { - err = g->ops.ltc.cbc_ctrl( + err = g->ops.cbc.ctrl( g, gk20a_cbc_op_clear, comptags.offset, (comptags.offset + diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c new file mode 100644 index 000000000..189b54d2c --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c @@ -0,0 +1,53 @@ +/* + * Virtualized GPU CBC + * + * Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "cbc_vgpu.h" + +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + u32 max_comptag_lines = 0; + int err; + + nvgpu_log_fn(g, " "); + + gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; + max_comptag_lines = priv->constants.comptag_lines; + + if (max_comptag_lines < 2) { + return -ENXIO; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h new file mode 100644 index 000000000..2ebb66df8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CBC_VGPU_H +#define NVGPU_CBC_VGPU_H + +struct gk20a; +struct gr_gk20a; + +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); + +#endif /* NVGPU_CBC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 78d52a506..1dca118ab 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -46,6 +46,7 @@ #include "common/vgpu/gr/ctx_vgpu.h" #include "common/vgpu/ltc/ltc_vgpu.h" #include "common/vgpu/mm/mm_vgpu.h" +#include "common/vgpu/cbc/cbc_vgpu.h" #include "common/vgpu/debugger_vgpu.h" #include "common/vgpu/fecs_trace_vgpu.h" #include "common/vgpu/perf/perf_vgpu.h" @@ -82,12 +83,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .determine_L2_size_bytes = vgpu_determine_L2_size_bytes, .set_zbc_color_entry = NULL, .set_zbc_depth_entry = NULL, - .init_cbc = NULL, .init_fs_state = vgpu_ltc_init_fs_state, - .init_comptags = vgpu_ltc_init_comptags, - .cbc_ctrl = NULL, .isr = NULL, - .cbc_fix_config = NULL, .flush = NULL, .set_enabled = NULL, .pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr, @@ -96,6 +93,12 @@ static const struct gpu_ops vgpu_gp10b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .alloc_comptags = vgpu_cbc_alloc_comptags, + .ctrl = NULL, + .fix_config = NULL, + }, .ce2 = { .isr_stall = NULL, .isr_nonstall = NULL, @@ -675,6 +678,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g) struct vgpu_priv_data *priv = vgpu_get_priv_data(g); gops->ltc = vgpu_gp10b_ops.ltc; + gops->cbc = vgpu_gp10b_ops.cbc; gops->ce2 = vgpu_gp10b_ops.ce2; gops->gr = vgpu_gp10b_ops.gr; gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index a40680c1b..fe9940ea1 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -685,7 +685,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - err = g->ops.ltc.init_comptags(g, gr); + err = g->ops.cbc.alloc_comptags(g, gr); if (err) { goto clean_up; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 61567e2f4..17d5a4c72 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -59,6 +59,7 @@ #include "common/vgpu/gr/ctx_vgpu.h" #include "common/vgpu/ltc/ltc_vgpu.h" #include "common/vgpu/mm/mm_vgpu.h" +#include "common/vgpu/cbc/cbc_vgpu.h" #include "common/vgpu/debugger_vgpu.h" #include "common/vgpu/perf/perf_vgpu.h" #include "common/vgpu/fecs_trace_vgpu.h" @@ -103,10 +104,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { .set_zbc_s_entry = NULL, .set_zbc_color_entry = NULL, .set_zbc_depth_entry = NULL, - .init_cbc = NULL, .init_fs_state = vgpu_ltc_init_fs_state, - .init_comptags = vgpu_ltc_init_comptags, - .cbc_ctrl = NULL, .isr = NULL, .flush = NULL, .set_enabled = NULL, @@ -116,6 +114,11 @@ static const struct gpu_ops vgpu_gv11b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .ctrl = NULL, + .alloc_comptags = vgpu_cbc_alloc_comptags, + }, .ce2 = { .isr_stall = NULL, .isr_nonstall = NULL, @@ -756,6 +759,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g) struct vgpu_priv_data *priv = vgpu_get_priv_data(g); gops->ltc = vgpu_gv11b_ops.ltc; + gops->cbc = vgpu_gv11b_ops.cbc; gops->ce2 = vgpu_gv11b_ops.ce2; gops->gr = vgpu_gv11b_ops.gr; gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c index 3a5fdc4c1..54006d700 100644 --- a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c @@ -36,31 +36,6 @@ int vgpu_determine_L2_size_bytes(struct gk20a *g) return priv->constants.l2_size; } -int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - struct vgpu_priv_data *priv = vgpu_get_priv_data(g); - u32 max_comptag_lines = 0; - int err; - - nvgpu_log_fn(g, " "); - - gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; - max_comptag_lines = priv->constants.comptag_lines; - - if (max_comptag_lines < 2) { - return -ENXIO; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - - return 0; -} - void vgpu_ltc_init_fs_state(struct gk20a *g) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h index c3457a80e..bd6676209 100644 --- a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h @@ -27,7 +27,6 @@ struct gk20a; struct gr_gk20a; int vgpu_determine_L2_size_bytes(struct gk20a *g); -int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); void vgpu_ltc_init_fs_state(struct gk20a *g); #endif /* NVGPU_LTC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e00d85f9c..49ac09014 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -2811,8 +2811,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) goto out; } - if (g->ops.ltc.init_cbc != NULL) { - g->ops.ltc.init_cbc(g, gr); + if (g->ops.cbc.init != NULL) { + g->ops.cbc.init(g, gr); } if (g->ops.fb.init_cbc != NULL) { @@ -3024,8 +3024,8 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - if (g->ops.ltc.init_comptags != NULL) { - err = g->ops.ltc.init_comptags(g, gr); + if (g->ops.cbc.alloc_comptags != NULL) { + err = g->ops.cbc.alloc_comptags(g, gr); if (err != 0) { goto clean_up; } diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 0c5c64aab..55e054778 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -41,6 +41,7 @@ #include "hal/bus/bus_gk20a.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/power_features/cg/gm20b_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" #include "hal/fuse/fuse_gm20b.h" #include "common/ptimer/ptimer_gk20a.h" @@ -202,12 +203,8 @@ static const struct gpu_ops gm20b_ops = { .determine_L2_size_bytes = gm20b_determine_L2_size_bytes, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = gm20b_ltc_init_cbc, .init_fs_state = gm20b_ltc_init_fs_state, - .init_comptags = gm20b_ltc_init_comptags, - .cbc_ctrl = gm20b_ltc_cbc_ctrl, .isr = gm20b_ltc_isr, - .cbc_fix_config = gm20b_ltc_cbc_fix_config, .flush = gm20b_flush_ltc, .set_enabled = gm20b_ltc_set_enabled, .pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr, @@ -216,6 +213,12 @@ static const struct gpu_ops gm20b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = gm20b_cbc_init, + .ctrl = gm20b_cbc_ctrl, + .alloc_comptags = gm20b_cbc_alloc_comptags, + .fix_config = gm20b_cbc_fix_config, + }, .ce2 = { .isr_stall = gk20a_ce2_isr, .isr_nonstall = gk20a_ce2_nonstall_isr, @@ -827,6 +830,7 @@ int gm20b_init_hal(struct gk20a *g) struct gpu_ops *gops = &g->ops; gops->ltc = gm20b_ops.ltc; + gops->cbc = gm20b_ops.cbc; gops->ce2 = gm20b_ops.ce2; gops->gr = gm20b_ops.gr; gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index dcaf2bba1..1b4ad45aa 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -45,6 +45,8 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/gp10b_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" @@ -223,12 +225,8 @@ static const struct gpu_ops gp10b_ops = { .determine_L2_size_bytes = gp10b_determine_L2_size_bytes, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = gm20b_ltc_init_cbc, .init_fs_state = gp10b_ltc_init_fs_state, - .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gp10b_ltc_isr, - .cbc_fix_config = gm20b_ltc_cbc_fix_config, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, .pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr, @@ -237,6 +235,12 @@ static const struct gpu_ops gp10b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = gm20b_cbc_init, + .alloc_comptags = gp10b_cbc_alloc_comptags, + .ctrl = gp10b_cbc_ctrl, + .fix_config = gm20b_cbc_fix_config, + }, .ce2 = { .isr_stall = gp10b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, @@ -911,6 +915,7 @@ int gp10b_init_hal(struct gk20a *g) struct gpu_ops *gops = &g->ops; gops->ltc = gp10b_ops.ltc; + gops->cbc = gp10b_ops.cbc; gops->ce2 = gp10b_ops.ce2; gops->gr = gp10b_ops.gr; gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 0120a177d..d230d317b 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -28,6 +28,8 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/gv100_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" #include "hal/fuse/fuse_gp106.h" @@ -326,11 +328,8 @@ static const struct gpu_ops gv100_ops = { .set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = NULL, .init_fs_state = gv11b_ltc_init_fs_state, - .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gv11b_ltc_isr, - .cbc_fix_config = NULL, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, .intr_en_illegal_compstat = gv11b_ltc_intr_en_illegal_compstat, @@ -340,6 +339,11 @@ static const struct gpu_ops gv100_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .ctrl = gp10b_cbc_ctrl, + .fix_config = NULL, + }, .ce2 = { .isr_stall = gv11b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, @@ -1190,6 +1194,7 @@ int gv100_init_hal(struct gk20a *g) gops->bios = gv100_ops.bios; gops->ltc = gv100_ops.ltc; + gops->cbc = gv100_ops.cbc; gops->ce2 = gv100_ops.ce2; gops->gr = gv100_ops.gr; gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 69885a9e8..7e2d8e48c 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -31,6 +31,8 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/gv11b_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" @@ -276,10 +278,7 @@ static const struct gpu_ops gv11b_ops = { .set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = NULL, .init_fs_state = gv11b_ltc_init_fs_state, - .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gv11b_ltc_isr, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, @@ -290,6 +289,11 @@ static const struct gpu_ops gv11b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .alloc_comptags = gp10b_cbc_alloc_comptags, + .ctrl = gp10b_cbc_ctrl, + }, .ce2 = { .isr_stall = gv11b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, @@ -1065,6 +1069,7 @@ int gv11b_init_hal(struct gk20a *g) struct gpu_ops *gops = &g->ops; gops->ltc = gv11b_ops.ltc; + gops->cbc = gv11b_ops.cbc; gops->ce2 = gv11b_ops.ce2; gops->gr = gv11b_ops.gr; gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c new file mode 100644 index 000000000..bc117856c --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c @@ -0,0 +1,271 @@ +/* + * GM20B CBC + * + * Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cbc_gm20b.h" + +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + /* max memory size (MB) to cover */ + u32 max_size = gr->max_comptag_mem; + /* one tag line covers 128KB */ + u32 max_comptag_lines = max_size << 3U; + + u32 hw_max_comptag_lines = + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); + + u32 cbc_param = + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); + u32 comptags_per_cacheline = + ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); + + u32 compbit_backing_size; + + int err; + + nvgpu_log_fn(g, " "); + + if (max_comptag_lines == 0U) { + return 0; + } + + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + compbit_backing_size = + DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * + gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; + + /* aligned to 2KB * ltc_count */ + compbit_backing_size += + g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + /* must be a multiple of 64KB */ + compbit_backing_size = roundup(compbit_backing_size, + U32(64) * U32(1024)); + + max_comptag_lines = + (compbit_backing_size * comptags_per_cacheline) / + (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); + + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + nvgpu_log_info(g, "compbit backing store size : %d", + compbit_backing_size); + nvgpu_log_info(g, "max comptag lines : %d", + max_comptag_lines); + + err = nvgpu_cbc_alloc(g, compbit_backing_size, false); + if (err != 0) { + return err; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err != 0) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + gr->comptags_per_cacheline = comptags_per_cacheline; + gr->compbit_backing_size = compbit_backing_size; + + return 0; +} + +int gm20b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; + u32 ltc, slice, ctrl1, val, hw_op = 0U; + u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + const u32 max_lines = 16384U; + + nvgpu_log_fn(g, " "); + + trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); + + if (gr->compbit_store.mem.size == 0ULL) { + return 0; + } + + while (true) { + const u32 iter_max = min(min + max_lines - 1U, max); + bool full_cache_op = true; + + nvgpu_mutex_acquire(&g->mm.l2_op_lock); + + nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); + + if (op == gk20a_cbc_op_clear) { + gk20a_writel( + g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( + min)); + gk20a_writel( + g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( + iter_max)); + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + full_cache_op = false; + } else if (op == gk20a_cbc_op_clean) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + nvgpu_err(g, "Unknown op: %u", (unsigned)op); + err = -EINVAL; + goto out; + } + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + gk20a_readl(g, + ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (slice = 0; slice < slices_per_ltc; slice++) { + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + ltc * ltc_stride + slice * lts_stride; + + nvgpu_timeout_init(g, &timeout, 2000, + NVGPU_TIMER_RETRY_TIMER); + do { + val = gk20a_readl(g, ctrl1); + if ((val & hw_op) == 0U) { + break; + } + nvgpu_udelay(5); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + nvgpu_err(g, "comp tag clear timeout"); + err = -EBUSY; + goto out; + } + } + } + + /* are we done? */ + if (full_cache_op || iter_max == max) { + break; + } + + /* note: iter_max is inclusive upper bound */ + min = iter_max + 1U; + + /* give a chance for higher-priority threads to progress */ + nvgpu_mutex_release(&g->mm.l2_op_lock); + } +out: + trace_gk20a_ltc_cbc_ctrl_done(g->name); + nvgpu_mutex_release(&g->mm.l2_op_lock); + return err; +} + +u32 gm20b_cbc_fix_config(struct gk20a *g, int base) +{ + u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); + if (val == 2U) { + return base * 2; + } else if (val != 1U) { + nvgpu_err(g, "Invalid number of active ltcs: %08x", val); + } + + return base; +} + + +void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 max_size = gr->max_comptag_mem; + u32 max_comptag_lines = max_size << 3U; + + u32 compbit_base_post_divide; + u64 compbit_base_post_multiply64; + u64 compbit_store_iova; + u64 compbit_base_post_divide64; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + compbit_store_iova = nvgpu_mem_get_phys_addr(g, + &gr->compbit_store.mem); + } else { + compbit_store_iova = nvgpu_mem_get_addr(g, + &gr->compbit_store.mem); + } + + compbit_base_post_divide64 = compbit_store_iova >> + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + do_div(compbit_base_post_divide64, g->ltc_count); + compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); + + compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * + g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + if (compbit_base_post_multiply64 < compbit_store_iova) { + compbit_base_post_divide++; + } + + /* Bug 1477079 indicates sw adjustment on the posted divided base. */ + if (g->ops.cbc.fix_config != NULL) { + compbit_base_post_divide = + g->ops.cbc.fix_config(g, compbit_base_post_divide); + } + + gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), + compbit_base_post_divide); + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, + "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", + (u32)(compbit_store_iova >> 32), + (u32)(compbit_store_iova & 0xffffffffU), + compbit_base_post_divide); + + gr->compbit_store.base_hw = compbit_base_post_divide; + + g->ops.cbc.ctrl(g, gk20a_cbc_op_invalidate, + 0, max_comptag_lines - 1U); + +} diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h new file mode 100644 index 000000000..0776275c6 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h @@ -0,0 +1,45 @@ +/* + * GM20B CBC + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CBC_GM20B +#define NVGPU_CBC_GM20B + +#include + +struct gk20a; +struct gr_gk20a; +struct gpu_ops; +enum gk20a_cbc_op; + +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +int gm20b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); +u32 gm20b_cbc_fix_config(struct gk20a *g, int base); +int gm20b_cbc_alloc_phys(struct gk20a *g, + size_t compbit_backing_size); +int gm20b_cbc_alloc_virtc(struct gk20a *g, + size_t compbit_backing_size); + +#endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c new file mode 100644 index 000000000..a453de990 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c @@ -0,0 +1,213 @@ +/* + * GP10B CBC + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cbc_gp10b.h" + +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + /* max memory size (MB) to cover */ + u32 max_size = gr->max_comptag_mem; + /* one tag line covers 64KB */ + u32 max_comptag_lines = max_size << 4U; + + u32 hw_max_comptag_lines = + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); + + u32 cbc_param = + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); + u32 comptags_per_cacheline = + ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); + u32 cbc_param2 = + gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); + u32 gobs_per_comptagline_per_slice = + ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(cbc_param2); + + u32 compbit_backing_size; + + int err; + + nvgpu_log_fn(g, " "); + + if (max_comptag_lines == 0U) { + return 0; + } + + /* Already initialized */ + if (gr->max_comptag_lines != 0U) { + return 0; + } + + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + compbit_backing_size = + roundup(max_comptag_lines * gobs_per_comptagline_per_slice, + gr->cacheline_size); + compbit_backing_size = roundup( + compbit_backing_size * gr->slices_per_ltc * g->ltc_count, + g->ops.fb.compressible_page_size(g)); + + /* aligned to 2KB * ltc_count */ + compbit_backing_size += + g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + /* must be a multiple of 64KB */ + compbit_backing_size = roundup(compbit_backing_size, + U32(64) * U32(1024)); + + nvgpu_log_info(g, "compbit backing store size : %d", + compbit_backing_size); + nvgpu_log_info(g, "max comptag lines : %d", + max_comptag_lines); + nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", + gobs_per_comptagline_per_slice); + + err = nvgpu_cbc_alloc(g, compbit_backing_size, false); + if (err != 0) { + return err; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err != 0) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + gr->comptags_per_cacheline = comptags_per_cacheline; + gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; + gr->compbit_backing_size = compbit_backing_size; + + return 0; +} + +int gp10b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; + u32 ltc, slice, ctrl1, val, hw_op = 0U; + u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + const u32 max_lines = 16384U; + + nvgpu_log_fn(g, " "); + + trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); + + if (gr->compbit_store.mem.size == 0U) { + return 0; + } + + while (true) { + const u32 iter_max = min(min + max_lines - 1U, max); + bool full_cache_op = true; + + nvgpu_mutex_acquire(&g->mm.l2_op_lock); + + nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); + + if (op == gk20a_cbc_op_clear) { + nvgpu_writel_check( + g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( + min)); + + nvgpu_writel_check( + g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( + iter_max)); + + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + full_cache_op = false; + } else if (op == gk20a_cbc_op_clean) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + nvgpu_err(g, "Unknown op: %u", (unsigned)op); + err = -EINVAL; + goto out; + } + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + gk20a_readl(g, + ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (slice = 0; slice < slices_per_ltc; slice++) { + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + ltc * ltc_stride + slice * lts_stride; + + nvgpu_timeout_init(g, &timeout, 2000, + NVGPU_TIMER_RETRY_TIMER); + do { + val = gk20a_readl(g, ctrl1); + if ((val & hw_op) == 0U) { + break; + } + nvgpu_udelay(5); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + nvgpu_err(g, "comp tag clear timeout"); + err = -EBUSY; + goto out; + } + } + } + + /* are we done? */ + if (full_cache_op || iter_max == max) { + break; + } + + /* note: iter_max is inclusive upper bound */ + min = iter_max + 1U; + + /* give a chance for higher-priority threads to progress */ + nvgpu_mutex_release(&g->mm.l2_op_lock); + } +out: + trace_gk20a_ltc_cbc_ctrl_done(g->name); + nvgpu_mutex_release(&g->mm.l2_op_lock); + return err; +} diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h new file mode 100644 index 000000000..272ef8c43 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CBC_GP10B_H +#define CBC_GP10B_H +struct gk20a; +struct gpu_ops; + +#include + +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int gp10b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); +#endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c new file mode 100644 index 000000000..64470ff8c --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cbc_tu104.h" + +#include "common/ltc/ltc_gv11b.h" + +#include + +u64 tu104_cbc_get_base_divisor(struct gk20a *g) +{ + return (u64)g->ltc_count << + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); +} + +int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + /* max memory size (MB) to cover */ + u32 max_size = gr->max_comptag_mem; + /* one tag line covers 64KB */ + u32 max_comptag_lines = max_size << 4U; + u32 compbit_backing_size; + u32 hw_max_comptag_lines; + u32 cbc_param; + u32 ctags_size; + u32 ctags_per_cacheline; + u32 amap_divide_rounding, amap_swizzle_rounding; + int err; + + nvgpu_log_fn(g, " "); + + if (max_comptag_lines == 0U) { + return 0; + } + + /* Already initialized */ + if (gr->max_comptag_lines != 0U) { + return 0; + } + + hw_max_comptag_lines = + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + cbc_param = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param_r()); + + ctags_size = + ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(cbc_param); + ctags_per_cacheline = gr->cacheline_size / ctags_size; + + amap_divide_rounding = (U32(2U) * U32(1024U)) << + ltc_ltcs_ltss_cbc_param_amap_divide_rounding_v(cbc_param); + amap_swizzle_rounding = (U32(64U) * U32(1024U)) << + ltc_ltcs_ltss_cbc_param_amap_swizzle_rounding_v(cbc_param); + + compbit_backing_size = + roundup(max_comptag_lines * ctags_size, gr->cacheline_size); + compbit_backing_size = + compbit_backing_size * gr->slices_per_ltc * g->ltc_count; + + compbit_backing_size += g->ltc_count * amap_divide_rounding; + compbit_backing_size += amap_swizzle_rounding; + + /* must be a multiple of 64KB */ + compbit_backing_size = roundup(compbit_backing_size, + U32(64) * U32(1024)); + + err = nvgpu_cbc_alloc(g, compbit_backing_size, true); + if (err != 0) { + return err; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err != 0) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + gr->comptags_per_cacheline = ctags_per_cacheline; + gr->gobs_per_comptagline_per_slice = ctags_size; + gr->compbit_backing_size = compbit_backing_size; + + nvgpu_log_info(g, "compbit backing store size : %d", + compbit_backing_size); + nvgpu_log_info(g, "max comptag lines : %d", + max_comptag_lines); + nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", + gr->gobs_per_comptagline_per_slice); + + return 0; +} + +int tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; + u32 ltc, slice, ctrl1, val, hw_op = 0U; + u32 slices_per_ltc = gr->slices_per_ltc; + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + const u32 max_lines = 16384U; + + nvgpu_log_fn(g, " "); + + trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); + + if (gr->compbit_store.mem.size == 0U) { + return 0; + } + + while (true) { + const u32 iter_max = min(min + max_lines - 1U, max); + bool full_cache_op = true; + + nvgpu_mutex_acquire(&g->mm.l2_op_lock); + + nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); + + if (op == gk20a_cbc_op_clear) { + nvgpu_writel( + g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( + min)); + nvgpu_writel( + g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( + iter_max)); + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + full_cache_op = false; + } else if (op == gk20a_cbc_op_clean) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + nvgpu_err(g, "Unknown op: %u", (unsigned)op); + err = -EINVAL; + goto out; + } + + nvgpu_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + nvgpu_readl(g, + ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (slice = 0; slice < slices_per_ltc; slice++) { + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + ltc * ltc_stride + slice * lts_stride; + + nvgpu_timeout_init(g, &timeout, 2000, + NVGPU_TIMER_RETRY_TIMER); + do { + val = nvgpu_readl(g, ctrl1); + if ((val & hw_op) == 0U) { + break; + } + nvgpu_udelay(5); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + nvgpu_err(g, "comp tag clear timeout"); + err = -EBUSY; + goto out; + } + } + } + + /* are we done? */ + if (full_cache_op || iter_max == max) { + break; + } + + /* note: iter_max is inclusive upper bound */ + min = iter_max + 1U; + + /* give a chance for higher-priority threads to progress */ + nvgpu_mutex_release(&g->mm.l2_op_lock); + } +out: + trace_gk20a_ltc_cbc_ctrl_done(g->name); + nvgpu_mutex_release(&g->mm.l2_op_lock); + return err; +} diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h new file mode 100644 index 000000000..0c3a94f28 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CBC_TU104_H +#define CBC_TU104_H + +#include + +enum gk20a_cbc_op; +struct gk20a; +struct gr_gk20a; + +u64 tu104_cbc_get_base_divisor(struct gk20a *g); +int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); + +#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/cbc.h b/drivers/gpu/nvgpu/include/nvgpu/cbc.h new file mode 100644 index 000000000..429f596f0 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/cbc.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CBC_H +#define NVGPU_CBC_H + +#include + +struct gk20a; + +int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, + bool vidmem_alloc); + +#endif /* NVGPU_CBC_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 7939e6851..84936f6e1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -207,10 +207,6 @@ enum { struct gpu_ops { struct { int (*determine_L2_size_bytes)(struct gk20a *gk20a); - u64 (*get_cbc_base_divisor)(struct gk20a *g); - int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr); - int (*cbc_ctrl)(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); void (*set_zbc_color_entry)(struct gk20a *g, u32 *color_val_l2, u32 index); @@ -220,11 +216,9 @@ struct gpu_ops { void (*set_zbc_s_entry)(struct gk20a *g, u32 s_val, u32 index); - void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr); void (*set_enabled)(struct gk20a *g, bool enabled); void (*init_fs_state)(struct gk20a *g); void (*isr)(struct gk20a *g, unsigned int ltc); - u32 (*cbc_fix_config)(struct gk20a *g, int base); void (*flush)(struct gk20a *g); void (*intr_en_illegal_compstat)(struct gk20a *g, bool enable); bool (*pri_is_ltc_addr)(struct gk20a *g, u32 addr); @@ -242,6 +236,14 @@ struct gpu_ops { u64 err_addr, u64 count); } err_ops; } ltc; + struct { + void (*init)(struct gk20a *g, struct gr_gk20a *gr); + u64 (*get_base_divisor)(struct gk20a *g); + int (*alloc_comptags)(struct gk20a *g, struct gr_gk20a *gr); + int (*ctrl)(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); + u32 (*fix_config)(struct gk20a *g, int base); + } cbc; struct { void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base); u32 (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base); diff --git a/drivers/gpu/nvgpu/include/nvgpu/ltc.h b/drivers/gpu/nvgpu/include/nvgpu/ltc.h index a674a2913..ba554a356 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ltc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ltc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,7 +29,5 @@ struct gk20a; int nvgpu_init_ltc_support(struct gk20a *g); void nvgpu_ltc_sync_enabled(struct gk20a *g); -int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, - bool vidmem_alloc); #endif /* NVGPU_LTC_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ltc.c b/drivers/gpu/nvgpu/os/linux/linux-cbc.c similarity index 89% rename from drivers/gpu/nvgpu/os/linux/ltc.c rename to drivers/gpu/nvgpu/os/linux/linux-cbc.c index 8a892381d..056798c2c 100644 --- a/drivers/gpu/nvgpu/os/linux/ltc.c +++ b/drivers/gpu/nvgpu/os/linux/linux-cbc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -14,14 +14,14 @@ * along with this program. If not, see . */ -#include +#include #include #include #include #include "gk20a/gr_gk20a.h" -int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, +int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/os/posix/stubs.c b/drivers/gpu/nvgpu/os/posix/stubs.c index 348910340..02c211bb1 100644 --- a/drivers/gpu/nvgpu/os/posix/stubs.c +++ b/drivers/gpu/nvgpu/os/posix/stubs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,7 +26,7 @@ */ #include -#include +#include #include @@ -43,7 +43,7 @@ void nvgpu_ecc_sysfs_remove(struct gk20a *g) { } -int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, +int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc) { return 0; diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 88e7deefc..6b9a417de 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -29,6 +29,9 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/tu104_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" +#include "hal/cbc/cbc_tu104.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" #include "hal/fuse/fuse_gp106.h" @@ -341,13 +344,8 @@ static const struct gpu_ops tu104_ops = { .set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = NULL, - .get_cbc_base_divisor = ltc_tu104_get_cbc_base_divisor, .init_fs_state = ltc_tu104_init_fs_state, - .init_comptags = ltc_tu104_init_comptags, - .cbc_ctrl = ltc_tu104_cbc_ctrl, .isr = gv11b_ltc_isr, - .cbc_fix_config = NULL, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, .intr_en_illegal_compstat = gv11b_ltc_intr_en_illegal_compstat, @@ -357,6 +355,13 @@ static const struct gpu_ops tu104_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .get_base_divisor = tu104_cbc_get_base_divisor, + .alloc_comptags = tu104_cbc_alloc_comptags, + .ctrl = tu104_cbc_ctrl, + .fix_config = NULL, + }, .ce2 = { .isr_stall = gv11b_ce_isr, .isr_nonstall = NULL, @@ -1229,6 +1234,7 @@ int tu104_init_hal(struct gk20a *g) gops->bios = tu104_ops.bios; gops->ltc = tu104_ops.ltc; + gops->cbc = tu104_ops.cbc; gops->ce2 = tu104_ops.ce2; gops->gr = tu104_ops.gr; gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog; @@ -1312,8 +1318,8 @@ int tu104_init_hal(struct gk20a *g) /* dGpu VDK support */ if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)){ /* Disable compression */ - gops->ltc.cbc_ctrl = NULL; - gops->ltc.init_comptags = NULL; + gops->cbc.ctrl = NULL; + gops->cbc.alloc_comptags = NULL; gops->fb.init_cbc = NULL; gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;