diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 1739ba6e8..cc76de2a3 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -178,6 +178,9 @@ nvgpu-y += \ hal/power_features/cg/gv100_gating_reglist.o \ hal/power_features/cg/gv11b_gating_reglist.o \ hal/power_features/cg/tu104_gating_reglist.o \ + hal/cbc/cbc_gm20b.o \ + hal/cbc/cbc_gp10b.o \ + hal/cbc/cbc_tu104.o \ hal/fuse/fuse_gm20b.o \ hal/fuse/fuse_gp10b.o \ hal/fuse/fuse_gp106.o @@ -222,7 +225,7 @@ nvgpu-y += \ os/linux/nvlink.o \ os/linux/dt.o \ os/linux/ecc_sysfs.o \ - os/linux/ltc.o \ + os/linux/linux-cbc.o \ os/linux/os_ops_tu104.o \ os/linux/bsearch.o @@ -407,6 +410,7 @@ nvgpu-$(CONFIG_GK20A_VIDMEM) += \ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ common/vgpu/ltc/ltc_vgpu.o \ + common/vgpu/cbc/cbc_vgpu.o \ common/vgpu/gr/gr_vgpu.o \ common/vgpu/gr/ctx_vgpu.o \ common/vgpu/fifo/fifo_vgpu.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 90abc7b44..393049b8b 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -323,6 +323,7 @@ srcs += common/sim.c \ common/vgpu/clk_vgpu.c \ common/vgpu/debugger_vgpu.c \ common/vgpu/ltc/ltc_vgpu.c \ + common/vgpu/cbc/cbc_vgpu.c \ common/vgpu/ce_vgpu.c \ common/vgpu/gv11b/vgpu_gv11b.c \ common/vgpu/gv11b/vgpu_hal_gv11b.c \ @@ -343,6 +344,9 @@ srcs += common/sim.c \ hal/power_features/cg/gp106_gating_reglist.c \ hal/power_features/cg/gv100_gating_reglist.c \ hal/power_features/cg/tu104_gating_reglist.c \ + hal/cbc/cbc_gm20b.c \ + hal/cbc/cbc_gp10b.c \ + hal/cbc/cbc_tu104.c \ hal/fuse/fuse_gm20b.c \ hal/fuse/fuse_gp10b.c \ hal/fuse/fuse_gp106.c diff --git a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c index eb6e6447e..378b6005d 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/common/fb/fb_gv11b.c @@ -154,9 +154,9 @@ void gv11b_fb_init_cbc(struct gk20a *g, struct gr_gk20a *gr) compbit_base_post_divide++; } - if (g->ops.ltc.cbc_fix_config != NULL) { + if (g->ops.cbc.fix_config != NULL) { compbit_base_post_divide = - g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); + g->ops.cbc.fix_config(g, compbit_base_post_divide); } gk20a_writel(g, fb_mmu_cbc_base_r(), @@ -172,7 +172,7 @@ void gv11b_fb_init_cbc(struct gk20a *g, struct gr_gk20a *gr) gr->compbit_store.base_hw = compbit_base_post_divide; - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, + g->ops.cbc.ctrl(g, gk20a_cbc_op_invalidate, 0, max_comptag_lines - 1U); } diff --git a/drivers/gpu/nvgpu/common/fb/fb_tu104.c b/drivers/gpu/nvgpu/common/fb/fb_tu104.c index b2740d95f..d414ce708 100644 --- a/drivers/gpu/nvgpu/common/fb/fb_tu104.c +++ b/drivers/gpu/nvgpu/common/fb/fb_tu104.c @@ -435,7 +435,7 @@ void fb_tu104_init_cbc(struct gk20a *g, struct gr_gk20a *gr) u32 cbc_max; compbit_store_pa = nvgpu_mem_get_addr(g, &gr->compbit_store.mem); - base_divisor = g->ops.ltc.get_cbc_base_divisor(g); + base_divisor = g->ops.cbc.get_base_divisor(g); compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor); cbc_start_addr = (u64)g->ltc_count * (compbit_store_base << @@ -466,7 +466,7 @@ void fb_tu104_init_cbc(struct gk20a *g, struct gr_gk20a *gr) gr->compbit_store.base_hw = compbit_store_base; - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, + g->ops.cbc.ctrl(g, gk20a_cbc_op_invalidate, 0, gr->max_comptag_lines - 1U); } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c index 56b01165e..b50f2ee3d 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.c @@ -38,173 +38,6 @@ #include "ltc_gm20b.h" -int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; - /* one tag line covers 128KB */ - u32 max_comptag_lines = max_size << 3U; - - u32 hw_max_comptag_lines = - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); - - u32 cbc_param = - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - u32 comptags_per_cacheline = - ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); - - u32 compbit_backing_size; - - int err; - - nvgpu_log_fn(g, " "); - - if (max_comptag_lines == 0U) { - return 0; - } - - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - compbit_backing_size = - DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * - gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; - - /* aligned to 2KB * ltc_count */ - compbit_backing_size += - g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - /* must be a multiple of 64KB */ - compbit_backing_size = roundup(compbit_backing_size, - U32(64) * U32(1024)); - - max_comptag_lines = - (compbit_backing_size * comptags_per_cacheline) / - (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); - - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - nvgpu_log_info(g, "compbit backing store size : %d", - compbit_backing_size); - nvgpu_log_info(g, "max comptag lines : %d", - max_comptag_lines); - - err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size, false); - if (err != 0) { - return err; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err != 0) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->compbit_backing_size = compbit_backing_size; - - return 0; -} - -int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max) -{ - struct gr_gk20a *gr = &g->gr; - struct nvgpu_timeout timeout; - int err = 0; - u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - const u32 max_lines = 16384U; - - nvgpu_log_fn(g, " "); - - trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - - if (gr->compbit_store.mem.size == 0ULL) { - return 0; - } - - while (true) { - const u32 iter_max = min(min + max_lines - 1U, max); - bool full_cache_op = true; - - nvgpu_mutex_acquire(&g->mm.l2_op_lock); - - nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); - - if (op == gk20a_cbc_op_clear) { - gk20a_writel( - g, ltc_ltcs_ltss_cbc_ctrl2_r(), - ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( - min)); - gk20a_writel( - g, ltc_ltcs_ltss_cbc_ctrl3_r(), - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( - iter_max)); - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); - full_cache_op = false; - } else if (op == gk20a_cbc_op_clean) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); - } else if (op == gk20a_cbc_op_invalidate) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); - } else { - nvgpu_err(g, "Unknown op: %u", (unsigned)op); - err = -EINVAL; - goto out; - } - gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), - gk20a_readl(g, - ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { - - ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + - ltc * ltc_stride + slice * lts_stride; - - nvgpu_timeout_init(g, &timeout, 2000, - NVGPU_TIMER_RETRY_TIMER); - do { - val = gk20a_readl(g, ctrl1); - if ((val & hw_op) == 0U) { - break; - } - nvgpu_udelay(5); - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - nvgpu_err(g, "comp tag clear timeout"); - err = -EBUSY; - goto out; - } - } - } - - /* are we done? */ - if (full_cache_op || iter_max == max) { - break; - } - - /* note: iter_max is inclusive upper bound */ - min = iter_max + 1U; - - /* give a chance for higher-priority threads to progress */ - nvgpu_mutex_release(&g->mm.l2_op_lock); - } -out: - trace_gk20a_ltc_cbc_ctrl_done(g->name); - nvgpu_mutex_release(&g->mm.l2_op_lock); - return err; -} - void gm20b_ltc_init_fs_state(struct gk20a *g) { struct gr_gk20a *gr = &g->gr; @@ -264,18 +97,6 @@ void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc) } } -u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base) -{ - u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); - if (val == 2U) { - return base * 2; - } else if (val != 1U) { - nvgpu_err(g, "Invalid number of active ltcs: %08x", val); - } - - return base; -} - /* * Performs a full flush of the L2 cache. */ @@ -432,59 +253,6 @@ void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, depth_val); } -void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) -{ - u32 max_size = gr->max_comptag_mem; - u32 max_comptag_lines = max_size << 3U; - - u32 compbit_base_post_divide; - u64 compbit_base_post_multiply64; - u64 compbit_store_iova; - u64 compbit_base_post_divide64; - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { - compbit_store_iova = nvgpu_mem_get_phys_addr(g, - &gr->compbit_store.mem); - } else { - compbit_store_iova = nvgpu_mem_get_addr(g, - &gr->compbit_store.mem); - } - - compbit_base_post_divide64 = compbit_store_iova >> - ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - do_div(compbit_base_post_divide64, g->ltc_count); - compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); - - compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * - g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - if (compbit_base_post_multiply64 < compbit_store_iova) { - compbit_base_post_divide++; - } - - /* Bug 1477079 indicates sw adjustment on the posted divided base. */ - if (g->ops.ltc.cbc_fix_config != NULL) { - compbit_base_post_divide = - g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); - } - - gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), - compbit_base_post_divide); - - nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, - "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", - (u32)(compbit_store_iova >> 32), - (u32)(compbit_store_iova & 0xffffffffU), - compbit_base_post_divide); - - gr->compbit_store.base_hw = compbit_base_post_divide; - - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, - 0, max_comptag_lines - 1U); - -} - void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled) { u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h index 86989798b..cff668899 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gm20b.h @@ -29,10 +29,7 @@ struct gk20a; struct gr_gk20a; -struct gpu_ops; -enum gk20a_cbc_op; -int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); int gm20b_determine_L2_size_bytes(struct gk20a *g); void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, u32 *color_l2, @@ -40,19 +37,11 @@ void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, u32 depth_val, u32 index); -void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr); void gm20b_ltc_set_enabled(struct gk20a *g, bool enabled); void gm20b_ltc_init_fs_state(struct gk20a *g); -int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc); void gm20b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice); -u32 gm20b_ltc_cbc_fix_config(struct gk20a *g, int base); void gm20b_flush_ltc(struct gk20a *g); -int gm20b_ltc_alloc_phys_cbc(struct gk20a *g, - size_t compbit_backing_size); -int gm20b_ltc_alloc_virt_cbc(struct gk20a *g, - size_t compbit_backing_size); bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr); bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr); bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr); diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c index 08e6f20d3..fdacb7b7a 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B L2 * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -56,182 +56,6 @@ int gp10b_determine_L2_size_bytes(struct gk20a *g) return ret; } -int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; - /* one tag line covers 64KB */ - u32 max_comptag_lines = max_size << 4U; - - u32 hw_max_comptag_lines = - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); - - u32 cbc_param = - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - u32 comptags_per_cacheline = - ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); - u32 cbc_param2 = - gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); - u32 gobs_per_comptagline_per_slice = - ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(cbc_param2); - - u32 compbit_backing_size; - - int err; - - nvgpu_log_fn(g, " "); - - if (max_comptag_lines == 0U) { - return 0; - } - - /* Already initialized */ - if (gr->max_comptag_lines != 0U) { - return 0; - } - - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - compbit_backing_size = - roundup(max_comptag_lines * gobs_per_comptagline_per_slice, - gr->cacheline_size); - compbit_backing_size = roundup( - compbit_backing_size * gr->slices_per_ltc * g->ltc_count, - g->ops.fb.compressible_page_size(g)); - - /* aligned to 2KB * ltc_count */ - compbit_backing_size += - g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - /* must be a multiple of 64KB */ - compbit_backing_size = roundup(compbit_backing_size, - U32(64) * U32(1024)); - - nvgpu_log_info(g, "compbit backing store size : %d", - compbit_backing_size); - nvgpu_log_info(g, "max comptag lines : %d", - max_comptag_lines); - nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", - gobs_per_comptagline_per_slice); - - err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size, false); - if (err != 0) { - return err; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err != 0) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = comptags_per_cacheline; - gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; - gr->compbit_backing_size = compbit_backing_size; - - return 0; -} - -int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max) -{ - struct gr_gk20a *gr = &g->gr; - struct nvgpu_timeout timeout; - int err = 0; - u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - const u32 max_lines = 16384U; - - nvgpu_log_fn(g, " "); - - trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - - if (gr->compbit_store.mem.size == 0U) { - return 0; - } - - while (true) { - const u32 iter_max = min(min + max_lines - 1U, max); - bool full_cache_op = true; - - nvgpu_mutex_acquire(&g->mm.l2_op_lock); - - nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); - - if (op == gk20a_cbc_op_clear) { - nvgpu_writel_check( - g, ltc_ltcs_ltss_cbc_ctrl2_r(), - ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( - min)); - - nvgpu_writel_check( - g, ltc_ltcs_ltss_cbc_ctrl3_r(), - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( - iter_max)); - - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); - full_cache_op = false; - } else if (op == gk20a_cbc_op_clean) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); - } else if (op == gk20a_cbc_op_invalidate) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); - } else { - nvgpu_err(g, "Unknown op: %u", (unsigned)op); - err = -EINVAL; - goto out; - } - gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), - gk20a_readl(g, - ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { - - ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + - ltc * ltc_stride + slice * lts_stride; - - nvgpu_timeout_init(g, &timeout, 2000, - NVGPU_TIMER_RETRY_TIMER); - do { - val = gk20a_readl(g, ctrl1); - if ((val & hw_op) == 0U) { - break; - } - nvgpu_udelay(5); - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - nvgpu_err(g, "comp tag clear timeout"); - err = -EBUSY; - goto out; - } - } - } - - /* are we done? */ - if (full_cache_op || iter_max == max) { - break; - } - - /* note: iter_max is inclusive upper bound */ - min = iter_max + 1U; - - /* give a chance for higher-priority threads to progress */ - nvgpu_mutex_release(&g->mm.l2_op_lock); - } -out: - trace_gk20a_ltc_cbc_ctrl_done(g->name); - nvgpu_mutex_release(&g->mm.l2_op_lock); - return err; -} - void gp10b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice) { u32 offset; diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h index 10b2713e1..387208842 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h +++ b/drivers/gpu/nvgpu/common/ltc/ltc_gp10b.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,10 +26,7 @@ struct gk20a; struct gpu_ops; int gp10b_determine_L2_size_bytes(struct gk20a *g); -int gp10b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); void gp10b_ltc_init_fs_state(struct gk20a *g); -int gp10b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); void gp10b_ltc_set_enabled(struct gk20a *g, bool enabled); void gp10b_ltc_isr(struct gk20a *g, unsigned int ltc); void gp10b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice); diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c index 871a08767..ba194622a 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.c @@ -58,184 +58,3 @@ void ltc_tu104_init_fs_state(struct gk20a *g) ltc_ltcs_ltss_tstg_set_mgmt_1_plc_recompress_rmw_disabled_f()); nvgpu_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_1_r(), reg); } - -u64 ltc_tu104_get_cbc_base_divisor(struct gk20a *g) -{ - return (u64)g->ltc_count << - ltc_ltcs_ltss_cbc_base_alignment_shift_v(); -} - -int ltc_tu104_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - /* max memory size (MB) to cover */ - u32 max_size = gr->max_comptag_mem; - /* one tag line covers 64KB */ - u32 max_comptag_lines = max_size << 4U; - u32 compbit_backing_size; - u32 hw_max_comptag_lines; - u32 cbc_param; - u32 ctags_size; - u32 ctags_per_cacheline; - u32 amap_divide_rounding, amap_swizzle_rounding; - int err; - - nvgpu_log_fn(g, " "); - - if (max_comptag_lines == 0U) { - return 0; - } - - /* Already initialized */ - if (gr->max_comptag_lines != 0U) { - return 0; - } - - hw_max_comptag_lines = - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); - if (max_comptag_lines > hw_max_comptag_lines) { - max_comptag_lines = hw_max_comptag_lines; - } - - cbc_param = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param_r()); - - ctags_size = - ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(cbc_param); - ctags_per_cacheline = gr->cacheline_size / ctags_size; - - amap_divide_rounding = (U32(2U) * U32(1024U)) << - ltc_ltcs_ltss_cbc_param_amap_divide_rounding_v(cbc_param); - amap_swizzle_rounding = (U32(64U) * U32(1024U)) << - ltc_ltcs_ltss_cbc_param_amap_swizzle_rounding_v(cbc_param); - - compbit_backing_size = - roundup(max_comptag_lines * ctags_size, gr->cacheline_size); - compbit_backing_size = - compbit_backing_size * gr->slices_per_ltc * g->ltc_count; - - compbit_backing_size += g->ltc_count * amap_divide_rounding; - compbit_backing_size += amap_swizzle_rounding; - - /* must be a multiple of 64KB */ - compbit_backing_size = roundup(compbit_backing_size, - U32(64) * U32(1024)); - - err = nvgpu_ltc_alloc_cbc(g, compbit_backing_size, true); - if (err != 0) { - return err; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err != 0) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - gr->comptags_per_cacheline = ctags_per_cacheline; - gr->gobs_per_comptagline_per_slice = ctags_size; - gr->compbit_backing_size = compbit_backing_size; - - nvgpu_log_info(g, "compbit backing store size : %d", - compbit_backing_size); - nvgpu_log_info(g, "max comptag lines : %d", - max_comptag_lines); - nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", - gr->gobs_per_comptagline_per_slice); - - return 0; -} - -int ltc_tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max) -{ - struct gr_gk20a *gr = &g->gr; - struct nvgpu_timeout timeout; - int err = 0; - u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = gr->slices_per_ltc; - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - const u32 max_lines = 16384U; - - nvgpu_log_fn(g, " "); - - trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); - - if (gr->compbit_store.mem.size == 0U) { - return 0; - } - - while (true) { - const u32 iter_max = min(min + max_lines - 1U, max); - bool full_cache_op = true; - - nvgpu_mutex_acquire(&g->mm.l2_op_lock); - - nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); - - if (op == gk20a_cbc_op_clear) { - nvgpu_writel( - g, ltc_ltcs_ltss_cbc_ctrl2_r(), - ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( - min)); - nvgpu_writel( - g, ltc_ltcs_ltss_cbc_ctrl3_r(), - ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( - iter_max)); - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); - full_cache_op = false; - } else if (op == gk20a_cbc_op_clean) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); - } else if (op == gk20a_cbc_op_invalidate) { - /* this is full-cache op */ - hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); - } else { - nvgpu_err(g, "Unknown op: %u", (unsigned)op); - err = -EINVAL; - goto out; - } - - nvgpu_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), - nvgpu_readl(g, - ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { - - ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + - ltc * ltc_stride + slice * lts_stride; - - nvgpu_timeout_init(g, &timeout, 2000, - NVGPU_TIMER_RETRY_TIMER); - do { - val = nvgpu_readl(g, ctrl1); - if ((val & hw_op) == 0U) { - break; - } - nvgpu_udelay(5); - } while (nvgpu_timeout_expired(&timeout) == 0); - - if (nvgpu_timeout_peek_expired(&timeout) != 0) { - nvgpu_err(g, "comp tag clear timeout"); - err = -EBUSY; - goto out; - } - } - } - - /* are we done? */ - if (full_cache_op || iter_max == max) { - break; - } - - /* note: iter_max is inclusive upper bound */ - min = iter_max + 1U; - - /* give a chance for higher-priority threads to progress */ - nvgpu_mutex_release(&g->mm.l2_op_lock); - } -out: - trace_gk20a_ltc_cbc_ctrl_done(g->name); - nvgpu_mutex_release(&g->mm.l2_op_lock); - return err; -} diff --git a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h index eca61388f..c5e8cd4c7 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h +++ b/drivers/gpu/nvgpu/common/ltc/ltc_tu104.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,14 +25,8 @@ #include -enum gk20a_cbc_op; struct gk20a; -struct gr_gk20a; -u64 ltc_tu104_get_cbc_base_divisor(struct gk20a *g); void ltc_tu104_init_fs_state(struct gk20a *g); -int ltc_tu104_init_comptags(struct gk20a *g, struct gr_gk20a *gr); -int ltc_tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); #endif diff --git a/drivers/gpu/nvgpu/common/mm/vm.c b/drivers/gpu/nvgpu/common/mm/vm.c index 7632396b0..70f7c133b 100644 --- a/drivers/gpu/nvgpu/common/mm/vm.c +++ b/drivers/gpu/nvgpu/common/mm/vm.c @@ -1000,9 +1000,9 @@ struct nvgpu_mapped_buf *nvgpu_vm_map(struct vm_gk20a *vm, * Newly allocated comptags needs to be cleared */ if (comptags.needs_clear) { - if (g->ops.ltc.cbc_ctrl != NULL) { + if (g->ops.cbc.ctrl != NULL) { if (gk20a_comptags_start_clear(os_buf)) { - err = g->ops.ltc.cbc_ctrl( + err = g->ops.cbc.ctrl( g, gk20a_cbc_op_clear, comptags.offset, (comptags.offset + diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c new file mode 100644 index 000000000..189b54d2c --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.c @@ -0,0 +1,53 @@ +/* + * Virtualized GPU CBC + * + * Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "cbc_vgpu.h" + +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + u32 max_comptag_lines = 0; + int err; + + nvgpu_log_fn(g, " "); + + gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; + max_comptag_lines = priv->constants.comptag_lines; + + if (max_comptag_lines < 2) { + return -ENXIO; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + + return 0; +} diff --git a/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h new file mode 100644 index 000000000..2ebb66df8 --- /dev/null +++ b/drivers/gpu/nvgpu/common/vgpu/cbc/cbc_vgpu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CBC_VGPU_H +#define NVGPU_CBC_VGPU_H + +struct gk20a; +struct gr_gk20a; + +int vgpu_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); + +#endif /* NVGPU_CBC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c index 78d52a506..1dca118ab 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gp10b/vgpu_hal_gp10b.c @@ -46,6 +46,7 @@ #include "common/vgpu/gr/ctx_vgpu.h" #include "common/vgpu/ltc/ltc_vgpu.h" #include "common/vgpu/mm/mm_vgpu.h" +#include "common/vgpu/cbc/cbc_vgpu.h" #include "common/vgpu/debugger_vgpu.h" #include "common/vgpu/fecs_trace_vgpu.h" #include "common/vgpu/perf/perf_vgpu.h" @@ -82,12 +83,8 @@ static const struct gpu_ops vgpu_gp10b_ops = { .determine_L2_size_bytes = vgpu_determine_L2_size_bytes, .set_zbc_color_entry = NULL, .set_zbc_depth_entry = NULL, - .init_cbc = NULL, .init_fs_state = vgpu_ltc_init_fs_state, - .init_comptags = vgpu_ltc_init_comptags, - .cbc_ctrl = NULL, .isr = NULL, - .cbc_fix_config = NULL, .flush = NULL, .set_enabled = NULL, .pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr, @@ -96,6 +93,12 @@ static const struct gpu_ops vgpu_gp10b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .alloc_comptags = vgpu_cbc_alloc_comptags, + .ctrl = NULL, + .fix_config = NULL, + }, .ce2 = { .isr_stall = NULL, .isr_nonstall = NULL, @@ -675,6 +678,7 @@ int vgpu_gp10b_init_hal(struct gk20a *g) struct vgpu_priv_data *priv = vgpu_get_priv_data(g); gops->ltc = vgpu_gp10b_ops.ltc; + gops->cbc = vgpu_gp10b_ops.cbc; gops->ce2 = vgpu_gp10b_ops.ce2; gops->gr = vgpu_gp10b_ops.gr; gops->gr.ctxsw_prog = vgpu_gp10b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index a40680c1b..fe9940ea1 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -685,7 +685,7 @@ static int vgpu_gr_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - err = g->ops.ltc.init_comptags(g, gr); + err = g->ops.cbc.alloc_comptags(g, gr); if (err) { goto clean_up; } diff --git a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c index 61567e2f4..17d5a4c72 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c +++ b/drivers/gpu/nvgpu/common/vgpu/gv11b/vgpu_hal_gv11b.c @@ -59,6 +59,7 @@ #include "common/vgpu/gr/ctx_vgpu.h" #include "common/vgpu/ltc/ltc_vgpu.h" #include "common/vgpu/mm/mm_vgpu.h" +#include "common/vgpu/cbc/cbc_vgpu.h" #include "common/vgpu/debugger_vgpu.h" #include "common/vgpu/perf/perf_vgpu.h" #include "common/vgpu/fecs_trace_vgpu.h" @@ -103,10 +104,7 @@ static const struct gpu_ops vgpu_gv11b_ops = { .set_zbc_s_entry = NULL, .set_zbc_color_entry = NULL, .set_zbc_depth_entry = NULL, - .init_cbc = NULL, .init_fs_state = vgpu_ltc_init_fs_state, - .init_comptags = vgpu_ltc_init_comptags, - .cbc_ctrl = NULL, .isr = NULL, .flush = NULL, .set_enabled = NULL, @@ -116,6 +114,11 @@ static const struct gpu_ops vgpu_gv11b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .ctrl = NULL, + .alloc_comptags = vgpu_cbc_alloc_comptags, + }, .ce2 = { .isr_stall = NULL, .isr_nonstall = NULL, @@ -756,6 +759,7 @@ int vgpu_gv11b_init_hal(struct gk20a *g) struct vgpu_priv_data *priv = vgpu_get_priv_data(g); gops->ltc = vgpu_gv11b_ops.ltc; + gops->cbc = vgpu_gv11b_ops.cbc; gops->ce2 = vgpu_gv11b_ops.ce2; gops->gr = vgpu_gv11b_ops.gr; gops->gr.ctxsw_prog = vgpu_gv11b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c index 3a5fdc4c1..54006d700 100644 --- a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c @@ -36,31 +36,6 @@ int vgpu_determine_L2_size_bytes(struct gk20a *g) return priv->constants.l2_size; } -int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) -{ - struct vgpu_priv_data *priv = vgpu_get_priv_data(g); - u32 max_comptag_lines = 0; - int err; - - nvgpu_log_fn(g, " "); - - gr->comptags_per_cacheline = priv->constants.comptags_per_cacheline; - max_comptag_lines = priv->constants.comptag_lines; - - if (max_comptag_lines < 2) { - return -ENXIO; - } - - err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); - if (err) { - return err; - } - - gr->max_comptag_lines = max_comptag_lines; - - return 0; -} - void vgpu_ltc_init_fs_state(struct gk20a *g) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h index c3457a80e..bd6676209 100644 --- a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.h @@ -27,7 +27,6 @@ struct gk20a; struct gr_gk20a; int vgpu_determine_L2_size_bytes(struct gk20a *g); -int vgpu_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr); void vgpu_ltc_init_fs_state(struct gk20a *g); #endif /* NVGPU_LTC_VGPU_H */ diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index e00d85f9c..49ac09014 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -2811,8 +2811,8 @@ static int gk20a_init_gr_setup_hw(struct gk20a *g) goto out; } - if (g->ops.ltc.init_cbc != NULL) { - g->ops.ltc.init_cbc(g, gr); + if (g->ops.cbc.init != NULL) { + g->ops.cbc.init(g, gr); } if (g->ops.fb.init_cbc != NULL) { @@ -3024,8 +3024,8 @@ static int gk20a_init_gr_setup_sw(struct gk20a *g) goto clean_up; } - if (g->ops.ltc.init_comptags != NULL) { - err = g->ops.ltc.init_comptags(g, gr); + if (g->ops.cbc.alloc_comptags != NULL) { + err = g->ops.cbc.alloc_comptags(g, gr); if (err != 0) { goto clean_up; } diff --git a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c index 0c5c64aab..55e054778 100644 --- a/drivers/gpu/nvgpu/gm20b/hal_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/hal_gm20b.c @@ -41,6 +41,7 @@ #include "hal/bus/bus_gk20a.h" #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/power_features/cg/gm20b_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" #include "hal/fuse/fuse_gm20b.h" #include "common/ptimer/ptimer_gk20a.h" @@ -202,12 +203,8 @@ static const struct gpu_ops gm20b_ops = { .determine_L2_size_bytes = gm20b_determine_L2_size_bytes, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = gm20b_ltc_init_cbc, .init_fs_state = gm20b_ltc_init_fs_state, - .init_comptags = gm20b_ltc_init_comptags, - .cbc_ctrl = gm20b_ltc_cbc_ctrl, .isr = gm20b_ltc_isr, - .cbc_fix_config = gm20b_ltc_cbc_fix_config, .flush = gm20b_flush_ltc, .set_enabled = gm20b_ltc_set_enabled, .pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr, @@ -216,6 +213,12 @@ static const struct gpu_ops gm20b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = gm20b_cbc_init, + .ctrl = gm20b_cbc_ctrl, + .alloc_comptags = gm20b_cbc_alloc_comptags, + .fix_config = gm20b_cbc_fix_config, + }, .ce2 = { .isr_stall = gk20a_ce2_isr, .isr_nonstall = gk20a_ce2_nonstall_isr, @@ -827,6 +830,7 @@ int gm20b_init_hal(struct gk20a *g) struct gpu_ops *gops = &g->ops; gops->ltc = gm20b_ops.ltc; + gops->cbc = gm20b_ops.cbc; gops->ce2 = gm20b_ops.ce2; gops->gr = gm20b_ops.gr; gops->gr.ctxsw_prog = gm20b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c index dcaf2bba1..1b4ad45aa 100644 --- a/drivers/gpu/nvgpu/gp10b/hal_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/hal_gp10b.c @@ -45,6 +45,8 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/gp10b_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" @@ -223,12 +225,8 @@ static const struct gpu_ops gp10b_ops = { .determine_L2_size_bytes = gp10b_determine_L2_size_bytes, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = gm20b_ltc_init_cbc, .init_fs_state = gp10b_ltc_init_fs_state, - .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gp10b_ltc_isr, - .cbc_fix_config = gm20b_ltc_cbc_fix_config, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, .pri_is_ltc_addr = gm20b_ltc_pri_is_ltc_addr, @@ -237,6 +235,12 @@ static const struct gpu_ops gp10b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = gm20b_cbc_init, + .alloc_comptags = gp10b_cbc_alloc_comptags, + .ctrl = gp10b_cbc_ctrl, + .fix_config = gm20b_cbc_fix_config, + }, .ce2 = { .isr_stall = gp10b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, @@ -911,6 +915,7 @@ int gp10b_init_hal(struct gk20a *g) struct gpu_ops *gops = &g->ops; gops->ltc = gp10b_ops.ltc; + gops->cbc = gp10b_ops.cbc; gops->ce2 = gp10b_ops.ce2; gops->gr = gp10b_ops.gr; gops->gr.ctxsw_prog = gp10b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/gv100/hal_gv100.c b/drivers/gpu/nvgpu/gv100/hal_gv100.c index 0120a177d..d230d317b 100644 --- a/drivers/gpu/nvgpu/gv100/hal_gv100.c +++ b/drivers/gpu/nvgpu/gv100/hal_gv100.c @@ -28,6 +28,8 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/gv100_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" #include "hal/fuse/fuse_gp106.h" @@ -326,11 +328,8 @@ static const struct gpu_ops gv100_ops = { .set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = NULL, .init_fs_state = gv11b_ltc_init_fs_state, - .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gv11b_ltc_isr, - .cbc_fix_config = NULL, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, .intr_en_illegal_compstat = gv11b_ltc_intr_en_illegal_compstat, @@ -340,6 +339,11 @@ static const struct gpu_ops gv100_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .ctrl = gp10b_cbc_ctrl, + .fix_config = NULL, + }, .ce2 = { .isr_stall = gv11b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, @@ -1190,6 +1194,7 @@ int gv100_init_hal(struct gk20a *g) gops->bios = gv100_ops.bios; gops->ltc = gv100_ops.ltc; + gops->cbc = gv100_ops.cbc; gops->ce2 = gv100_ops.ce2; gops->gr = gv100_ops.gr; gops->gr.ctxsw_prog = gv100_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c index 69885a9e8..7e2d8e48c 100644 --- a/drivers/gpu/nvgpu/gv11b/hal_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/hal_gv11b.c @@ -31,6 +31,8 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/gv11b_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" @@ -276,10 +278,7 @@ static const struct gpu_ops gv11b_ops = { .set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = NULL, .init_fs_state = gv11b_ltc_init_fs_state, - .init_comptags = gp10b_ltc_init_comptags, - .cbc_ctrl = gp10b_ltc_cbc_ctrl, .isr = gv11b_ltc_isr, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, @@ -290,6 +289,11 @@ static const struct gpu_ops gv11b_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .alloc_comptags = gp10b_cbc_alloc_comptags, + .ctrl = gp10b_cbc_ctrl, + }, .ce2 = { .isr_stall = gv11b_ce_isr, .isr_nonstall = gp10b_ce_nonstall_isr, @@ -1065,6 +1069,7 @@ int gv11b_init_hal(struct gk20a *g) struct gpu_ops *gops = &g->ops; gops->ltc = gv11b_ops.ltc; + gops->cbc = gv11b_ops.cbc; gops->ce2 = gv11b_ops.ce2; gops->gr = gv11b_ops.gr; gops->gr.ctxsw_prog = gv11b_ops.gr.ctxsw_prog; diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c new file mode 100644 index 000000000..bc117856c --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c @@ -0,0 +1,271 @@ +/* + * GM20B CBC + * + * Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cbc_gm20b.h" + +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + /* max memory size (MB) to cover */ + u32 max_size = gr->max_comptag_mem; + /* one tag line covers 128KB */ + u32 max_comptag_lines = max_size << 3U; + + u32 hw_max_comptag_lines = + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); + + u32 cbc_param = + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); + u32 comptags_per_cacheline = + ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); + + u32 compbit_backing_size; + + int err; + + nvgpu_log_fn(g, " "); + + if (max_comptag_lines == 0U) { + return 0; + } + + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + compbit_backing_size = + DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * + gr->cacheline_size * gr->slices_per_ltc * g->ltc_count; + + /* aligned to 2KB * ltc_count */ + compbit_backing_size += + g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + /* must be a multiple of 64KB */ + compbit_backing_size = roundup(compbit_backing_size, + U32(64) * U32(1024)); + + max_comptag_lines = + (compbit_backing_size * comptags_per_cacheline) / + (gr->cacheline_size * gr->slices_per_ltc * g->ltc_count); + + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + nvgpu_log_info(g, "compbit backing store size : %d", + compbit_backing_size); + nvgpu_log_info(g, "max comptag lines : %d", + max_comptag_lines); + + err = nvgpu_cbc_alloc(g, compbit_backing_size, false); + if (err != 0) { + return err; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err != 0) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + gr->comptags_per_cacheline = comptags_per_cacheline; + gr->compbit_backing_size = compbit_backing_size; + + return 0; +} + +int gm20b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; + u32 ltc, slice, ctrl1, val, hw_op = 0U; + u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + const u32 max_lines = 16384U; + + nvgpu_log_fn(g, " "); + + trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); + + if (gr->compbit_store.mem.size == 0ULL) { + return 0; + } + + while (true) { + const u32 iter_max = min(min + max_lines - 1U, max); + bool full_cache_op = true; + + nvgpu_mutex_acquire(&g->mm.l2_op_lock); + + nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); + + if (op == gk20a_cbc_op_clear) { + gk20a_writel( + g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( + min)); + gk20a_writel( + g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( + iter_max)); + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + full_cache_op = false; + } else if (op == gk20a_cbc_op_clean) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + nvgpu_err(g, "Unknown op: %u", (unsigned)op); + err = -EINVAL; + goto out; + } + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + gk20a_readl(g, + ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (slice = 0; slice < slices_per_ltc; slice++) { + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + ltc * ltc_stride + slice * lts_stride; + + nvgpu_timeout_init(g, &timeout, 2000, + NVGPU_TIMER_RETRY_TIMER); + do { + val = gk20a_readl(g, ctrl1); + if ((val & hw_op) == 0U) { + break; + } + nvgpu_udelay(5); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + nvgpu_err(g, "comp tag clear timeout"); + err = -EBUSY; + goto out; + } + } + } + + /* are we done? */ + if (full_cache_op || iter_max == max) { + break; + } + + /* note: iter_max is inclusive upper bound */ + min = iter_max + 1U; + + /* give a chance for higher-priority threads to progress */ + nvgpu_mutex_release(&g->mm.l2_op_lock); + } +out: + trace_gk20a_ltc_cbc_ctrl_done(g->name); + nvgpu_mutex_release(&g->mm.l2_op_lock); + return err; +} + +u32 gm20b_cbc_fix_config(struct gk20a *g, int base) +{ + u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); + if (val == 2U) { + return base * 2; + } else if (val != 1U) { + nvgpu_err(g, "Invalid number of active ltcs: %08x", val); + } + + return base; +} + + +void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 max_size = gr->max_comptag_mem; + u32 max_comptag_lines = max_size << 3U; + + u32 compbit_base_post_divide; + u64 compbit_base_post_multiply64; + u64 compbit_store_iova; + u64 compbit_base_post_divide64; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { + compbit_store_iova = nvgpu_mem_get_phys_addr(g, + &gr->compbit_store.mem); + } else { + compbit_store_iova = nvgpu_mem_get_addr(g, + &gr->compbit_store.mem); + } + + compbit_base_post_divide64 = compbit_store_iova >> + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + do_div(compbit_base_post_divide64, g->ltc_count); + compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); + + compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * + g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + if (compbit_base_post_multiply64 < compbit_store_iova) { + compbit_base_post_divide++; + } + + /* Bug 1477079 indicates sw adjustment on the posted divided base. */ + if (g->ops.cbc.fix_config != NULL) { + compbit_base_post_divide = + g->ops.cbc.fix_config(g, compbit_base_post_divide); + } + + gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), + compbit_base_post_divide); + + nvgpu_log(g, gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, + "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", + (u32)(compbit_store_iova >> 32), + (u32)(compbit_store_iova & 0xffffffffU), + compbit_base_post_divide); + + gr->compbit_store.base_hw = compbit_base_post_divide; + + g->ops.cbc.ctrl(g, gk20a_cbc_op_invalidate, + 0, max_comptag_lines - 1U); + +} diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h new file mode 100644 index 000000000..0776275c6 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.h @@ -0,0 +1,45 @@ +/* + * GM20B CBC + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CBC_GM20B +#define NVGPU_CBC_GM20B + +#include + +struct gk20a; +struct gr_gk20a; +struct gpu_ops; +enum gk20a_cbc_op; + +int gm20b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +void gm20b_cbc_init(struct gk20a *g, struct gr_gk20a *gr); +int gm20b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); +u32 gm20b_cbc_fix_config(struct gk20a *g, int base); +int gm20b_cbc_alloc_phys(struct gk20a *g, + size_t compbit_backing_size); +int gm20b_cbc_alloc_virtc(struct gk20a *g, + size_t compbit_backing_size); + +#endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c new file mode 100644 index 000000000..a453de990 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c @@ -0,0 +1,213 @@ +/* + * GP10B CBC + * + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cbc_gp10b.h" + +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + /* max memory size (MB) to cover */ + u32 max_size = gr->max_comptag_mem; + /* one tag line covers 64KB */ + u32 max_comptag_lines = max_size << 4U; + + u32 hw_max_comptag_lines = + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); + + u32 cbc_param = + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); + u32 comptags_per_cacheline = + ltc_ltcs_ltss_cbc_param_comptags_per_cache_line_v(cbc_param); + u32 cbc_param2 = + gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); + u32 gobs_per_comptagline_per_slice = + ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(cbc_param2); + + u32 compbit_backing_size; + + int err; + + nvgpu_log_fn(g, " "); + + if (max_comptag_lines == 0U) { + return 0; + } + + /* Already initialized */ + if (gr->max_comptag_lines != 0U) { + return 0; + } + + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + compbit_backing_size = + roundup(max_comptag_lines * gobs_per_comptagline_per_slice, + gr->cacheline_size); + compbit_backing_size = roundup( + compbit_backing_size * gr->slices_per_ltc * g->ltc_count, + g->ops.fb.compressible_page_size(g)); + + /* aligned to 2KB * ltc_count */ + compbit_backing_size += + g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + /* must be a multiple of 64KB */ + compbit_backing_size = roundup(compbit_backing_size, + U32(64) * U32(1024)); + + nvgpu_log_info(g, "compbit backing store size : %d", + compbit_backing_size); + nvgpu_log_info(g, "max comptag lines : %d", + max_comptag_lines); + nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", + gobs_per_comptagline_per_slice); + + err = nvgpu_cbc_alloc(g, compbit_backing_size, false); + if (err != 0) { + return err; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err != 0) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + gr->comptags_per_cacheline = comptags_per_cacheline; + gr->gobs_per_comptagline_per_slice = gobs_per_comptagline_per_slice; + gr->compbit_backing_size = compbit_backing_size; + + return 0; +} + +int gp10b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; + u32 ltc, slice, ctrl1, val, hw_op = 0U; + u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( + gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + const u32 max_lines = 16384U; + + nvgpu_log_fn(g, " "); + + trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); + + if (gr->compbit_store.mem.size == 0U) { + return 0; + } + + while (true) { + const u32 iter_max = min(min + max_lines - 1U, max); + bool full_cache_op = true; + + nvgpu_mutex_acquire(&g->mm.l2_op_lock); + + nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); + + if (op == gk20a_cbc_op_clear) { + nvgpu_writel_check( + g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( + min)); + + nvgpu_writel_check( + g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( + iter_max)); + + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + full_cache_op = false; + } else if (op == gk20a_cbc_op_clean) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + nvgpu_err(g, "Unknown op: %u", (unsigned)op); + err = -EINVAL; + goto out; + } + gk20a_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + gk20a_readl(g, + ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (slice = 0; slice < slices_per_ltc; slice++) { + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + ltc * ltc_stride + slice * lts_stride; + + nvgpu_timeout_init(g, &timeout, 2000, + NVGPU_TIMER_RETRY_TIMER); + do { + val = gk20a_readl(g, ctrl1); + if ((val & hw_op) == 0U) { + break; + } + nvgpu_udelay(5); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + nvgpu_err(g, "comp tag clear timeout"); + err = -EBUSY; + goto out; + } + } + } + + /* are we done? */ + if (full_cache_op || iter_max == max) { + break; + } + + /* note: iter_max is inclusive upper bound */ + min = iter_max + 1U; + + /* give a chance for higher-priority threads to progress */ + nvgpu_mutex_release(&g->mm.l2_op_lock); + } +out: + trace_gk20a_ltc_cbc_ctrl_done(g->name); + nvgpu_mutex_release(&g->mm.l2_op_lock); + return err; +} diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h new file mode 100644 index 000000000..272ef8c43 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CBC_GP10B_H +#define CBC_GP10B_H +struct gk20a; +struct gpu_ops; + +#include + +int gp10b_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int gp10b_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); +#endif diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c new file mode 100644 index 000000000..64470ff8c --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cbc_tu104.h" + +#include "common/ltc/ltc_gv11b.h" + +#include + +u64 tu104_cbc_get_base_divisor(struct gk20a *g) +{ + return (u64)g->ltc_count << + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); +} + +int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr) +{ + /* max memory size (MB) to cover */ + u32 max_size = gr->max_comptag_mem; + /* one tag line covers 64KB */ + u32 max_comptag_lines = max_size << 4U; + u32 compbit_backing_size; + u32 hw_max_comptag_lines; + u32 cbc_param; + u32 ctags_size; + u32 ctags_per_cacheline; + u32 amap_divide_rounding, amap_swizzle_rounding; + int err; + + nvgpu_log_fn(g, " "); + + if (max_comptag_lines == 0U) { + return 0; + } + + /* Already initialized */ + if (gr->max_comptag_lines != 0U) { + return 0; + } + + hw_max_comptag_lines = + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_init_v(); + if (max_comptag_lines > hw_max_comptag_lines) { + max_comptag_lines = hw_max_comptag_lines; + } + + cbc_param = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param_r()); + + ctags_size = + ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(cbc_param); + ctags_per_cacheline = gr->cacheline_size / ctags_size; + + amap_divide_rounding = (U32(2U) * U32(1024U)) << + ltc_ltcs_ltss_cbc_param_amap_divide_rounding_v(cbc_param); + amap_swizzle_rounding = (U32(64U) * U32(1024U)) << + ltc_ltcs_ltss_cbc_param_amap_swizzle_rounding_v(cbc_param); + + compbit_backing_size = + roundup(max_comptag_lines * ctags_size, gr->cacheline_size); + compbit_backing_size = + compbit_backing_size * gr->slices_per_ltc * g->ltc_count; + + compbit_backing_size += g->ltc_count * amap_divide_rounding; + compbit_backing_size += amap_swizzle_rounding; + + /* must be a multiple of 64KB */ + compbit_backing_size = roundup(compbit_backing_size, + U32(64) * U32(1024)); + + err = nvgpu_cbc_alloc(g, compbit_backing_size, true); + if (err != 0) { + return err; + } + + err = gk20a_comptag_allocator_init(g, &gr->comp_tags, max_comptag_lines); + if (err != 0) { + return err; + } + + gr->max_comptag_lines = max_comptag_lines; + gr->comptags_per_cacheline = ctags_per_cacheline; + gr->gobs_per_comptagline_per_slice = ctags_size; + gr->compbit_backing_size = compbit_backing_size; + + nvgpu_log_info(g, "compbit backing store size : %d", + compbit_backing_size); + nvgpu_log_info(g, "max comptag lines : %d", + max_comptag_lines); + nvgpu_log_info(g, "gobs_per_comptagline_per_slice: %d", + gr->gobs_per_comptagline_per_slice); + + return 0; +} + +int tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max) +{ + struct gr_gk20a *gr = &g->gr; + struct nvgpu_timeout timeout; + int err = 0; + u32 ltc, slice, ctrl1, val, hw_op = 0U; + u32 slices_per_ltc = gr->slices_per_ltc; + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + const u32 max_lines = 16384U; + + nvgpu_log_fn(g, " "); + + trace_gk20a_ltc_cbc_ctrl_start(g->name, op, min, max); + + if (gr->compbit_store.mem.size == 0U) { + return 0; + } + + while (true) { + const u32 iter_max = min(min + max_lines - 1U, max); + bool full_cache_op = true; + + nvgpu_mutex_acquire(&g->mm.l2_op_lock); + + nvgpu_log_info(g, "clearing CBC lines %u..%u", min, iter_max); + + if (op == gk20a_cbc_op_clear) { + nvgpu_writel( + g, ltc_ltcs_ltss_cbc_ctrl2_r(), + ltc_ltcs_ltss_cbc_ctrl2_clear_lower_bound_f( + min)); + nvgpu_writel( + g, ltc_ltcs_ltss_cbc_ctrl3_r(), + ltc_ltcs_ltss_cbc_ctrl3_clear_upper_bound_f( + iter_max)); + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clear_active_f(); + full_cache_op = false; + } else if (op == gk20a_cbc_op_clean) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_clean_active_f(); + } else if (op == gk20a_cbc_op_invalidate) { + /* this is full-cache op */ + hw_op = ltc_ltcs_ltss_cbc_ctrl1_invalidate_active_f(); + } else { + nvgpu_err(g, "Unknown op: %u", (unsigned)op); + err = -EINVAL; + goto out; + } + + nvgpu_writel(g, ltc_ltcs_ltss_cbc_ctrl1_r(), + nvgpu_readl(g, + ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); + + for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (slice = 0; slice < slices_per_ltc; slice++) { + + ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + + ltc * ltc_stride + slice * lts_stride; + + nvgpu_timeout_init(g, &timeout, 2000, + NVGPU_TIMER_RETRY_TIMER); + do { + val = nvgpu_readl(g, ctrl1); + if ((val & hw_op) == 0U) { + break; + } + nvgpu_udelay(5); + } while (nvgpu_timeout_expired(&timeout) == 0); + + if (nvgpu_timeout_peek_expired(&timeout) != 0) { + nvgpu_err(g, "comp tag clear timeout"); + err = -EBUSY; + goto out; + } + } + } + + /* are we done? */ + if (full_cache_op || iter_max == max) { + break; + } + + /* note: iter_max is inclusive upper bound */ + min = iter_max + 1U; + + /* give a chance for higher-priority threads to progress */ + nvgpu_mutex_release(&g->mm.l2_op_lock); + } +out: + trace_gk20a_ltc_cbc_ctrl_done(g->name); + nvgpu_mutex_release(&g->mm.l2_op_lock); + return err; +} diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h new file mode 100644 index 000000000..0c3a94f28 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef CBC_TU104_H +#define CBC_TU104_H + +#include + +enum gk20a_cbc_op; +struct gk20a; +struct gr_gk20a; + +u64 tu104_cbc_get_base_divisor(struct gk20a *g); +int tu104_cbc_alloc_comptags(struct gk20a *g, struct gr_gk20a *gr); +int tu104_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); + +#endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/cbc.h b/drivers/gpu/nvgpu/include/nvgpu/cbc.h new file mode 100644 index 000000000..429f596f0 --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/cbc.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef NVGPU_CBC_H +#define NVGPU_CBC_H + +#include + +struct gk20a; + +int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, + bool vidmem_alloc); + +#endif /* NVGPU_CBC_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 7939e6851..84936f6e1 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -207,10 +207,6 @@ enum { struct gpu_ops { struct { int (*determine_L2_size_bytes)(struct gk20a *gk20a); - u64 (*get_cbc_base_divisor)(struct gk20a *g); - int (*init_comptags)(struct gk20a *g, struct gr_gk20a *gr); - int (*cbc_ctrl)(struct gk20a *g, enum gk20a_cbc_op op, - u32 min, u32 max); void (*set_zbc_color_entry)(struct gk20a *g, u32 *color_val_l2, u32 index); @@ -220,11 +216,9 @@ struct gpu_ops { void (*set_zbc_s_entry)(struct gk20a *g, u32 s_val, u32 index); - void (*init_cbc)(struct gk20a *g, struct gr_gk20a *gr); void (*set_enabled)(struct gk20a *g, bool enabled); void (*init_fs_state)(struct gk20a *g); void (*isr)(struct gk20a *g, unsigned int ltc); - u32 (*cbc_fix_config)(struct gk20a *g, int base); void (*flush)(struct gk20a *g); void (*intr_en_illegal_compstat)(struct gk20a *g, bool enable); bool (*pri_is_ltc_addr)(struct gk20a *g, u32 addr); @@ -242,6 +236,14 @@ struct gpu_ops { u64 err_addr, u64 count); } err_ops; } ltc; + struct { + void (*init)(struct gk20a *g, struct gr_gk20a *gr); + u64 (*get_base_divisor)(struct gk20a *g); + int (*alloc_comptags)(struct gk20a *g, struct gr_gk20a *gr); + int (*ctrl)(struct gk20a *g, enum gk20a_cbc_op op, + u32 min, u32 max); + u32 (*fix_config)(struct gk20a *g, int base); + } cbc; struct { void (*isr_stall)(struct gk20a *g, u32 inst_id, u32 pri_base); u32 (*isr_nonstall)(struct gk20a *g, u32 inst_id, u32 pri_base); diff --git a/drivers/gpu/nvgpu/include/nvgpu/ltc.h b/drivers/gpu/nvgpu/include/nvgpu/ltc.h index a674a2913..ba554a356 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ltc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ltc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -29,7 +29,5 @@ struct gk20a; int nvgpu_init_ltc_support(struct gk20a *g); void nvgpu_ltc_sync_enabled(struct gk20a *g); -int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, - bool vidmem_alloc); #endif /* NVGPU_LTC_H */ diff --git a/drivers/gpu/nvgpu/os/linux/ltc.c b/drivers/gpu/nvgpu/os/linux/linux-cbc.c similarity index 89% rename from drivers/gpu/nvgpu/os/linux/ltc.c rename to drivers/gpu/nvgpu/os/linux/linux-cbc.c index 8a892381d..056798c2c 100644 --- a/drivers/gpu/nvgpu/os/linux/ltc.c +++ b/drivers/gpu/nvgpu/os/linux/linux-cbc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -14,14 +14,14 @@ * along with this program. If not, see . */ -#include +#include #include #include #include #include "gk20a/gr_gk20a.h" -int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, +int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc) { struct gr_gk20a *gr = &g->gr; diff --git a/drivers/gpu/nvgpu/os/posix/stubs.c b/drivers/gpu/nvgpu/os/posix/stubs.c index 348910340..02c211bb1 100644 --- a/drivers/gpu/nvgpu/os/posix/stubs.c +++ b/drivers/gpu/nvgpu/os/posix/stubs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -26,7 +26,7 @@ */ #include -#include +#include #include @@ -43,7 +43,7 @@ void nvgpu_ecc_sysfs_remove(struct gk20a *g) { } -int nvgpu_ltc_alloc_cbc(struct gk20a *g, size_t compbit_backing_size, +int nvgpu_cbc_alloc(struct gk20a *g, size_t compbit_backing_size, bool vidmem_alloc) { return 0; diff --git a/drivers/gpu/nvgpu/tu104/hal_tu104.c b/drivers/gpu/nvgpu/tu104/hal_tu104.c index 88e7deefc..6b9a417de 100644 --- a/drivers/gpu/nvgpu/tu104/hal_tu104.c +++ b/drivers/gpu/nvgpu/tu104/hal_tu104.c @@ -29,6 +29,9 @@ #include "hal/priv_ring/priv_ring_gm20b.h" #include "hal/priv_ring/priv_ring_gp10b.h" #include "hal/power_features/cg/tu104_gating_reglist.h" +#include "hal/cbc/cbc_gm20b.h" +#include "hal/cbc/cbc_gp10b.h" +#include "hal/cbc/cbc_tu104.h" #include "hal/fuse/fuse_gm20b.h" #include "hal/fuse/fuse_gp10b.h" #include "hal/fuse/fuse_gp106.h" @@ -341,13 +344,8 @@ static const struct gpu_ops tu104_ops = { .set_zbc_s_entry = gv11b_ltc_set_zbc_stencil_entry, .set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry, .set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry, - .init_cbc = NULL, - .get_cbc_base_divisor = ltc_tu104_get_cbc_base_divisor, .init_fs_state = ltc_tu104_init_fs_state, - .init_comptags = ltc_tu104_init_comptags, - .cbc_ctrl = ltc_tu104_cbc_ctrl, .isr = gv11b_ltc_isr, - .cbc_fix_config = NULL, .flush = gm20b_flush_ltc, .set_enabled = gp10b_ltc_set_enabled, .intr_en_illegal_compstat = gv11b_ltc_intr_en_illegal_compstat, @@ -357,6 +355,13 @@ static const struct gpu_ops tu104_ops = { .split_lts_broadcast_addr = gm20b_ltc_split_lts_broadcast_addr, .split_ltc_broadcast_addr = gm20b_ltc_split_ltc_broadcast_addr, }, + .cbc = { + .init = NULL, + .get_base_divisor = tu104_cbc_get_base_divisor, + .alloc_comptags = tu104_cbc_alloc_comptags, + .ctrl = tu104_cbc_ctrl, + .fix_config = NULL, + }, .ce2 = { .isr_stall = gv11b_ce_isr, .isr_nonstall = NULL, @@ -1229,6 +1234,7 @@ int tu104_init_hal(struct gk20a *g) gops->bios = tu104_ops.bios; gops->ltc = tu104_ops.ltc; + gops->cbc = tu104_ops.cbc; gops->ce2 = tu104_ops.ce2; gops->gr = tu104_ops.gr; gops->gr.ctxsw_prog = tu104_ops.gr.ctxsw_prog; @@ -1312,8 +1318,8 @@ int tu104_init_hal(struct gk20a *g) /* dGpu VDK support */ if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)){ /* Disable compression */ - gops->ltc.cbc_ctrl = NULL; - gops->ltc.init_comptags = NULL; + gops->cbc.ctrl = NULL; + gops->cbc.alloc_comptags = NULL; gops->fb.init_cbc = NULL; gops->gr.load_ctxsw_ucode = gr_gk20a_load_ctxsw_ucode;