From 60073d2156674770d71d25ac00f5af496c7d4d67 Mon Sep 17 00:00:00 2001 From: Seshendra Gadagottu Date: Thu, 14 Mar 2019 22:58:44 -0700 Subject: [PATCH] gpu: nvgpu: move ltc related data to nvgpu_ltc Moved following ltc related data to struct nvgpu_ltc and has a reference to it from struct gk20a: struct nvgpu_spinlock ltc_enabled_lock; u32 max_ltc_count; u32 ltc_count; u32 slices_per_ltc; u32 cacheline_size; Added function remove_support for ltc and it is called during nvgpu remove sequence. Added following helper functions in ltc.h: u32 nvgpu_ltc_get_ltc_count(struct gk20a *g); u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g); u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g); Removed redudnant ltc.init_fs_state call from vgpu init sequence since it is getting called from nvgpu_init_ltc_support. NVGPU-2044 Change-Id: I3c256dc3866f894c38715aa2609e85bd2e5cfe5a Signed-off-by: Seshendra Gadagottu Reviewed-on: https://git-master.nvidia.com/r/2073417 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/ecc.c | 15 +++--- drivers/gpu/nvgpu/common/init/nvgpu_init.c | 2 + drivers/gpu/nvgpu/common/ltc/ltc.c | 50 +++++++++++++++++-- drivers/gpu/nvgpu/common/mc/mc_gm20b.c | 3 +- drivers/gpu/nvgpu/common/mc/mc_gp10b.c | 5 +- drivers/gpu/nvgpu/common/mc/mc_tu104.c | 3 +- drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c | 7 +-- drivers/gpu/nvgpu/common/vgpu/vgpu.c | 4 -- drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h | 4 +- drivers/gpu/nvgpu/gm20b/gr_gm20b.c | 4 +- drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c | 26 ++++++---- drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c | 20 ++++---- drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c | 26 ++++++---- drivers/gpu/nvgpu/hal/fb/fb_gm20b.c | 3 +- drivers/gpu/nvgpu/hal/fb/fb_gv11b.c | 6 ++- drivers/gpu/nvgpu/hal/fb/fb_tu104.c | 5 +- drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c | 7 ++- drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c | 7 ++- drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c | 30 +++++------ drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c | 4 +- drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.c | 20 ++++---- drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c | 4 +- drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 13 ++--- drivers/gpu/nvgpu/include/nvgpu/ltc.h | 13 +++++ drivers/gpu/nvgpu/os/linux/cde.c | 6 ++- drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c | 6 +-- 26 files changed, 189 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/nvgpu/common/ecc.c b/drivers/gpu/nvgpu/common/ecc.c index 06655f3fd..0f853981c 100644 --- a/drivers/gpu/nvgpu/common/ecc.c +++ b/drivers/gpu/nvgpu/common/ecc.c @@ -22,6 +22,7 @@ #include #include +#include #include static void nvgpu_ecc_stat_add(struct gk20a *g, struct nvgpu_ecc_stat *stat) @@ -131,14 +132,16 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, struct nvgpu_ecc_stat **stats; u32 ltc, lts; int err = 0; + u32 ltc_count = nvgpu_ltc_get_ltc_count(g); + u32 slices_per_ltc = nvgpu_ltc_get_slices_per_ltc(g); - stats = nvgpu_kzalloc(g, sizeof(*stats) * g->ltc_count); + stats = nvgpu_kzalloc(g, sizeof(*stats) * ltc_count); if (stats == NULL) { return -ENOMEM; } - for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (ltc = 0; ltc < ltc_count; ltc++) { stats[ltc] = nvgpu_kzalloc(g, - sizeof(*stats[ltc]) * g->slices_per_ltc); + sizeof(*stats[ltc]) * slices_per_ltc); if (stats[ltc] == NULL) { err = -ENOMEM; break; @@ -154,8 +157,8 @@ int nvgpu_ecc_counter_init_per_lts(struct gk20a *g, return err; } - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (lts = 0; lts < g->slices_per_ltc; lts++) { + for (ltc = 0; ltc < ltc_count; ltc++) { + for (lts = 0; lts < slices_per_ltc; lts++) { (void) snprintf(stats[ltc][lts].name, NVGPU_ECC_STAT_NAME_MAX_SIZE, "ltc%d_lts%d_%s", ltc, lts, name); @@ -313,7 +316,7 @@ void nvgpu_ecc_free(struct gk20a *g) nvgpu_kfree(g, ecc->gr.fecs_ecc_corrected_err_count); nvgpu_kfree(g, ecc->gr.fecs_ecc_uncorrected_err_count); - for (i = 0; i < g->ltc_count; i++) { + for (i = 0; i < nvgpu_ltc_get_ltc_count(g); i++) { if (ecc->ltc.ecc_sec_count != NULL) { nvgpu_kfree(g, ecc->ltc.ecc_sec_count[i]); } diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index c60dc1bcb..b7a2fbef1 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -553,6 +553,8 @@ static void gk20a_free_cb(struct nvgpu_ref *refcount) g->remove_support(g); } + nvgpu_ltc_remove_support(g); + if (g->free != NULL) { g->free(g); } diff --git a/drivers/gpu/nvgpu/common/ltc/ltc.c b/drivers/gpu/nvgpu/common/ltc/ltc.c index 4302d6f80..545fbdf7a 100644 --- a/drivers/gpu/nvgpu/common/ltc/ltc.c +++ b/drivers/gpu/nvgpu/common/ltc/ltc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -27,13 +27,40 @@ #include "gk20a/gr_gk20a.h" +void nvgpu_ltc_remove_support(struct gk20a *g) +{ + struct nvgpu_ltc *ltc = g->ltc; + + nvgpu_log_fn(g, " "); + + if (ltc == NULL) { + return; + } + + nvgpu_kfree(g, ltc); + g->ltc = NULL; +} + + int nvgpu_init_ltc_support(struct gk20a *g) { - nvgpu_spinlock_init(&g->ltc_enabled_lock); + struct nvgpu_ltc *ltc = g->ltc; + + nvgpu_log_fn(g, " "); + g->mm.ltc_enabled_current = true; g->mm.ltc_enabled_target = true; + if (ltc == NULL) { + ltc = nvgpu_kzalloc(g, sizeof(*ltc)); + if (ltc == NULL) { + return -ENOMEM; + } + g->ltc = ltc; + nvgpu_spinlock_init(&g->ltc->ltc_enabled_lock); + } + if (g->ops.ltc.init_fs_state != NULL) { g->ops.ltc.init_fs_state(g); } @@ -47,10 +74,25 @@ void nvgpu_ltc_sync_enabled(struct gk20a *g) return; } - nvgpu_spinlock_acquire(&g->ltc_enabled_lock); + nvgpu_spinlock_acquire(&g->ltc->ltc_enabled_lock); if (g->mm.ltc_enabled_current != g->mm.ltc_enabled_target) { g->ops.ltc.set_enabled(g, g->mm.ltc_enabled_target); g->mm.ltc_enabled_current = g->mm.ltc_enabled_target; } - nvgpu_spinlock_release(&g->ltc_enabled_lock); + nvgpu_spinlock_release(&g->ltc->ltc_enabled_lock); +} + +u32 nvgpu_ltc_get_ltc_count(struct gk20a *g) +{ + return g->ltc->ltc_count; +} + +u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g) +{ + return g->ltc->slices_per_ltc; +} + +u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g) +{ + return g->ltc->cacheline_size; } diff --git a/drivers/gpu/nvgpu/common/mc/mc_gm20b.c b/drivers/gpu/nvgpu/common/mc/mc_gm20b.c index aa213d777..2389f42a9 100644 --- a/drivers/gpu/nvgpu/common/mc/mc_gm20b.c +++ b/drivers/gpu/nvgpu/common/mc/mc_gm20b.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -359,7 +360,7 @@ void gm20b_mc_ltc_isr(struct gk20a *g) mc_intr = gk20a_readl(g, mc_intr_ltc_r()); nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); - for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (ltc = 0; ltc < nvgpu_ltc_get_ltc_count(g); ltc++) { if ((mc_intr & BIT32(ltc)) == 0U) { continue; } diff --git a/drivers/gpu/nvgpu/common/mc/mc_gp10b.c b/drivers/gpu/nvgpu/common/mc/mc_gp10b.c index 6a173e7bc..152b43bb1 100644 --- a/drivers/gpu/nvgpu/common/mc/mc_gp10b.c +++ b/drivers/gpu/nvgpu/common/mc/mc_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B master * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -233,7 +234,7 @@ void mc_gp10b_ltc_isr(struct gk20a *g) mc_intr = gk20a_readl(g, mc_intr_ltc_r()); nvgpu_err(g, "mc_ltc_intr: %08x", mc_intr); - for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (ltc = 0; ltc < nvgpu_ltc_get_ltc_count(g); ltc++) { if ((mc_intr & BIT32(ltc)) == 0U) { continue; } diff --git a/drivers/gpu/nvgpu/common/mc/mc_tu104.c b/drivers/gpu/nvgpu/common/mc/mc_tu104.c index a8e3e0e7b..1b6897cec 100644 --- a/drivers/gpu/nvgpu/common/mc/mc_tu104.c +++ b/drivers/gpu/nvgpu/common/mc/mc_tu104.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -418,7 +419,7 @@ void mc_tu104_ltc_isr(struct gk20a *g) unsigned int ltc; /* Go through all the LTCs explicitly */ - for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (ltc = 0; ltc < nvgpu_ltc_get_ltc_count(g); ltc++) { g->ops.ltc.isr(g, ltc); } } diff --git a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c index 16f78a235..ab24e4a5d 100644 --- a/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/ltc/ltc_vgpu.c @@ -39,10 +39,11 @@ int vgpu_determine_L2_size_bytes(struct gk20a *g) void vgpu_ltc_init_fs_state(struct gk20a *g) { struct vgpu_priv_data *priv = vgpu_get_priv_data(g); + struct nvgpu_ltc *ltc = g->ltc; nvgpu_log_fn(g, " "); - g->ltc_count = priv->constants.ltc_count; - g->cacheline_size = priv->constants.cacheline_size; - g->slices_per_ltc = priv->constants.slices_per_ltc; + ltc->ltc_count = priv->constants.ltc_count; + ltc->cacheline_size = priv->constants.cacheline_size; + ltc->slices_per_ltc = priv->constants.slices_per_ltc; } diff --git a/drivers/gpu/nvgpu/common/vgpu/vgpu.c b/drivers/gpu/nvgpu/common/vgpu/vgpu.c index e38c2e434..fe7314295 100644 --- a/drivers/gpu/nvgpu/common/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/vgpu.c @@ -417,10 +417,6 @@ int vgpu_finalize_poweron_common(struct gk20a *g) return err; } - if (g->ops.ltc.init_fs_state != NULL) { - g->ops.ltc.init_fs_state(g); - } - err = nvgpu_init_ltc_support(g); if (err != 0) { nvgpu_err(g, "failed to init ltc"); diff --git a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h index e0537157b..2386dfca3 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gr_pri_gk20a.h @@ -1,7 +1,7 @@ /* * GK20A Graphics Context Pri Register Addressing * - * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -187,7 +187,7 @@ static inline bool pri_is_be_addr(struct gk20a *g, u32 addr) u32 rop_base = nvgpu_get_litter_value(g, GPU_LIT_ROP_BASE); u32 rop_stride = nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE); return ((addr >= rop_base) && - (addr < rop_base + g->ltc_count * rop_stride)) || + (addr < rop_base + nvgpu_ltc_get_ltc_count(g) * rop_stride)) || pri_is_be_addr_shared(g, addr); } diff --git a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c index dfa86e6ef..62d3ba6a3 100644 --- a/drivers/gpu/nvgpu/gm20b/gr_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/gr_gm20b.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -73,7 +74,8 @@ void gr_gm20b_init_gpc_mmu(struct gk20a *g) gk20a_writel(g, gr_gpcs_pri_mmu_debug_rd_r(), g->ops.fb.mmu_debug_rd(g)); - gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), g->ltc_count); + gk20a_writel(g, gr_gpcs_mmu_num_active_ltcs_r(), + nvgpu_ltc_get_ltc_count(g)); } void gr_gm20b_commit_global_attrib_cb(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c index 80787d204..747e3570a 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gm20b.c @@ -73,11 +73,14 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) compbit_backing_size = DIV_ROUND_UP(max_comptag_lines, comptags_per_cacheline) * - g->cacheline_size * g->slices_per_ltc * g->ltc_count; + (nvgpu_ltc_get_ltc_count(g) * + nvgpu_ltc_get_slices_per_ltc(g) * + nvgpu_ltc_get_cacheline_size(g)); /* aligned to 2KB * ltc_count */ compbit_backing_size += - g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + nvgpu_ltc_get_ltc_count(g) << + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); /* must be a multiple of 64KB */ compbit_backing_size = roundup(compbit_backing_size, @@ -85,7 +88,9 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) max_comptag_lines = (compbit_backing_size * comptags_per_cacheline) / - (g->cacheline_size * g->slices_per_ltc * g->ltc_count); + (nvgpu_ltc_get_ltc_count(g) * + nvgpu_ltc_get_slices_per_ltc(g) * + nvgpu_ltc_get_cacheline_size(g)); if (max_comptag_lines > hw_max_comptag_lines) { max_comptag_lines = hw_max_comptag_lines; @@ -101,7 +106,8 @@ int gm20b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) return err; } - err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, + max_comptag_lines); if (err != 0) { return err; } @@ -167,7 +173,7 @@ int gm20b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (ltc = 0; ltc < nvgpu_ltc_get_ltc_count(g); ltc++) { for (slice = 0; slice < slices_per_ltc; slice++) { ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + @@ -211,6 +217,7 @@ out: u32 gm20b_cbc_fix_config(struct gk20a *g, int base) { u32 val = gk20a_readl(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r()); + if (val == 2U) { return base * 2; } else if (val != 1U) { @@ -233,20 +240,21 @@ void gm20b_cbc_init(struct gk20a *g, struct nvgpu_cbc *cbc) if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) { compbit_store_iova = nvgpu_mem_get_phys_addr(g, - &cbc->compbit_store.mem); + &cbc->compbit_store.mem); } else { compbit_store_iova = nvgpu_mem_get_addr(g, - &cbc->compbit_store.mem); + &cbc->compbit_store.mem); } compbit_base_post_divide64 = compbit_store_iova >> ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - do_div(compbit_base_post_divide64, g->ltc_count); + do_div(compbit_base_post_divide64, nvgpu_ltc_get_ltc_count(g)); compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * - g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + nvgpu_ltc_get_ltc_count(g)) << + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); if (compbit_base_post_multiply64 < compbit_store_iova) { compbit_base_post_divide++; diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c index 65390506c..de88c7db4 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_gp10b.c @@ -53,7 +53,8 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) u32 cbc_param2 = gk20a_readl(g, ltc_ltcs_ltss_cbc_param2_r()); u32 gobs_per_comptagline_per_slice = - ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v(cbc_param2); + ltc_ltcs_ltss_cbc_param2_gobs_per_comptagline_per_slice_v( + cbc_param2); u32 compbit_backing_size; @@ -76,14 +77,16 @@ int gp10b_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) compbit_backing_size = roundup(max_comptag_lines * gobs_per_comptagline_per_slice, - g->cacheline_size); + nvgpu_ltc_get_cacheline_size(g)); compbit_backing_size = roundup( - compbit_backing_size * g->slices_per_ltc * g->ltc_count, - g->ops.fb.compressible_page_size(g)); + compbit_backing_size * nvgpu_ltc_get_slices_per_ltc(g) * + nvgpu_ltc_get_ltc_count(g), + g->ops.fb.compressible_page_size(g)); /* aligned to 2KB * ltc_count */ compbit_backing_size += - g->ltc_count << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + nvgpu_ltc_get_ltc_count(g) << + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); /* must be a multiple of 64KB */ compbit_backing_size = roundup(compbit_backing_size, @@ -121,8 +124,6 @@ int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v( - gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r())); u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); const u32 max_lines = 16384U; @@ -171,8 +172,9 @@ int gp10b_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, gk20a_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { + for (ltc = 0; ltc < nvgpu_ltc_get_ltc_count(g); ltc++) { + for (slice = 0; slice < + nvgpu_ltc_get_slices_per_ltc(g); slice++) { ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + ltc * ltc_stride + slice * lts_stride; diff --git a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c index a9adfdf21..19c9fc83f 100644 --- a/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c +++ b/drivers/gpu/nvgpu/hal/cbc/cbc_tu104.c @@ -39,7 +39,7 @@ u64 tu104_cbc_get_base_divisor(struct gk20a *g) { - return (u64)g->ltc_count << + return (u64)nvgpu_ltc_get_ltc_count(g) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); } @@ -76,21 +76,24 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) cbc_param = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param_r()); - ctags_size = - ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v(cbc_param); - ctags_per_cacheline = g->cacheline_size / ctags_size; + ctags_size = ltc_ltcs_ltss_cbc_param_bytes_per_comptagline_per_slice_v( + cbc_param); amap_divide_rounding = (U32(2U) * U32(1024U)) << ltc_ltcs_ltss_cbc_param_amap_divide_rounding_v(cbc_param); amap_swizzle_rounding = (U32(64U) * U32(1024U)) << ltc_ltcs_ltss_cbc_param_amap_swizzle_rounding_v(cbc_param); + ctags_per_cacheline = nvgpu_ltc_get_cacheline_size(g) / ctags_size; compbit_backing_size = - roundup(max_comptag_lines * ctags_size, g->cacheline_size); + roundup(max_comptag_lines * ctags_size, + nvgpu_ltc_get_cacheline_size(g)); compbit_backing_size = - compbit_backing_size * g->slices_per_ltc * g->ltc_count; + compbit_backing_size * nvgpu_ltc_get_slices_per_ltc(g) * + nvgpu_ltc_get_ltc_count(g); - compbit_backing_size += g->ltc_count * amap_divide_rounding; + compbit_backing_size += nvgpu_ltc_get_ltc_count(g) * + amap_divide_rounding; compbit_backing_size += amap_swizzle_rounding; /* must be a multiple of 64KB */ @@ -102,7 +105,8 @@ int tu104_cbc_alloc_comptags(struct gk20a *g, struct nvgpu_cbc *cbc) return err; } - err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, max_comptag_lines); + err = gk20a_comptag_allocator_init(g, &cbc->comp_tags, + max_comptag_lines); if (err != 0) { return err; } @@ -128,7 +132,6 @@ int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, struct nvgpu_timeout timeout; int err = 0; u32 ltc, slice, ctrl1, val, hw_op = 0U; - u32 slices_per_ltc = g->slices_per_ltc; u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); const u32 max_lines = 16384U; @@ -176,8 +179,9 @@ int tu104_cbc_ctrl(struct gk20a *g, enum nvgpu_cbc_op op, nvgpu_readl(g, ltc_ltcs_ltss_cbc_ctrl1_r()) | hw_op); - for (ltc = 0; ltc < g->ltc_count; ltc++) { - for (slice = 0; slice < slices_per_ltc; slice++) { + for (ltc = 0; ltc < nvgpu_ltc_get_ltc_count(g); ltc++) { + for (slice = 0; slice < + nvgpu_ltc_get_slices_per_ltc(g); slice++) { ctrl1 = ltc_ltc0_lts0_cbc_ctrl1_r() + ltc * ltc_stride + slice * lts_stride; diff --git a/drivers/gpu/nvgpu/hal/fb/fb_gm20b.c b/drivers/gpu/nvgpu/hal/fb/fb_gm20b.c index 7cc587eb8..a5b889a77 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_gm20b.c +++ b/drivers/gpu/nvgpu/hal/fb/fb_gm20b.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "fb_gm20b.h" @@ -148,7 +149,7 @@ void fb_gm20b_init_fs_state(struct gk20a *g) nvgpu_log_info(g, "initialize gm20b fb"); gk20a_writel(g, fb_fbhub_num_active_ltcs_r(), - g->ltc_count); + nvgpu_ltc_get_ltc_count(g)); if (!nvgpu_is_enabled(g, NVGPU_SEC_PRIVSECURITY)) { /* Bypass MMU check for non-secure boot. For diff --git a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c index e6e617d39..6d15944d8 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "gk20a/mm_gk20a.h" @@ -141,11 +142,12 @@ void gv11b_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc) compbit_base_post_divide64 = compbit_store_iova >> fb_mmu_cbc_base_address_alignment_shift_v(); - do_div(compbit_base_post_divide64, g->ltc_count); + do_div(compbit_base_post_divide64, nvgpu_ltc_get_ltc_count(g)); compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * - g->ltc_count) << fb_mmu_cbc_base_address_alignment_shift_v(); + nvgpu_ltc_get_ltc_count(g)) + << fb_mmu_cbc_base_address_alignment_shift_v(); if (compbit_base_post_multiply64 < compbit_store_iova) { compbit_base_post_divide++; diff --git a/drivers/gpu/nvgpu/hal/fb/fb_tu104.c b/drivers/gpu/nvgpu/hal/fb/fb_tu104.c index 3159a620b..eae9da9b8 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_tu104.c +++ b/drivers/gpu/nvgpu/hal/fb/fb_tu104.c @@ -439,11 +439,12 @@ void tu104_fb_cbc_configure(struct gk20a *g, struct nvgpu_cbc *cbc) base_divisor = g->ops.cbc.get_base_divisor(g); compbit_store_base = DIV_ROUND_UP(compbit_store_pa, base_divisor); - cbc_start_addr = (u64)g->ltc_count * (compbit_store_base << + cbc_start_addr = (u64)nvgpu_ltc_get_ltc_count(g) * + (compbit_store_base << fb_mmu_cbc_base_address_alignment_shift_v()); cbc_end_addr = cbc_start_addr + cbc->compbit_backing_size; - cbc_top = (cbc_end_addr / g->ltc_count) >> + cbc_top = (cbc_end_addr / nvgpu_ltc_get_ltc_count(g)) >> fb_mmu_cbc_base_address_alignment_shift_v(); cbc_top_size = u64_lo32(cbc_top) - compbit_store_base; diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c index 7d9e15ce9..66a96c8b1 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gm20b.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -357,9 +358,11 @@ int gm20b_gr_init_fs_state(struct gk20a *g) nvgpu_log_fn(g, " "); nvgpu_writel(g, gr_bes_zrop_settings_r(), - gr_bes_zrop_settings_num_active_ltcs_f(g->ltc_count)); + gr_bes_zrop_settings_num_active_ltcs_f( + nvgpu_ltc_get_ltc_count(g))); nvgpu_writel(g, gr_bes_crop_settings_r(), - gr_bes_crop_settings_num_active_ltcs_f(g->ltc_count)); + gr_bes_crop_settings_num_active_ltcs_f( + nvgpu_ltc_get_ltc_count(g))); nvgpu_writel(g, gr_bes_crop_debug3_r(), gk20a_readl(g, gr_be0_crop_debug3_r()) | diff --git a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c index 77e56295e..2f9a3c596 100644 --- a/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/init/gr_init_gv11b.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -311,9 +312,11 @@ int gv11b_gr_init_fs_state(struct gk20a *g) nvgpu_writel(g, gr_debug_0_r(), data); nvgpu_writel(g, gr_bes_zrop_settings_r(), - gr_bes_zrop_settings_num_active_ltcs_f(g->ltc_count)); + gr_bes_zrop_settings_num_active_ltcs_f( + nvgpu_ltc_get_ltc_count(g))); nvgpu_writel(g, gr_bes_crop_settings_r(), - gr_bes_crop_settings_num_active_ltcs_f(g->ltc_count)); + gr_bes_crop_settings_num_active_ltcs_f( + nvgpu_ltc_get_ltc_count(g))); return err; } diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c index 496302a78..bbca8b762 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b.c @@ -44,19 +44,20 @@ void gm20b_ltc_init_fs_state(struct gk20a *g) nvgpu_log_info(g, "initialize gm20b l2"); - g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); - g->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); - nvgpu_log_info(g, "%d ltcs out of %d", g->ltc_count, g->max_ltc_count); + g->ltc->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); + g->ltc->ltc_count = gk20a_readl(g, pri_ringmaster_enum_ltc_r()); + nvgpu_log_info(g, "%d ltcs out of %d", g->ltc->ltc_count, + g->ltc->max_ltc_count); reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - g->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; - g->cacheline_size = + g->ltc->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg); + g->ltc->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); gk20a_writel(g, ltc_ltcs_ltss_cbc_num_active_ltcs_r(), - g->ltc_count); + g->ltc->ltc_count); gk20a_writel(g, ltc_ltcs_misc_ltc_num_active_ltcs_r(), - g->ltc_count); + g->ltc->ltc_count); gk20a_writel(g, ltc_ltcs_ltss_dstg_cfg0_r(), gk20a_readl(g, ltc_ltc0_lts0_dstg_cfg0_r()) | @@ -91,7 +92,7 @@ void gm20b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->slices_per_ltc; slice++) { + for (slice = 0U; slice < g->ltc->slices_per_ltc; slice++) { gm20b_ltc_lts_isr(g, ltc, slice); } } @@ -117,7 +118,7 @@ void gm20b_flush_ltc(struct gk20a *g) ltc_ltcs_ltss_tstg_cmgmt1_clean_evict_first_class_true_f()); /* Wait on each LTC individually. */ - for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (ltc = 0; ltc < g->ltc->ltc_count; ltc++) { u32 op_pending; /* @@ -159,7 +160,7 @@ void gm20b_flush_ltc(struct gk20a *g) ltc_ltcs_ltss_tstg_cmgmt0_invalidate_evict_first_class_true_f()); /* Wait on each LTC individually. */ - for (ltc = 0; ltc < g->ltc_count; ltc++) { + for (ltc = 0; ltc < g->ltc->ltc_count; ltc++) { u32 op_pending; /* Again, 5ms. */ @@ -298,9 +299,8 @@ bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) ((addr & addr_mask) < end_offset); } -static void gm20b_ltc_update_ltc_lts_addr(struct gk20a *g, u32 addr, u32 ltc_num, - u32 *priv_addr_table, - u32 *priv_addr_table_index) +static void gm20b_ltc_update_ltc_lts_addr(struct gk20a *g, u32 addr, + u32 ltc_num, u32 *priv_addr_table, u32 *priv_addr_table_index) { u32 num_ltc_slices = g->ops.top.get_max_lts_per_ltc(g); u32 index = *priv_addr_table_index; @@ -322,7 +322,7 @@ void gm20b_ltc_split_lts_broadcast_addr(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *priv_addr_table_index) { - u32 num_ltc = g->ltc_count; + u32 num_ltc = g->ltc->ltc_count; u32 i, start, ltc_num = 0; u32 pltcg_base = ltc_pltcg_base_v(); u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); @@ -342,7 +342,7 @@ void gm20b_ltc_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, u32 *priv_addr_table, u32 *priv_addr_table_index) { - u32 num_ltc = g->ltc_count; + u32 num_ltc = g->ltc->ltc_count; u32 ltc_num; for (ltc_num = 0; ltc_num < num_ltc; ltc_num++) { diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c index 4f586131e..3566a013e 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gp10b.c @@ -45,7 +45,7 @@ int gp10b_determine_L2_size_bytes(struct gk20a *g) tmp = gk20a_readl(g, ltc_ltc0_lts0_tstg_info_1_r()); - ret = g->ltc_count * + ret = g->ltc->ltc_count * ltc_ltc0_lts0_tstg_info_1_slice_size_in_kb_v(tmp) * 1024U * ltc_ltc0_lts0_tstg_info_1_slices_per_l2_v(tmp); @@ -118,7 +118,7 @@ void gp10b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->slices_per_ltc; slice++) { + for (slice = 0U; slice < g->ltc->slices_per_ltc; slice++) { gp10b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.c index f78157a24..6774fb00e 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gv11b.c @@ -58,13 +58,14 @@ void gv11b_ltc_init_fs_state(struct gk20a *g) nvgpu_log_info(g, "initialize gv11b l2"); - g->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); - g->ltc_count = g->ops.priv_ring.enum_ltc(g); - nvgpu_log_info(g, "%u ltcs out of %u", g->ltc_count, g->max_ltc_count); + g->ltc->max_ltc_count = gk20a_readl(g, top_num_ltcs_r()); + g->ltc->ltc_count = g->ops.priv_ring.enum_ltc(g); + nvgpu_log_info(g, "%u ltcs out of %u", g->ltc->ltc_count, + g->ltc->max_ltc_count); reg = gk20a_readl(g, ltc_ltcs_ltss_cbc_param_r()); - g->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; - g->cacheline_size = + g->ltc->slices_per_ltc = ltc_ltcs_ltss_cbc_param_slices_per_ltc_v(reg);; + g->ltc->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param_cache_line_size_v(reg); /* Disable LTC interrupts */ @@ -100,7 +101,7 @@ void gv11b_ltc_intr_en_illegal_compstat(struct gk20a *g, bool enable) val = set_field(val, ltc_ltcs_ltss_intr_en_illegal_compstat_m(), ltc_ltcs_ltss_intr_en_illegal_compstat_disabled_f()); - } + } gk20a_writel(g, ltc_ltcs_ltss_intr_r(), val); } @@ -152,7 +153,7 @@ void gv11b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice) nvgpu_writel_check(g, ltc_ltc0_lts0_l2_cache_ecc_corrected_err_count_r() + offset, 0); } - if ((uncorrected_delta > 0U) || (uncorrected_overflow !=0U)) { + if ((uncorrected_delta > 0U) || (uncorrected_overflow != 0U)) { nvgpu_writel_check(g, ltc_ltc0_lts0_l2_cache_ecc_uncorrected_err_count_r() + offset, 0); } @@ -223,7 +224,8 @@ void gv11b_ltc_lts_isr(struct gk20a *g, unsigned int ltc, unsigned int slice) nvgpu_log(g, gpu_dbg_intr, "dstg ecc error uncorrected"); } - if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) { + if ((corrected_overflow != 0U) || + (uncorrected_overflow != 0U)) { nvgpu_info(g, "ecc counter overflow!"); } @@ -238,7 +240,7 @@ void gv11b_ltc_isr(struct gk20a *g, unsigned int ltc) { unsigned int slice; - for (slice = 0U; slice < g->slices_per_ltc; slice++) { + for (slice = 0U; slice < g->ltc->slices_per_ltc; slice++) { gv11b_ltc_lts_isr(g, ltc, slice); } } diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c b/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c index c08d1eba4..8465287a0 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_tu104.c @@ -42,9 +42,9 @@ void ltc_tu104_init_fs_state(struct gk20a *g) gv11b_ltc_init_fs_state(g); reg = nvgpu_readl(g, ltc_ltcs_ltss_cbc_param2_r()); - g->slices_per_ltc = + g->ltc->slices_per_ltc = ltc_ltcs_ltss_cbc_param2_slices_per_ltc_v(reg); - g->cacheline_size = + g->ltc->cacheline_size = U32(512) << ltc_ltcs_ltss_cbc_param2_cache_line_size_v(reg); /* disable PLC compression */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index d9a5aba29..7b102b319 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -95,6 +95,7 @@ struct nvgpu_cbc; #include #include #include +#include #include "gk20a/clk_gk20a.h" #include "gk20a/fifo_gk20a.h" @@ -1897,8 +1898,6 @@ struct gk20a { u32 emc3d_ratio; - struct nvgpu_spinlock ltc_enabled_lock; - struct gk20a_ctxsw_ucode_info ctxsw_ucode_info; /* @@ -1962,13 +1961,6 @@ struct gk20a { int irq_stall; /* can be same as irq_nonstall in case of PCI */ int irq_nonstall; - /* This data will be moved to nvgpu_ltc_info */ - u32 max_ltc_count; - u32 ltc_count; - u32 slices_per_ltc; - u32 cacheline_size; - u32 ltc_streamid; - /* * The deductible memory size for max_comptag_mem (in MBytes) * Usually close to memory size that running system is taking @@ -1977,7 +1969,10 @@ struct gk20a { u32 max_comptag_mem; /* max memory size (MB) for comptag */ + u32 ltc_streamid; + struct nvgpu_cbc *cbc; + struct nvgpu_ltc *ltc; struct gk20a_worker { struct nvgpu_thread poll_task; diff --git a/drivers/gpu/nvgpu/include/nvgpu/ltc.h b/drivers/gpu/nvgpu/include/nvgpu/ltc.h index ba554a356..38b279091 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/ltc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/ltc.h @@ -24,10 +24,23 @@ #define NVGPU_LTC_H #include +#include struct gk20a; +struct nvgpu_ltc { + struct nvgpu_spinlock ltc_enabled_lock; + u32 max_ltc_count; + u32 ltc_count; + u32 slices_per_ltc; + u32 cacheline_size; +}; + +void nvgpu_ltc_remove_support(struct gk20a *g); int nvgpu_init_ltc_support(struct gk20a *g); void nvgpu_ltc_sync_enabled(struct gk20a *g); +u32 nvgpu_ltc_get_ltc_count(struct gk20a *g); +u32 nvgpu_ltc_get_slices_per_ltc(struct gk20a *g); +u32 nvgpu_ltc_get_cacheline_size(struct gk20a *g); #endif /* NVGPU_LTC_H */ diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 86e204113..005f421ab 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -423,8 +424,9 @@ static int gk20a_cde_patch_params(struct gk20a_cde_ctx *cde_ctx) new_data = cbc->comptags_per_cacheline; break; case TYPE_PARAM_GPU_CONFIGURATION: - new_data = (u64)g->ltc_count * g->slices_per_ltc * - g->cacheline_size; + new_data = (u64) (nvgpu_ltc_get_ltc_count(g) * + nvgpu_ltc_get_slices_per_ltc(g) * + nvgpu_ltc_get_cacheline_size(g)); break; case TYPE_PARAM_FIRSTPAGEOFFSET: new_data = cde_ctx->surf_param_offset; diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index 41df911b5..afd157813 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -343,9 +343,9 @@ gk20a_ctrl_ioctl_gpu_characteristics( gpu.gr_compbit_store_base_hw = g->cbc->compbit_store.base_hw; gpu.gr_gobs_per_comptagline_per_slice = g->cbc->gobs_per_comptagline_per_slice; - gpu.num_ltc = g->ltc_count; - gpu.lts_per_ltc = g->slices_per_ltc; - gpu.cbc_cache_line_size = g->cacheline_size; + gpu.num_ltc = nvgpu_ltc_get_ltc_count(g); + gpu.lts_per_ltc = nvgpu_ltc_get_slices_per_ltc(g); + gpu.cbc_cache_line_size = nvgpu_ltc_get_cacheline_size(g); gpu.cbc_comptags_per_line = g->cbc->comptags_per_cacheline; if (g->ops.clk.get_maxrate)