diff --git a/drivers/gpu/nvgpu/gk20a/ltc_common.c b/drivers/gpu/nvgpu/gk20a/ltc_common.c deleted file mode 100644 index 2b015fa0b..000000000 --- a/drivers/gpu/nvgpu/gk20a/ltc_common.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * drivers/video/tegra/host/gk20a/ltc_common.c - * - * GK20A Graphics - * - * Copyright (c) 2011-2017, NVIDIA CORPORATION. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include "gk20a.h" -#include "gr_gk20a.h" - -/* - * Sets the ZBC color for the passed index. - */ -static void gk20a_ltc_set_zbc_color_entry(struct gk20a *g, - struct zbc_entry *color_val, - u32 index) -{ - u32 i; - u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; - - gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), - ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); - - for (i = 0; - i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { - gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), - color_val->color_l2[i]); - } - gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); -} - -/* - * Sets the ZBC depth for the passed index. - */ -static void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g, - struct zbc_entry *depth_val, - u32 index) -{ - u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; - - gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), - ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); - - gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), - depth_val->depth); - - gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); -} - -static int gk20a_ltc_alloc_phys_cbc(struct gk20a *g, - size_t compbit_backing_size) -{ - struct gr_gk20a *gr = &g->gr; - - return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_FORCE_CONTIGUOUS, - compbit_backing_size, - &gr->compbit_store.mem); -} - -static int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, - size_t compbit_backing_size) -{ - struct gr_gk20a *gr = &g->gr; - - return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_NO_KERNEL_MAPPING, - compbit_backing_size, - &gr->compbit_store.mem); -} - -static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) -{ - u32 max_size = gr->max_comptag_mem; - u32 max_comptag_lines = max_size << 3; - - u32 compbit_base_post_divide; - u64 compbit_base_post_multiply64; - u64 compbit_store_iova; - u64 compbit_base_post_divide64; - - if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) - compbit_store_iova = gk20a_mem_phys(&gr->compbit_store.mem); - else - compbit_store_iova = g->ops.mm.get_iova_addr(g, - gr->compbit_store.mem.priv.sgt->sgl, 0); - - compbit_base_post_divide64 = compbit_store_iova >> - ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - do_div(compbit_base_post_divide64, g->ltc_count); - compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); - - compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * - g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); - - if (compbit_base_post_multiply64 < compbit_store_iova) - compbit_base_post_divide++; - - /* Bug 1477079 indicates sw adjustment on the posted divided base. */ - if (g->ops.ltc.cbc_fix_config) - compbit_base_post_divide = - g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); - - gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), - compbit_base_post_divide); - - gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, - "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", - (u32)(compbit_store_iova >> 32), - (u32)(compbit_store_iova & 0xffffffff), - compbit_base_post_divide); - - gr->compbit_store.base_hw = compbit_base_post_divide; - - g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, - 0, max_comptag_lines - 1); - -} - -#ifdef CONFIG_DEBUG_FS -static void gk20a_ltc_sync_debugfs(struct gk20a *g) -{ - u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); - - nvgpu_spinlock_acquire(&g->debugfs_lock); - if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { - u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); - if (g->mm.ltc_enabled_debug) - /* bypass disabled (normal caching ops)*/ - reg &= ~reg_f; - else - /* bypass enabled (no caching) */ - reg |= reg_f; - - gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); - g->mm.ltc_enabled = g->mm.ltc_enabled_debug; - } - nvgpu_spinlock_release(&g->debugfs_lock); -} -#endif diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c index 8867202f8..1d517c274 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.c @@ -21,14 +21,36 @@ #include #include #include +#include #include "gk20a.h" +#include "gr_gk20a.h" #include "ltc_gk20a.h" #include -#include "ltc_common.c" +/* Non HW reg dependent stuff: */ + +int gk20a_ltc_alloc_phys_cbc(struct gk20a *g, size_t compbit_backing_size) +{ + struct gr_gk20a *gr = &g->gr; + + return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_FORCE_CONTIGUOUS, + compbit_backing_size, + &gr->compbit_store.mem); +} + +int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size) +{ + struct gr_gk20a *gr = &g->gr; + + return nvgpu_dma_alloc_flags_sys(g, NVGPU_DMA_NO_KERNEL_MAPPING, + compbit_backing_size, + &gr->compbit_store.mem); +} + +/* HW reg dependent stuff: */ static int gk20a_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) { /* max memory size (MB) to cover */ @@ -231,6 +253,117 @@ static int gk20a_determine_L2_size_bytes(struct gk20a *g) return cache_size; } +/* + * Sets the ZBC color for the passed index. + */ +static void gk20a_ltc_set_zbc_color_entry(struct gk20a *g, + struct zbc_entry *color_val, + u32 index) +{ + u32 i; + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + for (i = 0; + i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), + color_val->color_l2[i]); + } + gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); +} + +/* + * Sets the ZBC depth for the passed index. + */ +static void gk20a_ltc_set_zbc_depth_entry(struct gk20a *g, + struct zbc_entry *depth_val, + u32 index) +{ + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), + depth_val->depth); + + gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); +} + +static void gk20a_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 max_size = gr->max_comptag_mem; + u32 max_comptag_lines = max_size << 3; + + u32 compbit_base_post_divide; + u64 compbit_base_post_multiply64; + u64 compbit_store_iova; + u64 compbit_base_post_divide64; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) + compbit_store_iova = gk20a_mem_phys(&gr->compbit_store.mem); + else + compbit_store_iova = g->ops.mm.get_iova_addr(g, + gr->compbit_store.mem.priv.sgt->sgl, 0); + + compbit_base_post_divide64 = compbit_store_iova >> + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + do_div(compbit_base_post_divide64, g->ltc_count); + compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); + + compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * + g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + if (compbit_base_post_multiply64 < compbit_store_iova) + compbit_base_post_divide++; + + /* Bug 1477079 indicates sw adjustment on the posted divided base. */ + if (g->ops.ltc.cbc_fix_config) + compbit_base_post_divide = + g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); + + gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), + compbit_base_post_divide); + + gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, + "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", + (u32)(compbit_store_iova >> 32), + (u32)(compbit_store_iova & 0xffffffff), + compbit_base_post_divide); + + gr->compbit_store.base_hw = compbit_base_post_divide; + + g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, + 0, max_comptag_lines - 1); + +} + +#ifdef CONFIG_DEBUG_FS +static void gk20a_ltc_sync_debugfs(struct gk20a *g) +{ + u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); + + nvgpu_spinlock_acquire(&g->debugfs_lock); + if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { + u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); + + if (g->mm.ltc_enabled_debug) + /* bypass disabled (normal caching ops)*/ + reg &= ~reg_f; + else + /* bypass enabled (no caching) */ + reg |= reg_f; + + gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); + g->mm.ltc_enabled = g->mm.ltc_enabled_debug; + } + nvgpu_spinlock_release(&g->debugfs_lock); +} +#endif + void gk20a_init_ltc(struct gpu_ops *gops) { gops->ltc.determine_L2_size_bytes = gk20a_determine_L2_size_bytes; diff --git a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h index 3cace0fd3..30d4163fc 100644 --- a/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/ltc_gk20a.h @@ -1,7 +1,7 @@ /* * GK20A L2 * - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -18,4 +18,6 @@ struct gpu_ops; void gk20a_init_ltc(struct gpu_ops *gops); +int gk20a_ltc_alloc_phys_cbc(struct gk20a *g, size_t compbit_backing_size); +int gk20a_ltc_alloc_virt_cbc(struct gk20a *g, size_t compbit_backing_size); #endif diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c index 791cc45b4..e4e385fb1 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.c @@ -26,7 +26,7 @@ #include #include -#include "gk20a/ltc_common.c" +#include "gk20a/ltc_gk20a.h" #include "ltc_gm20b.h" static int gm20b_ltc_init_comptags(struct gk20a *g, struct gr_gk20a *gr) @@ -349,13 +349,124 @@ static int gm20b_determine_L2_size_bytes(struct gk20a *g) return cache_size; } +/* + * Sets the ZBC color for the passed index. + */ +void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, + struct zbc_entry *color_val, + u32 index) +{ + u32 i; + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + for (i = 0; + i < ltc_ltcs_ltss_dstg_zbc_color_clear_value__size_1_v(); i++) { + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_color_clear_value_r(i), + color_val->color_l2[i]); + } + gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); +} + +/* + * Sets the ZBC depth for the passed index. + */ +void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, + struct zbc_entry *depth_val, + u32 index) +{ + u32 real_index = index + GK20A_STARTOF_ZBC_TABLE; + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_index_r(), + ltc_ltcs_ltss_dstg_zbc_index_address_f(real_index)); + + gk20a_writel(g, ltc_ltcs_ltss_dstg_zbc_depth_clear_value_r(), + depth_val->depth); + + gk20a_readl(g, ltc_ltcs_ltss_dstg_zbc_index_r()); +} + +void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr) +{ + u32 max_size = gr->max_comptag_mem; + u32 max_comptag_lines = max_size << 3; + + u32 compbit_base_post_divide; + u64 compbit_base_post_multiply64; + u64 compbit_store_iova; + u64 compbit_base_post_divide64; + + if (nvgpu_is_enabled(g, NVGPU_IS_FMODEL)) + compbit_store_iova = gk20a_mem_phys(&gr->compbit_store.mem); + else + compbit_store_iova = g->ops.mm.get_iova_addr(g, + gr->compbit_store.mem.priv.sgt->sgl, 0); + + compbit_base_post_divide64 = compbit_store_iova >> + ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + do_div(compbit_base_post_divide64, g->ltc_count); + compbit_base_post_divide = u64_lo32(compbit_base_post_divide64); + + compbit_base_post_multiply64 = ((u64)compbit_base_post_divide * + g->ltc_count) << ltc_ltcs_ltss_cbc_base_alignment_shift_v(); + + if (compbit_base_post_multiply64 < compbit_store_iova) + compbit_base_post_divide++; + + /* Bug 1477079 indicates sw adjustment on the posted divided base. */ + if (g->ops.ltc.cbc_fix_config) + compbit_base_post_divide = + g->ops.ltc.cbc_fix_config(g, compbit_base_post_divide); + + gk20a_writel(g, ltc_ltcs_ltss_cbc_base_r(), + compbit_base_post_divide); + + gk20a_dbg(gpu_dbg_info | gpu_dbg_map_v | gpu_dbg_pte, + "compbit base.pa: 0x%x,%08x cbc_base:0x%08x\n", + (u32)(compbit_store_iova >> 32), + (u32)(compbit_store_iova & 0xffffffff), + compbit_base_post_divide); + + gr->compbit_store.base_hw = compbit_base_post_divide; + + g->ops.ltc.cbc_ctrl(g, gk20a_cbc_op_invalidate, + 0, max_comptag_lines - 1); + +} + +#ifdef CONFIG_DEBUG_FS +static void gm20b_ltc_sync_debugfs(struct gk20a *g) +{ + u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); + + nvgpu_spinlock_acquire(&g->debugfs_lock); + if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { + u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); + + if (g->mm.ltc_enabled_debug) + /* bypass disabled (normal caching ops)*/ + reg &= ~reg_f; + else + /* bypass enabled (no caching) */ + reg |= reg_f; + + gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); + g->mm.ltc_enabled = g->mm.ltc_enabled_debug; + } + nvgpu_spinlock_release(&g->debugfs_lock); +} +#endif + void gm20b_init_ltc(struct gpu_ops *gops) { /* Gk20a reused ops. */ gops->ltc.determine_L2_size_bytes = gm20b_determine_L2_size_bytes; - gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry; - gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry; - gops->ltc.init_cbc = gk20a_ltc_init_cbc; + gops->ltc.set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry; + gops->ltc.set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry; + gops->ltc.init_cbc = gm20b_ltc_init_cbc; /* GM20b specific ops. */ gops->ltc.init_fs_state = gm20b_ltc_init_fs_state; @@ -365,6 +476,6 @@ void gm20b_init_ltc(struct gpu_ops *gops) gops->ltc.cbc_fix_config = gm20b_ltc_cbc_fix_config; gops->ltc.flush = gm20b_flush_ltc; #ifdef CONFIG_DEBUG_FS - gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs; + gops->ltc.sync_debugfs = gm20b_ltc_sync_debugfs; #endif } diff --git a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h index fcd263de0..4fe832503 100644 --- a/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h +++ b/drivers/gpu/nvgpu/gm20b/ltc_gm20b.h @@ -1,7 +1,7 @@ /* * GM20B L2 * - * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,6 +17,14 @@ #define _NVHOST_GM20B_LTC struct gpu_ops; +void gm20b_ltc_set_zbc_color_entry(struct gk20a *g, + struct zbc_entry *color_val, + u32 index); +void gm20b_ltc_set_zbc_depth_entry(struct gk20a *g, + struct zbc_entry *depth_val, + u32 index); +void gm20b_ltc_init_cbc(struct gk20a *g, struct gr_gk20a *gr); + void gm20b_init_ltc(struct gpu_ops *gops); void gm20b_ltc_init_fs_state(struct gk20a *g); int gm20b_ltc_cbc_ctrl(struct gk20a *g, enum gk20a_cbc_op op, diff --git a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c index b3026059d..d94e56ceb 100644 --- a/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c +++ b/drivers/gpu/nvgpu/gp10b/ltc_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B L2 * - * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2017, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -24,7 +24,7 @@ #include #include -#include "gk20a/ltc_common.c" +#include "gk20a/ltc_gk20a.h" #include "ltc_gp10b.h" static int gp10b_determine_L2_size_bytes(struct gk20a *g) @@ -205,12 +205,35 @@ static void gp10b_ltc_init_fs_state(struct gk20a *g) ltc_intr); } +#ifdef CONFIG_DEBUG_FS +static void gp10b_ltc_sync_debugfs(struct gk20a *g) +{ + u32 reg_f = ltc_ltcs_ltss_tstg_set_mgmt_2_l2_bypass_mode_enabled_f(); + + nvgpu_spinlock_acquire(&g->debugfs_lock); + if (g->mm.ltc_enabled != g->mm.ltc_enabled_debug) { + u32 reg = gk20a_readl(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r()); + + if (g->mm.ltc_enabled_debug) + /* bypass disabled (normal caching ops)*/ + reg &= ~reg_f; + else + /* bypass enabled (no caching) */ + reg |= reg_f; + + gk20a_writel(g, ltc_ltcs_ltss_tstg_set_mgmt_2_r(), reg); + g->mm.ltc_enabled = g->mm.ltc_enabled_debug; + } + nvgpu_spinlock_release(&g->debugfs_lock); +} +#endif + void gp10b_init_ltc(struct gpu_ops *gops) { gops->ltc.determine_L2_size_bytes = gp10b_determine_L2_size_bytes; - gops->ltc.set_zbc_color_entry = gk20a_ltc_set_zbc_color_entry; - gops->ltc.set_zbc_depth_entry = gk20a_ltc_set_zbc_depth_entry; - gops->ltc.init_cbc = gk20a_ltc_init_cbc; + gops->ltc.set_zbc_color_entry = gm20b_ltc_set_zbc_color_entry; + gops->ltc.set_zbc_depth_entry = gm20b_ltc_set_zbc_depth_entry; + gops->ltc.init_cbc = gm20b_ltc_init_cbc; /* GM20b specific ops. */ gops->ltc.init_fs_state = gp10b_ltc_init_fs_state; @@ -220,6 +243,6 @@ void gp10b_init_ltc(struct gpu_ops *gops) gops->ltc.cbc_fix_config = gm20b_ltc_cbc_fix_config; gops->ltc.flush = gm20b_flush_ltc; #ifdef CONFIG_DEBUG_FS - gops->ltc.sync_debugfs = gk20a_ltc_sync_debugfs; + gops->ltc.sync_debugfs = gp10b_ltc_sync_debugfs; #endif }