From f576bd8f84513678440ce6e426064ef1bb1d455d Mon Sep 17 00:00:00 2001 From: Nicolas Benech Date: Thu, 18 Jul 2019 20:25:17 -0400 Subject: [PATCH] gpu: nvgpu: gm20b: split HALs for FUSA Only some HALs are functionally safe (FUSA), so this patch splits the GM20B-related HALs into FUSA and non-FUSA source files. JIRA NVGPU-3690 Change-Id: I3a558b1f3cc713a98e9eab366c49f7ab8ee2e5a2 Signed-off-by: Nicolas Benech Reviewed-on: https://git-master.nvidia.com/r/2156609 Reviewed-by: mobile promotions Tested-by: mobile promotions --- arch/nvgpu-hal-new.yaml | 7 +- drivers/gpu/nvgpu/Makefile | 38 ++-- drivers/gpu/nvgpu/Makefile.sources | 13 +- .../hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c | 170 ++++++++++++++++++ .../hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c | 140 --------------- drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_dbg.c | 139 ++++++++++++++ drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_fusa.c | 96 ---------- 7 files changed, 343 insertions(+), 260 deletions(-) create mode 100644 drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c create mode 100644 drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_dbg.c diff --git a/arch/nvgpu-hal-new.yaml b/arch/nvgpu-hal-new.yaml index abd29cb43..1ccfb85b1 100644 --- a/arch/nvgpu-hal-new.yaml +++ b/arch/nvgpu-hal-new.yaml @@ -19,7 +19,9 @@ ltc: children: ltc_fusa: safe: yes - sources: [ hal/ltc/ltc_gm20b_fusa.c, + sources: [ hal/ltc/ltc_gm20b.c, + hal/ltc/ltc_gm20b_dbg.c, + hal/ltc/ltc_gm20b_fusa.c, hal/ltc/ltc_gm20b.h, hal/ltc/ltc_gp10b_fusa.c, hal/ltc/ltc_gp10b.h, @@ -405,8 +407,9 @@ gr: ctxsw_prog: safe: no sources: [ hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c, + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c, hal/gr/ctxsw_prog/ctxsw_prog_gp10b.c, - hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c ] + hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c] config: safe: yes sources: [ hal/gr/config/gr_config_gm20b.c, diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 1afd2235d..789e14bf2 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -183,7 +183,6 @@ nvgpu-y += \ common/nvlink/nvlink.o \ common/nvlink/nvlink_gv100.o \ common/nvlink/nvlink_tu104.o \ - hal/mc/mc_gm20b.o \ hal/mc/mc_gp10b.o \ hal/mc/mc_gv100.o \ hal/mc/mc_tu104.o \ @@ -198,7 +197,6 @@ nvgpu-y += \ hal/gr/ecc/ecc_tu104.o \ hal/gr/zcull/zcull_gm20b.o \ hal/gr/zcull/zcull_gv11b.o \ - hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \ hal/gr/ctxsw_prog/ctxsw_prog_gp10b.o \ hal/gr/ctxsw_prog/ctxsw_prog_gv11b.o \ hal/gr/init/gr_init_gm20b.o \ @@ -223,26 +221,20 @@ nvgpu-y += \ hal/init/hal_gv11b.o \ hal/init/hal_gv11b_litter.o \ hal/init/hal_init.o \ - hal/perf/perf_gm20b.o \ hal/perf/perf_gv11b.o \ - hal/priv_ring/priv_ring_gm20b.o \ - hal/power_features/cg/gm20b_gating_reglist.o \ hal/power_features/cg/gp106_gating_reglist.o \ hal/power_features/cg/gp10b_gating_reglist.o \ hal/power_features/cg/gv100_gating_reglist.o \ hal/power_features/cg/gv11b_gating_reglist.o \ hal/power_features/cg/tu104_gating_reglist.o \ - hal/regops/regops_gm20b.o \ hal/regops/regops_gp10b.o \ hal/regops/regops_gv100.o \ hal/regops/regops_gv11b.o \ hal/regops/regops_tu104.o \ hal/ce/ce2_gk20a.o \ - hal/therm/therm_gm20b.o \ hal/therm/therm_gp10b.o \ hal/therm/therm_tu104.o \ hal/gr/falcon/gr_falcon_gm20b.o \ - hal/ltc/ltc_gm20b.o \ hal/ltc/ltc_gp10b.o \ hal/ltc/ltc_gv11b.o \ hal/ltc/ltc_tu104.o \ @@ -265,11 +257,8 @@ nvgpu-y += \ hal/fifo/fifo_tu104.o \ hal/fifo/preempt_gk20a.o \ hal/fifo/usermode_tu104.o \ - hal/fifo/engines_gm20b.o \ - hal/fifo/pbdma_gm20b.o \ hal/fifo/pbdma_gp10b.o \ hal/fifo/pbdma_tu104.o \ - hal/fifo/engine_status_gm20b.o \ hal/fifo/ramfc_gk20a.o \ hal/fifo/ramfc_gp10b.o \ hal/fifo/ramfc_tu104.o \ @@ -293,7 +282,6 @@ nvgpu-y += \ hal/fifo/mmu_fault_gm20b.o \ hal/fifo/mmu_fault_gp10b.o \ hal/fifo/ctxsw_timeout_gk20a.o \ - hal/netlist/netlist_gm20b.o \ hal/netlist/netlist_gp10b.o \ hal/netlist/netlist_gv100.o \ hal/netlist/netlist_tu104.o \ @@ -319,7 +307,6 @@ nvgpu-y += \ hal/pramin/pramin_init.o \ hal/pramin/pramin_tu104.o \ hal/bios/bios_tu104.o \ - hal/top/top_gm20b.o \ hal/top/top_gp106.o \ hal/top/top_gv100.o \ hal/xve/xve_gp106.o @@ -528,9 +515,6 @@ nvgpu-y += \ common/ce/ce.o \ common/debugger.o -nvgpu-y += \ - hal/gr/config/gr_config_gm20b.o - nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ common/vgpu/ltc/ltc_vgpu.o \ common/vgpu/fbp/fbp_vgpu.o \ @@ -675,19 +659,39 @@ nvgpu-y += \ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \ hal/ce/ce_tu104.o \ + hal/clk/clk_gm20b.o \ hal/init/hal_gp10b.o \ hal/init/hal_gp10b_litter.o \ hal/init/hal_gm20b.o \ hal/init/hal_gm20b_litter.o \ hal/init/hal_tu104.o \ hal/init/hal_tu104_litter.o \ + hal/fifo/engine_status_gm20b.o \ + hal/fifo/engines_gm20b.o \ + hal/fifo/pbdma_gm20b.o \ + hal/gr/config/gr_config_gm20b.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.o \ + hal/gr/gr/gr_gk20a.o \ + hal/gr/gr/gr_gm20b.o \ + hal/gr/gr/gr_gp10b.o \ + hal/ltc/ltc_gm20b.o \ + hal/ltc/ltc_gm20b_dbg.o \ + hal/mc/mc_gm20b.o \ hal/mm/mm_gm20b.o \ hal/mm/mm_gk20a.o \ hal/mm/mm_gv100.o \ hal/mm/mm_tu104.o \ hal/mm/gmmu/gmmu_gk20a.o \ hal/mm/gmmu/gmmu_gm20b.o \ - hal/falcon/falcon_gk20a.o + hal/falcon/falcon_gk20a.o \ + hal/netlist/netlist_gm20b.o \ + hal/perf/perf_gm20b.o \ + hal/power_features/cg/gm20b_gating_reglist.o \ + hal/priv_ring/priv_ring_gm20b.o \ + hal/regops/regops_gm20b.o \ + hal/therm/therm_gm20b.o \ + hal/top/top_gm20b.o ifeq ($(CONFIG_TEGRA_GR_VIRTUALIZATION),y) nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index 18333845c..750b76982 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -264,6 +264,7 @@ srcs += hal/init/hal_gp10b.c \ hal/class/class_gm20b.c \ hal/clk/clk_gm20b.c \ hal/falcon/falcon_gk20a.c \ + hal/gr/config/gr_config_gm20b.c \ hal/gr/ecc/ecc_gp10b.c \ hal/gr/init/gr_init_gm20b.c \ hal/gr/init/gr_init_gp10b.c \ @@ -278,8 +279,8 @@ srcs += hal/init/hal_gp10b.c \ hal/ce/ce2_gk20a.c \ hal/therm/therm_gm20b.c \ hal/therm/therm_gp10b.c \ - hal/ltc/ltc_gp10b.c \ hal/ltc/ltc_gm20b.c \ + hal/ltc/ltc_gp10b.c \ hal/ltc/ltc_gv11b.c \ hal/ltc/intr/ltc_intr_gm20b.c \ hal/ltc/intr/ltc_intr_gp10b.c \ @@ -351,13 +352,13 @@ srcs += common/debugger.c \ common/regops/regops.c \ common/gr/hwpm_map.c \ common/perf/perfbuf.c \ - hal/regops/regops_gm20b.c \ hal/regops/regops_gp10b.c \ hal/regops/regops_gv11b.c \ hal/regops/regops_gv100.c \ hal/regops/regops_tu104.c \ + hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c \ hal/gr/hwpm_map/hwpm_map_gv100.c \ - hal/perf/perf_gm20b.c \ + hal/ltc/ltc_gm20b_dbg.c \ hal/perf/perf_gv11b.c \ hal/gr/gr/gr_gk20a.c \ hal/gr/gr/gr_gm20b.c \ @@ -365,6 +366,10 @@ srcs += common/debugger.c \ hal/gr/gr/gr_gv11b.c \ hal/gr/gr/gr_gv100.c \ hal/gr/gr/gr_tu104.c +ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1) +srcs += hal/regops/regops_gm20b.c \ + hal/perf/perf_gm20b.c +endif endif ifeq ($(CONFIG_NVGPU_CE),1) @@ -409,8 +414,6 @@ ifdef NVGPU_FAULT_INJECTION_ENABLEMENT srcs += os/posix/posix-fault-injection.c endif -srcs += hal/gr/config/gr_config_gm20b.c - ifeq ($(CONFIG_NVGPU_LS_PMU),1) # Add LS PMU files which are required for normal build srcs += \ diff --git a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c new file mode 100644 index 000000000..d6727fb75 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "ctxsw_prog_gm20b.h" + +#include + +#ifdef CONFIG_NVGPU_DEBUGGER +u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void) +{ + return ctxsw_prog_gpccs_header_stride_v(); +} + +u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void) +{ + return ctxsw_prog_extended_buffer_segments_size_in_bytes_v(); +} + +u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void) +{ + return ctxsw_prog_extended_marker_size_in_bytes_v(); +} + +u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void) +{ + return ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); +} + +u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o()); +} + +void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, + u64 addr) +{ + addr = addr >> 8; + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(), + u64_lo32(addr)); +} + +void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, u32 mode) +{ + u32 data; + + data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); + + data = data & ~ctxsw_prog_main_image_pm_mode_m(); + data |= mode; + + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); +} + +void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable) +{ + u32 data; + + data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); + + data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); + data |= enable ? + ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : + ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); + + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); +} + +u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void) +{ + return ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); +} + +u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void) +{ + return ctxsw_prog_main_image_pm_mode_ctxsw_f(); +} + +void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g, + struct nvgpu_mem *ctx_mem) +{ + u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_ctl_o()); + + data |= ctxsw_prog_main_image_ctl_cde_enabled_f(); + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_ctl_o(), data); +} + +void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g, + struct nvgpu_mem *ctx_mem, bool enable) +{ + u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); + + data &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); + data |= ctxsw_prog_main_image_pm_pc_sampling_f( + nvgpu_safe_cast_bool_to_u32(enable)); + + nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); +} + +bool gm20b_ctxsw_prog_check_main_image_header_magic(u32 *context) +{ + u32 magic = *(context + (ctxsw_prog_main_image_magic_value_o() >> 2)); + return magic == ctxsw_prog_main_image_magic_value_v_value_v(); +} + +bool gm20b_ctxsw_prog_check_local_header_magic(u32 *context) +{ + u32 magic = *(context + (ctxsw_prog_local_magic_value_o() >> 2)); + return magic == ctxsw_prog_local_magic_value_v_value_v(); +} + +u32 gm20b_ctxsw_prog_get_num_gpcs(u32 *context) +{ + return *(context + (ctxsw_prog_main_image_num_gpcs_o() >> 2)); +} + +u32 gm20b_ctxsw_prog_get_num_tpcs(u32 *context) +{ + return *(context + (ctxsw_prog_local_image_num_tpcs_o() >> 2)); +} + +void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u32 *context, + u32 *size, u32 *offset) +{ + u32 data = *(context + (ctxsw_prog_main_extended_buffer_ctl_o() >> 2)); + + *size = ctxsw_prog_main_extended_buffer_ctl_size_v(data); + *offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data); +} + +void gm20b_ctxsw_prog_get_ppc_info(u32 *context, u32 *num_ppcs, u32 *ppc_mask) +{ + u32 data = *(context + (ctxsw_prog_local_image_ppc_info_o() >> 2)); + + *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data); + *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data); +} + +u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u32 *context) +{ + u32 data = *(context + (ctxsw_prog_local_priv_register_ctl_o() >> 2)); + return ctxsw_prog_local_priv_register_ctl_offset_v(data); +} +#endif /* CONFIG_NVGPU_DEBUGGER */ diff --git a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c index 9547cde8a..34ed8115e 100644 --- a/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c +++ b/drivers/gpu/nvgpu/hal/gr/ctxsw_prog/ctxsw_prog_gm20b_fusa.c @@ -29,146 +29,6 @@ #include -#ifdef CONFIG_NVGPU_DEBUGGER -u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void) -{ - return ctxsw_prog_gpccs_header_stride_v(); -} - -u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void) -{ - return ctxsw_prog_extended_buffer_segments_size_in_bytes_v(); -} - -u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void) -{ - return ctxsw_prog_extended_marker_size_in_bytes_v(); -} - -u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void) -{ - return ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v(); -} - -u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g, - struct nvgpu_mem *ctx_mem) -{ - return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o()); -} - -void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem, - u64 addr) -{ - addr = addr >> 8; - nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(), - u64_lo32(addr)); -} - -void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g, - struct nvgpu_mem *ctx_mem, u32 mode) -{ - u32 data; - - data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); - - data = data & ~ctxsw_prog_main_image_pm_mode_m(); - data |= mode; - - nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); -} - -void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g, - struct nvgpu_mem *ctx_mem, bool enable) -{ - u32 data; - - data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); - - data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m(); - data |= enable ? - ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() : - ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f(); - - nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); -} - -u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void) -{ - return ctxsw_prog_main_image_pm_mode_no_ctxsw_f(); -} - -u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void) -{ - return ctxsw_prog_main_image_pm_mode_ctxsw_f(); -} - -void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g, - struct nvgpu_mem *ctx_mem) -{ - u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_ctl_o()); - - data |= ctxsw_prog_main_image_ctl_cde_enabled_f(); - nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_ctl_o(), data); -} - -void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g, - struct nvgpu_mem *ctx_mem, bool enable) -{ - u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o()); - - data &= ~ctxsw_prog_main_image_pm_pc_sampling_m(); - data |= ctxsw_prog_main_image_pm_pc_sampling_f( - nvgpu_safe_cast_bool_to_u32(enable)); - - nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data); -} - -bool gm20b_ctxsw_prog_check_main_image_header_magic(u32 *context) -{ - u32 magic = *(context + (ctxsw_prog_main_image_magic_value_o() >> 2)); - return magic == ctxsw_prog_main_image_magic_value_v_value_v(); -} - -bool gm20b_ctxsw_prog_check_local_header_magic(u32 *context) -{ - u32 magic = *(context + (ctxsw_prog_local_magic_value_o() >> 2)); - return magic == ctxsw_prog_local_magic_value_v_value_v(); -} - -u32 gm20b_ctxsw_prog_get_num_gpcs(u32 *context) -{ - return *(context + (ctxsw_prog_main_image_num_gpcs_o() >> 2)); -} - -u32 gm20b_ctxsw_prog_get_num_tpcs(u32 *context) -{ - return *(context + (ctxsw_prog_local_image_num_tpcs_o() >> 2)); -} - -void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u32 *context, - u32 *size, u32 *offset) -{ - u32 data = *(context + (ctxsw_prog_main_extended_buffer_ctl_o() >> 2)); - - *size = ctxsw_prog_main_extended_buffer_ctl_size_v(data); - *offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data); -} - -void gm20b_ctxsw_prog_get_ppc_info(u32 *context, u32 *num_ppcs, u32 *ppc_mask) -{ - u32 data = *(context + (ctxsw_prog_local_image_ppc_info_o() >> 2)); - - *num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data); - *ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data); -} - -u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u32 *context) -{ - u32 data = *(context + (ctxsw_prog_local_priv_register_ctl_o() >> 2)); - return ctxsw_prog_local_priv_register_ctl_offset_v(data); -} -#endif /* CONFIG_NVGPU_DEBUGGER */ - u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void) { return ctxsw_prog_fecs_header_v(); diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_dbg.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_dbg.c new file mode 100644 index 000000000..b36a08753 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_dbg.c @@ -0,0 +1,139 @@ +/* + * GM20B L2 + * + * Copyright (c) 2014-2019 NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef CONFIG_NVGPU_TRACE +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "ltc_gm20b.h" + +#ifdef CONFIG_NVGPU_DEBUGGER +/* + * LTC pri addressing + */ +bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr) +{ + return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v())); +} + +bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) +{ + u32 ltc_shared_base = ltc_ltcs_ltss_v(); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + + if (addr >= ltc_shared_base) { + return (addr < nvgpu_safe_add_u32(ltc_shared_base, lts_stride)); + } + return false; +} + +bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) +{ + u32 lts_shared_base = ltc_ltc0_ltss_v(); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1U; + u32 base_offset = lts_shared_base & addr_mask; + u32 end_offset = nvgpu_safe_add_u32(base_offset, lts_stride); + + return (!gm20b_ltc_is_ltcs_ltss_addr(g, addr)) && + ((addr & addr_mask) >= base_offset) && + ((addr & addr_mask) < end_offset); +} + +static void gm20b_ltc_update_ltc_lts_addr(struct gk20a *g, u32 addr, + u32 ltc_num, u32 *priv_addr_table, u32 *priv_addr_table_index) +{ + u32 num_ltc_slices = g->ops.top.get_max_lts_per_ltc(g); + u32 index = *priv_addr_table_index; + u32 lts_num; + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); + + for (lts_num = 0; lts_num < num_ltc_slices; + lts_num = nvgpu_safe_add_u32(lts_num, 1U)) { + priv_addr_table[index] = nvgpu_safe_add_u32( + ltc_ltc0_lts0_v(), + nvgpu_safe_add_u32( + nvgpu_safe_add_u32( + nvgpu_safe_mult_u32(ltc_num, ltc_stride), + nvgpu_safe_mult_u32(lts_num, lts_stride)), + (addr & nvgpu_safe_sub_u32( + lts_stride, 1U)))); + index = nvgpu_safe_add_u32(index, 1U); + } + + *priv_addr_table_index = index; +} + +void gm20b_ltc_split_lts_broadcast_addr(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index) +{ + u32 num_ltc = g->ltc->ltc_count; + u32 i, start, ltc_num = 0; + u32 pltcg_base = ltc_pltcg_base_v(); + u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); + + for (i = 0; i < num_ltc; i++) { + start = nvgpu_safe_add_u32(pltcg_base, + nvgpu_safe_mult_u32(i, ltc_stride)); + if (addr >= start) { + if (addr < nvgpu_safe_add_u32(start, ltc_stride)) { + ltc_num = i; + break; + } + } + } + gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, + priv_addr_table_index); +} + +void gm20b_ltc_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, + u32 *priv_addr_table, + u32 *priv_addr_table_index) +{ + u32 num_ltc = g->ltc->ltc_count; + u32 ltc_num; + + for (ltc_num = 0; ltc_num < num_ltc; ltc_num = + nvgpu_safe_add_u32(ltc_num, 1U)) { + gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num, + priv_addr_table, priv_addr_table_index); + } +} +#endif /* CONFIG_NVGPU_DEBUGGER */ diff --git a/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_fusa.c b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_fusa.c index eb2a3eced..09e082981 100644 --- a/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_fusa.c +++ b/drivers/gpu/nvgpu/hal/ltc/ltc_gm20b_fusa.c @@ -42,102 +42,6 @@ #include "ltc_gm20b.h" -#ifdef CONFIG_NVGPU_DEBUGGER -/* - * LTC pri addressing - */ -bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr) -{ - return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v())); -} - -bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr) -{ - u32 ltc_shared_base = ltc_ltcs_ltss_v(); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - - if (addr >= ltc_shared_base) { - return (addr < nvgpu_safe_add_u32(ltc_shared_base, lts_stride)); - } - return false; -} - -bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr) -{ - u32 lts_shared_base = ltc_ltc0_ltss_v(); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1U; - u32 base_offset = lts_shared_base & addr_mask; - u32 end_offset = nvgpu_safe_add_u32(base_offset, lts_stride); - - return (!gm20b_ltc_is_ltcs_ltss_addr(g, addr)) && - ((addr & addr_mask) >= base_offset) && - ((addr & addr_mask) < end_offset); -} - -static void gm20b_ltc_update_ltc_lts_addr(struct gk20a *g, u32 addr, - u32 ltc_num, u32 *priv_addr_table, u32 *priv_addr_table_index) -{ - u32 num_ltc_slices = g->ops.top.get_max_lts_per_ltc(g); - u32 index = *priv_addr_table_index; - u32 lts_num; - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE); - - for (lts_num = 0; lts_num < num_ltc_slices; - lts_num = nvgpu_safe_add_u32(lts_num, 1U)) { - priv_addr_table[index] = nvgpu_safe_add_u32( - ltc_ltc0_lts0_v(), - nvgpu_safe_add_u32( - nvgpu_safe_add_u32( - nvgpu_safe_mult_u32(ltc_num, ltc_stride), - nvgpu_safe_mult_u32(lts_num, lts_stride)), - (addr & nvgpu_safe_sub_u32( - lts_stride, 1U)))); - index = nvgpu_safe_add_u32(index, 1U); - } - - *priv_addr_table_index = index; -} - -void gm20b_ltc_split_lts_broadcast_addr(struct gk20a *g, u32 addr, - u32 *priv_addr_table, - u32 *priv_addr_table_index) -{ - u32 num_ltc = g->ltc->ltc_count; - u32 i, start, ltc_num = 0; - u32 pltcg_base = ltc_pltcg_base_v(); - u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE); - - for (i = 0; i < num_ltc; i++) { - start = nvgpu_safe_add_u32(pltcg_base, - nvgpu_safe_mult_u32(i, ltc_stride)); - if (addr >= start) { - if (addr < nvgpu_safe_add_u32(start, ltc_stride)) { - ltc_num = i; - break; - } - } - } - gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table, - priv_addr_table_index); -} - -void gm20b_ltc_split_ltc_broadcast_addr(struct gk20a *g, u32 addr, - u32 *priv_addr_table, - u32 *priv_addr_table_index) -{ - u32 num_ltc = g->ltc->ltc_count; - u32 ltc_num; - - for (ltc_num = 0; ltc_num < num_ltc; ltc_num = - nvgpu_safe_add_u32(ltc_num, 1U)) { - gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num, - priv_addr_table, priv_addr_table_index); - } -} -#endif /* CONFIG_NVGPU_DEBUGGER */ - /* * Performs a full flush of the L2 cache. */