gpu: nvgpu: gm20b: split HALs for FUSA

Only some HALs are functionally safe (FUSA), so this patch splits
the GM20B-related HALs into FUSA and non-FUSA source files.

JIRA NVGPU-3690

Change-Id: I3a558b1f3cc713a98e9eab366c49f7ab8ee2e5a2
Signed-off-by: Nicolas Benech <nbenech@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2156609
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Nicolas Benech
2019-07-18 20:25:17 -04:00
committed by mobile promotions
parent e35cfa6ca3
commit f576bd8f84
7 changed files with 343 additions and 260 deletions

View File

@@ -19,7 +19,9 @@ ltc:
children: children:
ltc_fusa: ltc_fusa:
safe: yes safe: yes
sources: [ hal/ltc/ltc_gm20b_fusa.c, sources: [ hal/ltc/ltc_gm20b.c,
hal/ltc/ltc_gm20b_dbg.c,
hal/ltc/ltc_gm20b_fusa.c,
hal/ltc/ltc_gm20b.h, hal/ltc/ltc_gm20b.h,
hal/ltc/ltc_gp10b_fusa.c, hal/ltc/ltc_gp10b_fusa.c,
hal/ltc/ltc_gp10b.h, hal/ltc/ltc_gp10b.h,
@@ -405,8 +407,9 @@ gr:
ctxsw_prog: ctxsw_prog:
safe: no safe: no
sources: [ hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c, sources: [ hal/gr/ctxsw_prog/ctxsw_prog_gm20b.c,
hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c,
hal/gr/ctxsw_prog/ctxsw_prog_gp10b.c, hal/gr/ctxsw_prog/ctxsw_prog_gp10b.c,
hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c ] hal/gr/ctxsw_prog/ctxsw_prog_gv11b.c]
config: config:
safe: yes safe: yes
sources: [ hal/gr/config/gr_config_gm20b.c, sources: [ hal/gr/config/gr_config_gm20b.c,

View File

@@ -183,7 +183,6 @@ nvgpu-y += \
common/nvlink/nvlink.o \ common/nvlink/nvlink.o \
common/nvlink/nvlink_gv100.o \ common/nvlink/nvlink_gv100.o \
common/nvlink/nvlink_tu104.o \ common/nvlink/nvlink_tu104.o \
hal/mc/mc_gm20b.o \
hal/mc/mc_gp10b.o \ hal/mc/mc_gp10b.o \
hal/mc/mc_gv100.o \ hal/mc/mc_gv100.o \
hal/mc/mc_tu104.o \ hal/mc/mc_tu104.o \
@@ -198,7 +197,6 @@ nvgpu-y += \
hal/gr/ecc/ecc_tu104.o \ hal/gr/ecc/ecc_tu104.o \
hal/gr/zcull/zcull_gm20b.o \ hal/gr/zcull/zcull_gm20b.o \
hal/gr/zcull/zcull_gv11b.o \ hal/gr/zcull/zcull_gv11b.o \
hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \
hal/gr/ctxsw_prog/ctxsw_prog_gp10b.o \ hal/gr/ctxsw_prog/ctxsw_prog_gp10b.o \
hal/gr/ctxsw_prog/ctxsw_prog_gv11b.o \ hal/gr/ctxsw_prog/ctxsw_prog_gv11b.o \
hal/gr/init/gr_init_gm20b.o \ hal/gr/init/gr_init_gm20b.o \
@@ -223,26 +221,20 @@ nvgpu-y += \
hal/init/hal_gv11b.o \ hal/init/hal_gv11b.o \
hal/init/hal_gv11b_litter.o \ hal/init/hal_gv11b_litter.o \
hal/init/hal_init.o \ hal/init/hal_init.o \
hal/perf/perf_gm20b.o \
hal/perf/perf_gv11b.o \ hal/perf/perf_gv11b.o \
hal/priv_ring/priv_ring_gm20b.o \
hal/power_features/cg/gm20b_gating_reglist.o \
hal/power_features/cg/gp106_gating_reglist.o \ hal/power_features/cg/gp106_gating_reglist.o \
hal/power_features/cg/gp10b_gating_reglist.o \ hal/power_features/cg/gp10b_gating_reglist.o \
hal/power_features/cg/gv100_gating_reglist.o \ hal/power_features/cg/gv100_gating_reglist.o \
hal/power_features/cg/gv11b_gating_reglist.o \ hal/power_features/cg/gv11b_gating_reglist.o \
hal/power_features/cg/tu104_gating_reglist.o \ hal/power_features/cg/tu104_gating_reglist.o \
hal/regops/regops_gm20b.o \
hal/regops/regops_gp10b.o \ hal/regops/regops_gp10b.o \
hal/regops/regops_gv100.o \ hal/regops/regops_gv100.o \
hal/regops/regops_gv11b.o \ hal/regops/regops_gv11b.o \
hal/regops/regops_tu104.o \ hal/regops/regops_tu104.o \
hal/ce/ce2_gk20a.o \ hal/ce/ce2_gk20a.o \
hal/therm/therm_gm20b.o \
hal/therm/therm_gp10b.o \ hal/therm/therm_gp10b.o \
hal/therm/therm_tu104.o \ hal/therm/therm_tu104.o \
hal/gr/falcon/gr_falcon_gm20b.o \ hal/gr/falcon/gr_falcon_gm20b.o \
hal/ltc/ltc_gm20b.o \
hal/ltc/ltc_gp10b.o \ hal/ltc/ltc_gp10b.o \
hal/ltc/ltc_gv11b.o \ hal/ltc/ltc_gv11b.o \
hal/ltc/ltc_tu104.o \ hal/ltc/ltc_tu104.o \
@@ -265,11 +257,8 @@ nvgpu-y += \
hal/fifo/fifo_tu104.o \ hal/fifo/fifo_tu104.o \
hal/fifo/preempt_gk20a.o \ hal/fifo/preempt_gk20a.o \
hal/fifo/usermode_tu104.o \ hal/fifo/usermode_tu104.o \
hal/fifo/engines_gm20b.o \
hal/fifo/pbdma_gm20b.o \
hal/fifo/pbdma_gp10b.o \ hal/fifo/pbdma_gp10b.o \
hal/fifo/pbdma_tu104.o \ hal/fifo/pbdma_tu104.o \
hal/fifo/engine_status_gm20b.o \
hal/fifo/ramfc_gk20a.o \ hal/fifo/ramfc_gk20a.o \
hal/fifo/ramfc_gp10b.o \ hal/fifo/ramfc_gp10b.o \
hal/fifo/ramfc_tu104.o \ hal/fifo/ramfc_tu104.o \
@@ -293,7 +282,6 @@ nvgpu-y += \
hal/fifo/mmu_fault_gm20b.o \ hal/fifo/mmu_fault_gm20b.o \
hal/fifo/mmu_fault_gp10b.o \ hal/fifo/mmu_fault_gp10b.o \
hal/fifo/ctxsw_timeout_gk20a.o \ hal/fifo/ctxsw_timeout_gk20a.o \
hal/netlist/netlist_gm20b.o \
hal/netlist/netlist_gp10b.o \ hal/netlist/netlist_gp10b.o \
hal/netlist/netlist_gv100.o \ hal/netlist/netlist_gv100.o \
hal/netlist/netlist_tu104.o \ hal/netlist/netlist_tu104.o \
@@ -319,7 +307,6 @@ nvgpu-y += \
hal/pramin/pramin_init.o \ hal/pramin/pramin_init.o \
hal/pramin/pramin_tu104.o \ hal/pramin/pramin_tu104.o \
hal/bios/bios_tu104.o \ hal/bios/bios_tu104.o \
hal/top/top_gm20b.o \
hal/top/top_gp106.o \ hal/top/top_gp106.o \
hal/top/top_gv100.o \ hal/top/top_gv100.o \
hal/xve/xve_gp106.o hal/xve/xve_gp106.o
@@ -528,9 +515,6 @@ nvgpu-y += \
common/ce/ce.o \ common/ce/ce.o \
common/debugger.o common/debugger.o
nvgpu-y += \
hal/gr/config/gr_config_gm20b.o
nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \ nvgpu-$(CONFIG_TEGRA_GR_VIRTUALIZATION) += \
common/vgpu/ltc/ltc_vgpu.o \ common/vgpu/ltc/ltc_vgpu.o \
common/vgpu/fbp/fbp_vgpu.o \ common/vgpu/fbp/fbp_vgpu.o \
@@ -675,19 +659,39 @@ nvgpu-y += \
nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \
hal/ce/ce_tu104.o \ hal/ce/ce_tu104.o \
hal/clk/clk_gm20b.o \
hal/init/hal_gp10b.o \ hal/init/hal_gp10b.o \
hal/init/hal_gp10b_litter.o \ hal/init/hal_gp10b_litter.o \
hal/init/hal_gm20b.o \ hal/init/hal_gm20b.o \
hal/init/hal_gm20b_litter.o \ hal/init/hal_gm20b_litter.o \
hal/init/hal_tu104.o \ hal/init/hal_tu104.o \
hal/init/hal_tu104_litter.o \ hal/init/hal_tu104_litter.o \
hal/fifo/engine_status_gm20b.o \
hal/fifo/engines_gm20b.o \
hal/fifo/pbdma_gm20b.o \
hal/gr/config/gr_config_gm20b.o \
hal/gr/ctxsw_prog/ctxsw_prog_gm20b.o \
hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.o \
hal/gr/gr/gr_gk20a.o \
hal/gr/gr/gr_gm20b.o \
hal/gr/gr/gr_gp10b.o \
hal/ltc/ltc_gm20b.o \
hal/ltc/ltc_gm20b_dbg.o \
hal/mc/mc_gm20b.o \
hal/mm/mm_gm20b.o \ hal/mm/mm_gm20b.o \
hal/mm/mm_gk20a.o \ hal/mm/mm_gk20a.o \
hal/mm/mm_gv100.o \ hal/mm/mm_gv100.o \
hal/mm/mm_tu104.o \ hal/mm/mm_tu104.o \
hal/mm/gmmu/gmmu_gk20a.o \ hal/mm/gmmu/gmmu_gk20a.o \
hal/mm/gmmu/gmmu_gm20b.o \ hal/mm/gmmu/gmmu_gm20b.o \
hal/falcon/falcon_gk20a.o hal/falcon/falcon_gk20a.o \
hal/netlist/netlist_gm20b.o \
hal/perf/perf_gm20b.o \
hal/power_features/cg/gm20b_gating_reglist.o \
hal/priv_ring/priv_ring_gm20b.o \
hal/regops/regops_gm20b.o \
hal/therm/therm_gm20b.o \
hal/top/top_gm20b.o
ifeq ($(CONFIG_TEGRA_GR_VIRTUALIZATION),y) ifeq ($(CONFIG_TEGRA_GR_VIRTUALIZATION),y)
nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \ nvgpu-$(CONFIG_NVGPU_HAL_NON_FUSA) += \

View File

@@ -264,6 +264,7 @@ srcs += hal/init/hal_gp10b.c \
hal/class/class_gm20b.c \ hal/class/class_gm20b.c \
hal/clk/clk_gm20b.c \ hal/clk/clk_gm20b.c \
hal/falcon/falcon_gk20a.c \ hal/falcon/falcon_gk20a.c \
hal/gr/config/gr_config_gm20b.c \
hal/gr/ecc/ecc_gp10b.c \ hal/gr/ecc/ecc_gp10b.c \
hal/gr/init/gr_init_gm20b.c \ hal/gr/init/gr_init_gm20b.c \
hal/gr/init/gr_init_gp10b.c \ hal/gr/init/gr_init_gp10b.c \
@@ -278,8 +279,8 @@ srcs += hal/init/hal_gp10b.c \
hal/ce/ce2_gk20a.c \ hal/ce/ce2_gk20a.c \
hal/therm/therm_gm20b.c \ hal/therm/therm_gm20b.c \
hal/therm/therm_gp10b.c \ hal/therm/therm_gp10b.c \
hal/ltc/ltc_gp10b.c \
hal/ltc/ltc_gm20b.c \ hal/ltc/ltc_gm20b.c \
hal/ltc/ltc_gp10b.c \
hal/ltc/ltc_gv11b.c \ hal/ltc/ltc_gv11b.c \
hal/ltc/intr/ltc_intr_gm20b.c \ hal/ltc/intr/ltc_intr_gm20b.c \
hal/ltc/intr/ltc_intr_gp10b.c \ hal/ltc/intr/ltc_intr_gp10b.c \
@@ -351,13 +352,13 @@ srcs += common/debugger.c \
common/regops/regops.c \ common/regops/regops.c \
common/gr/hwpm_map.c \ common/gr/hwpm_map.c \
common/perf/perfbuf.c \ common/perf/perfbuf.c \
hal/regops/regops_gm20b.c \
hal/regops/regops_gp10b.c \ hal/regops/regops_gp10b.c \
hal/regops/regops_gv11b.c \ hal/regops/regops_gv11b.c \
hal/regops/regops_gv100.c \ hal/regops/regops_gv100.c \
hal/regops/regops_tu104.c \ hal/regops/regops_tu104.c \
hal/gr/ctxsw_prog/ctxsw_prog_gm20b_dbg.c \
hal/gr/hwpm_map/hwpm_map_gv100.c \ hal/gr/hwpm_map/hwpm_map_gv100.c \
hal/perf/perf_gm20b.c \ hal/ltc/ltc_gm20b_dbg.c \
hal/perf/perf_gv11b.c \ hal/perf/perf_gv11b.c \
hal/gr/gr/gr_gk20a.c \ hal/gr/gr/gr_gk20a.c \
hal/gr/gr/gr_gm20b.c \ hal/gr/gr/gr_gm20b.c \
@@ -365,6 +366,10 @@ srcs += common/debugger.c \
hal/gr/gr/gr_gv11b.c \ hal/gr/gr/gr_gv11b.c \
hal/gr/gr/gr_gv100.c \ hal/gr/gr/gr_gv100.c \
hal/gr/gr/gr_tu104.c hal/gr/gr/gr_tu104.c
ifeq ($(CONFIG_NVGPU_HAL_NON_FUSA),1)
srcs += hal/regops/regops_gm20b.c \
hal/perf/perf_gm20b.c
endif
endif endif
ifeq ($(CONFIG_NVGPU_CE),1) ifeq ($(CONFIG_NVGPU_CE),1)
@@ -409,8 +414,6 @@ ifdef NVGPU_FAULT_INJECTION_ENABLEMENT
srcs += os/posix/posix-fault-injection.c srcs += os/posix/posix-fault-injection.c
endif endif
srcs += hal/gr/config/gr_config_gm20b.c
ifeq ($(CONFIG_NVGPU_LS_PMU),1) ifeq ($(CONFIG_NVGPU_LS_PMU),1)
# Add LS PMU files which are required for normal build # Add LS PMU files which are required for normal build
srcs += \ srcs += \

View File

@@ -0,0 +1,170 @@
/*
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <nvgpu/gk20a.h>
#include <nvgpu/utils.h>
#include <nvgpu/nvgpu_mem.h>
#include <nvgpu/safe_ops.h>
#include "ctxsw_prog_gm20b.h"
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
#ifdef CONFIG_NVGPU_DEBUGGER
u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void)
{
return ctxsw_prog_gpccs_header_stride_v();
}
u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void)
{
return ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
}
u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void)
{
return ctxsw_prog_extended_marker_size_in_bytes_v();
}
u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void)
{
return ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
}
u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o());
}
void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(),
u64_lo32(addr));
}
void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 mode)
{
u32 data;
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_mode_m();
data |= mode;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable)
{
u32 data;
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
data |= enable ?
ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void)
{
return ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
}
u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void)
{
return ctxsw_prog_main_image_pm_mode_ctxsw_f();
}
void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_ctl_o());
data |= ctxsw_prog_main_image_ctl_cde_enabled_f();
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_ctl_o(), data);
}
void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable)
{
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
data |= ctxsw_prog_main_image_pm_pc_sampling_f(
nvgpu_safe_cast_bool_to_u32(enable));
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
bool gm20b_ctxsw_prog_check_main_image_header_magic(u32 *context)
{
u32 magic = *(context + (ctxsw_prog_main_image_magic_value_o() >> 2));
return magic == ctxsw_prog_main_image_magic_value_v_value_v();
}
bool gm20b_ctxsw_prog_check_local_header_magic(u32 *context)
{
u32 magic = *(context + (ctxsw_prog_local_magic_value_o() >> 2));
return magic == ctxsw_prog_local_magic_value_v_value_v();
}
u32 gm20b_ctxsw_prog_get_num_gpcs(u32 *context)
{
return *(context + (ctxsw_prog_main_image_num_gpcs_o() >> 2));
}
u32 gm20b_ctxsw_prog_get_num_tpcs(u32 *context)
{
return *(context + (ctxsw_prog_local_image_num_tpcs_o() >> 2));
}
void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u32 *context,
u32 *size, u32 *offset)
{
u32 data = *(context + (ctxsw_prog_main_extended_buffer_ctl_o() >> 2));
*size = ctxsw_prog_main_extended_buffer_ctl_size_v(data);
*offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data);
}
void gm20b_ctxsw_prog_get_ppc_info(u32 *context, u32 *num_ppcs, u32 *ppc_mask)
{
u32 data = *(context + (ctxsw_prog_local_image_ppc_info_o() >> 2));
*num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data);
*ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data);
}
u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u32 *context)
{
u32 data = *(context + (ctxsw_prog_local_priv_register_ctl_o() >> 2));
return ctxsw_prog_local_priv_register_ctl_offset_v(data);
}
#endif /* CONFIG_NVGPU_DEBUGGER */

View File

@@ -29,146 +29,6 @@
#include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h> #include <nvgpu/hw/gm20b/hw_ctxsw_prog_gm20b.h>
#ifdef CONFIG_NVGPU_DEBUGGER
u32 gm20b_ctxsw_prog_hw_get_gpccs_header_size(void)
{
return ctxsw_prog_gpccs_header_stride_v();
}
u32 gm20b_ctxsw_prog_hw_get_extended_buffer_segments_size_in_bytes(void)
{
return ctxsw_prog_extended_buffer_segments_size_in_bytes_v();
}
u32 gm20b_ctxsw_prog_hw_extended_marker_size_in_bytes(void)
{
return ctxsw_prog_extended_marker_size_in_bytes_v();
}
u32 gm20b_ctxsw_prog_hw_get_perf_counter_control_register_stride(void)
{
return ctxsw_prog_extended_sm_dsm_perf_counter_control_register_stride_v();
}
u32 gm20b_ctxsw_prog_get_main_image_ctx_id(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
return nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_context_id_o());
}
void gm20b_ctxsw_prog_set_pm_ptr(struct gk20a *g, struct nvgpu_mem *ctx_mem,
u64 addr)
{
addr = addr >> 8;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_ptr_o(),
u64_lo32(addr));
}
void gm20b_ctxsw_prog_set_pm_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, u32 mode)
{
u32 data;
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_mode_m();
data |= mode;
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
void gm20b_ctxsw_prog_set_pm_smpc_mode(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable)
{
u32 data;
data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data = data & ~ctxsw_prog_main_image_pm_smpc_mode_m();
data |= enable ?
ctxsw_prog_main_image_pm_smpc_mode_ctxsw_f() :
ctxsw_prog_main_image_pm_smpc_mode_no_ctxsw_f();
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
u32 gm20b_ctxsw_prog_hw_get_pm_mode_no_ctxsw(void)
{
return ctxsw_prog_main_image_pm_mode_no_ctxsw_f();
}
u32 gm20b_ctxsw_prog_hw_get_pm_mode_ctxsw(void)
{
return ctxsw_prog_main_image_pm_mode_ctxsw_f();
}
void gm20b_ctxsw_prog_set_cde_enabled(struct gk20a *g,
struct nvgpu_mem *ctx_mem)
{
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_ctl_o());
data |= ctxsw_prog_main_image_ctl_cde_enabled_f();
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_ctl_o(), data);
}
void gm20b_ctxsw_prog_set_pc_sampling(struct gk20a *g,
struct nvgpu_mem *ctx_mem, bool enable)
{
u32 data = nvgpu_mem_rd(g, ctx_mem, ctxsw_prog_main_image_pm_o());
data &= ~ctxsw_prog_main_image_pm_pc_sampling_m();
data |= ctxsw_prog_main_image_pm_pc_sampling_f(
nvgpu_safe_cast_bool_to_u32(enable));
nvgpu_mem_wr(g, ctx_mem, ctxsw_prog_main_image_pm_o(), data);
}
bool gm20b_ctxsw_prog_check_main_image_header_magic(u32 *context)
{
u32 magic = *(context + (ctxsw_prog_main_image_magic_value_o() >> 2));
return magic == ctxsw_prog_main_image_magic_value_v_value_v();
}
bool gm20b_ctxsw_prog_check_local_header_magic(u32 *context)
{
u32 magic = *(context + (ctxsw_prog_local_magic_value_o() >> 2));
return magic == ctxsw_prog_local_magic_value_v_value_v();
}
u32 gm20b_ctxsw_prog_get_num_gpcs(u32 *context)
{
return *(context + (ctxsw_prog_main_image_num_gpcs_o() >> 2));
}
u32 gm20b_ctxsw_prog_get_num_tpcs(u32 *context)
{
return *(context + (ctxsw_prog_local_image_num_tpcs_o() >> 2));
}
void gm20b_ctxsw_prog_get_extended_buffer_size_offset(u32 *context,
u32 *size, u32 *offset)
{
u32 data = *(context + (ctxsw_prog_main_extended_buffer_ctl_o() >> 2));
*size = ctxsw_prog_main_extended_buffer_ctl_size_v(data);
*offset = ctxsw_prog_main_extended_buffer_ctl_offset_v(data);
}
void gm20b_ctxsw_prog_get_ppc_info(u32 *context, u32 *num_ppcs, u32 *ppc_mask)
{
u32 data = *(context + (ctxsw_prog_local_image_ppc_info_o() >> 2));
*num_ppcs = ctxsw_prog_local_image_ppc_info_num_ppcs_v(data);
*ppc_mask = ctxsw_prog_local_image_ppc_info_ppc_mask_v(data);
}
u32 gm20b_ctxsw_prog_get_local_priv_register_ctl_offset(u32 *context)
{
u32 data = *(context + (ctxsw_prog_local_priv_register_ctl_o() >> 2));
return ctxsw_prog_local_priv_register_ctl_offset_v(data);
}
#endif /* CONFIG_NVGPU_DEBUGGER */
u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void) u32 gm20b_ctxsw_prog_hw_get_fecs_header_size(void)
{ {
return ctxsw_prog_fecs_header_v(); return ctxsw_prog_fecs_header_v();

View File

@@ -0,0 +1,139 @@
/*
* GM20B L2
*
* Copyright (c) 2014-2019 NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifdef CONFIG_NVGPU_TRACE
#include <trace/events/gk20a.h>
#endif
#include <nvgpu/timers.h>
#include <nvgpu/enabled.h>
#include <nvgpu/bug.h>
#include <nvgpu/ltc.h>
#include <nvgpu/fbp.h>
#include <nvgpu/io.h>
#include <nvgpu/utils.h>
#include <nvgpu/gk20a.h>
#include <nvgpu/safe_ops.h>
#include <nvgpu/hw/gm20b/hw_ltc_gm20b.h>
#include <nvgpu/hw/gm20b/hw_top_gm20b.h>
#include <nvgpu/hw/gm20b/hw_pri_ringmaster_gm20b.h>
#include "ltc_gm20b.h"
#ifdef CONFIG_NVGPU_DEBUGGER
/*
* LTC pri addressing
*/
bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr)
{
return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v()));
}
bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
{
u32 ltc_shared_base = ltc_ltcs_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
if (addr >= ltc_shared_base) {
return (addr < nvgpu_safe_add_u32(ltc_shared_base, lts_stride));
}
return false;
}
bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
{
u32 lts_shared_base = ltc_ltc0_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1U;
u32 base_offset = lts_shared_base & addr_mask;
u32 end_offset = nvgpu_safe_add_u32(base_offset, lts_stride);
return (!gm20b_ltc_is_ltcs_ltss_addr(g, addr)) &&
((addr & addr_mask) >= base_offset) &&
((addr & addr_mask) < end_offset);
}
static void gm20b_ltc_update_ltc_lts_addr(struct gk20a *g, u32 addr,
u32 ltc_num, u32 *priv_addr_table, u32 *priv_addr_table_index)
{
u32 num_ltc_slices = g->ops.top.get_max_lts_per_ltc(g);
u32 index = *priv_addr_table_index;
u32 lts_num;
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
for (lts_num = 0; lts_num < num_ltc_slices;
lts_num = nvgpu_safe_add_u32(lts_num, 1U)) {
priv_addr_table[index] = nvgpu_safe_add_u32(
ltc_ltc0_lts0_v(),
nvgpu_safe_add_u32(
nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(ltc_num, ltc_stride),
nvgpu_safe_mult_u32(lts_num, lts_stride)),
(addr & nvgpu_safe_sub_u32(
lts_stride, 1U))));
index = nvgpu_safe_add_u32(index, 1U);
}
*priv_addr_table_index = index;
}
void gm20b_ltc_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc = g->ltc->ltc_count;
u32 i, start, ltc_num = 0;
u32 pltcg_base = ltc_pltcg_base_v();
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
for (i = 0; i < num_ltc; i++) {
start = nvgpu_safe_add_u32(pltcg_base,
nvgpu_safe_mult_u32(i, ltc_stride));
if (addr >= start) {
if (addr < nvgpu_safe_add_u32(start, ltc_stride)) {
ltc_num = i;
break;
}
}
}
gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table,
priv_addr_table_index);
}
void gm20b_ltc_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc = g->ltc->ltc_count;
u32 ltc_num;
for (ltc_num = 0; ltc_num < num_ltc; ltc_num =
nvgpu_safe_add_u32(ltc_num, 1U)) {
gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num,
priv_addr_table, priv_addr_table_index);
}
}
#endif /* CONFIG_NVGPU_DEBUGGER */

View File

@@ -42,102 +42,6 @@
#include "ltc_gm20b.h" #include "ltc_gm20b.h"
#ifdef CONFIG_NVGPU_DEBUGGER
/*
* LTC pri addressing
*/
bool gm20b_ltc_pri_is_ltc_addr(struct gk20a *g, u32 addr)
{
return ((addr >= ltc_pltcg_base_v()) && (addr < ltc_pltcg_extent_v()));
}
bool gm20b_ltc_is_ltcs_ltss_addr(struct gk20a *g, u32 addr)
{
u32 ltc_shared_base = ltc_ltcs_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
if (addr >= ltc_shared_base) {
return (addr < nvgpu_safe_add_u32(ltc_shared_base, lts_stride));
}
return false;
}
bool gm20b_ltc_is_ltcn_ltss_addr(struct gk20a *g, u32 addr)
{
u32 lts_shared_base = ltc_ltc0_ltss_v();
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
u32 addr_mask = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE) - 1U;
u32 base_offset = lts_shared_base & addr_mask;
u32 end_offset = nvgpu_safe_add_u32(base_offset, lts_stride);
return (!gm20b_ltc_is_ltcs_ltss_addr(g, addr)) &&
((addr & addr_mask) >= base_offset) &&
((addr & addr_mask) < end_offset);
}
static void gm20b_ltc_update_ltc_lts_addr(struct gk20a *g, u32 addr,
u32 ltc_num, u32 *priv_addr_table, u32 *priv_addr_table_index)
{
u32 num_ltc_slices = g->ops.top.get_max_lts_per_ltc(g);
u32 index = *priv_addr_table_index;
u32 lts_num;
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
u32 lts_stride = nvgpu_get_litter_value(g, GPU_LIT_LTS_STRIDE);
for (lts_num = 0; lts_num < num_ltc_slices;
lts_num = nvgpu_safe_add_u32(lts_num, 1U)) {
priv_addr_table[index] = nvgpu_safe_add_u32(
ltc_ltc0_lts0_v(),
nvgpu_safe_add_u32(
nvgpu_safe_add_u32(
nvgpu_safe_mult_u32(ltc_num, ltc_stride),
nvgpu_safe_mult_u32(lts_num, lts_stride)),
(addr & nvgpu_safe_sub_u32(
lts_stride, 1U))));
index = nvgpu_safe_add_u32(index, 1U);
}
*priv_addr_table_index = index;
}
void gm20b_ltc_split_lts_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc = g->ltc->ltc_count;
u32 i, start, ltc_num = 0;
u32 pltcg_base = ltc_pltcg_base_v();
u32 ltc_stride = nvgpu_get_litter_value(g, GPU_LIT_LTC_STRIDE);
for (i = 0; i < num_ltc; i++) {
start = nvgpu_safe_add_u32(pltcg_base,
nvgpu_safe_mult_u32(i, ltc_stride));
if (addr >= start) {
if (addr < nvgpu_safe_add_u32(start, ltc_stride)) {
ltc_num = i;
break;
}
}
}
gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num, priv_addr_table,
priv_addr_table_index);
}
void gm20b_ltc_split_ltc_broadcast_addr(struct gk20a *g, u32 addr,
u32 *priv_addr_table,
u32 *priv_addr_table_index)
{
u32 num_ltc = g->ltc->ltc_count;
u32 ltc_num;
for (ltc_num = 0; ltc_num < num_ltc; ltc_num =
nvgpu_safe_add_u32(ltc_num, 1U)) {
gm20b_ltc_update_ltc_lts_addr(g, addr, ltc_num,
priv_addr_table, priv_addr_table_index);
}
}
#endif /* CONFIG_NVGPU_DEBUGGER */
/* /*
* Performs a full flush of the L2 cache. * Performs a full flush of the L2 cache.
*/ */