mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 09:12:24 +03:00
Remove all nvgpu_next files and move the code into corresponding nvgpu files. Merge nvgpu-next-*.yaml into nvgpu-.yaml files. Jira NVGPU-4771 Change-Id: I595311be3c7bbb4f6314811e68712ff01763801e Signed-off-by: Antony Clince Alex <aalex@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2547557 Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com> Reviewed-by: Mahantesh Kumbar <mkumbar@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
1075 lines
35 KiB
C
1075 lines
35 KiB
C
/*
|
|
* GA10b GPU GR
|
|
*
|
|
* Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/dma.h>
|
|
#include <nvgpu/log.h>
|
|
#include <nvgpu/debug.h>
|
|
#include <nvgpu/enabled.h>
|
|
#include <nvgpu/fuse.h>
|
|
#include <nvgpu/debugger.h>
|
|
#include <nvgpu/error_notifier.h>
|
|
#include <nvgpu/io.h>
|
|
#include <nvgpu/utils.h>
|
|
#include <nvgpu/bitops.h>
|
|
#include <nvgpu/gk20a.h>
|
|
#include <nvgpu/regops.h>
|
|
#include <nvgpu/gr/ctx.h>
|
|
#include <nvgpu/gr/config.h>
|
|
#include <nvgpu/gr/gr.h>
|
|
#include <nvgpu/gr/gr_instances.h>
|
|
#include <nvgpu/gr/warpstate.h>
|
|
#include <nvgpu/channel.h>
|
|
#include <nvgpu/engines.h>
|
|
#include <nvgpu/engine_status.h>
|
|
#include <nvgpu/fbp.h>
|
|
#include <nvgpu/nvgpu_err.h>
|
|
#include <nvgpu/netlist.h>
|
|
#include <nvgpu/gr/obj_ctx.h>
|
|
|
|
#include "gr_ga10b.h"
|
|
#include "hal/gr/gr/gr_gk20a.h"
|
|
#include "hal/gr/gr/gr_gv11b.h"
|
|
#include "hal/gr/gr/gr_pri_gk20a.h"
|
|
#include "hal/gr/gr/gr_pri_ga10b.h"
|
|
#include "hal/gr/ctxsw_prog/ctxsw_prog_ga10b.h"
|
|
#include "common/gr/gr_priv.h"
|
|
|
|
#include <nvgpu/hw/ga10b/hw_gr_ga10b.h>
|
|
#include <nvgpu/hw/ga10b/hw_proj_ga10b.h>
|
|
|
|
#define ILLEGAL_ID ~U32(0U)
|
|
|
|
static void gr_ga10b_dump_gr_per_sm_regs(struct gk20a *g,
|
|
struct nvgpu_debug_context *o,
|
|
u32 gpc, u32 tpc, u32 sm, u32 offset)
|
|
{
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR: 0x%x\n",
|
|
gpc, tpc, sm, nvgpu_readl(g,
|
|
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm0_hww_warp_esr_r(),
|
|
offset)));
|
|
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_WARP_ESR_REPORT_MASK: 0x%x\n",
|
|
gpc, tpc, sm, nvgpu_readl(g,
|
|
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm0_hww_warp_esr_report_mask_r(),
|
|
offset)));
|
|
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR: 0x%x\n",
|
|
gpc, tpc, sm, nvgpu_readl(g,
|
|
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm0_hww_global_esr_r(),
|
|
offset)));
|
|
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n",
|
|
gpc, tpc, sm, nvgpu_readl(g,
|
|
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_r(),
|
|
offset)));
|
|
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_CONTROL0: 0x%x\n",
|
|
gpc, tpc, sm, nvgpu_readl(g,
|
|
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm0_dbgr_control0_r(),
|
|
offset)));
|
|
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPC%d_TPC%d_SM%d_DBGR_STATUS0: 0x%x\n",
|
|
gpc, tpc, sm, nvgpu_readl(g,
|
|
nvgpu_safe_add_u32(gr_gpc0_tpc0_sm0_dbgr_status0_r(),
|
|
offset)));
|
|
}
|
|
|
|
static void gr_ga10b_dump_gr_sm_regs(struct gk20a *g,
|
|
struct nvgpu_debug_context *o)
|
|
{
|
|
u32 gpc, tpc, sm, sm_per_tpc;
|
|
u32 gpc_offset, tpc_offset, offset;
|
|
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
|
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR_REPORT_MASK: 0x%x\n",
|
|
nvgpu_readl(g,
|
|
gr_gpcs_tpcs_sms_hww_global_esr_report_mask_r()));
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_WARP_ESR_REPORT_MASK: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpcs_tpcs_sms_hww_warp_esr_report_mask_r()));
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPCS_TPCS_SMS_HWW_GLOBAL_ESR: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpcs_tpcs_sms_hww_global_esr_r()));
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_CONTROL0: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpcs_tpcs_sms_dbgr_control0_r()));
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_STATUS0: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpcs_tpcs_sms_dbgr_status0_r()));
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_0: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_0_r()));
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPCS_TPCS_SMS_DBGR_BPT_PAUSE_MASK_1: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpcs_tpcs_sms_dbgr_bpt_pause_mask_1_r()));
|
|
|
|
sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC);
|
|
for (gpc = 0U;
|
|
gpc < nvgpu_gr_config_get_gpc_count(gr->config); gpc++) {
|
|
gpc_offset = nvgpu_gr_gpc_offset(g, gpc);
|
|
|
|
for (tpc = 0U;
|
|
tpc < nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc);
|
|
tpc++) {
|
|
tpc_offset = nvgpu_gr_tpc_offset(g, tpc);
|
|
|
|
for (sm = 0U; sm < sm_per_tpc; sm++) {
|
|
offset = nvgpu_safe_add_u32(
|
|
nvgpu_safe_add_u32(gpc_offset,
|
|
tpc_offset),
|
|
nvgpu_gr_sm_offset(g, sm));
|
|
|
|
gr_ga10b_dump_gr_per_sm_regs(g, o,
|
|
gpc, tpc, sm, offset);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void gr_ga10b_dump_tpc_activity_regs(struct gk20a *g,
|
|
struct nvgpu_debug_context *o)
|
|
{
|
|
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
|
u32 gpc_index = 0U;
|
|
u32 tpc_count = 0U, tpc_stride = 0U;
|
|
u32 reg_index = 0U, offset = 0U;
|
|
u32 i = 0U;
|
|
|
|
if (nvgpu_gr_config_get_gpc_tpc_count_base(gr->config) == NULL) {
|
|
return;
|
|
}
|
|
|
|
tpc_count = nvgpu_gr_config_get_gpc_tpc_count(gr->config, gpc_index);
|
|
tpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
|
|
|
|
for (i = 0U; i < tpc_count; i++) {
|
|
offset = nvgpu_safe_mult_u32(tpc_stride, i);
|
|
reg_index = nvgpu_safe_add_u32(offset,
|
|
gr_pri_gpc0_tpc0_tpccs_tpc_activity_0_r());
|
|
|
|
gk20a_debug_output(o,
|
|
"NV_PGRAPH_PRI_GPC0_TPC%d_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
|
i, nvgpu_readl(g, reg_index));
|
|
}
|
|
}
|
|
|
|
int gr_ga10b_dump_gr_status_regs(struct gk20a *g,
|
|
struct nvgpu_debug_context *o)
|
|
{
|
|
u32 gr_engine_id;
|
|
struct nvgpu_engine_status_info engine_status;
|
|
|
|
gr_engine_id = nvgpu_engine_get_gr_id(g);
|
|
|
|
gk20a_debug_output(o, "NV_PGRAPH_STATUS: 0x%x\n",
|
|
nvgpu_readl(g, gr_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_STATUS1: 0x%x\n",
|
|
nvgpu_readl(g, gr_status_1_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_ENGINE_STATUS: 0x%x\n",
|
|
nvgpu_readl(g, gr_engine_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_STATUS : 0x%x\n",
|
|
nvgpu_readl(g, gr_gpfifo_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_GRFIFO_CONTROL : 0x%x\n",
|
|
nvgpu_readl(g, gr_gpfifo_ctl_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
|
|
nvgpu_readl(g, gr_fecs_host_int_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_EXCEPTION : 0x%x\n",
|
|
nvgpu_readl(g, gr_exception_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_FECS_INTR : 0x%x\n",
|
|
nvgpu_readl(g, gr_fecs_intr_r()));
|
|
g->ops.engine_status.read_engine_status_info(g, gr_engine_id,
|
|
&engine_status);
|
|
gk20a_debug_output(o, "NV_PFIFO_ENGINE_STATUS(GR) : 0x%x\n",
|
|
engine_status.reg_data);
|
|
gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY0: 0x%x\n",
|
|
nvgpu_readl(g, gr_activity_0_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY1: 0x%x\n",
|
|
nvgpu_readl(g, gr_activity_1_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_ACTIVITY4: 0x%x\n",
|
|
nvgpu_readl(g, gr_activity_4_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_SKED_ACTIVITY: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_sked_activity_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY0: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_gpccs_gpc_activity0_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY1: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_gpccs_gpc_activity1_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY2: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_gpccs_gpc_activity2_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY3: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_gpccs_gpc_activity3_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_ACTIVITY4: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_gpccs_gpc_activity4_r()));
|
|
|
|
gr_ga10b_dump_tpc_activity_regs(g,o);
|
|
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY0: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_gpccs_gpc_activity_0_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY1: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_gpccs_gpc_activity_1_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY2: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_gpccs_gpc_activity_2_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY3: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_gpccs_gpc_activity_3_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_GPCCS_GPC_ACTIVITY4: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_gpccs_gpc_activity_4_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_TPCS_TPCCS_TPC_ACTIVITY0: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_tpcs_tpccs_tpc_activity_0_r()));
|
|
if (!nvgpu_is_enabled(g, NVGPU_SUPPORT_MIG)) {
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_DS_MPIPE_STATUS: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_ds_mpipe_status_r()));
|
|
}
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_TIMEOUT : 0x%x\n",
|
|
nvgpu_readl(g, gr_fe_go_idle_timeout_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_GO_IDLE_INFO : 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_fe_go_idle_info_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TEX_M_TEX_SUBUNITS_STATUS: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_tpc0_tex_m_tex_subunits_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_FS: 0x%x\n",
|
|
nvgpu_readl(g, gr_cwd_fs_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FE_TPC_FS(0): 0x%x\n",
|
|
nvgpu_readl(g, gr_fe_tpc_fs_r(0)));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_GPC_TPC_ID: 0x%x\n",
|
|
nvgpu_readl(g, gr_cwd_gpc_tpc_id_r(0)));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_CWD_SM_ID(0): 0x%x\n",
|
|
nvgpu_readl(g, gr_cwd_sm_id_r(0)));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_FE_0: 0x%x\n",
|
|
g->ops.gr.falcon.read_fecs_ctxsw_status0(g));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_STATUS_1: 0x%x\n",
|
|
g->ops.gr.falcon.read_fecs_ctxsw_status1(g));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_GPC_0: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpc0_gpccs_ctxsw_status_gpc_0_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_STATUS_1: 0x%x\n",
|
|
nvgpu_readl(g, gr_gpc0_gpccs_ctxsw_status_1_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CTXSW_IDLESTATE : 0x%x\n",
|
|
nvgpu_readl(g, gr_fecs_ctxsw_idlestate_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_CTXSW_IDLESTATE : 0x%x\n",
|
|
nvgpu_readl(g, gr_gpc0_gpccs_ctxsw_idlestate_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_CURRENT_CTX : 0x%x\n",
|
|
g->ops.gr.falcon.get_current_ctx(g));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_NEW_CTX : 0x%x\n",
|
|
nvgpu_readl(g, gr_fecs_new_ctx_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_ENABLE : 0x%x\n",
|
|
nvgpu_readl(g, gr_fecs_host_int_enable_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_FECS_HOST_INT_STATUS : 0x%x\n",
|
|
nvgpu_readl(g, gr_fecs_host_int_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROP0_CROP_STATUS1 : 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_rop0_crop_status1_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROPS_CROP_STATUS1 : 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_rops_crop_status1_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROP0_ZROP_STATUS : 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_rop0_zrop_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROP0_ZROP_STATUS2 : 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_rop0_zrop_status2_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROP1_ZROP_STATUS: 0x%x\n",
|
|
nvgpu_readl(g, nvgpu_safe_add_u32(
|
|
gr_pri_gpcs_rop0_zrop_status_r(),
|
|
nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE))));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROP1_ZROP_STATUS2: 0x%x\n",
|
|
nvgpu_readl(g, nvgpu_safe_add_u32(
|
|
gr_pri_gpcs_rop0_zrop_status2_r(),
|
|
nvgpu_get_litter_value(g, GPU_LIT_ROP_STRIDE))));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROPS_ZROP_STATUS : 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_rops_zrop_status_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPCS_ROPS_ZROP_STATUS2 : 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpcs_rops_zrop_status2_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_gpccs_gpc_exception_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_GPCCS_GPC_EXCEPTION_EN: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_gpccs_gpc_exception_en_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_r()));
|
|
gk20a_debug_output(o, "NV_PGRAPH_PRI_GPC0_TPC0_TPCCS_TPC_EXCEPTION_EN: 0x%x\n",
|
|
nvgpu_readl(g, gr_pri_gpc0_tpc0_tpccs_tpc_exception_en_r()));
|
|
|
|
gr_ga10b_dump_gr_sm_regs(g, o);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void gr_ga10b_set_circular_buffer_size(struct gk20a *g, u32 data)
|
|
{
|
|
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
|
|
u32 gpc_index, ppc_index, stride, val;
|
|
u32 cb_size_steady = data * 4U, cb_size;
|
|
u32 attrib_cb_size = g->ops.gr.init.get_attrib_cb_size(g,
|
|
nvgpu_gr_config_get_tpc_count(gr->config));
|
|
|
|
nvgpu_log_fn(g, " ");
|
|
|
|
if (cb_size_steady > attrib_cb_size) {
|
|
cb_size_steady = attrib_cb_size;
|
|
}
|
|
if (nvgpu_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r()) !=
|
|
nvgpu_readl(g,
|
|
gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r())) {
|
|
cb_size = cb_size_steady +
|
|
(gr_gpc0_ppc0_cbm_beta_cb_size_v_gfxp_v() -
|
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_default_v());
|
|
} else {
|
|
cb_size = cb_size_steady;
|
|
}
|
|
|
|
nvgpu_writel(g, gr_ds_tga_constraintlogic_beta_r(),
|
|
(nvgpu_readl(g, gr_ds_tga_constraintlogic_beta_r()) &
|
|
~gr_ds_tga_constraintlogic_beta_cbsize_f(~U32(0U))) |
|
|
gr_ds_tga_constraintlogic_beta_cbsize_f(cb_size_steady));
|
|
|
|
for (gpc_index = 0;
|
|
gpc_index < nvgpu_gr_config_get_gpc_count(gr->config);
|
|
gpc_index++) {
|
|
stride = proj_gpc_stride_v() * gpc_index;
|
|
|
|
for (ppc_index = 0;
|
|
ppc_index < nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index);
|
|
ppc_index++) {
|
|
|
|
val = nvgpu_readl(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
|
stride +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index);
|
|
|
|
val = set_field(val,
|
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_m(),
|
|
gr_gpc0_ppc0_cbm_beta_cb_size_v_f(cb_size *
|
|
nvgpu_gr_config_get_pes_tpc_count(gr->config,
|
|
gpc_index, ppc_index)));
|
|
|
|
nvgpu_writel(g, gr_gpc0_ppc0_cbm_beta_cb_size_r() +
|
|
stride +
|
|
proj_ppc_in_gpc_stride_v() * ppc_index, val);
|
|
|
|
nvgpu_writel(g, proj_ppc_in_gpc_stride_v() * ppc_index +
|
|
gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_r() +
|
|
stride,
|
|
gr_gpc0_ppc0_cbm_beta_steady_state_cb_size_v_f(
|
|
cb_size_steady));
|
|
|
|
val = nvgpu_readl(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
|
ppc_index + gpc_index));
|
|
|
|
val = set_field(val,
|
|
gr_gpcs_swdx_tc_beta_cb_size_v_m(),
|
|
gr_gpcs_swdx_tc_beta_cb_size_v_f(
|
|
cb_size_steady *
|
|
nvgpu_gr_config_get_gpc_ppc_count(gr->config, gpc_index)));
|
|
|
|
nvgpu_writel(g, gr_gpcs_swdx_tc_beta_cb_size_r(
|
|
ppc_index + gpc_index), val);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ga10b_gr_set_gpcs_rops_crop_debug4(struct gk20a *g, u32 data)
|
|
{
|
|
u32 val;
|
|
|
|
nvgpu_log_fn(g, " ");
|
|
|
|
val = nvgpu_readl(g, gr_pri_gpcs_rops_crop_debug4_r());
|
|
if ((data & gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_s()) ==
|
|
gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_to_maxval_v()) {
|
|
val = set_field(val,
|
|
gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_m(),
|
|
gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_to_maxval_f());
|
|
} else if ((data & gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_s()) ==
|
|
gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_to_inf_v()) {
|
|
val = set_field(val,
|
|
gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_m(),
|
|
gr_pri_gpcs_rops_crop_debug4_clamp_fp_blend_to_inf_f());
|
|
} else {
|
|
nvgpu_warn(g,
|
|
"wrong data sent for crop_debug4: %x08x", data);
|
|
return;
|
|
}
|
|
nvgpu_writel(g, gr_pri_gpcs_rops_crop_debug4_r(), val);
|
|
}
|
|
|
|
#ifdef CONFIG_NVGPU_DEBUGGER
|
|
bool ga10b_gr_check_warp_esr_error(struct gk20a *g, u32 warp_esr_error)
|
|
{
|
|
u32 index = 0U;
|
|
bool esr_err = false;
|
|
|
|
struct warp_esr_error_table_s {
|
|
u32 error_value;
|
|
const char *error_name;
|
|
};
|
|
|
|
struct warp_esr_error_table_s warp_esr_error_table[] = {
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_api_stack_error_f(),
|
|
"API STACK ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_pc_f(),
|
|
"MISALIGNED PC ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_pc_overflow_f(),
|
|
"PC OVERFLOW ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_reg_f(),
|
|
"MISALIGNED REG ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_encoding_f(),
|
|
"ILLEGAL INSTRUCTION ENCODING ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_illegal_instr_param_f(),
|
|
"ILLEGAL INSTRUCTION PARAM ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_reg_f(),
|
|
"OOR REG ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_oor_addr_f(),
|
|
"OOR ADDR ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_misaligned_addr_f(),
|
|
"MISALIGNED ADDR ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_addr_space_f(),
|
|
"INVALID ADDR SPACE ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_invalid_const_addr_ldc_f(),
|
|
"INVALID ADDR LDC ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_fault_f(),
|
|
"MMU FAULT ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_format_f(),
|
|
"TEX FORMAT ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_tex_layout_f(),
|
|
"TEX LAYOUT ERROR"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_mmu_nack_f(),
|
|
"MMU NACK"},
|
|
{ gr_gpc0_tpc0_sm0_hww_warp_esr_error_arrive_f(),
|
|
"ARRIVE ERROR"},
|
|
};
|
|
|
|
for (index = 0; index < ARRAY_SIZE(warp_esr_error_table); index++) {
|
|
if (warp_esr_error_table[index].error_value == warp_esr_error) {
|
|
esr_err = true;
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
|
"WARP_ESR %s(0x%x)",
|
|
warp_esr_error_table[index].error_name,
|
|
esr_err);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return esr_err;
|
|
}
|
|
|
|
|
|
/*
|
|
* The context switched registers are saved as part of the context switch
|
|
* image.
|
|
*
|
|
* The regops interface writes/reads these location within the saved context
|
|
* switch image when the context is not resident.
|
|
*/
|
|
|
|
/*
|
|
* This function will decode a priv address and return the partition
|
|
* type and numbers
|
|
*/
|
|
int gr_ga10b_decode_priv_addr(struct gk20a *g, u32 addr,
|
|
enum ctxsw_addr_type *addr_type,
|
|
u32 *gpc_num, u32 *tpc_num, u32 *ppc_num, u32 *be_num,
|
|
u32 *broadcast_flags)
|
|
{
|
|
u32 gpc_addr;
|
|
|
|
/*
|
|
* Special handling for registers under: ctx_reg_LTS_bc
|
|
*
|
|
* Unlike the other ltc registers which are stored as part of
|
|
* pm_ctxsw buffer these are stored in fecs ctxsw image priv
|
|
* segment regionid: NETLIST_REGIONID_CTXREG_LTS.
|
|
*/
|
|
if (g->ops.ltc.pri_is_ltc_addr(g, addr) &&
|
|
g->ops.ltc.pri_is_lts_tstg_addr(g, addr)) {
|
|
*addr_type = CTXSW_ADDR_TYPE_LTS_MAIN;
|
|
if (g->ops.ltc.is_ltcs_ltss_addr(g, addr)) {
|
|
*broadcast_flags |= PRI_BROADCAST_FLAGS_LTCS;
|
|
} else if (g->ops.ltc.is_ltcn_ltss_addr(g, addr)) {
|
|
*broadcast_flags |= PRI_BROADCAST_FLAGS_LTSS;
|
|
}
|
|
return 0;
|
|
} else if (nvgpu_is_enabled(g, NVGPU_SUPPORT_ROP_IN_GPC) &&
|
|
pri_is_gpc_addr(g, addr)) {
|
|
gpc_addr = pri_gpccs_addr_mask(g, addr);
|
|
if (pri_is_rop_in_gpc_addr(g, gpc_addr)) {
|
|
if (pri_is_rop_in_gpc_addr_shared(g, gpc_addr)) {
|
|
*broadcast_flags |=
|
|
PRI_BROADCAST_FLAGS_ROP;
|
|
}
|
|
*addr_type = CTXSW_ADDR_TYPE_ROP;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return gr_gv11b_decode_priv_addr(g, addr, addr_type, gpc_num,
|
|
tpc_num, ppc_num, be_num, broadcast_flags);
|
|
}
|
|
|
|
int gr_ga10b_create_priv_addr_table(struct gk20a *g,
|
|
u32 addr,
|
|
u32 *priv_addr_table,
|
|
u32 *num_registers)
|
|
{
|
|
enum ctxsw_addr_type addr_type;
|
|
u32 gpc_num = 0U, tpc_num = 0U, ppc_num = 0U, be_num = 0U;
|
|
u32 broadcast_flags = 0U;
|
|
u32 t;
|
|
int err;
|
|
|
|
t = 0U;
|
|
*num_registers = 0U;
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "addr=0x%x", addr);
|
|
|
|
err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
|
|
&gpc_num, &tpc_num, &ppc_num, &be_num,
|
|
&broadcast_flags);
|
|
nvgpu_log(g, gpu_dbg_gpu_dbg, "addr_type = %d", addr_type);
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* The LIST_ctx_reg_LTS_bc contains broadcast registers; So, convert
|
|
* LTS unicast addresses of the form LTCnLTSn, LTCSLTSn, LTCnLTSS to
|
|
* LTCSLTSS.
|
|
*/
|
|
if (addr_type == CTXSW_ADDR_TYPE_LTS_MAIN) {
|
|
if (broadcast_flags & PRI_BROADCAST_FLAGS_LTCS) {
|
|
priv_addr_table[t++] = addr;
|
|
} else {
|
|
priv_addr_table[t++] =
|
|
g->ops.ltc.pri_shared_addr(g, addr);
|
|
}
|
|
*num_registers = t;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* At present the LIST_pm_ctx_reg_ROP contains only broadcast addresses.
|
|
* Hence, ROP unicast addresses are not ctxsw'ed, only broadcast
|
|
* addresses are ctxsw'ed. Therefore, convert all ROP unicast addresses
|
|
* to broadcast.
|
|
*/
|
|
if (addr_type == CTXSW_ADDR_TYPE_ROP) {
|
|
if (broadcast_flags & PRI_BROADCAST_FLAGS_ROP) {
|
|
priv_addr_table[t++] = addr;
|
|
} else {
|
|
priv_addr_table[t++] =
|
|
pri_rop_in_gpc_shared_addr(g, addr);
|
|
}
|
|
*num_registers = t;
|
|
return 0;
|
|
}
|
|
|
|
return gr_gv11b_create_priv_addr_table(g, addr, priv_addr_table, num_registers);
|
|
}
|
|
|
|
/*
|
|
* The sys, tpc, etpc, ppc and gpc ctxsw_reg bundles are divided into compute
|
|
* and gfx list. lts being the exception here, which still uses a single list.
|
|
* So, for any given pri address first search in the compute list
|
|
* followed by graphics list. On finding a match it returns the following:
|
|
* - FOUND_IN_CTXSWBUF_PRIV_REGLIST: legacy priv reglist.
|
|
* - FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST: new compute priv reglist.
|
|
* - FOUND_IN_CTXSWBUF_PRIV_GFX_REGLIST: new graphics priv reglist.
|
|
*/
|
|
int gr_ga10b_process_context_buffer_priv_segment(struct gk20a *g,
|
|
enum ctxsw_addr_type addr_type,
|
|
u32 pri_addr,
|
|
u32 gpc_num, u32 num_tpcs,
|
|
u32 num_ppcs, u32 ppc_mask,
|
|
u32 *priv_offset)
|
|
{
|
|
u32 i;
|
|
u32 address, base_address;
|
|
u32 sys_offset, gpc_offset, tpc_offset, ppc_offset;
|
|
u32 ppc_num, tpc_num, tpc_addr, gpc_addr, ppc_addr;
|
|
struct netlist_aiv_list *list;
|
|
struct netlist_aiv *reg;
|
|
u32 gpc_base = nvgpu_get_litter_value(g, GPU_LIT_GPC_BASE);
|
|
u32 gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_GPC_STRIDE);
|
|
u32 ppc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_BASE);
|
|
u32 ppc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_PPC_IN_GPC_STRIDE);
|
|
u32 tpc_in_gpc_base = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_BASE);
|
|
u32 tpc_in_gpc_stride = nvgpu_get_litter_value(g, GPU_LIT_TPC_IN_GPC_STRIDE);
|
|
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, "pri_addr=0x%x", pri_addr);
|
|
|
|
if (!g->netlist_valid) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Process the SYS/BE segment. */
|
|
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
|
|
(addr_type == CTXSW_ADDR_TYPE_ROP)) {
|
|
list = nvgpu_next_netlist_get_sys_compute_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
sys_offset = reg->index;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = sys_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST;
|
|
}
|
|
}
|
|
#ifdef CONFIG_NVGPU_GRAPHICS
|
|
list = nvgpu_next_netlist_get_sys_gfx_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
sys_offset = reg->index;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = sys_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_GFX_REGLIST;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Process the LTS segment.
|
|
*/
|
|
if (addr_type == CTXSW_ADDR_TYPE_LTS_MAIN) {
|
|
list = nvgpu_next_netlist_get_lts_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
sys_offset = reg->index;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = sys_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_REGLIST;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Process the TPC segment. */
|
|
if (addr_type == CTXSW_ADDR_TYPE_TPC) {
|
|
for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
|
|
list = nvgpu_next_netlist_get_tpc_compute_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
tpc_addr = pri_tpccs_addr_mask(g, address);
|
|
base_address = gpc_base +
|
|
(gpc_num * gpc_stride) +
|
|
tpc_in_gpc_base +
|
|
(tpc_num * tpc_in_gpc_stride);
|
|
address = base_address + tpc_addr;
|
|
tpc_offset = reg->index;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = tpc_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST;
|
|
}
|
|
}
|
|
#ifdef CONFIG_NVGPU_GRAPHICS
|
|
list = nvgpu_next_netlist_get_tpc_gfx_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
tpc_addr = pri_tpccs_addr_mask(g, address);
|
|
base_address = gpc_base +
|
|
(gpc_num * gpc_stride) +
|
|
tpc_in_gpc_base +
|
|
(tpc_num * tpc_in_gpc_stride);
|
|
address = base_address + tpc_addr;
|
|
tpc_offset = reg->index;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = tpc_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_GFX_REGLIST;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
} else if ((addr_type == CTXSW_ADDR_TYPE_EGPC) ||
|
|
(addr_type == CTXSW_ADDR_TYPE_ETPC)) {
|
|
if (g->ops.gr.get_egpc_base == NULL) {
|
|
return -EINVAL;
|
|
}
|
|
|
|
for (tpc_num = 0; tpc_num < num_tpcs; tpc_num++) {
|
|
list = nvgpu_next_netlist_get_etpc_compute_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
tpc_addr = pri_tpccs_addr_mask(g, address);
|
|
base_address = g->ops.gr.get_egpc_base(g) +
|
|
(gpc_num * gpc_stride) +
|
|
tpc_in_gpc_base +
|
|
(tpc_num * tpc_in_gpc_stride);
|
|
address = base_address + tpc_addr;
|
|
/*
|
|
* The data for the TPCs is interleaved in the context buffer.
|
|
* Example with num_tpcs = 2
|
|
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
|
|
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
|
|
*/
|
|
tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4U);
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = tpc_offset;
|
|
nvgpu_log(g,
|
|
gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
|
"egpc/etpc compute priv_offset=0x%#08x",
|
|
*priv_offset);
|
|
return FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST;
|
|
}
|
|
}
|
|
#ifdef CONFIG_NVGPU_GRAPHICS
|
|
list = nvgpu_next_netlist_get_etpc_gfx_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
tpc_addr = pri_tpccs_addr_mask(g, address);
|
|
base_address = g->ops.gr.get_egpc_base(g) +
|
|
(gpc_num * gpc_stride) +
|
|
tpc_in_gpc_base +
|
|
(tpc_num * tpc_in_gpc_stride);
|
|
address = base_address + tpc_addr;
|
|
/*
|
|
* The data for the TPCs is interleaved in the context buffer.
|
|
* Example with num_tpcs = 2
|
|
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
|
|
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
|
|
*/
|
|
tpc_offset = (reg->index * num_tpcs) + (tpc_num * 4U);
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = tpc_offset;
|
|
nvgpu_log(g,
|
|
gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
|
"egpc/etpc gfx priv_offset=0x%#08x",
|
|
*priv_offset);
|
|
return FOUND_IN_CTXSWBUF_PRIV_GFX_REGLIST;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
|
|
/* Process the PPC segment. */
|
|
if (addr_type == CTXSW_ADDR_TYPE_PPC) {
|
|
for (ppc_num = 0; ppc_num < num_ppcs; ppc_num++) {
|
|
list = nvgpu_next_netlist_get_ppc_compute_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
ppc_addr = pri_ppccs_addr_mask(address);
|
|
base_address = gpc_base +
|
|
(gpc_num * gpc_stride) +
|
|
ppc_in_gpc_base +
|
|
(ppc_num * ppc_in_gpc_stride);
|
|
address = base_address + ppc_addr;
|
|
/*
|
|
* The data for the PPCs is interleaved in the context buffer.
|
|
* Example with numPpcs = 2
|
|
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
|
|
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
|
|
*/
|
|
ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4U);
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = ppc_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST;
|
|
}
|
|
}
|
|
#ifdef CONFIG_NVGPU_GRAPHICS
|
|
list = nvgpu_next_netlist_get_ppc_gfx_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
address = reg->addr;
|
|
ppc_addr = pri_ppccs_addr_mask(address);
|
|
base_address = gpc_base +
|
|
(gpc_num * gpc_stride) +
|
|
ppc_in_gpc_base +
|
|
(ppc_num * ppc_in_gpc_stride);
|
|
address = base_address + ppc_addr;
|
|
/*
|
|
* The data for the PPCs is interleaved in the context buffer.
|
|
* Example with numPpcs = 2
|
|
* 0 1 2 3 4 5 6 7 8 9 10 11 ...
|
|
* 0-0 1-0 0-1 1-1 0-2 1-2 0-3 1-3 0-4 1-4 0-5 1-5 ...
|
|
*/
|
|
ppc_offset = (reg->index * num_ppcs) + (ppc_num * 4U);
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = ppc_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_GFX_REGLIST;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/* Process the GPC segment. */
|
|
if (addr_type == CTXSW_ADDR_TYPE_GPC) {
|
|
list = nvgpu_next_netlist_get_gpc_compute_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
|
|
address = reg->addr;
|
|
gpc_addr = pri_gpccs_addr_mask(g, address);
|
|
gpc_offset = reg->index;
|
|
|
|
base_address = gpc_base + (gpc_num * gpc_stride);
|
|
address = base_address + gpc_addr;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = gpc_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST;
|
|
}
|
|
}
|
|
#ifdef CONFIG_NVGPU_GRAPHICS
|
|
list = nvgpu_next_netlist_get_gpc_gfx_ctxsw_regs(g);
|
|
for (i = 0; i < list->count; i++) {
|
|
reg = &list->l[i];
|
|
|
|
address = reg->addr;
|
|
gpc_addr = pri_gpccs_addr_mask(g, address);
|
|
gpc_offset = reg->index;
|
|
|
|
base_address = gpc_base + (gpc_num * gpc_stride);
|
|
address = base_address + gpc_addr;
|
|
|
|
if (pri_addr == address) {
|
|
*priv_offset = gpc_offset;
|
|
return FOUND_IN_CTXSWBUF_PRIV_GFX_REGLIST;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Calculate the offset of pri address within ctxsw buffer by going through the
|
|
* various pri save segments.
|
|
*/
|
|
int gr_ga10b_find_priv_offset_in_buffer(struct gk20a *g, u32 addr,
|
|
u32 *context_buffer,
|
|
u32 context_buffer_size,
|
|
u32 *priv_offset)
|
|
{
|
|
int err;
|
|
enum ctxsw_addr_type addr_type;
|
|
u32 broadcast_flags = 0U;
|
|
u32 gpc_num, tpc_num, ppc_num, be_num;
|
|
u32 num_gpcs, num_tpcs, num_ppcs;
|
|
u32 offset;
|
|
u32 ppc_mask, reg_list_ppc_count;
|
|
u32 *context;
|
|
u32 segoffset, compute_segoffset;
|
|
u32 graphics_segoffset;
|
|
u32 main_hdr_size, fecs_hdr_size, gpccs_hdr_stride;
|
|
|
|
err = g->ops.gr.decode_priv_addr(g, addr, &addr_type,
|
|
&gpc_num, &tpc_num, &ppc_num, &be_num,
|
|
&broadcast_flags);
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
|
"addr =0x%x addr_type = %d, broadcast_flags: %08x",
|
|
addr, addr_type, broadcast_flags);
|
|
if (err != 0) {
|
|
return err;
|
|
}
|
|
|
|
context = context_buffer;
|
|
if (!g->ops.gr.ctxsw_prog.check_main_image_header_magic(context)) {
|
|
nvgpu_err(g, "invalid main header: magic value");
|
|
return -EINVAL;
|
|
}
|
|
|
|
main_hdr_size = g->ops.gr.ctxsw_prog.hw_get_main_header_size();
|
|
fecs_hdr_size = g->ops.gr.ctxsw_prog.hw_get_fecs_header_size();
|
|
gpccs_hdr_stride = g->ops.gr.ctxsw_prog.hw_get_gpccs_header_stride();
|
|
num_gpcs = g->ops.gr.ctxsw_prog.get_num_gpcs(context);
|
|
|
|
/*
|
|
* Check in extended buffer segment of ctxsw buffer. If found, return
|
|
* else continue on.
|
|
*/
|
|
err = gr_gk20a_find_priv_offset_in_ext_buffer(g,
|
|
addr, context_buffer,
|
|
context_buffer_size, priv_offset);
|
|
if (err == 0) {
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
|
"offset found in Ext buffer");
|
|
return err;
|
|
}
|
|
|
|
/* Parse the FECS local header. */
|
|
context += (main_hdr_size >> 2);
|
|
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
|
|
nvgpu_err(g,
|
|
"Invalid FECS local header: magic value");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((addr_type == CTXSW_ADDR_TYPE_SYS) ||
|
|
(addr_type == CTXSW_ADDR_TYPE_ROP)) {
|
|
compute_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_compute_sysreglist_offset(context);
|
|
graphics_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_gfx_sysreglist_offset(context);
|
|
nvgpu_log(g, gpu_dbg_gpu_dbg, "sys_segment_offsets(0x%x, 0x%x)",
|
|
compute_segoffset, graphics_segoffset);
|
|
|
|
err = g->ops.gr.process_context_buffer_priv_segment(g, addr_type,
|
|
addr, 0, 0, 0, 0, &offset);
|
|
if (err < 0) {
|
|
return err;
|
|
}
|
|
segoffset = (err == FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST) ?
|
|
compute_segoffset : graphics_segoffset;
|
|
*priv_offset = (segoffset + offset);
|
|
return 0;
|
|
|
|
} else if (addr_type == CTXSW_ADDR_TYPE_LTS_MAIN) {
|
|
segoffset = g->ops.gr.ctxsw_prog.get_ltsreglist_offset(context);
|
|
nvgpu_log(g, gpu_dbg_gpu_dbg, "lts_segment_offset(0x%x)",
|
|
segoffset);
|
|
|
|
err = g->ops.gr.process_context_buffer_priv_segment(g, addr_type,
|
|
addr, 0, 0, 0, 0, &offset);
|
|
if (err < 0) {
|
|
return err;
|
|
}
|
|
*priv_offset = (segoffset + offset);
|
|
return 0;
|
|
}
|
|
|
|
if ((gpc_num + 1U) > num_gpcs) {
|
|
nvgpu_err(g,
|
|
"GPC %d not in this context buffer.",
|
|
gpc_num);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Skip ahead to the relevant gpccs segment.
|
|
*/
|
|
context += (fecs_hdr_size >> BYTE_TO_DW_SHIFT) +
|
|
((gpc_num * gpccs_hdr_stride) >> BYTE_TO_DW_SHIFT);
|
|
if (!g->ops.gr.ctxsw_prog.check_local_header_magic(context)) {
|
|
nvgpu_err(g,
|
|
"Invalid GPCCS header: magic value");
|
|
return -EINVAL;
|
|
}
|
|
|
|
num_tpcs = g->ops.gr.ctxsw_prog.get_num_tpcs(context);
|
|
if ((tpc_num + 1U) > num_tpcs) {
|
|
nvgpu_err(g, "GPC %d TPC %d not in this context buffer.",
|
|
gpc_num, tpc_num);
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = gr_gk20a_determine_ppc_configuration(g, context, &num_ppcs,
|
|
&ppc_mask, ®_list_ppc_count);
|
|
if (err != 0) {
|
|
nvgpu_err(g, "determine ppc configuration failed");
|
|
return err;
|
|
}
|
|
|
|
if (addr_type == CTXSW_ADDR_TYPE_GPC) {
|
|
compute_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_compute_gpcreglist_offset(context);
|
|
graphics_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_gfx_gpcreglist_offset(context);
|
|
} else if (addr_type == CTXSW_ADDR_TYPE_PPC) {
|
|
compute_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_compute_ppcreglist_offset(context);
|
|
graphics_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_gfx_ppcreglist_offset(context);
|
|
} else if (addr_type == CTXSW_ADDR_TYPE_TPC) {
|
|
compute_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_compute_tpcreglist_offset(context, tpc_num);
|
|
graphics_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_gfx_tpcreglist_offset(context, tpc_num);
|
|
} else if (addr_type == CTXSW_ADDR_TYPE_ETPC) {
|
|
compute_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_compute_etpcreglist_offset(context);
|
|
graphics_segoffset =
|
|
g->ops.gr.ctxsw_prog.get_gfx_etpcreglist_offset(context);
|
|
} else {
|
|
nvgpu_err(g, "invalid addr_type(0x%x)", addr_type);
|
|
return -EINVAL;
|
|
}
|
|
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg,
|
|
"gpccs_segment_offset(0x%x, 0x%x)", compute_segoffset,
|
|
graphics_segoffset);
|
|
|
|
err = g->ops.gr.process_context_buffer_priv_segment(g, addr_type,
|
|
addr, gpc_num, num_tpcs, num_ppcs, ppc_mask, &offset);
|
|
if (err < 0) {
|
|
return err;
|
|
}
|
|
|
|
segoffset = (err == FOUND_IN_CTXSWBUF_PRIV_COMPUTE_REGLIST) ? compute_segoffset :
|
|
graphics_segoffset;
|
|
*priv_offset = (segoffset + offset);
|
|
return 0;
|
|
}
|
|
|
|
static const u32 hwpm_cau_init_data[] =
|
|
{
|
|
/* This list is autogenerated. Do not edit. */
|
|
0x00419980,
|
|
0x00000000,
|
|
0x00419988,
|
|
0x00000000,
|
|
0x0041998c,
|
|
0x00000000,
|
|
0x00419990,
|
|
0x00000000,
|
|
0x00419994,
|
|
0x00000000,
|
|
0x00419998,
|
|
0x00000000,
|
|
0x0041999c,
|
|
0x00000000,
|
|
0x004199a4,
|
|
0x00000001,
|
|
};
|
|
|
|
const u32 *ga10b_gr_get_hwpm_cau_init_data(u32 *count)
|
|
{
|
|
*count = sizeof(hwpm_cau_init_data) / sizeof(hwpm_cau_init_data[0]);
|
|
return hwpm_cau_init_data;
|
|
}
|
|
|
|
#endif /* CONFIG_NVGPU_DEBUGGER */
|
|
|
|
void ga10b_gr_vab_init(struct gk20a *g, u32 vab_reg)
|
|
{
|
|
nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), vab_reg);
|
|
}
|
|
|
|
void ga10b_gr_vab_release(struct gk20a *g, u32 vab_reg)
|
|
{
|
|
nvgpu_writel(g, gr_gpcs_mmu_vidmem_access_bit_r(), vab_reg);
|
|
}
|