mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: log mme esr register information
Add new hal to log the mme exception register information. Support added for Turing only. On mme exception interrupt, read the mme_hww_esr register and log the error based on esr register bits. JIRA NVGPU-1241 Change-Id: Ied3db0cc8fe6e2a82ecafc9964875e2686ca0d72 Signed-off-by: Vinod G <vinodg@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2005807 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
0f84c9024f
commit
1b1ebb0a8d
@@ -5427,6 +5427,10 @@ int gk20a_gr_isr(struct gk20a *g)
|
||||
|
||||
nvgpu_err(g, "mme exception: esr 0x%08x info:0x%08x",
|
||||
mme, info);
|
||||
if (g->ops.gr.log_mme_exception != NULL) {
|
||||
g->ops.gr.log_mme_exception(g);
|
||||
}
|
||||
|
||||
gk20a_writel(g, gr_mme_hww_esr_r(),
|
||||
gr_mme_hww_esr_reset_active_f());
|
||||
need_reset = true;
|
||||
|
||||
@@ -338,6 +338,7 @@ static const struct gpu_ops gm20b_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.init_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.log_mme_exception = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
|
||||
@@ -376,6 +376,7 @@ static const struct gpu_ops gp10b_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.init_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.log_mme_exception = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
|
||||
@@ -502,6 +502,7 @@ static const struct gpu_ops gv100_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.init_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.log_mme_exception = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
|
||||
@@ -457,6 +457,7 @@ static const struct gpu_ops gv11b_ops = {
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.init_gfxp_rtv_cb = NULL,
|
||||
.commit_gfxp_rtv_cb = NULL,
|
||||
.log_mme_exception = NULL,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
|
||||
@@ -530,6 +530,7 @@ struct gpu_ops {
|
||||
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm);
|
||||
void (*commit_gfxp_rtv_cb)(struct gk20a *g,
|
||||
struct nvgpu_gr_ctx *gr_ctx, bool patch);
|
||||
void (*log_mme_exception)(struct gk20a *g);
|
||||
struct {
|
||||
u32 (*hw_get_fecs_header_size)(void);
|
||||
u32 (*hw_get_gpccs_header_size)(void);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -1050,6 +1050,50 @@ static inline u32 gr_mme_hww_esr_r(void)
|
||||
{
|
||||
return 0x00404490U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_missing_macro_data_pending_f(void)
|
||||
{
|
||||
return 0x1U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_illegal_opcode_pending_f(void)
|
||||
{
|
||||
return 0x4U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_branch_in_delay_pending_f(void)
|
||||
{
|
||||
return 0x8U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_inst_ram_acess_pending_f(void)
|
||||
{
|
||||
return 0x20U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_data_ram_access_pending_f(void)
|
||||
{
|
||||
return 0x40U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_illegal_mme_method_pending_f(void)
|
||||
{
|
||||
return 0x80U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_dma_dram_access_pending_f(void)
|
||||
{
|
||||
return 0x10000U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_dma_read_pb_pending_f(void)
|
||||
{
|
||||
return 0x20000U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_dma_illegal_fifo_pending_f(void)
|
||||
{
|
||||
return 0x40000U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_dma_read_overflow_pending_f(void)
|
||||
{
|
||||
return 0x80000U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_dma_fifo_resized_pending_f(void)
|
||||
{
|
||||
return 0x100000U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_reset_active_f(void)
|
||||
{
|
||||
return 0x40000000U;
|
||||
@@ -1062,6 +1106,22 @@ static inline u32 gr_mme_hww_esr_info_r(void)
|
||||
{
|
||||
return 0x00404494U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_info_pc_valid_v(u32 r)
|
||||
{
|
||||
return (r >> 28U) & 0x1U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_info2_r(void)
|
||||
{
|
||||
return 0x0040449cU;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_info3_r(void)
|
||||
{
|
||||
return 0x004044a8U;
|
||||
}
|
||||
static inline u32 gr_mme_hww_esr_info4_r(void)
|
||||
{
|
||||
return 0x004044acU;
|
||||
}
|
||||
static inline u32 gr_memfmt_hww_esr_r(void)
|
||||
{
|
||||
return 0x00404600U;
|
||||
|
||||
@@ -491,3 +491,78 @@ void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||
*sm_dsm_perf_ctrl_regs = NULL;
|
||||
*ctrl_register_stride = 0;
|
||||
}
|
||||
|
||||
void gr_tu104_log_mme_exception(struct gk20a *g)
|
||||
{
|
||||
u32 mme_hww_esr = nvgpu_readl(g, gr_mme_hww_esr_r());
|
||||
u32 mme_hww_info = nvgpu_readl(g, gr_mme_hww_esr_info_r());
|
||||
|
||||
if ((mme_hww_esr &
|
||||
gr_mme_hww_esr_missing_macro_data_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: MISSING_MACRO_DATA");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr &
|
||||
gr_mme_hww_esr_illegal_mme_method_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: ILLEGAL_MME_METHOD");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr &
|
||||
gr_mme_hww_esr_dma_dram_access_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: DMA_DRAM_ACCESS_OUT_OF_BOUNDS");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr &
|
||||
gr_mme_hww_esr_dma_illegal_fifo_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: DMA_ILLEGAL_FIFO_CONFIG");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr &
|
||||
gr_mme_hww_esr_dma_read_overflow_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: DMA_READ_FIFOED_OVERFLOW");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr &
|
||||
gr_mme_hww_esr_dma_fifo_resized_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: DMA_FIFO_RESIZED_WHEN_NONIDLE");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr & gr_mme_hww_esr_illegal_opcode_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: ILLEGAL_OPCODE");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr & gr_mme_hww_esr_branch_in_delay_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: BRANCH_IN_DELAY_SHOT");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr & gr_mme_hww_esr_inst_ram_acess_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: INSTR_RAM_ACCESS_OUT_OF_BOUNDS");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr & gr_mme_hww_esr_data_ram_access_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: DATA_RAM_ACCESS_OUT_OF_BOUNDS");
|
||||
}
|
||||
|
||||
if ((mme_hww_esr & gr_mme_hww_esr_dma_read_pb_pending_f()) != 0U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: DMA_READ_FIFOED_FROM_PB");
|
||||
}
|
||||
|
||||
if (gr_mme_hww_esr_info_pc_valid_v(mme_hww_info) == 0x1U) {
|
||||
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
|
||||
"GR MME EXCEPTION: INFO2 0x%x, INFO3 0x%x, INFO4 0x%x",
|
||||
nvgpu_readl(g, gr_mme_hww_esr_info2_r()),
|
||||
nvgpu_readl(g, gr_mme_hww_esr_info3_r()),
|
||||
nvgpu_readl(g, gr_mme_hww_esr_info4_r()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,4 +92,5 @@ void gr_tu104_init_sm_dsm_reg_info(void);
|
||||
void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
|
||||
u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs,
|
||||
u32 *ctrl_register_stride);
|
||||
void gr_tu104_log_mme_exception(struct gk20a *g);
|
||||
#endif /* NVGPU_GR_TU104_H */
|
||||
|
||||
@@ -527,6 +527,7 @@ static const struct gpu_ops tu104_ops = {
|
||||
.dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats,
|
||||
.get_fecs_ctx_state_store_major_rev_id =
|
||||
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
|
||||
.log_mme_exception = gr_tu104_log_mme_exception,
|
||||
.ctxsw_prog = {
|
||||
.hw_get_fecs_header_size =
|
||||
gm20b_ctxsw_prog_hw_get_fecs_header_size,
|
||||
|
||||
Reference in New Issue
Block a user