gpu: nvgpu: log mme esr register information

Add new hal to log the mme exception register information. Support
added for Turing only. On mme exception interrupt, read the
mme_hww_esr register and log the error based on esr register bits.

JIRA NVGPU-1241

Change-Id: Ied3db0cc8fe6e2a82ecafc9964875e2686ca0d72
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2005807
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2019-01-28 13:04:46 -08:00
committed by mobile promotions
parent 0f84c9024f
commit 1b1ebb0a8d
10 changed files with 147 additions and 1 deletions

View File

@@ -5427,6 +5427,10 @@ int gk20a_gr_isr(struct gk20a *g)
nvgpu_err(g, "mme exception: esr 0x%08x info:0x%08x",
mme, info);
if (g->ops.gr.log_mme_exception != NULL) {
g->ops.gr.log_mme_exception(g);
}
gk20a_writel(g, gr_mme_hww_esr_r(),
gr_mme_hww_esr_reset_active_f());
need_reset = true;

View File

@@ -338,6 +338,7 @@ static const struct gpu_ops gm20b_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.init_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.log_mme_exception = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,

View File

@@ -376,6 +376,7 @@ static const struct gpu_ops gp10b_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.init_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.log_mme_exception = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,

View File

@@ -502,6 +502,7 @@ static const struct gpu_ops gv100_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.init_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.log_mme_exception = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,

View File

@@ -457,6 +457,7 @@ static const struct gpu_ops gv11b_ops = {
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.init_gfxp_rtv_cb = NULL,
.commit_gfxp_rtv_cb = NULL,
.log_mme_exception = NULL,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,

View File

@@ -530,6 +530,7 @@ struct gpu_ops {
struct nvgpu_gr_ctx *gr_ctx, struct vm_gk20a *vm);
void (*commit_gfxp_rtv_cb)(struct gk20a *g,
struct nvgpu_gr_ctx *gr_ctx, bool patch);
void (*log_mme_exception)(struct gk20a *g);
struct {
u32 (*hw_get_fecs_header_size)(void);
u32 (*hw_get_gpccs_header_size)(void);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -1050,6 +1050,50 @@ static inline u32 gr_mme_hww_esr_r(void)
{
return 0x00404490U;
}
static inline u32 gr_mme_hww_esr_missing_macro_data_pending_f(void)
{
return 0x1U;
}
static inline u32 gr_mme_hww_esr_illegal_opcode_pending_f(void)
{
return 0x4U;
}
static inline u32 gr_mme_hww_esr_branch_in_delay_pending_f(void)
{
return 0x8U;
}
static inline u32 gr_mme_hww_esr_inst_ram_acess_pending_f(void)
{
return 0x20U;
}
static inline u32 gr_mme_hww_esr_data_ram_access_pending_f(void)
{
return 0x40U;
}
static inline u32 gr_mme_hww_esr_illegal_mme_method_pending_f(void)
{
return 0x80U;
}
static inline u32 gr_mme_hww_esr_dma_dram_access_pending_f(void)
{
return 0x10000U;
}
static inline u32 gr_mme_hww_esr_dma_read_pb_pending_f(void)
{
return 0x20000U;
}
static inline u32 gr_mme_hww_esr_dma_illegal_fifo_pending_f(void)
{
return 0x40000U;
}
static inline u32 gr_mme_hww_esr_dma_read_overflow_pending_f(void)
{
return 0x80000U;
}
static inline u32 gr_mme_hww_esr_dma_fifo_resized_pending_f(void)
{
return 0x100000U;
}
static inline u32 gr_mme_hww_esr_reset_active_f(void)
{
return 0x40000000U;
@@ -1062,6 +1106,22 @@ static inline u32 gr_mme_hww_esr_info_r(void)
{
return 0x00404494U;
}
static inline u32 gr_mme_hww_esr_info_pc_valid_v(u32 r)
{
return (r >> 28U) & 0x1U;
}
static inline u32 gr_mme_hww_esr_info2_r(void)
{
return 0x0040449cU;
}
static inline u32 gr_mme_hww_esr_info3_r(void)
{
return 0x004044a8U;
}
static inline u32 gr_mme_hww_esr_info4_r(void)
{
return 0x004044acU;
}
static inline u32 gr_memfmt_hww_esr_r(void)
{
return 0x00404600U;

View File

@@ -491,3 +491,78 @@ void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
*sm_dsm_perf_ctrl_regs = NULL;
*ctrl_register_stride = 0;
}
void gr_tu104_log_mme_exception(struct gk20a *g)
{
u32 mme_hww_esr = nvgpu_readl(g, gr_mme_hww_esr_r());
u32 mme_hww_info = nvgpu_readl(g, gr_mme_hww_esr_info_r());
if ((mme_hww_esr &
gr_mme_hww_esr_missing_macro_data_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: MISSING_MACRO_DATA");
}
if ((mme_hww_esr &
gr_mme_hww_esr_illegal_mme_method_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: ILLEGAL_MME_METHOD");
}
if ((mme_hww_esr &
gr_mme_hww_esr_dma_dram_access_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DMA_DRAM_ACCESS_OUT_OF_BOUNDS");
}
if ((mme_hww_esr &
gr_mme_hww_esr_dma_illegal_fifo_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DMA_ILLEGAL_FIFO_CONFIG");
}
if ((mme_hww_esr &
gr_mme_hww_esr_dma_read_overflow_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DMA_READ_FIFOED_OVERFLOW");
}
if ((mme_hww_esr &
gr_mme_hww_esr_dma_fifo_resized_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DMA_FIFO_RESIZED_WHEN_NONIDLE");
}
if ((mme_hww_esr & gr_mme_hww_esr_illegal_opcode_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: ILLEGAL_OPCODE");
}
if ((mme_hww_esr & gr_mme_hww_esr_branch_in_delay_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: BRANCH_IN_DELAY_SHOT");
}
if ((mme_hww_esr & gr_mme_hww_esr_inst_ram_acess_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: INSTR_RAM_ACCESS_OUT_OF_BOUNDS");
}
if ((mme_hww_esr & gr_mme_hww_esr_data_ram_access_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DATA_RAM_ACCESS_OUT_OF_BOUNDS");
}
if ((mme_hww_esr & gr_mme_hww_esr_dma_read_pb_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DMA_READ_FIFOED_FROM_PB");
}
if (gr_mme_hww_esr_info_pc_valid_v(mme_hww_info) == 0x1U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: INFO2 0x%x, INFO3 0x%x, INFO4 0x%x",
nvgpu_readl(g, gr_mme_hww_esr_info2_r()),
nvgpu_readl(g, gr_mme_hww_esr_info3_r()),
nvgpu_readl(g, gr_mme_hww_esr_info4_r()));
}
}

View File

@@ -92,4 +92,5 @@ void gr_tu104_init_sm_dsm_reg_info(void);
void gr_tu104_get_sm_dsm_perf_ctrl_regs(struct gk20a *g,
u32 *num_sm_dsm_perf_ctrl_regs, u32 **sm_dsm_perf_ctrl_regs,
u32 *ctrl_register_stride);
void gr_tu104_log_mme_exception(struct gk20a *g);
#endif /* NVGPU_GR_TU104_H */

View File

@@ -527,6 +527,7 @@ static const struct gpu_ops tu104_ops = {
.dump_gr_falcon_stats = gk20a_fecs_dump_falcon_stats,
.get_fecs_ctx_state_store_major_rev_id =
gk20a_gr_get_fecs_ctx_state_store_major_rev_id,
.log_mme_exception = gr_tu104_log_mme_exception,
.ctxsw_prog = {
.hw_get_fecs_header_size =
gm20b_ctxsw_prog_hw_get_fecs_header_size,