gpu: nvgpu: reduce code complexity in gr.intr unit

Reduce code complexity of following functions in gr.intr unit
gm20b_gr_intr_handle_exceptions(complexity : 13)
tu104_gr_intr_log_mme_exception(complexity : 13)
gv11b_gr_intr_handle_icache_exception(complexity : 17)
gv11b_gr_intr_handle_gpc_gpccs_exception(complexity : 13)
gv11b_gr_intr_handle_l1_tag_exception(complexity : 15)
gv11b_gr_intr_handle_gpc_gpcmmu_exception(complexity : 15)

Create sub functions by moving the control statement codes
from the function which has high complexity above 10.

Create following 8 sub functions for handling each exception from
gm20b_gr_intr_handle_exceptions function
gr_gm20b_intr_check_gr_fe_exception(complexity : 2)
gr_gm20b_intr_check_gr_memfmt_exception(complexity : 2)
gr_gm20b_intr_check_gr_pd_exception(complexity : 2)
gr_gm20b_intr_check_gr_scc_exception(complexity : 2)
gr_gm20b_intr_check_gr_ds_exception(complexity : 2)
gr_gm20b_intr_check_gr_ssync_exception(complexity : 4)
gr_gm20b_intr_check_gr_mme_exception(complexity : 3)
gr_gm20b_intr_check_gr_sked_exception(complexity : 2)
and reduce gm20b_gr_intr_handle_exceptions complexity to 3.

Create following 2 sub functions from tu104_gr_intr_log_mme_exception
function
gr_tu104_check_dma_exception(complexity : 6)
gr_tu104_check_ram_access_exception(complexity : 3)
and reduce tu104_gr_intr_log_mme_exception complexity to 6

Create following 2 sub functions for corrected and uncorrected error
reporting from gv11b_gr_intr_handle_icache_exception function
gv11b_gr_intr_report_icache_uncorrected_err(complexity : 5)
gv11b_gr_intr_report_icache_corrected_err(complexity : 5)
and reduce gv11b_gr_intr_handle_icache_exception complexity to 9

Create following 2 sub functions for corrected and uncorrected error
reporting from gv11b_gr_intr_handle_l1_tag_exception function
gv11b_gr_intr_report_l1_tag_uncorrected_err(complexity : 4)
gv11b_gr_intr_report_l1_tag_corrected_err(complexity : 4)
and reduce gv11b_gr_intr_handle_l1_tag_exception complexity to 9

Create following 1 sub function for error reporting from
gv11b_gr_intr_handle_gpc_gpccs_exception function
gv11b_gr_intr_report_gpccs_ecc_err(complexity : 5)
and reduce gv11b_gr_intr_handle_gpc_gpccs_exception complexity to 9

Create following 1 sub function for error reporting from
gv11b_gr_intr_handle_gpc_gpcmmu_exception function
gv11b_gr_intr_report_gpcmmu_ecc_err(complexity : 5)
and reduce gv11b_gr_intr_handle_gpc_gpcmmu_exception complexity to 9

Jira NVGPU-3661

Change-Id: I855b9ba055f3a8578c7b62cd59e249017ec31936
Signed-off-by: Vinod G <vinodg@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2155852
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Vinod G
2019-07-17 22:16:39 -07:00
committed by mobile promotions
parent d3fef630f5
commit 2a73264b75
3 changed files with 365 additions and 267 deletions

View File

@@ -121,75 +121,10 @@ u32 gm20b_gr_intr_read_pending_interrupts(struct gk20a *g,
return gr_intr;
}
bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception)
static u32 gr_gm20b_intr_check_gr_ssync_exception(struct gk20a *g,
u32 exception)
{
bool gpc_reset = false;
u32 exception = nvgpu_readl(g, gr_exception_r());
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"exception %08x\n", exception);
if ((exception & gr_exception_fe_m()) != 0U) {
u32 fe = nvgpu_readl(g, gr_fe_hww_esr_r());
u32 info = nvgpu_readl(g, gr_fe_hww_esr_info_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_FE_EXCEPTION,
fe, 0);
nvgpu_err(g, "fe exception: esr 0x%08x, info 0x%08x",
fe, info);
nvgpu_writel(g, gr_fe_hww_esr_r(),
gr_fe_hww_esr_reset_active_f());
gpc_reset = true;
}
if ((exception & gr_exception_memfmt_m()) != 0U) {
u32 memfmt = nvgpu_readl(g, gr_memfmt_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_MEMFMT_EXCEPTION,
memfmt, 0);
nvgpu_err(g, "memfmt exception: esr %08x", memfmt);
nvgpu_writel(g, gr_memfmt_hww_esr_r(),
gr_memfmt_hww_esr_reset_active_f());
gpc_reset = true;
}
if ((exception & gr_exception_pd_m()) != 0U) {
u32 pd = nvgpu_readl(g, gr_pd_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_PD_EXCEPTION,
pd, 0);
nvgpu_err(g, "pd exception: esr 0x%08x", pd);
nvgpu_writel(g, gr_pd_hww_esr_r(),
gr_pd_hww_esr_reset_active_f());
gpc_reset = true;
}
if ((exception & gr_exception_scc_m()) != 0U) {
u32 scc = nvgpu_readl(g, gr_scc_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_SCC_EXCEPTION,
scc, 0);
nvgpu_err(g, "scc exception: esr 0x%08x", scc);
nvgpu_writel(g, gr_scc_hww_esr_r(),
gr_scc_hww_esr_reset_active_f());
gpc_reset = true;
}
if ((exception & gr_exception_ds_m()) != 0U) {
u32 ds = nvgpu_readl(g, gr_ds_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_DS_EXCEPTION,
ds, 0);
nvgpu_err(g, "ds exception: esr: 0x%08x", ds);
nvgpu_writel(g, gr_ds_hww_esr_r(),
gr_ds_hww_esr_reset_task_f());
gpc_reset = true;
}
u32 reset_gpc = 0U;
if ((exception & gr_exception_ssync_m()) != 0U) {
u32 ssync_esr = 0;
@@ -197,7 +132,7 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception)
if (g->ops.gr.intr.handle_ssync_hww != NULL) {
if (g->ops.gr.intr.handle_ssync_hww(g, &ssync_esr)
!= 0) {
gpc_reset = true;
reset_gpc = 1U;
}
} else {
nvgpu_err(g, "unhandled ssync exception");
@@ -206,7 +141,12 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception)
GPU_PGRAPH_SSYNC_EXCEPTION,
ssync_esr, 0);
}
return reset_gpc;
}
static u32 gr_gm20b_intr_check_gr_mme_exception(struct gk20a *g,
u32 exception)
{
if ((exception & gr_exception_mme_m()) != 0U) {
u32 mme = nvgpu_readl(g, gr_mme_hww_esr_r());
u32 info = nvgpu_readl(g, gr_mme_hww_esr_info_r());
@@ -222,9 +162,14 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception)
nvgpu_writel(g, gr_mme_hww_esr_r(),
gr_mme_hww_esr_reset_active_f());
gpc_reset = true;
return 1U;
}
return 0U;
}
static u32 gr_gm20b_intr_check_gr_sked_exception(struct gk20a *g,
u32 exception)
{
if ((exception & gr_exception_sked_m()) != 0U) {
u32 sked = nvgpu_readl(g, gr_sked_hww_esr_r());
@@ -234,15 +179,120 @@ bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception)
nvgpu_err(g, "sked exception: esr 0x%08x", sked);
nvgpu_writel(g, gr_sked_hww_esr_r(),
gr_sked_hww_esr_reset_active_f());
gpc_reset = true;
return 1U;
}
return 0U;
}
static u32 gr_gm20b_intr_check_gr_fe_exception(struct gk20a *g, u32 exception)
{
if ((exception & gr_exception_fe_m()) != 0U) {
u32 fe = nvgpu_readl(g, gr_fe_hww_esr_r());
u32 info = nvgpu_readl(g, gr_fe_hww_esr_info_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_FE_EXCEPTION,
fe, 0);
nvgpu_err(g, "fe exception: esr 0x%08x, info 0x%08x",
fe, info);
nvgpu_writel(g, gr_fe_hww_esr_r(),
gr_fe_hww_esr_reset_active_f());
return 1U;
}
return 0U;
}
static u32 gr_gm20b_intr_check_gr_memfmt_exception(struct gk20a *g,
u32 exception)
{
if ((exception & gr_exception_memfmt_m()) != 0U) {
u32 memfmt = nvgpu_readl(g, gr_memfmt_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_MEMFMT_EXCEPTION,
memfmt, 0);
nvgpu_err(g, "memfmt exception: esr %08x", memfmt);
nvgpu_writel(g, gr_memfmt_hww_esr_r(),
gr_memfmt_hww_esr_reset_active_f());
return 1U;
}
return 0U;
}
static u32 gr_gm20b_intr_check_gr_pd_exception(struct gk20a *g,
u32 exception)
{
if ((exception & gr_exception_pd_m()) != 0U) {
u32 pd = nvgpu_readl(g, gr_pd_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_PD_EXCEPTION,
pd, 0);
nvgpu_err(g, "pd exception: esr 0x%08x", pd);
nvgpu_writel(g, gr_pd_hww_esr_r(),
gr_pd_hww_esr_reset_active_f());
return 1U;
}
return 0U;
}
static u32 gr_gm20b_intr_check_gr_scc_exception(struct gk20a *g,
u32 exception)
{
if ((exception & gr_exception_scc_m()) != 0U) {
u32 scc = nvgpu_readl(g, gr_scc_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_SCC_EXCEPTION,
scc, 0);
nvgpu_err(g, "scc exception: esr 0x%08x", scc);
nvgpu_writel(g, gr_scc_hww_esr_r(),
gr_scc_hww_esr_reset_active_f());
return 1U;
}
return 0U;
}
static u32 gr_gm20b_intr_check_gr_ds_exception(struct gk20a *g,
u32 exception)
{
if ((exception & gr_exception_ds_m()) != 0U) {
u32 ds = nvgpu_readl(g, gr_ds_hww_esr_r());
nvgpu_gr_intr_report_exception(g, 0,
GPU_PGRAPH_DS_EXCEPTION,
ds, 0);
nvgpu_err(g, "ds exception: esr: 0x%08x", ds);
nvgpu_writel(g, gr_ds_hww_esr_r(),
gr_ds_hww_esr_reset_task_f());
return 1U;
}
return 0U;
}
bool gm20b_gr_intr_handle_exceptions(struct gk20a *g, bool *is_gpc_exception)
{
u32 gpc_reset = 0U;
u32 exception = nvgpu_readl(g, gr_exception_r());
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"exception %08x\n", exception);
gpc_reset = gr_gm20b_intr_check_gr_fe_exception(g, exception);
gpc_reset |= gr_gm20b_intr_check_gr_memfmt_exception(g, exception);
gpc_reset |= gr_gm20b_intr_check_gr_pd_exception(g, exception);
gpc_reset |= gr_gm20b_intr_check_gr_scc_exception(g, exception);
gpc_reset |= gr_gm20b_intr_check_gr_ds_exception(g, exception);
gpc_reset |= gr_gm20b_intr_check_gr_ssync_exception(g, exception);
gpc_reset |= gr_gm20b_intr_check_gr_mme_exception(g, exception);
gpc_reset |= gr_gm20b_intr_check_gr_sked_exception(g, exception);
/* check if a gpc exception has occurred */
if ((exception & gr_exception_gpc_m()) != 0U) {
*is_gpc_exception = true;
}
return gpc_reset;
return (gpc_reset != 0U)? true: false;
}
u32 gm20b_gr_intr_read_gpc_tpc_exception(u32 gpc_exception)

View File

@@ -368,6 +368,44 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc,
gr_pri_gpc0_gcc_l15_ecc_status_reset_task_f());
}
static void gv11b_gr_intr_report_gpcmmu_ecc_err(struct gk20a *g,
u32 ecc_status, u32 gpc,
u32 correct_err, u32 uncorrect_err)
{
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_SA_DATA_ECC_CORRECTED,
0, correct_err);
nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
}
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED,
0, uncorrect_err);
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
}
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_FA_DATA_ECC_CORRECTED,
0, correct_err);
nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error");
}
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED,
0, uncorrect_err);
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error");
}
}
void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
u32 gpc_exception, u32 *corrected_err, u32 *uncorrected_err)
{
@@ -430,10 +468,12 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
if (corrected_overflow != 0U) {
corrected_delta = nvgpu_safe_add_u32(corrected_delta,
BIT32(gr_gpc0_mmu_l1tlb_ecc_corrected_err_count_total_s()));
nvgpu_info(g, "mmu l1tlb ecc counter corrected overflow!");
}
if (uncorrected_overflow != 0U) {
uncorrected_delta = nvgpu_safe_add_u32(uncorrected_delta,
BIT32(gr_gpc0_mmu_l1tlb_ecc_uncorrected_err_count_total_s()));
nvgpu_info(g, "mmu l1tlb ecc counter uncorrected overflow!");
}
*corrected_err = nvgpu_safe_add_u32(*corrected_err, corrected_delta);
@@ -443,41 +483,8 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
nvgpu_log(g, gpu_dbg_intr,
"mmu l1tlb gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_sa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_SA_DATA_ECC_CORRECTED,
0, (u32)*corrected_err);
nvgpu_log(g, gpu_dbg_intr, "corrected ecc sa data error");
}
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_sa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_SA_DATA_ECC_UNCORRECTED,
0, (u32)*uncorrected_err);
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc sa data error");
}
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_corrected_err_l1tlb_fa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_FA_DATA_ECC_CORRECTED,
0, (u32)*corrected_err);
nvgpu_log(g, gpu_dbg_intr, "corrected ecc fa data error");
}
if ((ecc_status &
gr_gpc0_mmu_l1tlb_ecc_status_uncorrected_err_l1tlb_fa_data_m()) !=
0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_MMU, gpc,
GPU_MMU_L1TLB_FA_DATA_ECC_UNCORRECTED,
0, (u32)*uncorrected_err);
nvgpu_log(g, gpu_dbg_intr, "uncorrected ecc fa data error");
}
if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) {
nvgpu_info(g, "mmu l1tlb ecc counter overflow!");
}
gv11b_gr_intr_report_gpcmmu_ecc_err(g, ecc_status, gpc,
(u32)*corrected_err, (u32)*uncorrected_err);
nvgpu_log(g, gpu_dbg_intr,
"ecc error address: 0x%x", ecc_addr);
@@ -486,6 +493,39 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc,
(u32)*corrected_err, (u32)*uncorrected_err);
}
static void gv11b_gr_intr_report_gpccs_ecc_err(struct gk20a *g,
u32 ecc_status, u32 ecc_addr, u32 gpc,
u32 correct_err, u32 uncorrect_err)
{
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED,
ecc_addr, correct_err);
nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
}
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED,
ecc_addr, uncorrect_err);
nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected");
}
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_DMEM_ECC_CORRECTED,
ecc_addr, correct_err);
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
}
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED,
ecc_addr, uncorrect_err);
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected");
}
}
void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc,
u32 gpc_exception, u32 *corrected_err, u32 *uncorrected_err)
{
@@ -553,34 +593,9 @@ void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc,
nvgpu_log(g, gpu_dbg_intr,
"gppcs gpc:%d ecc interrupt intr: 0x%x", gpc, hww_esr);
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_corrected_err_imem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_IMEM_ECC_CORRECTED,
ecc_addr, (u32)*corrected_err);
nvgpu_log(g, gpu_dbg_intr, "imem ecc error corrected");
}
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_imem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_IMEM_ECC_UNCORRECTED,
ecc_addr, (u32)*uncorrected_err);
nvgpu_log(g, gpu_dbg_intr, "imem ecc error uncorrected");
}
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_corrected_err_dmem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_DMEM_ECC_CORRECTED,
ecc_addr, (u32)*corrected_err);
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error corrected");
}
if ((ecc_status &
gr_gpc0_gpccs_falcon_ecc_status_uncorrected_err_dmem_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_GPCCS,
gpc, GPU_GPCCS_FALCON_DMEM_ECC_UNCORRECTED,
ecc_addr, (u32)*uncorrected_err);
nvgpu_log(g, gpu_dbg_intr, "dmem ecc error uncorrected");
}
gv11b_gr_intr_report_gpccs_ecc_err(g, ecc_status, ecc_addr, gpc,
(u32)*corrected_err, (u32)*uncorrected_err);
if ((corrected_overflow != 0U) || (uncorrected_overflow != 0U)) {
nvgpu_info(g, "gpccs ecc counter overflow!");
}
@@ -746,6 +761,60 @@ void gv11b_gr_intr_set_hww_esr_report_mask(struct gk20a *g)
gr_gpc0_tpc0_sm0_hww_global_esr_report_mask_multiple_warp_errors_report_f());
}
static void gv11b_gr_intr_report_l1_tag_uncorrected_err(struct gk20a *g,
u32 l1_tag_ecc_status, u32 gpc, u32 tpc)
{
if ((l1_tag_ecc_status &
(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() |
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m())) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_ECC_UNCORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
}
}
static void gv11b_gr_intr_report_l1_tag_corrected_err(struct gk20a *g,
u32 l1_tag_ecc_status, u32 gpc, u32 tpc)
{
if ((l1_tag_ecc_status &
(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() |
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m())) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_ECC_CORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_MISS_FIFO_ECC_CORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_CORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
}
}
static void gv11b_gr_intr_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32 tpc,
bool *post_event, struct nvgpu_channel *fault_ch,
u32 *hww_global_esr)
@@ -813,28 +882,7 @@ static void gv11b_gr_intr_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32
nvgpu_safe_add_u32(
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter,
l1_tag_corrected_err_count_delta);
if ((l1_tag_ecc_status &
(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_0_m() |
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_el1_1_m())) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_ECC_CORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_miss_fifo_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_MISS_FIFO_ECC_CORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_corrected_err_pixrpf_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_CORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_corrected_err_count[gpc][tpc].counter);
}
gv11b_gr_intr_report_l1_tag_corrected_err(g, l1_tag_ecc_status, gpc, tpc);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_corrected_err_count_r(), offset),
0);
@@ -855,28 +903,7 @@ static void gv11b_gr_intr_handle_l1_tag_exception(struct gk20a *g, u32 gpc, u32
nvgpu_safe_add_u32(
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter,
l1_tag_uncorrected_err_count_delta);
if ((l1_tag_ecc_status &
(gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_0_m() |
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_el1_1_m())) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_ECC_UNCORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_miss_fifo_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_MISS_FIFO_ECC_UNCORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((l1_tag_ecc_status &
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_status_uncorrected_err_pixrpf_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_L1_TAG_S2R_PIXPRF_ECC_UNCORRECTED, 0,
g->ecc.gr.sm_l1_tag_ecc_uncorrected_err_count[gpc][tpc].counter);
}
gv11b_gr_intr_report_l1_tag_uncorrected_err(g, l1_tag_ecc_status, gpc, tpc);
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_l1_tag_ecc_uncorrected_err_count_r(), offset),
0);
@@ -1203,6 +1230,72 @@ static void gv11b_gr_intr_handle_l1_data_exception(struct gk20a *g, u32 gpc, u32
gr_pri_gpc0_tpc0_sm_l1_data_ecc_status_reset_task_f());
}
static void gv11b_gr_intr_report_icache_uncorrected_err(struct gk20a *g,
u32 icache_ecc_status, u32 gpc, u32 tpc)
{
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
}
static void gv11b_gr_intr_report_icache_corrected_err(struct gk20a *g,
u32 icache_ecc_status, u32 gpc, u32 tpc)
{
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_DATA_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_PREDECODE_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_DATA_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_PREDECODE_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
}
static void gv11b_gr_intr_handle_icache_exception(struct gk20a *g, u32 gpc, u32 tpc,
bool *post_event, struct nvgpu_channel *fault_ch,
u32 *hww_global_esr)
@@ -1272,34 +1365,7 @@ static void gv11b_gr_intr_handle_icache_exception(struct gk20a *g, u32 gpc, u32
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_icache_ecc_corrected_err_count_r(), offset),
0);
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_DATA_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l0_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_PREDECODE_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_DATA_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_corrected_err_l1_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_PREDECODE_ECC_CORRECTED,
0, g->ecc.gr.sm_icache_ecc_corrected_err_count[gpc][tpc].counter);
}
gv11b_gr_intr_report_icache_corrected_err(g, icache_ecc_status, gpc, tpc);
}
if ((icache_uncorrected_err_count_delta > 0U) || is_icache_ecc_uncorrected_total_err_overflow) {
nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr,
@@ -1320,38 +1386,11 @@ static void gv11b_gr_intr_handle_icache_exception(struct gk20a *g, u32 gpc, u32
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_icache_ecc_uncorrected_err_count_r(), offset),
0);
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_DATA_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l0_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L0_PREDECODE_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_data_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_DATA_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
if ((icache_ecc_status &
gr_pri_gpc0_tpc0_sm_icache_ecc_status_uncorrected_err_l1_predecode_m()) != 0U) {
(void) nvgpu_report_ecc_err(g, NVGPU_ERR_MODULE_SM,
(gpc << 8) | tpc,
GPU_SM_ICACHE_L1_PREDECODE_ECC_UNCORRECTED,
0, g->ecc.gr.sm_icache_ecc_uncorrected_err_count[gpc][tpc].counter);
}
gv11b_gr_intr_report_icache_uncorrected_err(g, icache_ecc_status, gpc, tpc);
}
nvgpu_writel(g, nvgpu_safe_add_u32(
gr_pri_gpc0_tpc0_sm_icache_ecc_status_r(), offset),
gr_pri_gpc0_tpc0_sm_icache_ecc_status_r(), offset),
gr_pri_gpc0_tpc0_sm_icache_ecc_status_reset_task_f());
}

View File

@@ -158,23 +158,8 @@ void tu104_gr_intr_enable_gpc_exceptions(struct gk20a *g,
gr_gpcs_gpccs_gpc_exception_en_gpcmmu_f(1U)));
}
void tu104_gr_intr_log_mme_exception(struct gk20a *g)
static void gr_tu104_check_dma_exception(struct gk20a *g, u32 mme_hww_esr)
{
u32 mme_hww_esr = nvgpu_readl(g, gr_mme_hww_esr_r());
u32 mme_hww_info = nvgpu_readl(g, gr_mme_hww_esr_info_r());
if ((mme_hww_esr &
gr_mme_hww_esr_missing_macro_data_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: MISSING_MACRO_DATA");
}
if ((mme_hww_esr &
gr_mme_hww_esr_illegal_mme_method_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: ILLEGAL_MME_METHOD");
}
if ((mme_hww_esr &
gr_mme_hww_esr_dma_dram_access_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
@@ -199,6 +184,45 @@ void tu104_gr_intr_log_mme_exception(struct gk20a *g)
"GR MME EXCEPTION: DMA_FIFO_RESIZED_WHEN_NONIDLE");
}
if ((mme_hww_esr & gr_mme_hww_esr_dma_read_pb_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DMA_READ_FIFOED_FROM_PB");
}
}
static void gr_tu104_check_ram_access_exception(struct gk20a *g, u32 mme_hww_esr)
{
if ((mme_hww_esr & gr_mme_hww_esr_inst_ram_acess_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: INSTR_RAM_ACCESS_OUT_OF_BOUNDS");
}
if ((mme_hww_esr & gr_mme_hww_esr_data_ram_access_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DATA_RAM_ACCESS_OUT_OF_BOUNDS");
}
}
void tu104_gr_intr_log_mme_exception(struct gk20a *g)
{
u32 mme_hww_esr = nvgpu_readl(g, gr_mme_hww_esr_r());
u32 mme_hww_info = nvgpu_readl(g, gr_mme_hww_esr_info_r());
gr_tu104_check_dma_exception(g, mme_hww_esr);
gr_tu104_check_ram_access_exception(g, mme_hww_esr);
if ((mme_hww_esr &
gr_mme_hww_esr_missing_macro_data_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: MISSING_MACRO_DATA");
}
if ((mme_hww_esr &
gr_mme_hww_esr_illegal_mme_method_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: ILLEGAL_MME_METHOD");
}
if ((mme_hww_esr & gr_mme_hww_esr_illegal_opcode_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: ILLEGAL_OPCODE");
@@ -209,21 +233,6 @@ void tu104_gr_intr_log_mme_exception(struct gk20a *g)
"GR MME EXCEPTION: BRANCH_IN_DELAY_SHOT");
}
if ((mme_hww_esr & gr_mme_hww_esr_inst_ram_acess_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: INSTR_RAM_ACCESS_OUT_OF_BOUNDS");
}
if ((mme_hww_esr & gr_mme_hww_esr_data_ram_access_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DATA_RAM_ACCESS_OUT_OF_BOUNDS");
}
if ((mme_hww_esr & gr_mme_hww_esr_dma_read_pb_pending_f()) != 0U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: DMA_READ_FIFOED_FROM_PB");
}
if (gr_mme_hww_esr_info_pc_valid_v(mme_hww_info) == 0x1U) {
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg,
"GR MME EXCEPTION: INFO2 0x%x, INFO3 0x%x, INFO4 0x%x",