From cd02e4d70f8e59d337b374449471d4ca0e263726 Mon Sep 17 00:00:00 2001 From: Vinod G Date: Mon, 20 May 2019 16:16:17 -0700 Subject: [PATCH] gpu: nvgpu: Fix CERT INT30-C errors in gr intr unit Fix CERT INT30-C error in gr interrupt units cert_violation: Unsigned integer operation may wrap. Use nvgpu_safe_ops macros for addition and subtraction. Jira NVGPU-3412 Change-Id: Id2d936e77959005616faf069aff6701789342456 Signed-off-by: Vinod G Reviewed-on: https://git-master.nvidia.com/r/2122474 Reviewed-by: svc-mobile-coverity GVS: Gerrit_Virtual_Submit Reviewed-by: Seshendra Gadagottu Tested-by: Seshendra Gadagottu Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/gr/gr_intr.c | 11 +-- drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c | 30 +++++--- drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b.c | 72 +++++++++++-------- drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c | 33 ++++++--- drivers/gpu/nvgpu/hal/gr/intr/gr_intr_tu104.c | 4 +- 5 files changed, 94 insertions(+), 56 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/gr_intr.c b/drivers/gpu/nvgpu/common/gr/gr_intr.c index ca3d59e93..8fe8a6d91 100644 --- a/drivers/gpu/nvgpu/common/gr/gr_intr.c +++ b/drivers/gpu/nvgpu/common/gr/gr_intr.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #if defined(CONFIG_GK20A_CYCLE_STATS) @@ -68,7 +69,8 @@ static int gr_intr_handle_tpc_exception(struct gk20a *g, u32 gpc, u32 tpc, { int tmp_ret, ret = 0; struct nvgpu_gr_tpc_exception pending_tpc; - u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc); + u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc), + nvgpu_gr_tpc_offset(g, tpc)); u32 tpc_exception = g->ops.gr.intr.get_tpc_exception(g, offset, &pending_tpc); u32 sm_per_tpc = nvgpu_get_litter_value(g, GPU_LIT_NUM_SM_PER_TPC); @@ -298,8 +300,8 @@ struct nvgpu_channel *nvgpu_gr_intr_get_channel_from_ctx(struct gk20a *g, intr->chid_tlb[intr->channel_tlb_flush_index].tsgid = tsgid; intr->channel_tlb_flush_index = - (intr->channel_tlb_flush_index + 1U) & - (GR_CHANNEL_MAP_TLB_SIZE - 1U); + (nvgpu_safe_add_u32(intr->channel_tlb_flush_index, 1U)) & + (nvgpu_safe_sub_u32(GR_CHANNEL_MAP_TLB_SIZE, 1U)); unlock: nvgpu_spinlock_release(&intr->ch_tlb_lock); @@ -374,7 +376,8 @@ int nvgpu_gr_intr_handle_sm_exception(struct gk20a *g, u32 gpc, u32 tpc, u32 sm, int ret = 0; bool do_warp_sync = false, early_exit = false, ignore_debugger = false; bool disable_sm_exceptions = true; - u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc); + u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc), + nvgpu_gr_tpc_offset(g, tpc)); bool sm_debugger_attached; u32 global_esr, warp_esr, global_mask; u64 hww_warp_esr_pc = 0; diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c index 025bf6898..e0dda3b03 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gm20b.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -345,7 +346,10 @@ u32 gm20b_gr_intr_read_gpc_exception(struct gk20a *g, u32 gpc) { u32 gpc_offset = nvgpu_gr_gpc_offset(g, gpc); - return nvgpu_readl(g, gr_gpc0_gpccs_gpc_exception_r() + gpc_offset); + return nvgpu_readl(g, + nvgpu_safe_add_u32( + gr_gpc0_gpccs_gpc_exception_r(), + gpc_offset)); } u32 gm20b_gr_intr_read_exception1(struct gk20a *g) @@ -397,18 +401,21 @@ u32 gm20b_gr_intr_get_tpc_exception(struct gk20a *g, u32 offset, void gm20b_gr_intr_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc) { - u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc); + u32 offset = nvgpu_safe_add_u32( + nvgpu_gr_gpc_offset(g, gpc), + nvgpu_gr_tpc_offset(g, tpc)); u32 esr; nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, " "); esr = nvgpu_readl(g, - gr_gpc0_tpc0_tex_m_hww_esr_r() + offset); + nvgpu_safe_add_u32( + gr_gpc0_tpc0_tex_m_hww_esr_r(), offset)); nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "0x%08x", esr); nvgpu_writel(g, - gr_gpc0_tpc0_tex_m_hww_esr_r() + offset, - esr); + nvgpu_safe_add_u32( + gr_gpc0_tpc0_tex_m_hww_esr_r(), offset), esr); } void gm20b_gr_intr_enable_hww_exceptions(struct gk20a *g) @@ -455,21 +462,22 @@ void gm20b_gr_intr_enable_gpc_exceptions(struct gk20a *g, tpc_mask_calc = (u32)BIT32( nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config)); - tpc_mask = gr_gpcs_gpccs_gpc_exception_en_tpc_f(tpc_mask_calc - 1U); + tpc_mask = gr_gpcs_gpccs_gpc_exception_en_tpc_f( + nvgpu_safe_sub_u32(tpc_mask_calc, 1U)); nvgpu_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), tpc_mask); } void gm20ab_gr_intr_tpc_exception_sm_disable(struct gk20a *g, u32 offset) { - u32 tpc_exception_en = nvgpu_readl(g, - gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + - offset); + u32 tpc_exception_en = nvgpu_readl(g, nvgpu_safe_add_u32( + gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), + offset)); tpc_exception_en &= ~gr_gpc0_tpc0_tpccs_tpc_exception_en_sm_enabled_f(); - nvgpu_writel(g, - gr_gpc0_tpc0_tpccs_tpc_exception_en_r() + offset, + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_gpc0_tpc0_tpccs_tpc_exception_en_r(), offset), tpc_exception_en); } diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b.c index c7094b23b..1368cfd42 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gp10b.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -290,7 +291,8 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g, u32 *hww_global_esr) { int ret = 0; - u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc); + u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc), + nvgpu_gr_tpc_offset(g, tpc)); u32 lrf_ecc_status, lrf_ecc_sed_status, lrf_ecc_ded_status; u32 lrf_single_count_delta, lrf_double_count_delta; u32 shm_ecc_status; @@ -300,7 +302,9 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g, /* Check for LRF ECC errors. */ lrf_ecc_status = nvgpu_readl(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset); + nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r(), + offset)); lrf_ecc_sed_status = lrf_ecc_status & (gr_pri_gpc0_tpc0_sm_lrf_ecc_status_single_err_detected_qrfdp0_pending_f() | @@ -314,17 +318,17 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp2_pending_f() | gr_pri_gpc0_tpc0_sm_lrf_ecc_status_double_err_detected_qrfdp3_pending_f()); lrf_single_count_delta = - nvgpu_readl(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + - offset); + nvgpu_readl(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r(), + offset)); lrf_double_count_delta = - nvgpu_readl(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + - offset); - nvgpu_writel(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r() + offset, 0); - nvgpu_writel(g, - gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r() + offset, 0); + nvgpu_readl(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r(), + offset)); + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_lrf_ecc_single_err_count_r(), offset), 0); + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_lrf_ecc_double_err_count_r(), offset), 0); if (lrf_ecc_sed_status != 0U) { nvgpu_log(g, gpu_dbg_fn | gpu_dbg_intr, "Single bit error detected in SM LRF!"); @@ -349,12 +353,14 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g, g->ecc.gr.sm_lrf_ecc_double_err_count[gpc][tpc].counter += lrf_double_count_delta; } - nvgpu_writel(g, gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r() + offset, + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_lrf_ecc_status_r(), offset), lrf_ecc_status); /* Check for SHM ECC errors. */ - shm_ecc_status = nvgpu_readl(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset); + shm_ecc_status = nvgpu_readl(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_shm_ecc_status_r(), + offset)); if ((shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_single_err_corrected_shm0_pending_f()) != 0U || (shm_ecc_status & @@ -369,17 +375,18 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g, "Single bit error detected in SM SHM!"); ecc_stats_reg_val = - nvgpu_readl(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); + nvgpu_readl(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r(), + offset)); g->ecc.gr.sm_shm_ecc_sec_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_v(ecc_stats_reg_val); g->ecc.gr.sm_shm_ecc_sed_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_corrected_m() | gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_single_detected_m()); - nvgpu_writel(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, - ecc_stats_reg_val); + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r(), + offset), ecc_stats_reg_val); } if ((shm_ecc_status & gr_pri_gpc0_tpc0_sm_shm_ecc_status_double_err_detected_shm0_pending_f()) != 0U || @@ -391,17 +398,19 @@ int gp10b_gr_intr_handle_sm_exception(struct gk20a *g, "Double bit error detected in SM SHM!"); ecc_stats_reg_val = - nvgpu_readl(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset); + nvgpu_readl(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r(), + offset)); g->ecc.gr.sm_shm_ecc_ded_count[gpc][tpc].counter += gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_v(ecc_stats_reg_val); ecc_stats_reg_val &= ~(gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_double_detected_m()); - nvgpu_writel(g, - gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r() + offset, - ecc_stats_reg_val); + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_shm_ecc_err_count_r(), + offset), ecc_stats_reg_val); } - nvgpu_writel(g, gr_pri_gpc0_tpc0_sm_shm_ecc_status_r() + offset, - shm_ecc_status); + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_sm_shm_ecc_status_r(), + offset), shm_ecc_status); return ret; @@ -539,8 +548,9 @@ void gp10b_gr_intr_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc) ecc_stats_reg_val); ecc_stats_reg_val &= ~gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_ded_m(); - nvgpu_writel(g, - gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r() + offset, + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_pri_gpc0_tpc0_tex_m_ecc_cnt_unique_r(), + offset), ecc_stats_reg_val); @@ -549,8 +559,8 @@ void gp10b_gr_intr_handle_tex_exception(struct gk20a *g, u32 gpc, u32 tpc) gr_pri_gpc0_tpc0_tex_m_routing_sel_default_f()); } - nvgpu_writel(g, - gr_gpc0_tpc0_tex_m_hww_esr_r() + offset, + nvgpu_writel(g, nvgpu_safe_add_u32( + gr_gpc0_tpc0_tex_m_hww_esr_r(), offset), esr | gr_gpc0_tpc0_tex_m_hww_esr_reset_active_f()); } diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c index dca04c552..abb621463 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_gv11b.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -271,7 +272,8 @@ void gv11b_gr_intr_handle_gcc_exception(struct gk20a *g, u32 gpc, /* Check for gcc l15 ECC errors. */ gcc_l15_ecc_status = nvgpu_readl(g, - gr_pri_gpc0_gcc_l15_ecc_status_r() + offset); + nvgpu_safe_add_u32( + gr_pri_gpc0_gcc_l15_ecc_status_r(), offset)); gcc_l15_ecc_corrected_err_status = gcc_l15_ecc_status & (gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank0_m() | gr_pri_gpc0_gcc_l15_ecc_status_corrected_err_bank1_m()); @@ -364,7 +366,9 @@ void gv11b_gr_intr_handle_gpc_gpcmmu_exception(struct gk20a *g, u32 gpc, return; } - hww_esr = nvgpu_readl(g, gr_gpc0_mmu_gpcmmu_global_esr_r() + offset); + hww_esr = nvgpu_readl(g, + nvgpu_safe_add_u32(gr_gpc0_mmu_gpcmmu_global_esr_r(), + offset)); if ((hww_esr & (gr_gpc0_mmu_gpcmmu_global_esr_ecc_corrected_m() | gr_gpc0_mmu_gpcmmu_global_esr_ecc_uncorrected_m())) == 0U) { @@ -478,7 +482,9 @@ void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, return; } - hww_esr = nvgpu_readl(g, gr_gpc0_gpccs_hww_esr_r() + offset); + hww_esr = nvgpu_readl(g, + nvgpu_safe_add_u32(gr_gpc0_gpccs_hww_esr_r(), + offset)); if ((hww_esr & (gr_gpc0_gpccs_hww_esr_ecc_uncorrected_m() | gr_gpc0_gpccs_hww_esr_ecc_corrected_m())) == 0U) { @@ -572,21 +578,28 @@ void gv11b_gr_intr_handle_gpc_gpccs_exception(struct gk20a *g, u32 gpc, void gv11b_gr_intr_handle_tpc_mpc_exception(struct gk20a *g, u32 gpc, u32 tpc) { u32 esr; - u32 offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc); + u32 offset = nvgpu_safe_add_u32(nvgpu_gr_gpc_offset(g, gpc), + nvgpu_gr_tpc_offset(g, tpc)); - esr = nvgpu_readl(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset); + esr = nvgpu_readl(g, + nvgpu_safe_add_u32(gr_gpc0_tpc0_mpc_hww_esr_r(), + offset)); nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "mpc hww esr 0x%08x", esr); nvgpu_gr_intr_report_exception(g, ((gpc << 8U) | tpc), GPU_PGRAPH_MPC_EXCEPTION, esr); - esr = nvgpu_readl(g, gr_gpc0_tpc0_mpc_hww_esr_info_r() + offset); + esr = nvgpu_readl(g, + nvgpu_safe_add_u32(gr_gpc0_tpc0_mpc_hww_esr_info_r(), + offset)); nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "mpc hww esr info: veid 0x%08x", gr_gpc0_tpc0_mpc_hww_esr_info_veid_v(esr)); - nvgpu_writel(g, gr_gpc0_tpc0_mpc_hww_esr_r() + offset, + nvgpu_writel(g, + nvgpu_safe_add_u32(gr_gpc0_tpc0_mpc_hww_esr_r(), + offset), gr_gpc0_tpc0_mpc_hww_esr_reset_trigger_f()); } @@ -644,7 +657,8 @@ void gv11b_gr_intr_enable_exceptions(struct gk20a *g, nvgpu_writel(g, gr_exception2_en_r(), 0x0U); /* BE not enabled */ reg_val = (u32)BIT32(nvgpu_gr_config_get_gpc_count(gr_config)); - nvgpu_writel(g, gr_exception1_en_r(), (reg_val - 1U)); + nvgpu_writel(g, gr_exception1_en_r(), + nvgpu_safe_sub_u32(reg_val, 1U)); reg_val = gr_exception_en_fe_enabled_f() | gr_exception_en_memfmt_enabled_f() | @@ -673,7 +687,8 @@ void gv11b_gr_intr_enable_gpc_exceptions(struct gk20a *g, tpc_mask_calc = (u32)BIT32( nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config)); tpc_mask = - gr_gpcs_gpccs_gpc_exception_en_tpc_f(tpc_mask_calc - 1U); + gr_gpcs_gpccs_gpc_exception_en_tpc_f( + nvgpu_safe_sub_u32(tpc_mask_calc, 1U)); nvgpu_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) | diff --git a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_tu104.c b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_tu104.c index cd10c2e8f..7b3f4b496 100644 --- a/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_tu104.c +++ b/drivers/gpu/nvgpu/hal/gr/intr/gr_intr_tu104.c @@ -23,6 +23,7 @@ #include #include #include +#include #include @@ -146,7 +147,8 @@ void tu104_gr_intr_enable_gpc_exceptions(struct gk20a *g, tpc_mask_calc = (u32)BIT32( nvgpu_gr_config_get_max_tpc_per_gpc_count(gr_config)); tpc_mask = - gr_gpcs_gpccs_gpc_exception_en_tpc_f(tpc_mask_calc - 1U); + gr_gpcs_gpccs_gpc_exception_en_tpc_f( + nvgpu_safe_sub_u32(tpc_mask_calc, 1U)); nvgpu_writel(g, gr_gpcs_gpccs_gpc_exception_en_r(), (tpc_mask | gr_gpcs_gpccs_gpc_exception_en_gcc_f(1U) |