From dcec7f184e148347ada2367c6382d12d3e6ba4e4 Mon Sep 17 00:00:00 2001 From: Divya Date: Thu, 19 May 2022 12:13:29 +0000 Subject: [PATCH] gpu: nvgpu: disable elpg earlier in recovery path When MMU fault happens, if the id_type = 1, that means fault happened in TSG. So in that path we set the error notifier and let userspace know about faulty channel. During this, we check if debugger is attached or not by reading gr_gpc0_tpc0_sm0_dbgr_control0_r() register. During this time ELPG is enabled and this read causes IDLE SNAP error for ELPG. To resolve this, move CG/PG disable function call early in fifo recover code path. This ensures that ELPG is disabled early before any read happens for any GR register. Bug 3660592 Change-Id: Ie5d01b7ccf00167b58f260e9142aa5deb2a08be4 Signed-off-by: Divya (cherry picked from commit f09e429f2d142c20529bedc05acf193805e1bb25) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2720655 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: Mahantesh Kumbar GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/hal/rc/rc_gv11b.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c index 93648384b..45728cb52 100644 --- a/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c +++ b/drivers/gpu/nvgpu/hal/rc/rc_gv11b.c @@ -165,6 +165,20 @@ void gv11b_fifo_recover(struct gk20a *g, u32 act_eng_bitmask, rec_dbg(g, " rc_type = %s", nvgpu_rc_type_to_str(rc_type)); rec_dbg(g, " Engine bitmask: 0x%x", act_eng_bitmask); + /* + * Recovery path accesses many GR registers. + * Any access to GR registers with CG/PG enabled + * in recovery path will cause errors like pri timeout + * idle snap etc. So disable CG/PG before we start + * the recovery process to avoid such errors. + */ +#ifdef CONFIG_NVGPU_NON_FUSA + rec_dbg(g, "Disabling CG/PG now"); + if (nvgpu_cg_pg_disable(g) != 0) { + nvgpu_warn(g, "fail to disable power mgmt"); + } +#endif + nvgpu_swprofile_begin_sample(prof); rec_dbg(g, "Acquiring engines_reset_mutex"); @@ -256,13 +270,6 @@ void gv11b_fifo_recover(struct gk20a *g, u32 act_eng_bitmask, nvgpu_swprofile_snapshot(prof, PROF_RECOVERY_DISABLE_RL); -#ifdef CONFIG_NVGPU_NON_FUSA - rec_dbg(g, "Disabling CG/PG now"); - if (nvgpu_cg_pg_disable(g) != 0) { - nvgpu_warn(g, "fail to disable power mgmt"); - } -#endif - if (rc_type == RC_TYPE_MMU_FAULT) { if (!nvgpu_swprofile_is_enabled(prof)) { gk20a_debug_dump(g);