gpu: nvgpu: disable elpg earlier in recovery path

When MMU fault happens, if the id_type = 1, that means
fault happened in TSG. So in that path we set the error
notifier and let userspace know about faulty channel.
During this, we check if debugger is attached or not by
reading gr_gpc0_tpc0_sm0_dbgr_control0_r() register.
During this time ELPG is enabled and this read causes
IDLE SNAP error for ELPG.

To resolve this, move CG/PG disable function call
early in fifo recover code path. This ensures that
ELPG is disabled early before any read happens for any
GR register.

Bug 3660592

Change-Id: Ie5d01b7ccf00167b58f260e9142aa5deb2a08be4
Signed-off-by: Divya <dsinghatwari@nvidia.com>
(cherry picked from commit f09e429f2d142c20529bedc05acf193805e1bb25)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2720655
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Mahantesh Kumbar <mkumbar@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Divya
2022-05-19 12:13:29 +00:00
committed by mobile promotions
parent cb78bca971
commit dcec7f184e

View File

@@ -165,6 +165,20 @@ void gv11b_fifo_recover(struct gk20a *g, u32 act_eng_bitmask,
rec_dbg(g, " rc_type = %s", nvgpu_rc_type_to_str(rc_type)); rec_dbg(g, " rc_type = %s", nvgpu_rc_type_to_str(rc_type));
rec_dbg(g, " Engine bitmask: 0x%x", act_eng_bitmask); rec_dbg(g, " Engine bitmask: 0x%x", act_eng_bitmask);
/*
* Recovery path accesses many GR registers.
* Any access to GR registers with CG/PG enabled
* in recovery path will cause errors like pri timeout
* idle snap etc. So disable CG/PG before we start
* the recovery process to avoid such errors.
*/
#ifdef CONFIG_NVGPU_NON_FUSA
rec_dbg(g, "Disabling CG/PG now");
if (nvgpu_cg_pg_disable(g) != 0) {
nvgpu_warn(g, "fail to disable power mgmt");
}
#endif
nvgpu_swprofile_begin_sample(prof); nvgpu_swprofile_begin_sample(prof);
rec_dbg(g, "Acquiring engines_reset_mutex"); rec_dbg(g, "Acquiring engines_reset_mutex");
@@ -256,13 +270,6 @@ void gv11b_fifo_recover(struct gk20a *g, u32 act_eng_bitmask,
nvgpu_swprofile_snapshot(prof, PROF_RECOVERY_DISABLE_RL); nvgpu_swprofile_snapshot(prof, PROF_RECOVERY_DISABLE_RL);
#ifdef CONFIG_NVGPU_NON_FUSA
rec_dbg(g, "Disabling CG/PG now");
if (nvgpu_cg_pg_disable(g) != 0) {
nvgpu_warn(g, "fail to disable power mgmt");
}
#endif
if (rc_type == RC_TYPE_MMU_FAULT) { if (rc_type == RC_TYPE_MMU_FAULT) {
if (!nvgpu_swprofile_is_enabled(prof)) { if (!nvgpu_swprofile_is_enabled(prof)) {
gk20a_debug_dump(g); gk20a_debug_dump(g);