From 59bf3919e2d66e97e426f6d18a634ec29b75008f Mon Sep 17 00:00:00 2001 From: Seema Khowala Date: Tue, 9 Apr 2019 19:38:12 -0700 Subject: [PATCH] gpu: nvgpu: move defer reset functions to engines and channel Renamed and moved from fifo_gk20a.c to common/fifo/engines.c gk20a_fifo_should_defer_engine_reset -> nvgpu_engine_should_defer_reset Renamed and moved from fifo_gk20a.c to common/fifo/channel.c gk20a_fifo_deferred_reset -> nvgpu_channel_deferred_reset_engines JIRA NVGPU-1314 Change-Id: Ifc32ff4dde398143b83c2c1b6fab896142574240 Signed-off-by: Seema Khowala Reviewed-on: https://git-master.nvidia.com/r/2093910 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/channel.c | 67 +++++++++++++- drivers/gpu/nvgpu/common/fifo/engines.c | 41 +++++++++ drivers/gpu/nvgpu/gk20a/fifo_gk20a.c | 105 +--------------------- drivers/gpu/nvgpu/gk20a/fifo_gk20a.h | 5 -- drivers/gpu/nvgpu/gv11b/fifo_gv11b.c | 2 +- drivers/gpu/nvgpu/include/nvgpu/channel.h | 3 + drivers/gpu/nvgpu/include/nvgpu/engines.h | 2 + 7 files changed, 114 insertions(+), 111 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index f211de6e3..28f55e162 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -397,7 +397,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" " deferred, running now"); nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex); - gk20a_fifo_deferred_reset(g, ch); + nvgpu_channel_deferred_reset_engines(g, ch); nvgpu_mutex_release(&g->fifo.engines_reset_mutex); } @@ -2765,3 +2765,68 @@ void nvgpu_channel_debug_dump_all(struct gk20a *g, nvgpu_kfree(g, infos); } + +int nvgpu_channel_deferred_reset_engines(struct gk20a *g, + struct channel_gk20a *ch) +{ + unsigned long engine_id, engines = 0U; + struct tsg_gk20a *tsg; + bool deferred_reset_pending; + struct fifo_gk20a *f = &g->fifo; + int err = 0; + + nvgpu_mutex_acquire(&g->dbg_sessions_lock); + + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + deferred_reset_pending = g->fifo.deferred_reset_pending; + nvgpu_mutex_release(&f->deferred_reset_mutex); + + if (!deferred_reset_pending) { + nvgpu_mutex_release(&g->dbg_sessions_lock); + return 0; + } + + err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); + if (err != 0) { + nvgpu_err(g, "failed to disable ctxsw"); + goto fail; + } + + tsg = tsg_gk20a_from_ch(ch); + if (tsg != NULL) { + engines = g->ops.engine.get_mask_on_id(g, + tsg->tsgid, true); + } else { + nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + } + + if (engines == 0U) { + goto clean_up; + } + + /* + * If deferred reset is set for an engine, and channel is running + * on that engine, reset it + */ + + for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32UL) { + if ((BIT64(engine_id) & engines) != 0ULL) { + nvgpu_engine_reset(g, (u32)engine_id); + } + } + + nvgpu_mutex_acquire(&f->deferred_reset_mutex); + g->fifo.deferred_fault_engines = 0; + g->fifo.deferred_reset_pending = false; + nvgpu_mutex_release(&f->deferred_reset_mutex); + +clean_up: + err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); + if (err != 0) { + nvgpu_err(g, "failed to enable ctxsw"); + } +fail: + nvgpu_mutex_release(&g->dbg_sessions_lock); + + return err; +} diff --git a/drivers/gpu/nvgpu/common/fifo/engines.c b/drivers/gpu/nvgpu/common/fifo/engines.c index f0b0cafd4..d67f00189 100644 --- a/drivers/gpu/nvgpu/common/fifo/engines.c +++ b/drivers/gpu/nvgpu/common/fifo/engines.c @@ -901,3 +901,44 @@ u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id) return eng_bitmask; } + +bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id, + u32 engine_subid, bool fake_fault) +{ + enum nvgpu_fifo_engine engine_enum = NVGPU_ENGINE_INVAL_GK20A; + struct fifo_engine_info_gk20a *engine_info; + + if (g == NULL) { + return false; + } + + engine_info = nvgpu_engine_get_active_eng_info(g, engine_id); + + if (engine_info != NULL) { + engine_enum = engine_info->engine_enum; + } + + if (engine_enum == NVGPU_ENGINE_INVAL_GK20A) { + return false; + } + + /* + * channel recovery is only deferred if an sm debugger + * is attached and has MMU debug mode is enabled + */ + if (!g->ops.gr.sm_debugger_attached(g) || + !g->ops.fb.is_debug_mode_enabled(g)) { + return false; + } + + /* if this fault is fake (due to RC recovery), don't defer recovery */ + if (fake_fault) { + return false; + } + + if (engine_enum != NVGPU_ENGINE_GR_GK20A) { + return false; + } + + return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid); +} diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 2129d5aa9..06051f93a 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -119,109 +119,6 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g) return 0; } -bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, - u32 engine_subid, bool fake_fault) -{ - enum nvgpu_fifo_engine engine_enum = NVGPU_ENGINE_INVAL_GK20A; - struct fifo_engine_info_gk20a *engine_info; - - if (g == NULL) { - return false; - } - - engine_info = nvgpu_engine_get_active_eng_info(g, engine_id); - - if (engine_info != NULL) { - engine_enum = engine_info->engine_enum; - } - - if (engine_enum == NVGPU_ENGINE_INVAL_GK20A) { - return false; - } - - /* channel recovery is only deferred if an sm debugger - is attached and has MMU debug mode is enabled */ - if (!g->ops.gr.sm_debugger_attached(g) || - !g->ops.fb.is_debug_mode_enabled(g)) { - return false; - } - - /* if this fault is fake (due to RC recovery), don't defer recovery */ - if (fake_fault) { - return false; - } - - if (engine_enum != NVGPU_ENGINE_GR_GK20A) { - return false; - } - - return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid); -} - -int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch) -{ - unsigned long engine_id, engines = 0U; - struct tsg_gk20a *tsg; - bool deferred_reset_pending; - struct fifo_gk20a *f = &g->fifo; - int err = 0; - - nvgpu_mutex_acquire(&g->dbg_sessions_lock); - - nvgpu_mutex_acquire(&f->deferred_reset_mutex); - deferred_reset_pending = g->fifo.deferred_reset_pending; - nvgpu_mutex_release(&f->deferred_reset_mutex); - - if (!deferred_reset_pending) { - nvgpu_mutex_release(&g->dbg_sessions_lock); - return 0; - } - - err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon); - if (err != 0) { - nvgpu_err(g, "failed to disable ctxsw"); - goto fail; - } - - tsg = tsg_gk20a_from_ch(ch); - if (tsg != NULL) { - engines = g->ops.engine.get_mask_on_id(g, - tsg->tsgid, true); - } else { - nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); - } - - if (engines == 0U) { - goto clean_up; - } - - /* - * If deferred reset is set for an engine, and channel is running - * on that engine, reset it - */ - - for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32UL) { - if ((BIT64(engine_id) & engines) != 0ULL) { - nvgpu_engine_reset(g, (u32)engine_id); - } - } - - nvgpu_mutex_acquire(&f->deferred_reset_mutex); - g->fifo.deferred_fault_engines = 0; - g->fifo.deferred_reset_pending = false; - nvgpu_mutex_release(&f->deferred_reset_mutex); - -clean_up: - err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon); - if (err != 0) { - nvgpu_err(g, "failed to enable ctxsw"); - } -fail: - nvgpu_mutex_release(&g->dbg_sessions_lock); - - return err; -} - static bool gk20a_fifo_handle_mmu_fault_locked( struct gk20a *g, u32 mmu_fault_engines, /* queried from HW if 0 */ @@ -325,7 +222,7 @@ static bool gk20a_fifo_handle_mmu_fault_locked( /* check if engine reset should be deferred */ if (engine_id != FIFO_INVAL_ENGINE_ID) { - bool defer = gk20a_fifo_should_defer_engine_reset(g, + bool defer = nvgpu_engine_should_defer_reset(g, engine_id, mmfault_info.client_type, fake_fault); if (((ch != NULL) || (tsg != NULL)) && defer) { diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index 55e2c67fa..6c7a5d855 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -230,8 +230,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g, void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg); int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice); -int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch); - #ifdef CONFIG_DEBUG_FS struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g); void gk20a_fifo_profile_release(struct gk20a *g, @@ -257,9 +255,6 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id, unsigned int id_type); int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg); -bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, - u32 engine_subid, bool fake_fault); - void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids, u32 hw_id, unsigned int id_type, unsigned int rc_type, struct mmu_fault_info *mmfault); diff --git a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c index 473a3b290..25f8e09fc 100644 --- a/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/fifo_gv11b.c @@ -813,7 +813,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask, g->fifo.max_engines) { engine_id = U32(bit); if ((tsg != NULL) && - gk20a_fifo_should_defer_engine_reset(g, + nvgpu_engine_should_defer_reset(g, engine_id, client_type, false)) { g->fifo.deferred_fault_engines |= diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index f3ce55e20..7a3d89e87 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -547,4 +547,7 @@ struct channel_gk20a *nvgpu_channel_refch_from_inst_ptr(struct gk20a *g, u64 inst_ptr); void nvgpu_channel_debug_dump_all(struct gk20a *g, struct gk20a_debug_output *o); +int nvgpu_channel_deferred_reset_engines(struct gk20a *g, + struct channel_gk20a *ch); + #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/engines.h b/drivers/gpu/nvgpu/include/nvgpu/engines.h index 03d2d5a39..144f351eb 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/engines.h +++ b/drivers/gpu/nvgpu/include/nvgpu/engines.h @@ -81,4 +81,6 @@ u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g, u32 *id_ptr, bool *is_tsg_ptr); u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id); +bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id, + u32 engine_subid, bool fake_fault); #endif /*NVGPU_ENGINE_H*/