gpu: nvgpu: move defer reset functions to engines and channel

Renamed and moved from fifo_gk20a.c to common/fifo/engines.c
gk20a_fifo_should_defer_engine_reset -> nvgpu_engine_should_defer_reset

Renamed and moved from fifo_gk20a.c to common/fifo/channel.c
gk20a_fifo_deferred_reset -> nvgpu_channel_deferred_reset_engines

JIRA NVGPU-1314

Change-Id: Ifc32ff4dde398143b83c2c1b6fab896142574240
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2093910
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2019-04-09 19:38:12 -07:00
committed by mobile promotions
parent ca628dfd6e
commit 59bf3919e2
7 changed files with 114 additions and 111 deletions

View File

@@ -397,7 +397,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
nvgpu_log(g, gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was"
" deferred, running now");
nvgpu_mutex_acquire(&g->fifo.engines_reset_mutex);
gk20a_fifo_deferred_reset(g, ch);
nvgpu_channel_deferred_reset_engines(g, ch);
nvgpu_mutex_release(&g->fifo.engines_reset_mutex);
}
@@ -2765,3 +2765,68 @@ void nvgpu_channel_debug_dump_all(struct gk20a *g,
nvgpu_kfree(g, infos);
}
int nvgpu_channel_deferred_reset_engines(struct gk20a *g,
struct channel_gk20a *ch)
{
unsigned long engine_id, engines = 0U;
struct tsg_gk20a *tsg;
bool deferred_reset_pending;
struct fifo_gk20a *f = &g->fifo;
int err = 0;
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
nvgpu_mutex_acquire(&f->deferred_reset_mutex);
deferred_reset_pending = g->fifo.deferred_reset_pending;
nvgpu_mutex_release(&f->deferred_reset_mutex);
if (!deferred_reset_pending) {
nvgpu_mutex_release(&g->dbg_sessions_lock);
return 0;
}
err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
if (err != 0) {
nvgpu_err(g, "failed to disable ctxsw");
goto fail;
}
tsg = tsg_gk20a_from_ch(ch);
if (tsg != NULL) {
engines = g->ops.engine.get_mask_on_id(g,
tsg->tsgid, true);
} else {
nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
}
if (engines == 0U) {
goto clean_up;
}
/*
* If deferred reset is set for an engine, and channel is running
* on that engine, reset it
*/
for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32UL) {
if ((BIT64(engine_id) & engines) != 0ULL) {
nvgpu_engine_reset(g, (u32)engine_id);
}
}
nvgpu_mutex_acquire(&f->deferred_reset_mutex);
g->fifo.deferred_fault_engines = 0;
g->fifo.deferred_reset_pending = false;
nvgpu_mutex_release(&f->deferred_reset_mutex);
clean_up:
err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
if (err != 0) {
nvgpu_err(g, "failed to enable ctxsw");
}
fail:
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}

View File

@@ -901,3 +901,44 @@ u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id)
return eng_bitmask;
}
bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id,
u32 engine_subid, bool fake_fault)
{
enum nvgpu_fifo_engine engine_enum = NVGPU_ENGINE_INVAL_GK20A;
struct fifo_engine_info_gk20a *engine_info;
if (g == NULL) {
return false;
}
engine_info = nvgpu_engine_get_active_eng_info(g, engine_id);
if (engine_info != NULL) {
engine_enum = engine_info->engine_enum;
}
if (engine_enum == NVGPU_ENGINE_INVAL_GK20A) {
return false;
}
/*
* channel recovery is only deferred if an sm debugger
* is attached and has MMU debug mode is enabled
*/
if (!g->ops.gr.sm_debugger_attached(g) ||
!g->ops.fb.is_debug_mode_enabled(g)) {
return false;
}
/* if this fault is fake (due to RC recovery), don't defer recovery */
if (fake_fault) {
return false;
}
if (engine_enum != NVGPU_ENGINE_GR_GK20A) {
return false;
}
return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid);
}

View File

@@ -119,109 +119,6 @@ int gk20a_init_fifo_setup_hw(struct gk20a *g)
return 0;
}
bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
u32 engine_subid, bool fake_fault)
{
enum nvgpu_fifo_engine engine_enum = NVGPU_ENGINE_INVAL_GK20A;
struct fifo_engine_info_gk20a *engine_info;
if (g == NULL) {
return false;
}
engine_info = nvgpu_engine_get_active_eng_info(g, engine_id);
if (engine_info != NULL) {
engine_enum = engine_info->engine_enum;
}
if (engine_enum == NVGPU_ENGINE_INVAL_GK20A) {
return false;
}
/* channel recovery is only deferred if an sm debugger
is attached and has MMU debug mode is enabled */
if (!g->ops.gr.sm_debugger_attached(g) ||
!g->ops.fb.is_debug_mode_enabled(g)) {
return false;
}
/* if this fault is fake (due to RC recovery), don't defer recovery */
if (fake_fault) {
return false;
}
if (engine_enum != NVGPU_ENGINE_GR_GK20A) {
return false;
}
return g->ops.engine.is_fault_engine_subid_gpc(g, engine_subid);
}
int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch)
{
unsigned long engine_id, engines = 0U;
struct tsg_gk20a *tsg;
bool deferred_reset_pending;
struct fifo_gk20a *f = &g->fifo;
int err = 0;
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
nvgpu_mutex_acquire(&f->deferred_reset_mutex);
deferred_reset_pending = g->fifo.deferred_reset_pending;
nvgpu_mutex_release(&f->deferred_reset_mutex);
if (!deferred_reset_pending) {
nvgpu_mutex_release(&g->dbg_sessions_lock);
return 0;
}
err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
if (err != 0) {
nvgpu_err(g, "failed to disable ctxsw");
goto fail;
}
tsg = tsg_gk20a_from_ch(ch);
if (tsg != NULL) {
engines = g->ops.engine.get_mask_on_id(g,
tsg->tsgid, true);
} else {
nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid);
}
if (engines == 0U) {
goto clean_up;
}
/*
* If deferred reset is set for an engine, and channel is running
* on that engine, reset it
*/
for_each_set_bit(engine_id, &g->fifo.deferred_fault_engines, 32UL) {
if ((BIT64(engine_id) & engines) != 0ULL) {
nvgpu_engine_reset(g, (u32)engine_id);
}
}
nvgpu_mutex_acquire(&f->deferred_reset_mutex);
g->fifo.deferred_fault_engines = 0;
g->fifo.deferred_reset_pending = false;
nvgpu_mutex_release(&f->deferred_reset_mutex);
clean_up:
err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
if (err != 0) {
nvgpu_err(g, "failed to enable ctxsw");
}
fail:
nvgpu_mutex_release(&g->dbg_sessions_lock);
return err;
}
static bool gk20a_fifo_handle_mmu_fault_locked(
struct gk20a *g,
u32 mmu_fault_engines, /* queried from HW if 0 */
@@ -325,7 +222,7 @@ static bool gk20a_fifo_handle_mmu_fault_locked(
/* check if engine reset should be deferred */
if (engine_id != FIFO_INVAL_ENGINE_ID) {
bool defer = gk20a_fifo_should_defer_engine_reset(g,
bool defer = nvgpu_engine_should_defer_reset(g,
engine_id, mmfault_info.client_type,
fake_fault);
if (((ch != NULL) || (tsg != NULL)) && defer) {

View File

@@ -230,8 +230,6 @@ void fifo_gk20a_finish_mmu_fault_handling(struct gk20a *g,
void gk20a_fifo_issue_preempt(struct gk20a *g, u32 id, bool is_tsg);
int gk20a_fifo_tsg_set_timeslice(struct tsg_gk20a *tsg, u32 timeslice);
int gk20a_fifo_deferred_reset(struct gk20a *g, struct channel_gk20a *ch);
#ifdef CONFIG_DEBUG_FS
struct fifo_profile_gk20a *gk20a_fifo_profile_acquire(struct gk20a *g);
void gk20a_fifo_profile_release(struct gk20a *g,
@@ -257,9 +255,6 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
unsigned int id_type);
int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
u32 engine_subid, bool fake_fault);
void gk20a_fifo_teardown_ch_tsg(struct gk20a *g, u32 __engine_ids,
u32 hw_id, unsigned int id_type, unsigned int rc_type,
struct mmu_fault_info *mmfault);

View File

@@ -813,7 +813,7 @@ void gv11b_fifo_teardown_ch_tsg(struct gk20a *g, u32 act_eng_bitmask,
g->fifo.max_engines) {
engine_id = U32(bit);
if ((tsg != NULL) &&
gk20a_fifo_should_defer_engine_reset(g,
nvgpu_engine_should_defer_reset(g,
engine_id, client_type, false)) {
g->fifo.deferred_fault_engines |=

View File

@@ -547,4 +547,7 @@ struct channel_gk20a *nvgpu_channel_refch_from_inst_ptr(struct gk20a *g,
u64 inst_ptr);
void nvgpu_channel_debug_dump_all(struct gk20a *g,
struct gk20a_debug_output *o);
int nvgpu_channel_deferred_reset_engines(struct gk20a *g,
struct channel_gk20a *ch);
#endif

View File

@@ -81,4 +81,6 @@ u32 nvgpu_engine_find_busy_doing_ctxsw(struct gk20a *g,
u32 *id_ptr, bool *is_tsg_ptr);
u32 nvgpu_engine_get_runlist_busy_engines(struct gk20a *g, u32 runlist_id);
bool nvgpu_engine_should_defer_reset(struct gk20a *g, u32 engine_id,
u32 engine_subid, bool fake_fault);
#endif /*NVGPU_ENGINE_H*/