gpu: nvgpu: move preempt timeout rc from fifo to rc

Move preempt timeout recovery related function to common/rc.
Remove nvgpu_channel_recover as bare channels are not recovered.
Recover channels bound to tsg.

JIRA NVGPU-1314

Change-Id: Ic1f94b321d0404eea86dd6d6d990529b2f3a8d57
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2093682
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2019-04-09 14:08:08 -07:00
committed by mobile promotions
parent 1882a7413d
commit 2f00275584
6 changed files with 23 additions and 72 deletions

View File

@@ -1474,42 +1474,6 @@ bool nvgpu_channel_update_and_check_ctxsw_timeout(struct channel_gk20a *ch,
ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
}
void nvgpu_channel_recover(struct gk20a *g, struct channel_gk20a *ch,
bool verbose, u32 rc_type)
{
u32 engines;
int err;
/* stop context switching to prevent engine assignments from
changing until channel is recovered */
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
if (err != 0) {
nvgpu_err(g, "failed to disable ctxsw");
goto fail;
}
engines = g->ops.engine.get_mask_on_id(g, ch->chid, false);
if (engines != 0U) {
gk20a_fifo_recover(g, engines, ch->chid, false, true, verbose,
rc_type);
} else {
gk20a_channel_abort(ch, false);
if (nvgpu_channel_mark_error(g, ch)) {
gk20a_debug_dump(g);
}
}
err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
if (err != 0) {
nvgpu_err(g, "failed to enable ctxsw");
}
fail:
nvgpu_mutex_release(&g->dbg_sessions_lock);
}
u32 nvgpu_get_gp_free_count(struct channel_gk20a *c)
{
update_gp_get(c->g, c);

View File

@@ -95,3 +95,11 @@ void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id)
RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
}
}
void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg)
{
nvgpu_tsg_set_error_notifier(g, tsg,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
}

View File

@@ -688,35 +688,6 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
return ret;
}
void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
{
struct channel_gk20a *ch = NULL;
nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
nvgpu_list_for_each_entry(ch, &tsg->ch_list,
channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch) == NULL) {
continue;
}
g->ops.channel.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
gk20a_channel_put(ch);
}
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
}
void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch)
{
nvgpu_err(g, "preempt channel %d timeout", ch->chid);
g->ops.channel.set_error_notifier(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
nvgpu_channel_recover(g, ch, true, RC_TYPE_PREEMPT_TIMEOUT);
}
int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
{
int ret;
@@ -763,12 +734,20 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch)
"ctxsw timeout will trigger recovery if needed",
ch->chid);
} else {
gk20a_fifo_preempt_timeout_rc(g, ch);
struct tsg_gk20a *tsg;
nvgpu_err(g, "preempt channel %d timeout", ch->chid);
tsg = tsg_gk20a_from_ch(ch);
if (tsg != NULL) {
nvgpu_rc_preempt_timeout(g, tsg);
} else {
nvgpu_err(g, "chid: %d is not bound to tsg",
ch->chid);
}
}
}
return ret;
}
@@ -800,7 +779,8 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
"ctxsw timeout will trigger recovery if needed",
tsg->tsgid);
} else {
gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
nvgpu_rc_preempt_timeout(g, tsg);
}
}

View File

@@ -266,8 +266,6 @@ static inline void gk20a_fifo_profile_snapshot(
int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
unsigned int id_type);
int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch);
u32 gk20a_fifo_runlist_busy_engines(struct gk20a *g, u32 runlist_id);
bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,

View File

@@ -499,7 +499,7 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
nvgpu_err(g, "preempt timed out for tsgid: %u, "
"ctxsw timeout will trigger recovery if needed", tsg->tsgid);
} else {
gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
nvgpu_rc_preempt_timeout(g, tsg);
}
}

View File

@@ -49,4 +49,5 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f,
void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id);
void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg);
#endif /* NVGPU_RC_H */