mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 10:34:43 +03:00
gpu: nvgpu: move preempt timeout rc from fifo to rc
Move preempt timeout recovery related function to common/rc. Remove nvgpu_channel_recover as bare channels are not recovered. Recover channels bound to tsg. JIRA NVGPU-1314 Change-Id: Ic1f94b321d0404eea86dd6d6d990529b2f3a8d57 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2093682 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
1882a7413d
commit
2f00275584
@@ -1474,42 +1474,6 @@ bool nvgpu_channel_update_and_check_ctxsw_timeout(struct channel_gk20a *ch,
|
||||
ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
|
||||
}
|
||||
|
||||
void nvgpu_channel_recover(struct gk20a *g, struct channel_gk20a *ch,
|
||||
bool verbose, u32 rc_type)
|
||||
{
|
||||
u32 engines;
|
||||
int err;
|
||||
|
||||
/* stop context switching to prevent engine assignments from
|
||||
changing until channel is recovered */
|
||||
nvgpu_mutex_acquire(&g->dbg_sessions_lock);
|
||||
err = g->ops.gr.falcon.disable_ctxsw(g, g->gr.falcon);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to disable ctxsw");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
engines = g->ops.engine.get_mask_on_id(g, ch->chid, false);
|
||||
|
||||
if (engines != 0U) {
|
||||
gk20a_fifo_recover(g, engines, ch->chid, false, true, verbose,
|
||||
rc_type);
|
||||
} else {
|
||||
gk20a_channel_abort(ch, false);
|
||||
|
||||
if (nvgpu_channel_mark_error(g, ch)) {
|
||||
gk20a_debug_dump(g);
|
||||
}
|
||||
}
|
||||
|
||||
err = g->ops.gr.falcon.enable_ctxsw(g, g->gr.falcon);
|
||||
if (err != 0) {
|
||||
nvgpu_err(g, "failed to enable ctxsw");
|
||||
}
|
||||
fail:
|
||||
nvgpu_mutex_release(&g->dbg_sessions_lock);
|
||||
}
|
||||
|
||||
u32 nvgpu_get_gp_free_count(struct channel_gk20a *c)
|
||||
{
|
||||
update_gp_get(c->g, c);
|
||||
|
||||
@@ -95,3 +95,11 @@ void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id)
|
||||
RC_TYPE_RUNLIST_UPDATE_TIMEOUT);
|
||||
}
|
||||
}
|
||||
|
||||
void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
{
|
||||
nvgpu_tsg_set_error_notifier(g, tsg,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||
|
||||
nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
|
||||
}
|
||||
|
||||
@@ -688,35 +688,6 @@ int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
{
|
||||
struct channel_gk20a *ch = NULL;
|
||||
|
||||
nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
|
||||
|
||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list,
|
||||
channel_gk20a, ch_entry) {
|
||||
if (gk20a_channel_get(ch) == NULL) {
|
||||
continue;
|
||||
}
|
||||
g->ops.channel.set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||
gk20a_channel_put(ch);
|
||||
}
|
||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||
nvgpu_tsg_recover(g, tsg, true, RC_TYPE_PREEMPT_TIMEOUT);
|
||||
}
|
||||
|
||||
void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
nvgpu_err(g, "preempt channel %d timeout", ch->chid);
|
||||
|
||||
g->ops.channel.set_error_notifier(ch,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||
nvgpu_channel_recover(g, ch, true, RC_TYPE_PREEMPT_TIMEOUT);
|
||||
}
|
||||
|
||||
int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg)
|
||||
{
|
||||
int ret;
|
||||
@@ -763,12 +734,20 @@ int gk20a_fifo_preempt_channel(struct gk20a *g, struct channel_gk20a *ch)
|
||||
"ctxsw timeout will trigger recovery if needed",
|
||||
ch->chid);
|
||||
} else {
|
||||
gk20a_fifo_preempt_timeout_rc(g, ch);
|
||||
struct tsg_gk20a *tsg;
|
||||
|
||||
nvgpu_err(g, "preempt channel %d timeout", ch->chid);
|
||||
tsg = tsg_gk20a_from_ch(ch);
|
||||
if (tsg != NULL) {
|
||||
nvgpu_rc_preempt_timeout(g, tsg);
|
||||
} else {
|
||||
nvgpu_err(g, "chid: %d is not bound to tsg",
|
||||
ch->chid);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -800,7 +779,8 @@ int gk20a_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
"ctxsw timeout will trigger recovery if needed",
|
||||
tsg->tsgid);
|
||||
} else {
|
||||
gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
|
||||
nvgpu_err(g, "preempt TSG %d timeout", tsg->tsgid);
|
||||
nvgpu_rc_preempt_timeout(g, tsg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -266,8 +266,6 @@ static inline void gk20a_fifo_profile_snapshot(
|
||||
int gk20a_fifo_is_preempt_pending(struct gk20a *g, u32 id,
|
||||
unsigned int id_type);
|
||||
int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg);
|
||||
void gk20a_fifo_preempt_timeout_rc_tsg(struct gk20a *g, struct tsg_gk20a *tsg);
|
||||
void gk20a_fifo_preempt_timeout_rc(struct gk20a *g, struct channel_gk20a *ch);
|
||||
|
||||
u32 gk20a_fifo_runlist_busy_engines(struct gk20a *g, u32 runlist_id);
|
||||
bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id,
|
||||
|
||||
@@ -499,7 +499,7 @@ int gv11b_fifo_preempt_tsg(struct gk20a *g, struct tsg_gk20a *tsg)
|
||||
nvgpu_err(g, "preempt timed out for tsgid: %u, "
|
||||
"ctxsw timeout will trigger recovery if needed", tsg->tsgid);
|
||||
} else {
|
||||
gk20a_fifo_preempt_timeout_rc_tsg(g, tsg);
|
||||
nvgpu_rc_preempt_timeout(g, tsg);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -49,4 +49,5 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f,
|
||||
|
||||
void nvgpu_rc_runlist_update(struct gk20a *g, u32 runlist_id);
|
||||
|
||||
void nvgpu_rc_preempt_timeout(struct gk20a *g, struct tsg_gk20a *tsg);
|
||||
#endif /* NVGPU_RC_H */
|
||||
|
||||
Reference in New Issue
Block a user