mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: rename timeout_* of channel struct
timeout_ms_max is renamed as ctxsw_timeout_max_ms timeout_debug_dump is renamed as ctxsw_timeout_debug_dump timeout_accumulated_ms is renamed as ctxsw_timeout_accumulated_ms timeout_gpfifo_get is renamed as ctxsw_timeout_gpfifo_get gk20a_channel_update_and_check_timeout is renamed as nvgpu_channel_update_and_check_ctxsw_timeout JIRA NVGPU-1312 Change-Id: Ib5c8829c76df95817e9809e451e8c9671faba726 Signed-off-by: Seema Khowala <seemaj@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/2076847 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
9a0b8c0234
commit
737de7eac5
@@ -738,11 +738,11 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
|
||||
ch->tsgid = NVGPU_INVALID_TSG_ID;
|
||||
|
||||
/* clear ctxsw timeout counter and update timestamp */
|
||||
ch->timeout_accumulated_ms = 0;
|
||||
ch->timeout_gpfifo_get = 0;
|
||||
ch->ctxsw_timeout_accumulated_ms = 0;
|
||||
ch->ctxsw_timeout_gpfifo_get = 0;
|
||||
/* set gr host default timeout */
|
||||
ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g);
|
||||
ch->timeout_debug_dump = true;
|
||||
ch->ctxsw_timeout_max_ms = gk20a_get_gr_idle_timeout(g);
|
||||
ch->ctxsw_timeout_debug_dump = true;
|
||||
ch->unserviceable = false;
|
||||
|
||||
/* init kernel watchdog timeout */
|
||||
@@ -1384,13 +1384,13 @@ u32 nvgpu_gp_free_count(struct channel_gk20a *c)
|
||||
c->gpfifo.entry_num;
|
||||
}
|
||||
|
||||
static bool nvgpu_channel_timeout_debug_dump_state(struct gk20a *g,
|
||||
static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(struct gk20a *g,
|
||||
struct channel_gk20a *ch)
|
||||
{
|
||||
bool verbose = false;
|
||||
if (nvgpu_is_error_notifier_set(ch,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
|
||||
verbose = ch->timeout_debug_dump;
|
||||
verbose = ch->ctxsw_timeout_debug_dump;
|
||||
}
|
||||
|
||||
return verbose;
|
||||
@@ -1411,7 +1411,7 @@ bool nvgpu_channel_mark_error(struct gk20a *g, struct channel_gk20a *ch)
|
||||
{
|
||||
bool verbose;
|
||||
|
||||
verbose = nvgpu_channel_timeout_debug_dump_state(g, ch);
|
||||
verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(g, ch);
|
||||
nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch);
|
||||
|
||||
return verbose;
|
||||
@@ -1432,26 +1432,25 @@ void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
|
||||
}
|
||||
|
||||
bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
|
||||
bool nvgpu_channel_update_and_check_ctxsw_timeout(struct channel_gk20a *ch,
|
||||
u32 timeout_delta_ms, bool *progress)
|
||||
{
|
||||
u32 gpfifo_get = update_gp_get(ch->g, ch);
|
||||
|
||||
/* Count consequent timeout isr */
|
||||
if (gpfifo_get == ch->timeout_gpfifo_get) {
|
||||
/* we didn't advance since previous channel timeout check */
|
||||
ch->timeout_accumulated_ms += timeout_delta_ms;
|
||||
if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) {
|
||||
/* didn't advance since previous ctxsw timeout check */
|
||||
ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms;
|
||||
*progress = false;
|
||||
} else {
|
||||
/* first timeout isr encountered */
|
||||
ch->timeout_accumulated_ms = timeout_delta_ms;
|
||||
/* first ctxsw timeout isr encountered */
|
||||
ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms;
|
||||
*progress = true;
|
||||
}
|
||||
|
||||
ch->timeout_gpfifo_get = gpfifo_get;
|
||||
ch->ctxsw_timeout_gpfifo_get = gpfifo_get;
|
||||
|
||||
return nvgpu_is_timeouts_enabled(ch->g) &&
|
||||
ch->timeout_accumulated_ms > ch->timeout_ms_max;
|
||||
ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
|
||||
}
|
||||
|
||||
bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch,
|
||||
@@ -1461,11 +1460,11 @@ bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch,
|
||||
bool progress = false;
|
||||
struct gk20a *g = ch->g;
|
||||
|
||||
recover = gk20a_channel_update_and_check_timeout(ch,
|
||||
recover = nvgpu_channel_update_and_check_ctxsw_timeout(ch,
|
||||
g->fifo_eng_timeout_us / 1000U,
|
||||
&progress);
|
||||
*verbose = ch->timeout_debug_dump;
|
||||
*ms = ch->timeout_accumulated_ms;
|
||||
*verbose = ch->ctxsw_timeout_debug_dump;
|
||||
*ms = ch->ctxsw_timeout_accumulated_ms;
|
||||
if (recover) {
|
||||
nvgpu_channel_set_error_notifier(g, ch,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||
|
||||
@@ -355,21 +355,21 @@ bool nvgpu_tsg_mark_error(struct gk20a *g,
|
||||
|
||||
}
|
||||
|
||||
void nvgpu_tsg_set_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms)
|
||||
void nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms)
|
||||
{
|
||||
struct channel_gk20a *ch = NULL;
|
||||
|
||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||
if (gk20a_channel_get(ch) != NULL) {
|
||||
ch->timeout_accumulated_ms = ms;
|
||||
ch->ctxsw_timeout_accumulated_ms = ms;
|
||||
gk20a_channel_put(ch);
|
||||
}
|
||||
}
|
||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||
}
|
||||
|
||||
bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg)
|
||||
bool nvgpu_tsg_ctxsw_timeout_debug_dump_state(struct tsg_gk20a *tsg)
|
||||
{
|
||||
struct channel_gk20a *ch = NULL;
|
||||
bool verbose = false;
|
||||
@@ -377,7 +377,7 @@ bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg)
|
||||
nvgpu_rwsem_down_read(&tsg->ch_list_lock);
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||
if (gk20a_channel_get(ch) != NULL) {
|
||||
if (ch->timeout_debug_dump) {
|
||||
if (ch->ctxsw_timeout_debug_dump) {
|
||||
verbose = true;
|
||||
}
|
||||
gk20a_channel_put(ch);
|
||||
@@ -430,7 +430,7 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg,
|
||||
*/
|
||||
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
|
||||
if (gk20a_channel_get(ch) != NULL) {
|
||||
recover = gk20a_channel_update_and_check_timeout(ch,
|
||||
recover = nvgpu_channel_update_and_check_ctxsw_timeout(ch,
|
||||
*ms, &progress);
|
||||
if (progress || recover) {
|
||||
break;
|
||||
@@ -443,33 +443,33 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg,
|
||||
/*
|
||||
* if one channel is presumed dead (no progress for too long),
|
||||
* then fifo recovery is needed. we can't really figure out
|
||||
* which channel caused the problem, so set timeout error
|
||||
* which channel caused the problem, so set ctxsw timeout error
|
||||
* notifier for all channels.
|
||||
*/
|
||||
nvgpu_log_info(g, "timeout on tsg=%d ch=%d",
|
||||
tsg->tsgid, ch->chid);
|
||||
*ms = ch->timeout_accumulated_ms;
|
||||
*ms = ch->ctxsw_timeout_accumulated_ms;
|
||||
gk20a_channel_put(ch);
|
||||
nvgpu_tsg_set_error_notifier(g, tsg,
|
||||
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
|
||||
*verbose = nvgpu_tsg_timeout_debug_dump_state(tsg);
|
||||
*verbose = nvgpu_tsg_ctxsw_timeout_debug_dump_state(tsg);
|
||||
} else if (progress) {
|
||||
/*
|
||||
* if at least one channel in the TSG made some progress, reset
|
||||
* accumulated timeout for all channels in the TSG. In
|
||||
* particular, this resets timeout for channels that already
|
||||
* completed their work
|
||||
* ctxsw_timeout_accumulated_ms for all channels in the TSG. In
|
||||
* particular, this resets ctxsw_timeout_accumulated_ms timeout
|
||||
* for channels that already completed their work.
|
||||
*/
|
||||
nvgpu_log_info(g, "progress on tsg=%d ch=%d",
|
||||
tsg->tsgid, ch->chid);
|
||||
gk20a_channel_put(ch);
|
||||
*ms = g->fifo_eng_timeout_us / 1000U;
|
||||
nvgpu_tsg_set_timeout_accumulated_ms(tsg, *ms);
|
||||
nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(tsg, *ms);
|
||||
}
|
||||
|
||||
/* if we could not detect progress on any of the channel, but none
|
||||
* of them has reached the timeout, there is nothing more to do:
|
||||
* timeout_accumulated_ms has been updated for all of them.
|
||||
* ctxsw_timeout_accumulated_ms has been updated for all of them.
|
||||
*/
|
||||
nvgpu_rwsem_up_read(&tsg->ch_list_lock);
|
||||
return recover;
|
||||
|
||||
@@ -312,13 +312,14 @@ struct channel_gk20a {
|
||||
|
||||
u32 obj_class; /* we support only one obj per channel */
|
||||
|
||||
u32 timeout_accumulated_ms;
|
||||
u32 timeout_gpfifo_get;
|
||||
u32 ctxsw_timeout_accumulated_ms;
|
||||
u32 ctxsw_timeout_gpfifo_get;
|
||||
u32 ctxsw_timeout_max_ms;
|
||||
bool ctxsw_timeout_debug_dump;
|
||||
|
||||
u32 subctx_id;
|
||||
u32 runqueue_sel;
|
||||
|
||||
u32 timeout_ms_max;
|
||||
u32 runlist_id;
|
||||
|
||||
bool mmu_nack_handled;
|
||||
@@ -329,7 +330,6 @@ struct channel_gk20a {
|
||||
bool deterministic_railgate_allowed;
|
||||
bool cde;
|
||||
bool usermode_submit_enabled;
|
||||
bool timeout_debug_dump;
|
||||
bool has_os_fence_framework_support;
|
||||
|
||||
bool is_privileged_channel;
|
||||
@@ -373,7 +373,7 @@ void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
|
||||
struct channel_gk20a *ch);
|
||||
bool nvgpu_channel_mark_error(struct gk20a *g, struct channel_gk20a *ch);
|
||||
|
||||
bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch,
|
||||
bool nvgpu_channel_update_and_check_ctxsw_timeout(struct channel_gk20a *ch,
|
||||
u32 timeout_delta_ms, bool *progress);
|
||||
bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch,
|
||||
bool *verbose, u32 *ms);
|
||||
|
||||
@@ -147,6 +147,7 @@ gk20a_event_id_data_from_event_id_node(struct nvgpu_list_node *node)
|
||||
|
||||
void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct tsg_gk20a *tsg,
|
||||
u32 error_notifier);
|
||||
bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg);
|
||||
void nvgpu_tsg_set_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms);
|
||||
bool nvgpu_tsg_ctxsw_timeout_debug_dump_state(struct tsg_gk20a *tsg);
|
||||
void nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms);
|
||||
|
||||
#endif /* TSG_GK20A_H */
|
||||
|
||||
@@ -93,7 +93,7 @@ static int gk20a_fifo_sched_debugfs_seq_show(
|
||||
ch->tsgid,
|
||||
ch->tgid,
|
||||
tsg->timeslice_us,
|
||||
ch->timeout_ms_max,
|
||||
ch->ctxsw_timeout_max_ms,
|
||||
tsg->interleave_level,
|
||||
tsg->gr_ctx->graphics_preempt_mode,
|
||||
tsg->gr_ctx->compute_preempt_mode);
|
||||
|
||||
@@ -89,7 +89,7 @@ static void gk20a_channel_trace_sched_param(
|
||||
|
||||
(trace)(ch->chid, ch->tsgid, ch->pid,
|
||||
tsg_gk20a_from_ch(ch)->timeslice_us,
|
||||
ch->timeout_ms_max,
|
||||
ch->ctxsw_timeout_max_ms,
|
||||
gk20a_fifo_interleave_level_name(tsg->interleave_level),
|
||||
gr_gk20a_graphics_preempt_mode_name(
|
||||
tsg->gr_ctx->graphics_preempt_mode),
|
||||
@@ -1242,7 +1242,7 @@ long gk20a_channel_ioctl(struct file *filp,
|
||||
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
|
||||
nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
|
||||
timeout, ch->chid);
|
||||
ch->timeout_ms_max = timeout;
|
||||
ch->ctxsw_timeout_max_ms = timeout;
|
||||
gk20a_channel_trace_sched_param(
|
||||
trace_gk20a_channel_set_timeout, ch);
|
||||
break;
|
||||
@@ -1251,13 +1251,13 @@ long gk20a_channel_ioctl(struct file *filp,
|
||||
{
|
||||
u32 timeout =
|
||||
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
|
||||
bool timeout_debug_dump = !((u32)
|
||||
bool ctxsw_timeout_debug_dump = !((u32)
|
||||
((struct nvgpu_set_timeout_ex_args *)buf)->flags &
|
||||
(1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
|
||||
nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
|
||||
timeout, ch->chid);
|
||||
ch->timeout_ms_max = timeout;
|
||||
ch->timeout_debug_dump = timeout_debug_dump;
|
||||
ch->ctxsw_timeout_max_ms = timeout;
|
||||
ch->ctxsw_timeout_debug_dump = ctxsw_timeout_debug_dump;
|
||||
gk20a_channel_trace_sched_param(
|
||||
trace_gk20a_channel_set_timeout, ch);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user