gpu: nvgpu: rename timeout_* of channel struct

timeout_ms_max is renamed as ctxsw_timeout_max_ms
timeout_debug_dump is renamed as ctxsw_timeout_debug_dump
timeout_accumulated_ms is renamed as ctxsw_timeout_accumulated_ms
timeout_gpfifo_get is renamed as ctxsw_timeout_gpfifo_get

gk20a_channel_update_and_check_timeout is renamed as
nvgpu_channel_update_and_check_ctxsw_timeout

JIRA NVGPU-1312

Change-Id: Ib5c8829c76df95817e9809e451e8c9671faba726
Signed-off-by: Seema Khowala <seemaj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2076847
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Seema Khowala
2019-03-20 10:58:04 -07:00
committed by mobile promotions
parent 9a0b8c0234
commit 737de7eac5
6 changed files with 45 additions and 45 deletions

View File

@@ -738,11 +738,11 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
ch->tsgid = NVGPU_INVALID_TSG_ID; ch->tsgid = NVGPU_INVALID_TSG_ID;
/* clear ctxsw timeout counter and update timestamp */ /* clear ctxsw timeout counter and update timestamp */
ch->timeout_accumulated_ms = 0; ch->ctxsw_timeout_accumulated_ms = 0;
ch->timeout_gpfifo_get = 0; ch->ctxsw_timeout_gpfifo_get = 0;
/* set gr host default timeout */ /* set gr host default timeout */
ch->timeout_ms_max = gk20a_get_gr_idle_timeout(g); ch->ctxsw_timeout_max_ms = gk20a_get_gr_idle_timeout(g);
ch->timeout_debug_dump = true; ch->ctxsw_timeout_debug_dump = true;
ch->unserviceable = false; ch->unserviceable = false;
/* init kernel watchdog timeout */ /* init kernel watchdog timeout */
@@ -1384,13 +1384,13 @@ u32 nvgpu_gp_free_count(struct channel_gk20a *c)
c->gpfifo.entry_num; c->gpfifo.entry_num;
} }
static bool nvgpu_channel_timeout_debug_dump_state(struct gk20a *g, static bool nvgpu_channel_ctxsw_timeout_debug_dump_state(struct gk20a *g,
struct channel_gk20a *ch) struct channel_gk20a *ch)
{ {
bool verbose = false; bool verbose = false;
if (nvgpu_is_error_notifier_set(ch, if (nvgpu_is_error_notifier_set(ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) { NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT)) {
verbose = ch->timeout_debug_dump; verbose = ch->ctxsw_timeout_debug_dump;
} }
return verbose; return verbose;
@@ -1411,7 +1411,7 @@ bool nvgpu_channel_mark_error(struct gk20a *g, struct channel_gk20a *ch)
{ {
bool verbose; bool verbose;
verbose = nvgpu_channel_timeout_debug_dump_state(g, ch); verbose = nvgpu_channel_ctxsw_timeout_debug_dump_state(g, ch);
nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch); nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch);
return verbose; return verbose;
@@ -1432,26 +1432,25 @@ void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT);
} }
bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, bool nvgpu_channel_update_and_check_ctxsw_timeout(struct channel_gk20a *ch,
u32 timeout_delta_ms, bool *progress) u32 timeout_delta_ms, bool *progress)
{ {
u32 gpfifo_get = update_gp_get(ch->g, ch); u32 gpfifo_get = update_gp_get(ch->g, ch);
/* Count consequent timeout isr */ if (gpfifo_get == ch->ctxsw_timeout_gpfifo_get) {
if (gpfifo_get == ch->timeout_gpfifo_get) { /* didn't advance since previous ctxsw timeout check */
/* we didn't advance since previous channel timeout check */ ch->ctxsw_timeout_accumulated_ms += timeout_delta_ms;
ch->timeout_accumulated_ms += timeout_delta_ms;
*progress = false; *progress = false;
} else { } else {
/* first timeout isr encountered */ /* first ctxsw timeout isr encountered */
ch->timeout_accumulated_ms = timeout_delta_ms; ch->ctxsw_timeout_accumulated_ms = timeout_delta_ms;
*progress = true; *progress = true;
} }
ch->timeout_gpfifo_get = gpfifo_get; ch->ctxsw_timeout_gpfifo_get = gpfifo_get;
return nvgpu_is_timeouts_enabled(ch->g) && return nvgpu_is_timeouts_enabled(ch->g) &&
ch->timeout_accumulated_ms > ch->timeout_ms_max; ch->ctxsw_timeout_accumulated_ms > ch->ctxsw_timeout_max_ms;
} }
bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch, bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch,
@@ -1461,11 +1460,11 @@ bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch,
bool progress = false; bool progress = false;
struct gk20a *g = ch->g; struct gk20a *g = ch->g;
recover = gk20a_channel_update_and_check_timeout(ch, recover = nvgpu_channel_update_and_check_ctxsw_timeout(ch,
g->fifo_eng_timeout_us / 1000U, g->fifo_eng_timeout_us / 1000U,
&progress); &progress);
*verbose = ch->timeout_debug_dump; *verbose = ch->ctxsw_timeout_debug_dump;
*ms = ch->timeout_accumulated_ms; *ms = ch->ctxsw_timeout_accumulated_ms;
if (recover) { if (recover) {
nvgpu_channel_set_error_notifier(g, ch, nvgpu_channel_set_error_notifier(g, ch,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);

View File

@@ -355,21 +355,21 @@ bool nvgpu_tsg_mark_error(struct gk20a *g,
} }
void nvgpu_tsg_set_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms) void nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms)
{ {
struct channel_gk20a *ch = NULL; struct channel_gk20a *ch = NULL;
nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_rwsem_down_read(&tsg->ch_list_lock);
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch) != NULL) { if (gk20a_channel_get(ch) != NULL) {
ch->timeout_accumulated_ms = ms; ch->ctxsw_timeout_accumulated_ms = ms;
gk20a_channel_put(ch); gk20a_channel_put(ch);
} }
} }
nvgpu_rwsem_up_read(&tsg->ch_list_lock); nvgpu_rwsem_up_read(&tsg->ch_list_lock);
} }
bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg) bool nvgpu_tsg_ctxsw_timeout_debug_dump_state(struct tsg_gk20a *tsg)
{ {
struct channel_gk20a *ch = NULL; struct channel_gk20a *ch = NULL;
bool verbose = false; bool verbose = false;
@@ -377,7 +377,7 @@ bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg)
nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_rwsem_down_read(&tsg->ch_list_lock);
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch) != NULL) { if (gk20a_channel_get(ch) != NULL) {
if (ch->timeout_debug_dump) { if (ch->ctxsw_timeout_debug_dump) {
verbose = true; verbose = true;
} }
gk20a_channel_put(ch); gk20a_channel_put(ch);
@@ -430,7 +430,7 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg,
*/ */
nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) {
if (gk20a_channel_get(ch) != NULL) { if (gk20a_channel_get(ch) != NULL) {
recover = gk20a_channel_update_and_check_timeout(ch, recover = nvgpu_channel_update_and_check_ctxsw_timeout(ch,
*ms, &progress); *ms, &progress);
if (progress || recover) { if (progress || recover) {
break; break;
@@ -443,33 +443,33 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg,
/* /*
* if one channel is presumed dead (no progress for too long), * if one channel is presumed dead (no progress for too long),
* then fifo recovery is needed. we can't really figure out * then fifo recovery is needed. we can't really figure out
* which channel caused the problem, so set timeout error * which channel caused the problem, so set ctxsw timeout error
* notifier for all channels. * notifier for all channels.
*/ */
nvgpu_log_info(g, "timeout on tsg=%d ch=%d", nvgpu_log_info(g, "timeout on tsg=%d ch=%d",
tsg->tsgid, ch->chid); tsg->tsgid, ch->chid);
*ms = ch->timeout_accumulated_ms; *ms = ch->ctxsw_timeout_accumulated_ms;
gk20a_channel_put(ch); gk20a_channel_put(ch);
nvgpu_tsg_set_error_notifier(g, tsg, nvgpu_tsg_set_error_notifier(g, tsg,
NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
*verbose = nvgpu_tsg_timeout_debug_dump_state(tsg); *verbose = nvgpu_tsg_ctxsw_timeout_debug_dump_state(tsg);
} else if (progress) { } else if (progress) {
/* /*
* if at least one channel in the TSG made some progress, reset * if at least one channel in the TSG made some progress, reset
* accumulated timeout for all channels in the TSG. In * ctxsw_timeout_accumulated_ms for all channels in the TSG. In
* particular, this resets timeout for channels that already * particular, this resets ctxsw_timeout_accumulated_ms timeout
* completed their work * for channels that already completed their work.
*/ */
nvgpu_log_info(g, "progress on tsg=%d ch=%d", nvgpu_log_info(g, "progress on tsg=%d ch=%d",
tsg->tsgid, ch->chid); tsg->tsgid, ch->chid);
gk20a_channel_put(ch); gk20a_channel_put(ch);
*ms = g->fifo_eng_timeout_us / 1000U; *ms = g->fifo_eng_timeout_us / 1000U;
nvgpu_tsg_set_timeout_accumulated_ms(tsg, *ms); nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(tsg, *ms);
} }
/* if we could not detect progress on any of the channel, but none /* if we could not detect progress on any of the channel, but none
* of them has reached the timeout, there is nothing more to do: * of them has reached the timeout, there is nothing more to do:
* timeout_accumulated_ms has been updated for all of them. * ctxsw_timeout_accumulated_ms has been updated for all of them.
*/ */
nvgpu_rwsem_up_read(&tsg->ch_list_lock); nvgpu_rwsem_up_read(&tsg->ch_list_lock);
return recover; return recover;

View File

@@ -312,13 +312,14 @@ struct channel_gk20a {
u32 obj_class; /* we support only one obj per channel */ u32 obj_class; /* we support only one obj per channel */
u32 timeout_accumulated_ms; u32 ctxsw_timeout_accumulated_ms;
u32 timeout_gpfifo_get; u32 ctxsw_timeout_gpfifo_get;
u32 ctxsw_timeout_max_ms;
bool ctxsw_timeout_debug_dump;
u32 subctx_id; u32 subctx_id;
u32 runqueue_sel; u32 runqueue_sel;
u32 timeout_ms_max;
u32 runlist_id; u32 runlist_id;
bool mmu_nack_handled; bool mmu_nack_handled;
@@ -329,7 +330,6 @@ struct channel_gk20a {
bool deterministic_railgate_allowed; bool deterministic_railgate_allowed;
bool cde; bool cde;
bool usermode_submit_enabled; bool usermode_submit_enabled;
bool timeout_debug_dump;
bool has_os_fence_framework_support; bool has_os_fence_framework_support;
bool is_privileged_channel; bool is_privileged_channel;
@@ -373,7 +373,7 @@ void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g,
struct channel_gk20a *ch); struct channel_gk20a *ch);
bool nvgpu_channel_mark_error(struct gk20a *g, struct channel_gk20a *ch); bool nvgpu_channel_mark_error(struct gk20a *g, struct channel_gk20a *ch);
bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, bool nvgpu_channel_update_and_check_ctxsw_timeout(struct channel_gk20a *ch,
u32 timeout_delta_ms, bool *progress); u32 timeout_delta_ms, bool *progress);
bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch, bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch,
bool *verbose, u32 *ms); bool *verbose, u32 *ms);

View File

@@ -147,6 +147,7 @@ gk20a_event_id_data_from_event_id_node(struct nvgpu_list_node *node)
void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct tsg_gk20a *tsg, void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct tsg_gk20a *tsg,
u32 error_notifier); u32 error_notifier);
bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg); bool nvgpu_tsg_ctxsw_timeout_debug_dump_state(struct tsg_gk20a *tsg);
void nvgpu_tsg_set_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms); void nvgpu_tsg_set_ctxsw_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms);
#endif /* TSG_GK20A_H */ #endif /* TSG_GK20A_H */

View File

@@ -93,7 +93,7 @@ static int gk20a_fifo_sched_debugfs_seq_show(
ch->tsgid, ch->tsgid,
ch->tgid, ch->tgid,
tsg->timeslice_us, tsg->timeslice_us,
ch->timeout_ms_max, ch->ctxsw_timeout_max_ms,
tsg->interleave_level, tsg->interleave_level,
tsg->gr_ctx->graphics_preempt_mode, tsg->gr_ctx->graphics_preempt_mode,
tsg->gr_ctx->compute_preempt_mode); tsg->gr_ctx->compute_preempt_mode);

View File

@@ -89,7 +89,7 @@ static void gk20a_channel_trace_sched_param(
(trace)(ch->chid, ch->tsgid, ch->pid, (trace)(ch->chid, ch->tsgid, ch->pid,
tsg_gk20a_from_ch(ch)->timeslice_us, tsg_gk20a_from_ch(ch)->timeslice_us,
ch->timeout_ms_max, ch->ctxsw_timeout_max_ms,
gk20a_fifo_interleave_level_name(tsg->interleave_level), gk20a_fifo_interleave_level_name(tsg->interleave_level),
gr_gk20a_graphics_preempt_mode_name( gr_gk20a_graphics_preempt_mode_name(
tsg->gr_ctx->graphics_preempt_mode), tsg->gr_ctx->graphics_preempt_mode),
@@ -1242,7 +1242,7 @@ long gk20a_channel_ioctl(struct file *filp,
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout; (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
timeout, ch->chid); timeout, ch->chid);
ch->timeout_ms_max = timeout; ch->ctxsw_timeout_max_ms = timeout;
gk20a_channel_trace_sched_param( gk20a_channel_trace_sched_param(
trace_gk20a_channel_set_timeout, ch); trace_gk20a_channel_set_timeout, ch);
break; break;
@@ -1251,13 +1251,13 @@ long gk20a_channel_ioctl(struct file *filp,
{ {
u32 timeout = u32 timeout =
(u32)((struct nvgpu_set_timeout_args *)buf)->timeout; (u32)((struct nvgpu_set_timeout_args *)buf)->timeout;
bool timeout_debug_dump = !((u32) bool ctxsw_timeout_debug_dump = !((u32)
((struct nvgpu_set_timeout_ex_args *)buf)->flags & ((struct nvgpu_set_timeout_ex_args *)buf)->flags &
(1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP)); (1 << NVGPU_TIMEOUT_FLAG_DISABLE_DUMP));
nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d", nvgpu_log(g, gpu_dbg_gpu_dbg, "setting timeout (%d ms) for chid %d",
timeout, ch->chid); timeout, ch->chid);
ch->timeout_ms_max = timeout; ch->ctxsw_timeout_max_ms = timeout;
ch->timeout_debug_dump = timeout_debug_dump; ch->ctxsw_timeout_debug_dump = ctxsw_timeout_debug_dump;
gk20a_channel_trace_sched_param( gk20a_channel_trace_sched_param(
trace_gk20a_channel_set_timeout, ch); trace_gk20a_channel_set_timeout, ch);
break; break;