diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 27e02ca22..e709ac4e7 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -1397,12 +1397,18 @@ bool nvgpu_channel_mark_error(struct gk20a *g, struct channel_gk20a *ch) return verbose; } +void nvgpu_channel_set_error_notifier(struct gk20a *g, struct channel_gk20a *ch, + u32 error_notifier) +{ + g->ops.fifo.set_error_notifier(ch, error_notifier); +} + void nvgpu_channel_set_ctx_mmu_error(struct gk20a *g, struct channel_gk20a *ch) { nvgpu_err(g, "channel %d generated a mmu fault", ch->chid); - g->ops.fifo.set_error_notifier(ch, + nvgpu_channel_set_error_notifier(g, ch, NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); } @@ -1441,7 +1447,7 @@ bool nvgpu_channel_check_ctxsw_timeout(struct channel_gk20a *ch, *verbose = ch->timeout_debug_dump; *ms = ch->timeout_accumulated_ms; if (recover) { - g->ops.fifo.set_error_notifier(ch, + nvgpu_channel_set_error_notifier(g, ch, NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); } diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 0c27a2765..e27fbca37 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -354,22 +354,60 @@ bool nvgpu_tsg_mark_error(struct gk20a *g, } -void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, - struct tsg_gk20a *tsg) +void nvgpu_tsg_set_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms) { struct channel_gk20a *ch = NULL; - nvgpu_err(g, "TSG %d generated a mmu fault", tsg->tsgid); - nvgpu_rwsem_down_read(&tsg->ch_list_lock); nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { if (gk20a_channel_get(ch) != NULL) { - nvgpu_channel_set_ctx_mmu_error(g, ch); + ch->timeout_accumulated_ms = ms; + gk20a_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg) +{ + struct channel_gk20a *ch = NULL; + bool verbose = false; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { + if (gk20a_channel_get(ch) != NULL) { + if (ch->timeout_debug_dump) { + verbose = true; + } gk20a_channel_put(ch); } } nvgpu_rwsem_up_read(&tsg->ch_list_lock); + return verbose; +} + +void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct tsg_gk20a *tsg, + u32 error_notifier) +{ + struct channel_gk20a *ch = NULL; + + nvgpu_rwsem_down_read(&tsg->ch_list_lock); + nvgpu_list_for_each_entry(ch, &tsg->ch_list, channel_gk20a, ch_entry) { + if (gk20a_channel_get(ch) != NULL) { + nvgpu_channel_set_error_notifier(g, ch, error_notifier); + gk20a_channel_put(ch); + } + } + nvgpu_rwsem_up_read(&tsg->ch_list_lock); +} + +void nvgpu_tsg_set_ctx_mmu_error(struct gk20a *g, struct tsg_gk20a *tsg) +{ + nvgpu_err(g, "TSG %d generated a mmu fault", tsg->tsgid); + + nvgpu_tsg_set_error_notifier(g, tsg, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_MMU_ERR_FLT); } bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg, @@ -411,17 +449,9 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg, tsg->tsgid, ch->chid); *ms = ch->timeout_accumulated_ms; gk20a_channel_put(ch); - nvgpu_list_for_each_entry(ch, &tsg->ch_list, - channel_gk20a, ch_entry) { - if (gk20a_channel_get(ch) != NULL) { - ch->g->ops.fifo.set_error_notifier(ch, - NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); - if (ch->timeout_debug_dump) { - *verbose = true; - } - gk20a_channel_put(ch); - } - } + nvgpu_tsg_set_error_notifier(g, tsg, + NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT); + *verbose = nvgpu_tsg_timeout_debug_dump_state(tsg); } else if (progress) { /* * if at least one channel in the TSG made some progress, reset @@ -433,13 +463,7 @@ bool nvgpu_tsg_check_ctxsw_timeout(struct tsg_gk20a *tsg, tsg->tsgid, ch->chid); gk20a_channel_put(ch); *ms = g->fifo_eng_timeout_us / 1000U; - nvgpu_list_for_each_entry(ch, &tsg->ch_list, - channel_gk20a, ch_entry) { - if (gk20a_channel_get(ch) != NULL) { - ch->timeout_accumulated_ms = *ms; - gk20a_channel_put(ch); - } - } + nvgpu_tsg_set_timeout_accumulated_ms(tsg, *ms); } /* if we could not detect progress on any of the channel, but none diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index f4c60f9b7..5de181e3c 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -2938,7 +2938,6 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g, { struct channel_gk20a *ch; struct tsg_gk20a *tsg; - struct channel_gk20a *ch_tsg; ch = isr_data->ch; @@ -2948,16 +2947,7 @@ static void gk20a_gr_set_error_notifier(struct gk20a *g, tsg = tsg_gk20a_from_ch(ch); if (tsg != NULL) { - nvgpu_rwsem_down_read(&tsg->ch_list_lock); - nvgpu_list_for_each_entry(ch_tsg, &tsg->ch_list, - channel_gk20a, ch_entry) { - if (gk20a_channel_get(ch_tsg) != NULL) { - g->ops.fifo.set_error_notifier(ch_tsg, - error_notifier); - gk20a_channel_put(ch_tsg); - } - } - nvgpu_rwsem_up_read(&tsg->ch_list_lock); + nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); } else { nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index 845b665c9..0ba588da9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -496,4 +496,7 @@ static inline u64 gk20a_channel_userd_gpu_va(struct channel_gk20a *c) struct nvgpu_mem *mem = c->userd_mem; return (mem->gpu_va != 0ULL) ? mem->gpu_va + c->userd_offset : 0ULL; } + +void nvgpu_channel_set_error_notifier(struct gk20a *g, struct channel_gk20a *ch, + u32 error_notifier); #endif diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 212b892b6..3e22386e9 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -145,4 +145,8 @@ gk20a_event_id_data_from_event_id_node(struct nvgpu_list_node *node) ((uintptr_t)node - offsetof(struct gk20a_event_id_data, event_id_node)); }; +void nvgpu_tsg_set_error_notifier(struct gk20a *g, struct tsg_gk20a *tsg, + u32 error_notifier); +bool nvgpu_tsg_timeout_debug_dump_state(struct tsg_gk20a *tsg); +void nvgpu_tsg_set_timeout_accumulated_ms(struct tsg_gk20a *tsg, u32 ms); #endif /* TSG_GK20A_H */