diff --git a/drivers/gpu/nvgpu/common/fifo/runlist.c b/drivers/gpu/nvgpu/common/fifo/runlist.c index d48849e64..d3b2b645f 100644 --- a/drivers/gpu/nvgpu/common/fifo/runlist.c +++ b/drivers/gpu/nvgpu/common/fifo/runlist.c @@ -134,7 +134,7 @@ static u32 nvgpu_runlist_append_prio(struct fifo_gk20a *f, nvgpu_log_fn(f->g, " "); for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { - struct tsg_gk20a *tsg = &f->tsg[tsgid]; + struct tsg_gk20a *tsg = nvgpu_tsg_get_from_id(f->g, tsgid); u32 entries; if (tsg->interleave_level == interleave_level) { @@ -177,7 +177,7 @@ static u32 nvgpu_runlist_append_med(struct fifo_gk20a *f, nvgpu_log_fn(f->g, " "); for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { - struct tsg_gk20a *tsg = &f->tsg[tsgid]; + struct tsg_gk20a *tsg = nvgpu_tsg_get_from_id(f->g, tsgid); u32 entries; if (tsg->interleave_level != @@ -216,7 +216,7 @@ static u32 nvgpu_runlist_append_low(struct fifo_gk20a *f, nvgpu_log_fn(f->g, " "); for_each_set_bit(tsgid, runlist->active_tsgs, f->num_channels) { - struct tsg_gk20a *tsg = &f->tsg[tsgid]; + struct tsg_gk20a *tsg = nvgpu_tsg_get_from_id(f->g, tsgid); u32 entries; if (tsg->interleave_level != diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 750246d41..759354b18 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -48,6 +48,24 @@ void nvgpu_tsg_disable(struct tsg_gk20a *tsg) nvgpu_rwsem_up_read(&tsg->ch_list_lock); } +struct tsg_gk20a *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid) +{ + if (tsgid == NVGPU_INVALID_TSG_ID) { + return NULL; + } + + return nvgpu_tsg_get_from_id(g, tsgid); +} + + +struct tsg_gk20a *nvgpu_tsg_get_from_id(struct gk20a *g, u32 tsgid) +{ + struct fifo_gk20a *f = &g->fifo; + + return &f->tsg[tsgid]; +} + + static bool gk20a_is_channel_active(struct gk20a *g, struct channel_gk20a *ch) { struct fifo_gk20a *f = &g->fifo; diff --git a/drivers/gpu/nvgpu/common/rc/rc.c b/drivers/gpu/nvgpu/common/rc/rc.c index dc1159f90..d359ba0ae 100644 --- a/drivers/gpu/nvgpu/common/rc/rc.c +++ b/drivers/gpu/nvgpu/common/rc/rc.c @@ -85,7 +85,7 @@ void nvgpu_rc_pbdma_fault(struct gk20a *g, struct fifo_gk20a *f, /* Remove channel from runlist */ id = pbdma_status.id; if (pbdma_status.id_type == PBDMA_STATUS_ID_TYPE_TSGID) { - struct tsg_gk20a *tsg = &f->tsg[id]; + struct tsg_gk20a *tsg = nvgpu_tsg_get_from_id(g, id); nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); nvgpu_rc_tsg_and_related_engines(g, tsg, true, diff --git a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c index 5b59582b7..ca6883352 100644 --- a/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/gr/gr_vgpu.c @@ -1114,7 +1114,7 @@ void vgpu_gr_handle_sm_esr_event(struct gk20a *g, return; } - tsg = &g->fifo.tsg[info->tsg_id]; + tsg = nvgpu_tsg_check_and_get_from_id(g, info->tsg_id); if (tsg == NULL) { nvgpu_err(g, "invalid tsg"); return; diff --git a/drivers/gpu/nvgpu/common/vgpu/vgpu.c b/drivers/gpu/nvgpu/common/vgpu/vgpu.c index a915fb9a3..d2e37f4d3 100644 --- a/drivers/gpu/nvgpu/common/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/common/vgpu/vgpu.c @@ -117,9 +117,10 @@ static void vgpu_handle_channel_event(struct gk20a *g, return; } - tsg = &g->fifo.tsg[info->id]; - - nvgpu_tsg_post_event_id(tsg, info->event_id); + tsg = nvgpu_tsg_check_and_get_from_id(g, info->id); + if (tsg != NULL) { + nvgpu_tsg_post_event_id(tsg, info->event_id); + } } static void vgpu_channel_abort_cleanup(struct gk20a *g, u32 chid) diff --git a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c index 66b78d62c..aab68a7bf 100644 --- a/drivers/gpu/nvgpu/gv11b/gr_gv11b.c +++ b/drivers/gpu/nvgpu/gv11b/gr_gv11b.c @@ -1146,12 +1146,16 @@ static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, is_esr_error = gr_gv11b_check_warp_esr_error(g, warp_esr_error); if (!is_esr_error) { nvgpu_log(g, gpu_dbg_fn | gpu_dbg_gpu_dbg, - "No ESR error, Skip RC recovery and Trigeer CILP"); + "No ESR error, Skip RC recovery and Trigger CILP"); return 0; } if (fault_ch != NULL) { - tsg = &g->fifo.tsg[fault_ch->tsgid]; + tsg = nvgpu_tsg_check_and_get_from_id(g, fault_ch->tsgid); + if (tsg == NULL) { + nvgpu_err(g, "fault ch %u not found", fault_ch->chid); + goto clear_intr; + } /* * Check SET_EXCEPTION_TYPE_MASK is being set. @@ -1172,6 +1176,7 @@ static int gr_gv11b_handle_all_warp_esr_errors(struct gk20a *g, NVGPU_ERR_NOTIFIER_GR_EXCEPTION); } +clear_intr: /* clear interrupt */ offset = nvgpu_gr_gpc_offset(g, gpc) + nvgpu_gr_tpc_offset(g, tpc) + diff --git a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c index 9abbcd3a2..5e4bdd409 100644 --- a/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fb/fb_gv11b.c @@ -652,9 +652,9 @@ static void gv11b_fb_handle_mmu_fault_common(struct gk20a *g, /* CE page faults are not reported as replayable */ nvgpu_log(g, gpu_dbg_intr, "CE Faulted"); err = gv11b_fb_fix_page_fault(g, mmfault); - - if (mmfault->refch != NULL) { - tsg = tsg_gk20a_from_ch(mmfault->refch); + if ((mmfault->refch != NULL) && + ((u32)mmfault->refch->tsgid != FIFO_INVAL_TSG_ID)) { + tsg = nvgpu_tsg_get_from_id(g, mmfault->refch->tsgid); nvgpu_tsg_reset_faulted_eng_pbdma(g, tsg, true, true); } if (err == 0) { diff --git a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c index 8920d0b1b..c251babed 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gk20a.c @@ -107,7 +107,7 @@ bool gk20a_fifo_handle_ctxsw_timeout(struct gk20a *g) } if (is_tsg) { - tsg = &f->tsg[id]; + tsg = nvgpu_tsg_check_and_get_from_id(g, id); } else { ch = gk20a_channel_from_id(g, id); if (ch != NULL) { diff --git a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c index b3a1dc7af..a29b7d8fd 100644 --- a/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c +++ b/drivers/gpu/nvgpu/hal/fifo/ctxsw_timeout_gv11b.c @@ -200,19 +200,16 @@ bool gv11b_fifo_handle_ctxsw_timeout(struct gk20a *g) if ((ctxsw_timeout_engines & fifo_intr_ctxsw_timeout_engine_pending_f( active_eng_id)) != 0U) { - - struct fifo_gk20a *f = &g->fifo; u32 ms = 0; bool debug_dump = false; tsgid = gv11b_fifo_ctxsw_timeout_info(g, active_eng_id, &info_status); - - if (tsgid == FIFO_INVAL_TSG_ID) { + tsg = nvgpu_tsg_check_and_get_from_id(g, tsgid); + if (tsg == NULL) { continue; } - tsg = &f->tsg[tsgid]; recover = g->ops.tsg.check_ctxsw_timeout(tsg, &debug_dump, &ms); if (recover) { diff --git a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c index 8ee0a84a5..85158a367 100644 --- a/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c +++ b/drivers/gpu/nvgpu/hal/fifo/mmu_fault_gk20a.c @@ -315,7 +315,7 @@ bool gk20a_fifo_handle_mmu_fault_locked( } if (type == ENGINE_STATUS_CTX_ID_TYPE_TSGID) { - tsg = &g->fifo.tsg[id]; + tsg = nvgpu_tsg_get_from_id(g, id); } else if (type == ENGINE_STATUS_CTX_ID_TYPE_CHID) { ch = &g->fifo.channel[id]; refch = gk20a_channel_get(ch); diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 10ac55b91..bf1386a55 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -105,6 +105,8 @@ struct tsg_gk20a *tsg_gk20a_from_ch(struct channel_gk20a *ch); void nvgpu_tsg_disable(struct tsg_gk20a *tsg); int nvgpu_tsg_bind_channel(struct tsg_gk20a *tsg, struct channel_gk20a *ch); +struct tsg_gk20a *nvgpu_tsg_get_from_id(struct gk20a *g, u32 tsgid); +struct tsg_gk20a *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid); int nvgpu_tsg_unbind_channel(struct tsg_gk20a *tsg, struct channel_gk20a *ch); int nvgpu_tsg_unbind_channel_common(struct tsg_gk20a *tsg, struct channel_gk20a *ch); diff --git a/drivers/gpu/nvgpu/os/linux/sched.c b/drivers/gpu/nvgpu/os/linux/sched.c index 41e132154..f7a272409 100644 --- a/drivers/gpu/nvgpu/os/linux/sched.c +++ b/drivers/gpu/nvgpu/os/linux/sched.c @@ -167,9 +167,10 @@ static int gk20a_sched_dev_ioctl_get_tsgs_by_pid(struct gk20a_sched_ctrl *sched, nvgpu_mutex_acquire(&sched->status_lock); for (tsgid = 0; tsgid < f->num_channels; tsgid++) { if (NVGPU_SCHED_ISSET(tsgid, sched->active_tsg_bitmap)) { - tsg = &f->tsg[tsgid]; - if (tsg->tgid == tgid) + tsg = nvgpu_tsg_get_from_id(g, tsgid); + if (tsg->tgid == tgid) { NVGPU_SCHED_SET(tsgid, bitmap); + } } } nvgpu_mutex_release(&sched->status_lock); @@ -198,7 +199,7 @@ static int gk20a_sched_dev_ioctl_get_params(struct gk20a_sched_ctrl *sched, nvgpu_speculation_barrier(); - tsg = &f->tsg[tsgid]; + tsg = nvgpu_tsg_get_from_id(g, tsgid); if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) return -ENXIO; @@ -233,7 +234,7 @@ static int gk20a_sched_dev_ioctl_tsg_set_timeslice( nvgpu_speculation_barrier(); - tsg = &f->tsg[tsgid]; + tsg = nvgpu_tsg_get_from_id(g, tsgid); if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) return -ENXIO; @@ -268,7 +269,7 @@ static int gk20a_sched_dev_ioctl_tsg_set_runlist_interleave( nvgpu_speculation_barrier(); - tsg = &f->tsg[tsgid]; + tsg = nvgpu_tsg_get_from_id(g, tsgid); if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) return -ENXIO; @@ -336,7 +337,7 @@ static int gk20a_sched_dev_ioctl_get_tsg(struct gk20a_sched_ctrl *sched, nvgpu_speculation_barrier(); - tsg = &f->tsg[tsgid]; + tsg = nvgpu_tsg_get_from_id(g, tsgid); if (!nvgpu_ref_get_unless_zero(&tsg->refcount)) return -ENXIO; @@ -382,7 +383,7 @@ static int gk20a_sched_dev_ioctl_put_tsg(struct gk20a_sched_ctrl *sched, NVGPU_SCHED_CLR(tsgid, sched->ref_tsg_bitmap); nvgpu_mutex_release(&sched->status_lock); - tsg = &f->tsg[tsgid]; + tsg = nvgpu_tsg_get_from_id(g, tsgid); nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); return 0; @@ -527,7 +528,7 @@ int gk20a_sched_dev_release(struct inode *inode, struct file *filp) /* release any reference to TSGs */ for (tsgid = 0; tsgid < f->num_channels; tsgid++) { if (NVGPU_SCHED_ISSET(tsgid, sched->ref_tsg_bitmap)) { - tsg = &f->tsg[tsgid]; + tsg = nvgpu_tsg_get_from_id(g, tsgid); nvgpu_ref_put(&tsg->refcount, nvgpu_ioctl_tsg_release); } }