diff --git a/drivers/gpu/nvgpu/common/rc/rc.c b/drivers/gpu/nvgpu/common/rc/rc.c index 7052c64ec..7eb809fcb 100644 --- a/drivers/gpu/nvgpu/common/rc/rc.c +++ b/drivers/gpu/nvgpu/common/rc/rc.c @@ -101,10 +101,13 @@ int nvgpu_rc_pbdma_fault(struct gk20a *g, u32 pbdma_id, u32 error_notifier, u32 id_type = PBDMA_STATUS_ID_TYPE_INVALID; int err = 0; u32 id; + struct nvgpu_tsg *tsg = NULL; + struct nvgpu_channel *ch = NULL; if (error_notifier >= NVGPU_ERR_NOTIFIER_INVAL) { nvgpu_err(g, "Invalid error notifier %u", error_notifier); err = -EINVAL; + nvgpu_sw_quiesce(g); goto out; } @@ -126,45 +129,51 @@ int nvgpu_rc_pbdma_fault(struct gk20a *g, u32 pbdma_id, u32 error_notifier, } else { nvgpu_err(g, "pbdma status not valid"); err = -EINVAL; + nvgpu_sw_quiesce(g); goto out; } - if (id_type == PBDMA_STATUS_ID_TYPE_TSGID) { - struct nvgpu_tsg *tsg = nvgpu_tsg_get_from_id(g, id); + switch (id_type) { + case PBDMA_STATUS_ID_TYPE_TSGID: + { + tsg = nvgpu_tsg_get_from_id(g, id); nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); - nvgpu_rc_tsg_and_related_engines(g, tsg, true, - RC_TYPE_PBDMA_FAULT); - } else if(id_type == PBDMA_STATUS_ID_TYPE_CHID) { - struct nvgpu_channel *ch = nvgpu_channel_from_id(g, id); - struct nvgpu_tsg *tsg; + nvgpu_rc_tsg_and_related_engines(g, tsg, true, RC_TYPE_PBDMA_FAULT); + break; + } + case PBDMA_STATUS_ID_TYPE_CHID: + { + ch = nvgpu_channel_from_id(g, id); if (ch == NULL) { nvgpu_err(g, "channel is not referenceable"); err = -EINVAL; - goto out; + nvgpu_sw_quiesce(g); + break; } tsg = nvgpu_tsg_from_ch(ch); - if (tsg != NULL) { - nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); - nvgpu_rc_tsg_and_related_engines(g, tsg, true, - RC_TYPE_PBDMA_FAULT); - } else { + if (tsg == NULL) { nvgpu_err(g, "chid: %d is not bound to tsg", ch->chid); + nvgpu_channel_put(ch); err = -EINVAL; + nvgpu_sw_quiesce(g); + break; } + nvgpu_tsg_set_error_notifier(g, tsg, error_notifier); + nvgpu_rc_tsg_and_related_engines(g, tsg, true, RC_TYPE_PBDMA_FAULT); nvgpu_channel_put(ch); - } else { + break; + } + default: nvgpu_err(g, "Invalid pbdma_status id_type or next_id_type"); err = -EINVAL; + nvgpu_sw_quiesce(g); + break; } out: - if (err != 0) { - nvgpu_sw_quiesce(g); - } - return err; } diff --git a/drivers/gpu/nvgpu/include/nvgpu/rc.h b/drivers/gpu/nvgpu/include/nvgpu/rc.h index 66919b996..e2c0422a0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/rc.h +++ b/drivers/gpu/nvgpu/include/nvgpu/rc.h @@ -190,7 +190,8 @@ void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask, * Do PBDMA fault recovery. Set error notifier as per \a error_notifier and call * \ref nvgpu_rc_tsg_and_related_engines to do the recovery. Steps involved are * - If \a error_notifier is >= \ref NVGPU_ERR_NOTIFIER_INVAL, set error variable to - * -EINVAL and jump to label \a out. + * -EINVAL, trigger quiesce \ref nvgpu_sw_quiesce "nvgpu_sw_quiesce(g)" and + * jump to label \a out. * - If \ref nvgpu_pbdma_status_is_chsw_valid * "nvgpu_pbdma_status_is_chsw_valid(pbdma_status)" or * \ref nvgpu_pbdma_status_is_chsw_save @@ -207,7 +208,7 @@ void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask, * "nvgpu_pbdma_status_ch_not_loaded(pbdma_status)" returns true, log message * but don't set error variable. * - Else log error message and set error variable to -EINVAL and trigger - * quiesce. + * quiesce \ref nvgpu_sw_quiesce "nvgpu_sw_quiesce(g)". * - If id_type set in above steps matches with \ref PBDMA_STATUS_ID_TYPE_TSGID, * call \ref nvgpu_tsg_get_from_id to get * pointer to struct \ref nvgpu_tsg and store in variable tsg, then call @@ -220,20 +221,22 @@ void nvgpu_rc_ctxsw_timeout(struct gk20a *g, u32 eng_bitmask, * - If id_type set in above steps matches with \ref PBDMA_STATUS_ID_TYPE_CHID, * call \ref nvgpu_channel_from_id to get * pointer to struct \ref nvgpu_channel and store in variable ch. If ch is NULL - * log error, set error variable to -EINVAL and jump to label \a out else get + * log error, set error variable to -EINVAL, trigger quiesce + * \ref nvgpu_sw_quiesce "nvgpu_sw_quiesce(g)" and jump to label \a out else get * pointer to struct \ref nvgpu_tsg using API \ref nvgpu_tsg_from_ch * "nvgpu_tsg_from_ch(ch)" and store in variable tsg. If tsg is NULL log - * error, set error variable to -EINVAL and jump to label \a out else set - * error notifier buffer by calling \ref nvgpu_tsg_set_error_notifier + * error, put the channel reference by calling \ref nvgpu_channel_put + * "nvgpu_channel_put(ch)", set error variable to -EINVAL, trigger quiesce + * \ref nvgpu_sw_quiesce "nvgpu_sw_quiesce(g)" and jump to label \a out else + * set error notifier buffer by calling \ref nvgpu_tsg_set_error_notifier * "nvgpu_tsg_set_error_notifier(g, tsg, error_notifier)" followed by doing * recovery by calling \ref nvgpu_rc_tsg_and_related_engines * "nvgpu_rc_tsg_and_related_engines(g, tsg, true, RC_TYPE_PBDMA_FAULT)". - * Finally put the channel reference by calling \ref nvgpu_channel_put + * Put the channel reference by calling \ref nvgpu_channel_put * "nvgpu_channel_put(ch)". * - If id_type is not set to any of \ref PBMDA_STATUS_ID_TYPE_TSGID or - * \ref PBMDA_STATUS_ID_TYPE_CHID, log error and set error variable to -EINVAL. - * - At \a out label, if error variable is set, call - * \ref nvgpu_sw_quiesce "nvgpu_sw_quiesce(g)". + * \ref PBMDA_STATUS_ID_TYPE_CHID, log error and set error variable to -EINVAL, + * trigger quiesce \ref nvgpu_sw_quiesce "nvgpu_sw_quiesce(g)" * - Return error variable. * * @return 0 in case of success, < 0 in case of failure.