From 1fc9a427e0c3242c92219a81ea0d3ad5c5317f07 Mon Sep 17 00:00:00 2001 From: Thomas Fleury Date: Tue, 26 Nov 2019 11:28:11 -0500 Subject: [PATCH] gpu: nvgpu: tear down TSG on unbind HAL failure Currently nvgpu_tsg_unbind ignores return code from g->ops.tsg.unbind_channel. For consistency, tear down TSG in case an error occurs in the unbind HAL. Also make sure to restore valid ops for fifo.preempt_tsg in test_gr_setup_free_obj_ctx, to avoid unbind failure. Jira NVGPU-4387 Change-Id: I27a9c0daa365d05684149fc4bb17874d60ae1fde Signed-off-by: Thomas Fleury Reviewed-on: https://git-master.nvidia.com/r/2248159 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-misra Reviewed-by: svc-mobile-cert Reviewed-by: Deepak Nibade GVS: Gerrit_Virtual_Submit Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/tsg.c | 45 ++++++++++++++++------- userspace/units/gr/setup/nvgpu-gr-setup.c | 6 +-- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 88036f8aa..8c11cd279 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -233,29 +233,46 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) err = nvgpu_tsg_unbind_channel_common(tsg, ch); if (err != 0) { - nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", - ch->chid, tsg->tsgid); - - nvgpu_tsg_abort(g, tsg, true); - /* If channel unbind fails, channel is still part of runlist */ - if (nvgpu_channel_update_runlist(ch, false) != 0) { - nvgpu_err(g, - "remove ch %u from runlist failed", ch->chid); - } - - nvgpu_rwsem_down_write(&tsg->ch_list_lock); - nvgpu_list_del(&ch->ch_entry); - ch->tsgid = NVGPU_INVALID_TSG_ID; - nvgpu_rwsem_up_write(&tsg->ch_list_lock); + nvgpu_err(g, "unbind common failed, err=%d", err); + goto fail; } if (g->ops.tsg.unbind_channel != NULL) { err = g->ops.tsg.unbind_channel(tsg, ch); + if (err != 0) { + /* + * ch already removed from TSG's list. + * mark error explicitly. + */ + (void) nvgpu_channel_mark_error(g, ch); + nvgpu_err(g, "unbind hal failed, err=%d", err); + goto fail; + } } nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); return 0; + +fail: + nvgpu_err(g, "Channel %d unbind failed, tearing down TSG %d", + ch->chid, tsg->tsgid); + + nvgpu_tsg_abort(g, tsg, true); + /* If channel unbind fails, channel is still part of runlist */ + if (nvgpu_channel_update_runlist(ch, false) != 0) { + nvgpu_err(g, "remove ch %u from runlist failed", ch->chid); + } + + nvgpu_rwsem_down_write(&tsg->ch_list_lock); + nvgpu_list_del(&ch->ch_entry); + ch->tsgid = NVGPU_INVALID_TSG_ID; + nvgpu_rwsem_up_write(&tsg->ch_list_lock); + + nvgpu_ref_put(&tsg->refcount, nvgpu_tsg_release); + + return err; + } int nvgpu_tsg_unbind_channel_check_hw_state(struct nvgpu_tsg *tsg, diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.c b/userspace/units/gr/setup/nvgpu-gr-setup.c index 7eb2195b8..23dd4ca9b 100644 --- a/userspace/units/gr/setup/nvgpu-gr-setup.c +++ b/userspace/units/gr/setup/nvgpu-gr-setup.c @@ -602,13 +602,13 @@ int test_gr_setup_free_obj_ctx(struct unit_module *m, { int err = 0; + /* Restore valid ops for negative tests */ + gr_setup_restore_valid_ops(g); + err = gr_test_setup_unbind_tsg(m, g); gr_test_setup_cleanup_ch_tsg(m, g); - /* Restore valid ops for negative tests */ - gr_setup_restore_valid_ops(g); - return (err == 0) ? UNIT_SUCCESS: UNIT_FAIL; }