From cf287a4ef592e7329f813c076ec8bdad18dc5933 Mon Sep 17 00:00:00 2001 From: Sagar Kamble Date: Sun, 10 Jan 2021 22:35:00 +0530 Subject: [PATCH] gpu: nvgpu: retry tsg unbind if NEXT is set The NEXT bit can remain set for the channel if timeslice expires before scheduler clears it. Due to this nvgpu fails TSG unbind and in turn nvrm_gpu fails channel close. In this case, checking the channel hw state after some time can help see NEXT bit cleared by scheduler. Reenable the tsg and return -EAGAIN to nvrm_gpu for it to retry again. Bug 3144960 Change-Id: I35f417f02270e371a4e632986b73a00f8a4f921a Signed-off-by: Sagar Kamble Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2468391 Reviewed-by: svc-mobile-cert Reviewed-by: Deepak Nibade Reviewed-by: svc-mobile-coverity Reviewed-by: mobile promotions Tested-by: mobile promotions GVS: Gerrit_Virtual_Submit --- arch/nvgpu-hal-new.yaml | 9 ++-- drivers/gpu/nvgpu/Makefile | 1 + drivers/gpu/nvgpu/Makefile.sources | 1 + drivers/gpu/nvgpu/common/fifo/channel.c | 13 ++--- drivers/gpu/nvgpu/common/fifo/tsg.c | 34 ++++++++----- drivers/gpu/nvgpu/hal/fifo/tsg_gk20a.h | 10 +++- drivers/gpu/nvgpu/hal/fifo/tsg_gk20a_fusa.c | 51 +++++++++++++++++++ drivers/gpu/nvgpu/hal/init/hal_gm20b.c | 3 +- drivers/gpu/nvgpu/hal/init/hal_gp10b.c | 3 +- drivers/gpu/nvgpu/hal/init/hal_gv11b.c | 4 +- drivers/gpu/nvgpu/hal/init/hal_tu104.c | 4 +- drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h | 4 +- drivers/gpu/nvgpu/include/nvgpu/tsg.h | 26 +++++++++- drivers/gpu/nvgpu/os/linux/ioctl_tsg.c | 7 ++- libs/dgpu/libnvgpu-drv-dgpu_safe.export | 2 +- libs/igpu/libnvgpu-drv-igpu_safe.export | 2 +- userspace/units/fifo/channel/nvgpu-channel.c | 14 ++--- .../fifo/preempt/gv11b/nvgpu-preempt-gv11b.c | 6 +-- userspace/units/fifo/runlist/nvgpu-runlist.c | 8 +-- userspace/units/fifo/tsg/nvgpu-tsg.c | 8 +-- userspace/units/fifo/tsg/nvgpu-tsg.h | 6 +-- userspace/units/gr/intr/nvgpu-gr-intr.c | 4 +- userspace/units/gr/setup/nvgpu-gr-setup.c | 4 +- userspace/units/gr/setup/nvgpu-gr-setup.h | 4 +- userspace/units/rc/nvgpu-rc.c | 4 +- 25 files changed, 169 insertions(+), 63 deletions(-) create mode 100644 drivers/gpu/nvgpu/hal/fifo/tsg_gk20a_fusa.c diff --git a/arch/nvgpu-hal-new.yaml b/arch/nvgpu-hal-new.yaml index 7708fdf61..d5964bc44 100644 --- a/arch/nvgpu-hal-new.yaml +++ b/arch/nvgpu-hal-new.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All Rights Reserved. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All Rights Reserved. # # HAL units. These are the units that have access to HW. # @@ -240,13 +240,14 @@ fifo: hal/fifo/channel_gv100.h ] tsg_fusa: safe: yes - sources: [ hal/fifo/tsg_gv11b.h, + sources: [ hal/fifo/tsg_gk20a.h, + hal/fifo/tsg_gk20a_fusa.c, + hal/fifo/tsg_gv11b.h, hal/fifo/tsg_gv11b_fusa.c ] tsg: safe: no - sources: [ hal/fifo/tsg_gk20a.h, - hal/fifo/tsg_gk20a.c ] + sources: [ hal/fifo/tsg_gk20a.c ] fifo_fusa: safe: yes diff --git a/drivers/gpu/nvgpu/Makefile b/drivers/gpu/nvgpu/Makefile index 81f67fdeb..ddcb95e5e 100644 --- a/drivers/gpu/nvgpu/Makefile +++ b/drivers/gpu/nvgpu/Makefile @@ -673,6 +673,7 @@ nvgpu-y += \ hal/fifo/runlist_fifo_gk20a_fusa.o \ hal/fifo/runlist_fifo_gv11b_fusa.o \ hal/fifo/runlist_ram_gv11b_fusa.o \ + hal/fifo/tsg_gk20a_fusa.o \ hal/fifo/tsg_gv11b_fusa.o \ hal/fifo/usermode_gv11b_fusa.o \ hal/fuse/fuse_gm20b_fusa.o \ diff --git a/drivers/gpu/nvgpu/Makefile.sources b/drivers/gpu/nvgpu/Makefile.sources index abc81c68e..96f7c84a5 100644 --- a/drivers/gpu/nvgpu/Makefile.sources +++ b/drivers/gpu/nvgpu/Makefile.sources @@ -214,6 +214,7 @@ srcs += hal/mm/mm_gv11b_fusa.c \ hal/fifo/runlist_fifo_gk20a_fusa.c \ hal/fifo/runlist_fifo_gv11b_fusa.c \ hal/fifo/runlist_ram_gv11b_fusa.c \ + hal/fifo/tsg_gk20a_fusa.c \ hal/fifo/tsg_gv11b_fusa.c \ hal/fifo/usermode_gv11b_fusa.c \ hal/fuse/fuse_gm20b_fusa.c \ diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c index 0cfe88c3a..7a7fc4d5d 100644 --- a/drivers/gpu/nvgpu/common/fifo/channel.c +++ b/drivers/gpu/nvgpu/common/fifo/channel.c @@ -1,7 +1,7 @@ /* * GK20A Graphics channel * - * Copyright (c) 2011-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -790,7 +790,7 @@ static void channel_free_invoke_unbind(struct nvgpu_channel *ch) * have an open channel fd anymore to use for the unbind * ioctl. */ - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); if (err != 0) { nvgpu_err(g, "failed to unbind channel %d from TSG", @@ -944,10 +944,11 @@ static void channel_free(struct nvgpu_channel *ch, bool force) /* * OS channel close may require that syncpoint should be set to some - * safe value before it is called. nvgpu_tsg_unbind_channel(above) is - * internally doing that by calling nvgpu_nvhost_syncpt_set_safe_state - * deep down in the stack. Otherwise os_channel close may block if the - * app is killed abruptly (which was going to do the syncpoint signal). + * safe value before it is called. nvgpu_tsg_force_unbind_channel(above) + * is internally doing that by calling nvgpu_nvhost_syncpt_set_safe_- + * state deep down in the stack. Otherwise os_channel close may block if + * the app is killed abruptly (which was going to do the syncpoint + * signal). */ if (g->os_channel.close != NULL) { g->os_channel.close(ch, force); diff --git a/drivers/gpu/nvgpu/common/fifo/tsg.c b/drivers/gpu/nvgpu/common/fifo/tsg.c index 071f63b39..ccfc23ddd 100644 --- a/drivers/gpu/nvgpu/common/fifo/tsg.c +++ b/drivers/gpu/nvgpu/common/fifo/tsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -193,6 +193,10 @@ static int nvgpu_tsg_unbind_channel_common(struct nvgpu_tsg *tsg, if (nvgpu_tsg_is_multi_channel(tsg) && !tsg_timedout && (g->ops.tsg.unbind_channel_check_hw_state != NULL)) { err = g->ops.tsg.unbind_channel_check_hw_state(tsg, ch); + if (err == -EAGAIN) { + goto fail_enable_tsg; + } + if (err != 0) { nvgpu_err(g, "invalid hw_state for ch %u", ch->chid); goto fail_enable_tsg; @@ -257,7 +261,8 @@ fail_enable_tsg: } /* The caller must ensure that channel belongs to a tsg */ -int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) +int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, + bool force) { struct gk20a *g = ch->g; int err; @@ -265,6 +270,10 @@ int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) nvgpu_log_fn(g, "unbind tsg:%u ch:%u\n", tsg->tsgid, ch->chid); err = nvgpu_tsg_unbind_channel_common(tsg, ch); + if (!force && err == -EAGAIN) { + return err; + } + if (err != 0) { nvgpu_err(g, "unbind common failed, err=%d", err); goto fail_common; @@ -315,26 +324,27 @@ fail: } +int nvgpu_tsg_force_unbind_channel(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch) +{ + return nvgpu_tsg_unbind_channel(tsg, ch, true); +} + int nvgpu_tsg_unbind_channel_check_hw_state(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch) { struct gk20a *g = ch->g; struct nvgpu_channel_hw_state hw_state; + int err; nvgpu_rwsem_down_read(&tsg->ch_list_lock); g->ops.channel.read_state(g, ch, &hw_state); nvgpu_rwsem_up_read(&tsg->ch_list_lock); - if (hw_state.next) { - if (g->ops.channel.clear != NULL) { - nvgpu_log_info(g, "Channel %d to be removed " - "from TSG %d has NEXT set!", - ch->chid, ch->tsgid); - } else { - nvgpu_err(g, "Channel %d to be removed " - "from TSG %d has NEXT set!", - ch->chid, ch->tsgid); - return -EINVAL; + if (g->ops.tsg.unbind_channel_check_hw_next != NULL) { + err = g->ops.tsg.unbind_channel_check_hw_next(ch, &hw_state); + if (err != 0) { + return err; } } diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gk20a.h b/drivers/gpu/nvgpu/hal/fifo/tsg_gk20a.h index bf9077b59..7edeb2482 100644 --- a/drivers/gpu/nvgpu/hal/fifo/tsg_gk20a.h +++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gk20a.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -23,8 +23,16 @@ #ifndef NVGPU_TSG_GK20A_H #define NVGPU_TSG_GK20A_H +struct nvgpu_channel; +struct nvgpu_channel_hw_state; + +#ifdef CONFIG_NVGPU_HAL_NON_FUSA struct nvgpu_tsg; void gk20a_tsg_enable(struct nvgpu_tsg *tsg); +#endif + +int gk20a_tsg_unbind_channel_check_hw_next(struct nvgpu_channel *ch, + struct nvgpu_channel_hw_state *hw_state); #endif /* NVGPU_TSG_GK20A_H */ diff --git a/drivers/gpu/nvgpu/hal/fifo/tsg_gk20a_fusa.c b/drivers/gpu/nvgpu/hal/fifo/tsg_gk20a_fusa.c new file mode 100644 index 000000000..f5bf3ca31 --- /dev/null +++ b/drivers/gpu/nvgpu/hal/fifo/tsg_gk20a_fusa.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include "hal/fifo/tsg_gk20a.h" + +int gk20a_tsg_unbind_channel_check_hw_next(struct nvgpu_channel *ch, + struct nvgpu_channel_hw_state *hw_state) +{ + if (hw_state->next) { + /* + * There is a possibility that the user sees the channel + * has finished all the work and invokes channel removal + * before the scheduler marks it idle (clears NEXT bit). + * Scheduler can miss marking the channel idle if the + * timeslice expires just after the work finishes. + * + * nvgpu will then see NEXT bit set even though the + * channel has no work left. To catch this case, + * reenable the tsg and check the hw state again + * to see if the channel is truly idle. + */ + nvgpu_log_info(ch->g, "Channel %d to be removed " + "from TSG %d has NEXT set!", + ch->chid, ch->tsgid); + return -EAGAIN; + } + + return 0; +} diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c index 37ee980ea..d9665e577 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c @@ -1,7 +1,7 @@ /* * GM20B Graphics * - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -733,6 +733,7 @@ static const struct gops_tsg gm20b_ops_tsg = { .bind_channel = NULL, .unbind_channel = NULL, .unbind_channel_check_hw_state = nvgpu_tsg_unbind_channel_check_hw_state, + .unbind_channel_check_hw_next = gk20a_tsg_unbind_channel_check_hw_next, .unbind_channel_check_ctx_reload = nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = NULL, #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c index 43c825a1e..a877e7cb2 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c @@ -1,7 +1,7 @@ /* * GP10B Tegra HAL interface * - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -821,6 +821,7 @@ static const struct gops_tsg gp10b_ops_tsg = { .bind_channel = NULL, .unbind_channel = NULL, .unbind_channel_check_hw_state = nvgpu_tsg_unbind_channel_check_hw_state, + .unbind_channel_check_hw_next = gk20a_tsg_unbind_channel_check_hw_next, .unbind_channel_check_ctx_reload = nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = NULL, #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c index de96e5e18..1cec3ebc6 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c +++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c @@ -1,7 +1,7 @@ /* * GV11B Tegra HAL interface * - * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2016-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -114,6 +114,7 @@ #include "hal/fifo/runlist_ram_gv11b.h" #include "hal/fifo/runlist_fifo_gk20a.h" #include "hal/fifo/runlist_fifo_gv11b.h" +#include "hal/fifo/tsg_gk20a.h" #include "hal/fifo/tsg_gv11b.h" #include "hal/fifo/userd_gk20a.h" #include "hal/fifo/userd_gv11b.h" @@ -995,6 +996,7 @@ static const struct gops_tsg gv11b_ops_tsg = { .bind_channel_eng_method_buffers = gv11b_tsg_bind_channel_eng_method_buffers, .unbind_channel = NULL, .unbind_channel_check_hw_state = nvgpu_tsg_unbind_channel_check_hw_state, + .unbind_channel_check_hw_next = gk20a_tsg_unbind_channel_check_hw_next, .unbind_channel_check_ctx_reload = nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = gv11b_tsg_unbind_channel_check_eng_faulted, #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c index 95d58dd82..d27266a0d 100644 --- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c +++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c @@ -1,7 +1,7 @@ /* * TU104 Tegra HAL interface * - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -109,6 +109,7 @@ #include "hal/fifo/runlist_fifo_gk20a.h" #include "hal/fifo/runlist_fifo_gv11b.h" #include "hal/fifo/runlist_fifo_tu104.h" +#include "hal/fifo/tsg_gk20a.h" #include "hal/fifo/tsg_gv11b.h" #include "hal/fifo/userd_gk20a.h" #include "hal/fifo/userd_gv11b.h" @@ -1046,6 +1047,7 @@ static const struct gops_tsg tu104_ops_tsg = { .bind_channel_eng_method_buffers = gv11b_tsg_bind_channel_eng_method_buffers, .unbind_channel = NULL, .unbind_channel_check_hw_state = nvgpu_tsg_unbind_channel_check_hw_state, + .unbind_channel_check_hw_next = gk20a_tsg_unbind_channel_check_hw_next, .unbind_channel_check_ctx_reload = nvgpu_tsg_unbind_channel_check_ctx_reload, .unbind_channel_check_eng_faulted = gv11b_tsg_unbind_channel_check_eng_faulted, #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h index 7b106b088..0f9bace4d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gops/tsg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -67,6 +67,8 @@ struct gops_tsg { struct nvgpu_channel *ch); int (*unbind_channel_check_hw_state)(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); + int (*unbind_channel_check_hw_next)(struct nvgpu_channel *ch, + struct nvgpu_channel_hw_state *state); void (*unbind_channel_check_ctx_reload)(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, struct nvgpu_channel_hw_state *state); diff --git a/drivers/gpu/nvgpu/include/nvgpu/tsg.h b/drivers/gpu/nvgpu/include/nvgpu/tsg.h index 16f930b86..962ce924e 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/tsg.h +++ b/drivers/gpu/nvgpu/include/nvgpu/tsg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -373,12 +373,18 @@ struct nvgpu_tsg *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid); * * @param tsg [in] Pointer to TSG struct. * @param ch [in] Pointer to Channel struct. + * @param force [in] If set, unbind proceeds if the channel + * is busy. * * Unbind channel from TSG: * - Check if channel being unbound has become unserviceable. * - Disable TSG. * - Preempt TSG. * - Check hw state of the channel. + * - If NEXT bit is set and force is set to true, perform error + * handling steps given next. + * - If NEXT bit is set and force is set to false, caller will + * have to retry unbind. * - Remove channel from its runlist. * - Remove channel from TSG's channel list. * - Set tsgid of the channel to #NVGPU_INVALID_TSG_ID. @@ -402,7 +408,23 @@ struct nvgpu_tsg *nvgpu_tsg_check_and_get_from_id(struct gk20a *g, u32 tsgid); * @note Caller of this function must make sure that channel requested to be * unbound from the TSG is bound to the TSG. */ -int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch); +int nvgpu_tsg_unbind_channel(struct nvgpu_tsg *tsg, struct nvgpu_channel *ch, + bool force); + + +/** + * @brief Unbind a channel from the TSG it is bound to. + * + * @param tsg [in] Pointer to TSG struct. + * @param ch [in] Pointer to Channel struct. + * + * Call #nvgpu_tsg_unbind_channel with argument force set. Thus, if the + * channel has work, it still gets unbound. + * + * @return return value by #nvgpu_tsg_unbind_channel + */ +int nvgpu_tsg_force_unbind_channel(struct nvgpu_tsg *tsg, + struct nvgpu_channel *ch); /** * @brief Check h/w channel status before unbinding Channel. diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c index 047ab50e4..4c8a1e20d 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_tsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2021, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -174,7 +174,10 @@ static int nvgpu_tsg_unbind_channel_fd(struct nvgpu_tsg *tsg, int ch_fd) goto out; } - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_unbind_channel(tsg, ch, false); + if (err == -EAGAIN) { + goto out; + } /* * Mark the channel unserviceable since channel unbound from TSG diff --git a/libs/dgpu/libnvgpu-drv-dgpu_safe.export b/libs/dgpu/libnvgpu-drv-dgpu_safe.export index a606aebe2..71b052a84 100644 --- a/libs/dgpu/libnvgpu-drv-dgpu_safe.export +++ b/libs/dgpu/libnvgpu-drv-dgpu_safe.export @@ -700,7 +700,7 @@ nvgpu_tsg_reset_faulted_eng_pbdma nvgpu_tsg_set_ctx_mmu_error nvgpu_tsg_set_error_notifier nvgpu_tsg_setup_sw -nvgpu_tsg_unbind_channel +nvgpu_tsg_force_unbind_channel nvgpu_tsg_unbind_channel_check_hw_state nvgpu_tsg_unbind_channel_check_ctx_reload nvgpu_set_bit diff --git a/libs/igpu/libnvgpu-drv-igpu_safe.export b/libs/igpu/libnvgpu-drv-igpu_safe.export index e4077b45d..d025e3203 100644 --- a/libs/igpu/libnvgpu-drv-igpu_safe.export +++ b/libs/igpu/libnvgpu-drv-igpu_safe.export @@ -715,7 +715,7 @@ nvgpu_tsg_reset_faulted_eng_pbdma nvgpu_tsg_set_ctx_mmu_error nvgpu_tsg_set_error_notifier nvgpu_tsg_setup_sw -nvgpu_tsg_unbind_channel +nvgpu_tsg_force_unbind_channel nvgpu_tsg_unbind_channel_check_hw_state nvgpu_tsg_unbind_channel_check_ctx_reload nvgpu_set_bit diff --git a/userspace/units/fifo/channel/nvgpu-channel.c b/userspace/units/fifo/channel/nvgpu-channel.c index 267ddf1d2..db2377ff7 100644 --- a/userspace/units/fifo/channel/nvgpu-channel.c +++ b/userspace/units/fifo/channel/nvgpu-channel.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -344,7 +344,7 @@ done: #define F_CHANNEL_CLOSE_AS_BOUND BIT(14) #define F_CHANNEL_CLOSE_LAST BIT(15) -/* nvgpu_tsg_unbind_channel always return 0 */ +/* nvgpu_tsg_force_unbind_channel always return 0 */ static const char *f_channel_close[] = { "already_freed", @@ -390,7 +390,7 @@ static bool channel_close_pruned(u32 branches, u32 final) return true; } - /* TODO: nvgpu_tsg_unbind_channel always returns 0 */ + /* TODO: nvgpu_tsg_force_unbind_channel always returns 0 */ branches &= ~F_CHANNEL_CLOSE_TSG_UNBIND_FAIL; @@ -1129,7 +1129,7 @@ int test_channel_enable_disable_tsg(struct unit_module *m, subtest_setup(branches); - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); unit_assert(err == 0, goto done); err = nvgpu_channel_enable_tsg(g, ch); @@ -1530,7 +1530,7 @@ done: f_channel_suspend_resume)); } if (ch != NULL) { - nvgpu_tsg_unbind_channel(tsg, ch); + nvgpu_tsg_force_unbind_channel(tsg, ch); nvgpu_channel_close(ch); } if (tsg != NULL) { @@ -1629,7 +1629,7 @@ done: branches_str(branches, f_channel_debug_dump)); } if (ch != NULL) { - nvgpu_tsg_unbind_channel(tsg, ch); + nvgpu_tsg_force_unbind_channel(tsg, ch); nvgpu_channel_close(ch); } if (tsg != NULL) { @@ -1888,7 +1888,7 @@ int test_channel_abort_cleanup(struct unit_module *m, struct gk20a *g, err = nvgpu_tsg_bind_channel(tsg, ch); unit_assert(err == 0, goto done); - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); unit_assert(err == 0, goto done); nvgpu_channel_close(ch); diff --git a/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c b/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c index db9b3a0c0..70aa6487e 100644 --- a/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c +++ b/userspace/units/fifo/preempt/gv11b/nvgpu-preempt-gv11b.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -294,7 +294,7 @@ done: } g->ops.fifo.is_preempt_pending = stub_fifo_is_preempt_pending_pass; - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); if (err != 0) { unit_err(m, "Cannot unbind channel\n"); } @@ -505,7 +505,7 @@ done: } g->ops.fifo.is_preempt_pending = stub_fifo_is_preempt_pending_pass; - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); if (err != 0) { unit_err(m, "Cannot unbind channel\n"); } diff --git a/userspace/units/fifo/runlist/nvgpu-runlist.c b/userspace/units/fifo/runlist/nvgpu-runlist.c index 99b822c23..1bba77691 100644 --- a/userspace/units/fifo/runlist/nvgpu-runlist.c +++ b/userspace/units/fifo/runlist/nvgpu-runlist.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -1274,7 +1274,7 @@ int test_runlist_update_locked(struct unit_module *m, struct gk20a *g, 0U, chA, false, false); unit_assert(err == 0, goto done); - err = nvgpu_tsg_unbind_channel(tsg, chA); + err = nvgpu_tsg_force_unbind_channel(tsg, chA); if (err != 0) { unit_err(m, "Cannot unbind channel A\n"); } @@ -1307,7 +1307,7 @@ done: branches_str(branches, f_runlist_update)); } - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); if (err != 0) { unit_err(m, "Cannot unbind channel\n"); } @@ -1355,7 +1355,7 @@ done: unit_err(m, "%s failed\n", __func__); } - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); if (err != 0) { unit_err(m, "Cannot unbind channel\n"); } diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.c b/userspace/units/fifo/tsg/nvgpu-tsg.c index 639390f2a..9966440dc 100644 --- a/userspace/units/fifo/tsg/nvgpu-tsg.c +++ b/userspace/units/fifo/tsg/nvgpu-tsg.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -379,7 +379,7 @@ int test_tsg_bind_channel(struct unit_module *m, goto done); unit_assert(nvgpu_tsg_from_ch(ch) == tsg, goto done); - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); unit_assert(err == 0, goto done); unit_assert(ch->tsgid == NVGPU_INVALID_TSG_ID, goto done); @@ -564,7 +564,7 @@ int test_tsg_unbind_channel(struct unit_module *m, branches & F_TSG_UNBIND_CHANNEL_ABORT_CLEAN_UP_NULL ? NULL : gops.channel.abort_clean_up; - err = nvgpu_tsg_unbind_channel(tsg, chA); + err = nvgpu_tsg_force_unbind_channel(tsg, chA); if (branches & fail) { /* check that TSG has been torn down */ @@ -914,7 +914,7 @@ int test_tsg_unbind_channel_check_ctx_reload(struct unit_module *m, if ((branches & F_UNBIND_CHANNEL_CHECK_CTX_RELOAD_SET) && (branches & F_UNBIND_CHANNEL_CHECK_CTX_RELOAD_CHID_MATCH)) { - nvgpu_tsg_unbind_channel(tsg, chB); + nvgpu_tsg_force_unbind_channel(tsg, chB); unit_assert(stub[0].chid == chB->chid, goto done); } } diff --git a/userspace/units/fifo/tsg/nvgpu-tsg.h b/userspace/units/fifo/tsg/nvgpu-tsg.h index 23635e451..633ea42ed 100644 --- a/userspace/units/fifo/tsg/nvgpu-tsg.h +++ b/userspace/units/fifo/tsg/nvgpu-tsg.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -91,7 +91,7 @@ int test_tsg_open(struct unit_module *m, * - Allocate channel with nvgpu_channel_open_new. * - Check that nvgpu_tsg_bind_channel returns 0. * - Check that TSG's list of channel is not empty. - * - Unbind channel with nvgpu_tsg_unbind_channel. + * - Unbind channel with nvgpu_tsg_force_unbind_channel. * - Check that ch->tsgid is now invalid. * - Check that tsg can be retrieved from ch using nvgpu_tsg_from_ch. * - Check TSG bind failure cases: @@ -121,7 +121,7 @@ int test_tsg_bind_channel(struct unit_module *m, * * Test Type: Feature * - * Targets: nvgpu_tsg_unbind_channel + * Targets: nvgpu_tsg_force_unbind_channel * * Input: test_fifo_init_support() run for this GPU * diff --git a/userspace/units/gr/intr/nvgpu-gr-intr.c b/userspace/units/gr/intr/nvgpu-gr-intr.c index 313bec4aa..34cd823b6 100644 --- a/userspace/units/gr/intr/nvgpu-gr-intr.c +++ b/userspace/units/gr/intr/nvgpu-gr-intr.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -325,7 +325,7 @@ static int gr_test_intr_allocate_ch_tsg(struct unit_module *m, ch->notifier_wq.initialized = notify_init; tsg_unbind: - err = nvgpu_tsg_unbind_channel(tsg, ch); + err = nvgpu_tsg_force_unbind_channel(tsg, ch); if (err != 0) { unit_err(m, "failed tsg channel unbind\n"); } diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.c b/userspace/units/gr/setup/nvgpu-gr-setup.c index 36c8fc9a7..256a44257 100644 --- a/userspace/units/gr/setup/nvgpu-gr-setup.c +++ b/userspace/units/gr/setup/nvgpu-gr-setup.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -111,7 +111,7 @@ static int gr_test_setup_unbind_tsg(struct unit_module *m, struct gk20a *g) goto unbind_tsg; } - err = nvgpu_tsg_unbind_channel(gr_setup_tsg, gr_setup_ch); + err = nvgpu_tsg_force_unbind_channel(gr_setup_tsg, gr_setup_ch); if (err != 0) { unit_err(m, "failed tsg channel unbind\n"); } diff --git a/userspace/units/gr/setup/nvgpu-gr-setup.h b/userspace/units/gr/setup/nvgpu-gr-setup.h index f808c408e..9845260ef 100644 --- a/userspace/units/gr/setup/nvgpu-gr-setup.h +++ b/userspace/units/gr/setup/nvgpu-gr-setup.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -137,7 +137,7 @@ int test_gr_setup_set_preemption_mode(struct unit_module *m, * must have been executed successfully. * * Steps: - * - Call nvgpu_tsg_unbind_channel. + * - Call nvgpu_tsg_force_unbind_channel. * - Call nvgpu_channel_close. * - Call nvgpu_tsg_release. * diff --git a/userspace/units/rc/nvgpu-rc.c b/userspace/units/rc/nvgpu-rc.c index 6fe968073..dac8a72f5 100644 --- a/userspace/units/rc/nvgpu-rc.c +++ b/userspace/units/rc/nvgpu-rc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -152,7 +152,7 @@ clear_tsg: int test_rc_deinit(struct unit_module *m, struct gk20a *g, void *args) { struct nvgpu_posix_channel *posix_channel = ch->os_priv; - int ret = nvgpu_tsg_unbind_channel(tsg, ch); + int ret = nvgpu_tsg_force_unbind_channel(tsg, ch); if (ret != 0) { ret = UNIT_FAIL; unit_err(m , "channel already unbound");