From 3e6d61b17797fa6828460f95c578e54fa8900b22 Mon Sep 17 00:00:00 2001 From: Divya Date: Wed, 26 Oct 2022 06:11:48 +0000 Subject: [PATCH] gpu: nvgpu: wake up gr wait wq in rmmod path - The pmu_pg_task thread remains alive in the background during railgate and rail-ungate. - During rail-ungate, the PG task thread starts again and executes PG-related tasks. - It comes in pmu_pg_init_powergating() and waits for GR initialization. Here it waits for gr to be initialized. - In parallel, the main GPU thread works on rmmod (from gpu_module_reload test). - By this time, the main gpu thread has started rmmod and gr->initialized can be set to false, thus causing an uninterruptible wait for pmu_pg_task thread. - To solve this, wake gr wait wq in rmmod path when NVGPU_DRIVER_IS_DYING and NVGPU_KERNEL_IS_DYING flgas are set. Bug 3806514 Change-Id: Id78d92f30b75aba1aee22398cc86a3acebd50ef6 Signed-off-by: Divya Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2798003 (cherry picked from commit d9345065bcb6d9ff497c127fa4cd52077f4ecfa4) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2807245 Reviewed-by: svc-mobile-coverity Reviewed-by: svc-mobile-cert Reviewed-by: svc-mobile-misra Reviewed-by: Vijayakumar Subbu GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/common/gr/gr.c | 6 ++++-- drivers/gpu/nvgpu/os/linux/module.c | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/nvgpu/common/gr/gr.c b/drivers/gpu/nvgpu/common/gr/gr.c index eaddc3cce..696db7faf 100644 --- a/drivers/gpu/nvgpu/common/gr/gr.c +++ b/drivers/gpu/nvgpu/common/gr/gr.c @@ -1184,12 +1184,14 @@ void nvgpu_gr_sw_ready(struct gk20a *g, bool enable) } #ifdef CONFIG_NVGPU_NON_FUSA -/* Wait until GR is initialized */ void nvgpu_gr_wait_initialized(struct gk20a *g) { struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); - NVGPU_COND_WAIT(&gr->init_wq, gr->initialized, 0U); + /* Wait until GR is initialized */ + NVGPU_COND_WAIT(&gr->init_wq, gr->initialized || + (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING) || + nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)), 0U); } #endif diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index c2e32e7b8..aefdfb210 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,7 @@ #include #include +#include "common/gr/gr_priv.h" #include "platform_gk20a.h" #include "sysfs.h" #include "vgpu/vgpu_linux.h" @@ -147,8 +149,21 @@ static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb, struct nvgpu_os_linux *l = container_of(nb, struct nvgpu_os_linux, nvgpu_reboot_nb); struct gk20a *g = &l->g; + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); + + /* + * In rmmod path, when the kernel or GPU driver is + * dying signal the gr wait queue so that the wait + * queue wakes up and further processing happens. + * This is needed to prevent other threads, like + * pmu_pg_task, to go into un-interruptible state. + */ + if (gr != NULL) { + nvgpu_cond_signal(&gr->init_wq); + } + return NOTIFY_DONE; } @@ -1590,10 +1605,21 @@ static int gk20a_pm_deinit(struct device *dev) void nvgpu_start_gpu_idle(struct gk20a *g) { + struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); down_write(&l->busy_lock); nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); + /* + * In rmmod path, when the kernel or GPU driver is + * dying signal the gr wait queue so that the wait + * queue wakes up and further processing happens. + * This is needed to prevent other threads, like + * pmu_pg_task, to go into un-interruptible state. + */ + if (gr != NULL) { + nvgpu_cond_signal(&gr->init_wq); + } /* * GR SW ready needs to be invalidated at this time with the busy lock * held to prevent a racing condition on the gr/mm code