gpu: nvgpu: wake up gr wait wq in rmmod path

- The pmu_pg_task thread remains alive in the background
  during railgate and rail-ungate.
- During rail-ungate, the PG task thread starts again and
  executes PG-related tasks.
- It comes in pmu_pg_init_powergating() and waits for GR
  initialization. Here it waits for gr to be initialized.
- In parallel, the main GPU thread works on rmmod (from
  gpu_module_reload test).
- By this time, the main gpu thread has started rmmod and
  gr->initialized can be set to false, thus causing an uninterruptible
  wait for pmu_pg_task thread.
- To solve this, wake gr wait wq in rmmod path when
  NVGPU_DRIVER_IS_DYING and NVGPU_KERNEL_IS_DYING flgas are set.

Bug 3806514

Change-Id: Id78d92f30b75aba1aee22398cc86a3acebd50ef6
Signed-off-by: Divya <dsinghatwari@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2798003
(cherry picked from commit d9345065bcb6d9ff497c127fa4cd52077f4ecfa4)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2807245
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Divya
2022-10-26 06:11:48 +00:00
committed by mobile promotions
parent ea9aebb358
commit 3e6d61b177
2 changed files with 30 additions and 2 deletions

View File

@@ -1184,12 +1184,14 @@ void nvgpu_gr_sw_ready(struct gk20a *g, bool enable)
} }
#ifdef CONFIG_NVGPU_NON_FUSA #ifdef CONFIG_NVGPU_NON_FUSA
/* Wait until GR is initialized */
void nvgpu_gr_wait_initialized(struct gk20a *g) void nvgpu_gr_wait_initialized(struct gk20a *g)
{ {
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
NVGPU_COND_WAIT(&gr->init_wq, gr->initialized, 0U); /* Wait until GR is initialized */
NVGPU_COND_WAIT(&gr->init_wq, gr->initialized ||
(nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING) ||
nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)), 0U);
} }
#endif #endif

View File

@@ -61,6 +61,7 @@
#include <nvgpu/engines.h> #include <nvgpu/engines.h>
#include <nvgpu/channel.h> #include <nvgpu/channel.h>
#include <nvgpu/gr/gr.h> #include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/gr/gr_utils.h> #include <nvgpu/gr/gr_utils.h>
#include <nvgpu/pmu/pmu_pstate.h> #include <nvgpu/pmu/pmu_pstate.h>
#include <nvgpu/cyclestats_snapshot.h> #include <nvgpu/cyclestats_snapshot.h>
@@ -71,6 +72,7 @@
#include <nvgpu/fb.h> #include <nvgpu/fb.h>
#include <nvgpu/nvs.h> #include <nvgpu/nvs.h>
#include "common/gr/gr_priv.h"
#include "platform_gk20a.h" #include "platform_gk20a.h"
#include "sysfs.h" #include "sysfs.h"
#include "vgpu/vgpu_linux.h" #include "vgpu/vgpu_linux.h"
@@ -147,8 +149,21 @@ static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb,
struct nvgpu_os_linux *l = container_of(nb, struct nvgpu_os_linux, struct nvgpu_os_linux *l = container_of(nb, struct nvgpu_os_linux,
nvgpu_reboot_nb); nvgpu_reboot_nb);
struct gk20a *g = &l->g; struct gk20a *g = &l->g;
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true);
/*
* In rmmod path, when the kernel or GPU driver is
* dying signal the gr wait queue so that the wait
* queue wakes up and further processing happens.
* This is needed to prevent other threads, like
* pmu_pg_task, to go into un-interruptible state.
*/
if (gr != NULL) {
nvgpu_cond_signal(&gr->init_wq);
}
return NOTIFY_DONE; return NOTIFY_DONE;
} }
@@ -1590,10 +1605,21 @@ static int gk20a_pm_deinit(struct device *dev)
void nvgpu_start_gpu_idle(struct gk20a *g) void nvgpu_start_gpu_idle(struct gk20a *g)
{ {
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
down_write(&l->busy_lock); down_write(&l->busy_lock);
nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
/*
* In rmmod path, when the kernel or GPU driver is
* dying signal the gr wait queue so that the wait
* queue wakes up and further processing happens.
* This is needed to prevent other threads, like
* pmu_pg_task, to go into un-interruptible state.
*/
if (gr != NULL) {
nvgpu_cond_signal(&gr->init_wq);
}
/* /*
* GR SW ready needs to be invalidated at this time with the busy lock * GR SW ready needs to be invalidated at this time with the busy lock
* held to prevent a racing condition on the gr/mm code * held to prevent a racing condition on the gr/mm code