gpu: nvgpu: wake up gr wait wq in rmmod path

- The pmu_pg_task thread remains alive in the background
  during railgate and rail-ungate.
- During rail-ungate, the PG task thread starts again and
  executes PG-related tasks.
- It comes in pmu_pg_init_powergating() and waits for GR
  initialization. Here it waits for gr to be initialized.
- In parallel, the main GPU thread works on rmmod (from
  gpu_module_reload test).
- By this time, the main gpu thread has started rmmod and
  gr->initialized can be set to false, thus causing an uninterruptible
  wait for pmu_pg_task thread.
- To solve this, wake gr wait wq in rmmod path when
  NVGPU_DRIVER_IS_DYING and NVGPU_KERNEL_IS_DYING flgas are set.

Bug 3806514
Bug 3756912

Change-Id: Id78d92f30b75aba1aee22398cc86a3acebd50ef6
Signed-off-by: Divya <dsinghatwari@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2798003
(cherry picked from commit d9345065bcb6d9ff497c127fa4cd52077f4ecfa4)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2819084
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Divya
2022-10-26 06:11:48 +00:00
committed by mobile promotions
parent 26423cad95
commit 27f3fc61a3
2 changed files with 13 additions and 1 deletions

View File

@@ -1180,7 +1180,9 @@ void nvgpu_gr_wait_initialized(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
NVGPU_COND_WAIT(&gr->init_wq, gr->initialized, 0U);
NVGPU_COND_WAIT(&gr->init_wq, gr->initialized ||
(nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING) ||
nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)), 0U);
}
#endif

View File

@@ -61,6 +61,7 @@
#include <nvgpu/engines.h>
#include <nvgpu/channel.h>
#include <nvgpu/gr/gr.h>
#include <nvgpu/gr/gr_instances.h>
#include <nvgpu/gr/gr_utils.h>
#include <nvgpu/pmu/pmu_pstate.h>
#include <nvgpu/cyclestats_snapshot.h>
@@ -72,6 +73,7 @@
#include <nvgpu/nvs.h>
#include <nvgpu/l1ss_err_reporting.h>
#include "common/gr/gr_priv.h"
#include "platform_gk20a.h"
#include "sysfs.h"
#include "vgpu/vgpu_linux.h"
@@ -148,8 +150,12 @@ static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb,
struct nvgpu_os_linux *l = container_of(nb, struct nvgpu_os_linux,
nvgpu_reboot_nb);
struct gk20a *g = &l->g;
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true);
/* signal the gr wait */
nvgpu_cond_signal(&gr->init_wq);
return NOTIFY_DONE;
}
@@ -1578,10 +1584,14 @@ static int gk20a_pm_deinit(struct device *dev)
void nvgpu_start_gpu_idle(struct gk20a *g)
{
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
down_write(&l->busy_lock);
nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
/* signal the gr wait */
nvgpu_cond_signal(&gr->init_wq);
/*
* GR SW ready needs to be invalidated at this time with the busy lock
* held to prevent a racing condition on the gr/mm code