gpu: nvgpu: Fix crash on reboot

A kernel panic has been observed sometimes on reboot. The crash occurs
in the nvgpu_kernel_shutdown_notification() function when calling
nvgpu_cond_signal(). Fix this by checking that the 'gr' pointer is valid
before calling nvgpu_cond_signal().

Bug 3943885

Change-Id: I81e5e1b1128f22832daf01b880fac2a5e38f2a7a
Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2846761
Reviewed-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
GVS: Gerrit_Virtual_Submit <buildbot_gerritrpt@nvidia.com>
This commit is contained in:
Jon Hunter
2023-01-20 09:34:07 +00:00
committed by mobile promotions
parent 82b95758d1
commit e914561b6e

View File

@@ -1,7 +1,7 @@
/* /*
* GK20A Graphics * GK20A Graphics
* *
* Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -153,8 +153,17 @@ static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb,
struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g);
nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true);
/* signal the gr wait */
/*
* In rmmod path, when the kernel or GPU driver is
* dying signal the gr wait queue so that the wait
* queue wakes up and further processing happens.
* This is needed to prevent other threads, like
* pmu_pg_task, to go into un-interruptible state.
*/
if (gr != NULL) {
nvgpu_cond_signal(&gr->init_wq); nvgpu_cond_signal(&gr->init_wq);
}
return NOTIFY_DONE; return NOTIFY_DONE;
} }
@@ -1589,9 +1598,16 @@ void nvgpu_start_gpu_idle(struct gk20a *g)
down_write(&l->busy_lock); down_write(&l->busy_lock);
nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
/*
/* signal the gr wait */ * In rmmod path, when the kernel or GPU driver is
* dying signal the gr wait queue so that the wait
* queue wakes up and further processing happens.
* This is needed to prevent other threads, like
* pmu_pg_task, to go into un-interruptible state.
*/
if (gr != NULL) {
nvgpu_cond_signal(&gr->init_wq); nvgpu_cond_signal(&gr->init_wq);
}
/* /*
* GR SW ready needs to be invalidated at this time with the busy lock * GR SW ready needs to be invalidated at this time with the busy lock
* held to prevent a racing condition on the gr/mm code * held to prevent a racing condition on the gr/mm code