From e914561b6e60a2d88a3950211c95847c610c51fc Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 20 Jan 2023 09:34:07 +0000 Subject: [PATCH] gpu: nvgpu: Fix crash on reboot A kernel panic has been observed sometimes on reboot. The crash occurs in the nvgpu_kernel_shutdown_notification() function when calling nvgpu_cond_signal(). Fix this by checking that the 'gr' pointer is valid before calling nvgpu_cond_signal(). Bug 3943885 Change-Id: I81e5e1b1128f22832daf01b880fac2a5e38f2a7a Signed-off-by: Jon Hunter Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2846761 Reviewed-by: Sagar Kamble Reviewed-by: Alex Waterman GVS: Gerrit_Virtual_Submit --- drivers/gpu/nvgpu/os/linux/module.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index 1f94bd99d..1e322344d 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -1,7 +1,7 @@ /* * GK20A Graphics * - * Copyright (c) 2011-2022, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2023, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -153,8 +153,17 @@ static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb, struct nvgpu_gr *gr = nvgpu_gr_get_cur_instance_ptr(g); nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); - /* signal the gr wait */ - nvgpu_cond_signal(&gr->init_wq); + + /* + * In rmmod path, when the kernel or GPU driver is + * dying signal the gr wait queue so that the wait + * queue wakes up and further processing happens. + * This is needed to prevent other threads, like + * pmu_pg_task, to go into un-interruptible state. + */ + if (gr != NULL) { + nvgpu_cond_signal(&gr->init_wq); + } return NOTIFY_DONE; } @@ -1589,9 +1598,16 @@ void nvgpu_start_gpu_idle(struct gk20a *g) down_write(&l->busy_lock); nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true); - - /* signal the gr wait */ - nvgpu_cond_signal(&gr->init_wq); + /* + * In rmmod path, when the kernel or GPU driver is + * dying signal the gr wait queue so that the wait + * queue wakes up and further processing happens. + * This is needed to prevent other threads, like + * pmu_pg_task, to go into un-interruptible state. + */ + if (gr != NULL) { + nvgpu_cond_signal(&gr->init_wq); + } /* * GR SW ready needs to be invalidated at this time with the busy lock * held to prevent a racing condition on the gr/mm code