From 5b1b9eeab17a659a0d4cd301e23223f17f225105 Mon Sep 17 00:00:00 2001 From: Kary Jin Date: Mon, 15 Oct 2018 15:57:52 +0800 Subject: [PATCH] gpu: nvgpu: Add reboot handler Add a reboot handler to make sure that nvgpu does not try to busy the GPU if the system is going down. If the system is going down then any number of subsystems nvgpu depends on may already have been deinitialized. Bug 200333709 Bug 200454316 Change-Id: I2ceaf7ca4fb88643310874b5b26937ef44c6e3dd Signed-off-by: Kary Jin Reviewed-on: https://git-master.nvidia.com/r/1927018 (cherry picked from commit 9d2e50de426ac6362d66f9ccb29a0415322e467f) Reviewed-on: https://git-master.nvidia.com/r/1927030 Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/gpu/nvgpu/common/init/nvgpu_init.c | 11 ++++++--- drivers/gpu/nvgpu/include/nvgpu/enabled.h | 1 + drivers/gpu/nvgpu/include/nvgpu/gk20a.h | 2 +- drivers/gpu/nvgpu/os/linux/module.c | 26 ++++++++++++++++++++-- drivers/gpu/nvgpu/os/linux/os_linux.h | 3 +++ 5 files changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c index 592a6fd79..29bfe113f 100644 --- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c +++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c @@ -438,12 +438,17 @@ done: * Check if the device can go busy. Basically if the driver is currently * in the process of dying then do not let new places make the driver busy. */ -int gk20a_can_busy(struct gk20a *g) +int nvgpu_can_busy(struct gk20a *g) { - if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { + /* Can't do anything if the system is rebooting/shutting down + * or the driver is restarting + */ + if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING) || + nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { return 0; + } else { + return 1; } - return 1; } int gk20a_wait_for_idle(struct gk20a *g) diff --git a/drivers/gpu/nvgpu/include/nvgpu/enabled.h b/drivers/gpu/nvgpu/include/nvgpu/enabled.h index eb5cf9be3..27047ef74 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/enabled.h +++ b/drivers/gpu/nvgpu/include/nvgpu/enabled.h @@ -36,6 +36,7 @@ struct gk20a; #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 #define NVGPU_FECS_TRACE_VA 4 #define NVGPU_CAN_RAILGATE 5 +#define NVGPU_KERNEL_IS_DYING 6 /* * ECC flags diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h index 09e199deb..196d56e0d 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h +++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h @@ -1894,7 +1894,7 @@ void gk20a_idle(struct gk20a *g); int __gk20a_do_idle(struct gk20a *g, bool force_reset); int __gk20a_do_unidle(struct gk20a *g); -int gk20a_can_busy(struct gk20a *g); +int nvgpu_can_busy(struct gk20a *g); int gk20a_wait_for_idle(struct gk20a *g); #define NVGPU_GPU_ARCHITECTURE_SHIFT 4U diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c index cf71bdbe6..940ba1b43 100644 --- a/drivers/gpu/nvgpu/os/linux/module.c +++ b/drivers/gpu/nvgpu/os/linux/module.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #include #include @@ -77,6 +79,17 @@ #define CREATE_TRACE_POINTS #include +static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb, + unsigned long event, void *unused) +{ + struct nvgpu_os_linux *l = container_of(nb, struct nvgpu_os_linux, + nvgpu_reboot_nb); + struct gk20a *g = &l->g; + + __nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true); + return NOTIFY_DONE; +} + struct device_node *nvgpu_get_node(struct gk20a *g) { struct device *dev = dev_from_gk20a(g); @@ -111,7 +124,7 @@ int gk20a_busy(struct gk20a *g) down_read(&l->busy_lock); - if (!gk20a_can_busy(g)) { + if (!nvgpu_can_busy(g)) { ret = -ENODEV; atomic_dec(&g->usage_count.atomic_var); goto fail; @@ -158,7 +171,7 @@ void gk20a_idle(struct gk20a *g) dev = dev_from_gk20a(g); - if (!(dev && gk20a_can_busy(g))) + if (!(dev && nvgpu_can_busy(g))) return; if (pm_runtime_enabled(dev)) { @@ -1370,6 +1383,12 @@ static int gk20a_probe(struct platform_device *dev) goto return_err; } + l->nvgpu_reboot_nb.notifier_call = + nvgpu_kernel_shutdown_notification; + err = register_reboot_notifier(&l->nvgpu_reboot_nb); + if (err) + goto return_err; + return 0; return_err: @@ -1443,12 +1462,15 @@ static int __exit gk20a_remove(struct platform_device *pdev) int err; struct device *dev = &pdev->dev; struct gk20a *g = get_gk20a(dev); + struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); if (gk20a_gpu_is_virtual(dev)) return vgpu_remove(pdev); err = nvgpu_remove(dev, &nvgpu_class); + unregister_reboot_notifier(&l->nvgpu_reboot_nb); + set_gk20a(pdev, NULL); gk20a_put(g); diff --git a/drivers/gpu/nvgpu/os/linux/os_linux.h b/drivers/gpu/nvgpu/os/linux/os_linux.h index 393180327..900879919 100644 --- a/drivers/gpu/nvgpu/os/linux/os_linux.h +++ b/drivers/gpu/nvgpu/os/linux/os_linux.h @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -136,6 +137,8 @@ struct nvgpu_os_linux { struct nvgpu_os_linux_ops ops; + struct notifier_block nvgpu_reboot_nb; + #ifdef CONFIG_DEBUG_FS struct dentry *debugfs; struct dentry *debugfs_alias;