gpu: nvgpu: Add reboot handler

Add a reboot handler to make sure that nvgpu does not try to busy
the GPU if the system is going down. If the system is going down
then any number of subsystems nvgpu depends on may already have
been deinitialized.

Bug 200333709
Bug 200454316

Change-Id: I2ceaf7ca4fb88643310874b5b26937ef44c6e3dd
Signed-off-by: Kary Jin <karyj@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1927018
(cherry picked from commit 9d2e50de42)
Reviewed-on: https://git-master.nvidia.com/r/1927030
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Kary Jin
2018-10-15 15:57:52 +08:00
committed by mobile promotions
parent 0188b93e30
commit 5b1b9eeab1
5 changed files with 37 additions and 6 deletions

View File

@@ -438,12 +438,17 @@ done:
* Check if the device can go busy. Basically if the driver is currently * Check if the device can go busy. Basically if the driver is currently
* in the process of dying then do not let new places make the driver busy. * in the process of dying then do not let new places make the driver busy.
*/ */
int gk20a_can_busy(struct gk20a *g) int nvgpu_can_busy(struct gk20a *g)
{ {
if (nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) { /* Can't do anything if the system is rebooting/shutting down
* or the driver is restarting
*/
if (nvgpu_is_enabled(g, NVGPU_KERNEL_IS_DYING) ||
nvgpu_is_enabled(g, NVGPU_DRIVER_IS_DYING)) {
return 0; return 0;
} else {
return 1;
} }
return 1;
} }
int gk20a_wait_for_idle(struct gk20a *g) int gk20a_wait_for_idle(struct gk20a *g)

View File

@@ -36,6 +36,7 @@ struct gk20a;
#define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3 #define NVGPU_GR_USE_DMA_FOR_FW_BOOTSTRAP 3
#define NVGPU_FECS_TRACE_VA 4 #define NVGPU_FECS_TRACE_VA 4
#define NVGPU_CAN_RAILGATE 5 #define NVGPU_CAN_RAILGATE 5
#define NVGPU_KERNEL_IS_DYING 6
/* /*
* ECC flags * ECC flags

View File

@@ -1894,7 +1894,7 @@ void gk20a_idle(struct gk20a *g);
int __gk20a_do_idle(struct gk20a *g, bool force_reset); int __gk20a_do_idle(struct gk20a *g, bool force_reset);
int __gk20a_do_unidle(struct gk20a *g); int __gk20a_do_unidle(struct gk20a *g);
int gk20a_can_busy(struct gk20a *g); int nvgpu_can_busy(struct gk20a *g);
int gk20a_wait_for_idle(struct gk20a *g); int gk20a_wait_for_idle(struct gk20a *g);
#define NVGPU_GPU_ARCHITECTURE_SHIFT 4U #define NVGPU_GPU_ARCHITECTURE_SHIFT 4U

View File

@@ -24,6 +24,8 @@
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/pm_runtime.h> #include <linux/pm_runtime.h>
#include <linux/reset.h> #include <linux/reset.h>
#include <linux/reboot.h>
#include <linux/notifier.h>
#include <linux/platform/tegra/common.h> #include <linux/platform/tegra/common.h>
#include <linux/pci.h> #include <linux/pci.h>
@@ -77,6 +79,17 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/gk20a.h> #include <trace/events/gk20a.h>
static int nvgpu_kernel_shutdown_notification(struct notifier_block *nb,
unsigned long event, void *unused)
{
struct nvgpu_os_linux *l = container_of(nb, struct nvgpu_os_linux,
nvgpu_reboot_nb);
struct gk20a *g = &l->g;
__nvgpu_set_enabled(g, NVGPU_KERNEL_IS_DYING, true);
return NOTIFY_DONE;
}
struct device_node *nvgpu_get_node(struct gk20a *g) struct device_node *nvgpu_get_node(struct gk20a *g)
{ {
struct device *dev = dev_from_gk20a(g); struct device *dev = dev_from_gk20a(g);
@@ -111,7 +124,7 @@ int gk20a_busy(struct gk20a *g)
down_read(&l->busy_lock); down_read(&l->busy_lock);
if (!gk20a_can_busy(g)) { if (!nvgpu_can_busy(g)) {
ret = -ENODEV; ret = -ENODEV;
atomic_dec(&g->usage_count.atomic_var); atomic_dec(&g->usage_count.atomic_var);
goto fail; goto fail;
@@ -158,7 +171,7 @@ void gk20a_idle(struct gk20a *g)
dev = dev_from_gk20a(g); dev = dev_from_gk20a(g);
if (!(dev && gk20a_can_busy(g))) if (!(dev && nvgpu_can_busy(g)))
return; return;
if (pm_runtime_enabled(dev)) { if (pm_runtime_enabled(dev)) {
@@ -1370,6 +1383,12 @@ static int gk20a_probe(struct platform_device *dev)
goto return_err; goto return_err;
} }
l->nvgpu_reboot_nb.notifier_call =
nvgpu_kernel_shutdown_notification;
err = register_reboot_notifier(&l->nvgpu_reboot_nb);
if (err)
goto return_err;
return 0; return 0;
return_err: return_err:
@@ -1443,12 +1462,15 @@ static int __exit gk20a_remove(struct platform_device *pdev)
int err; int err;
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
struct gk20a *g = get_gk20a(dev); struct gk20a *g = get_gk20a(dev);
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
if (gk20a_gpu_is_virtual(dev)) if (gk20a_gpu_is_virtual(dev))
return vgpu_remove(pdev); return vgpu_remove(pdev);
err = nvgpu_remove(dev, &nvgpu_class); err = nvgpu_remove(dev, &nvgpu_class);
unregister_reboot_notifier(&l->nvgpu_reboot_nb);
set_gk20a(pdev, NULL); set_gk20a(pdev, NULL);
gk20a_put(g); gk20a_put(g);

View File

@@ -20,6 +20,7 @@
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/iommu.h> #include <linux/iommu.h>
#include <linux/hashtable.h> #include <linux/hashtable.h>
#include <linux/notifier.h>
#include <nvgpu/gk20a.h> #include <nvgpu/gk20a.h>
@@ -136,6 +137,8 @@ struct nvgpu_os_linux {
struct nvgpu_os_linux_ops ops; struct nvgpu_os_linux_ops ops;
struct notifier_block nvgpu_reboot_nb;
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
struct dentry *debugfs; struct dentry *debugfs;
struct dentry *debugfs_alias; struct dentry *debugfs_alias;