mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
gpu: nvgpu: Handle driver shutdown more gracefully
Handle possible asynchronous GPU driver shutdown more gracefully. This occurs when the GPU disappears from the PCI bus, for example, if it overheats or detects an over current event. Also add a preprocessor check to make sure that the gk20a_channel_cancel_pending_sema_waits() is always defined. In some builds CONFIG_SYNC is disabled but the gk20a_remove_support() code does not check for this. Bug 1816516 Bug 1807277 Change-Id: I932e312291c5c6a6ac5e13525ce8ca56a1be3652 Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1250028 (cherry picked from commit 337810f8c478238a38d8553c1492622d5fa9aafa) Reviewed-on: http://git-master/r/1274476 GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
9e2f7d98d4
commit
c116522b10
@@ -103,6 +103,13 @@ struct gk20a_channel_sync {
|
||||
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
|
||||
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
|
||||
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
|
||||
|
||||
#ifdef CONFIG_SYNC
|
||||
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
|
||||
#else
|
||||
static inline void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -47,6 +47,8 @@
|
||||
#include "nvgpu_common.h"
|
||||
#include "debug_gk20a.h"
|
||||
#include "ctrl_gk20a.h"
|
||||
#include "channel_sync_gk20a.h"
|
||||
|
||||
#include "hw_mc_gk20a.h"
|
||||
#include "hw_timer_gk20a.h"
|
||||
#include "hw_bus_gk20a.h"
|
||||
@@ -66,6 +68,7 @@
|
||||
#include "pstate/pstate.h"
|
||||
#endif
|
||||
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/gk20a.h>
|
||||
|
||||
@@ -708,6 +711,10 @@ void gk20a_remove_support(struct device *dev)
|
||||
if (g->dbg_regops_tmp_buf)
|
||||
kfree(g->dbg_regops_tmp_buf);
|
||||
|
||||
nvgpu_wait_for_deferred_interrupts(g);
|
||||
|
||||
gk20a_channel_cancel_pending_sema_waits(g);
|
||||
|
||||
if (g->pmu.remove_support)
|
||||
g->pmu.remove_support(&g->pmu);
|
||||
|
||||
@@ -1740,6 +1747,7 @@ void gk20a_busy_noresume(struct device *dev)
|
||||
*/
|
||||
void gk20a_driver_start_unload(struct gk20a *g)
|
||||
{
|
||||
gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n");
|
||||
g->driver_is_dying = 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -1081,6 +1081,7 @@ enum gk20a_dbg_categories {
|
||||
gpu_dbg_sema_v = BIT(16), /* verbose semaphore debugging */
|
||||
gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */
|
||||
gpu_dbg_xv = BIT(18), /* XVE debugging */
|
||||
gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */
|
||||
gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
|
||||
};
|
||||
|
||||
|
||||
@@ -356,10 +356,25 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
|
||||
struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev);
|
||||
struct gk20a *g = get_gk20a(&pdev->dev);
|
||||
|
||||
if (g->remove_support)
|
||||
g->remove_support(g->dev);
|
||||
gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n");
|
||||
gk20a_driver_start_unload(g);
|
||||
|
||||
disable_irq(g->irq_stall);
|
||||
devm_free_irq(&pdev->dev, g->irq_stall, g);
|
||||
gk20a_dbg(gpu_dbg_shutdown, "IRQs disabled.\n");
|
||||
|
||||
/*
|
||||
* Wait for the driver to finish up all the IOCTLs it's working on
|
||||
* before cleaning up the driver's data structures.
|
||||
*/
|
||||
gk20a_wait_for_idle(&pdev->dev);
|
||||
gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n");
|
||||
|
||||
gk20a_user_deinit(g->dev, &nvgpu_pci_class);
|
||||
gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b");
|
||||
|
||||
if (g->remove_support)
|
||||
g->remove_support(g->dev);
|
||||
|
||||
debugfs_remove_recursive(platform->debugfs);
|
||||
debugfs_remove_recursive(platform->debugfs_alias);
|
||||
@@ -368,6 +383,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
|
||||
|
||||
if (platform->remove)
|
||||
platform->remove(g->dev);
|
||||
gk20a_dbg(gpu_dbg_shutdown, "Platform remove done.\b");
|
||||
|
||||
kfree(g);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user