gpu: nvgpu: Handle driver shutdown more gracefully

Handle possible asynchronous GPU driver shutdown more gracefully.
This occurs when the GPU disappears from the PCI bus, for example,
if it overheats or detects an over current event.

Also add a preprocessor check to make sure that the

  gk20a_channel_cancel_pending_sema_waits()

is always defined. In some builds CONFIG_SYNC is disabled but the
gk20a_remove_support() code does not check for this.

Bug 1816516
Bug 1807277

Change-Id: I932e312291c5c6a6ac5e13525ce8ca56a1be3652
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: http://git-master/r/1250028
(cherry picked from commit 337810f8c478238a38d8553c1492622d5fa9aafa)
Reviewed-on: http://git-master/r/1274476
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Alex Waterman
2016-11-08 11:31:05 -08:00
committed by mobile promotions
parent 9e2f7d98d4
commit c116522b10
4 changed files with 34 additions and 2 deletions

View File

@@ -103,6 +103,13 @@ struct gk20a_channel_sync {
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync); void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c); bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
#ifdef CONFIG_SYNC
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g); void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g);
#else
static inline void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g)
{
}
#endif
#endif #endif

View File

@@ -47,6 +47,8 @@
#include "nvgpu_common.h" #include "nvgpu_common.h"
#include "debug_gk20a.h" #include "debug_gk20a.h"
#include "ctrl_gk20a.h" #include "ctrl_gk20a.h"
#include "channel_sync_gk20a.h"
#include "hw_mc_gk20a.h" #include "hw_mc_gk20a.h"
#include "hw_timer_gk20a.h" #include "hw_timer_gk20a.h"
#include "hw_bus_gk20a.h" #include "hw_bus_gk20a.h"
@@ -66,6 +68,7 @@
#include "pstate/pstate.h" #include "pstate/pstate.h"
#endif #endif
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/gk20a.h> #include <trace/events/gk20a.h>
@@ -708,6 +711,10 @@ void gk20a_remove_support(struct device *dev)
if (g->dbg_regops_tmp_buf) if (g->dbg_regops_tmp_buf)
kfree(g->dbg_regops_tmp_buf); kfree(g->dbg_regops_tmp_buf);
nvgpu_wait_for_deferred_interrupts(g);
gk20a_channel_cancel_pending_sema_waits(g);
if (g->pmu.remove_support) if (g->pmu.remove_support)
g->pmu.remove_support(&g->pmu); g->pmu.remove_support(&g->pmu);
@@ -1740,6 +1747,7 @@ void gk20a_busy_noresume(struct device *dev)
*/ */
void gk20a_driver_start_unload(struct gk20a *g) void gk20a_driver_start_unload(struct gk20a *g)
{ {
gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n");
g->driver_is_dying = 1; g->driver_is_dying = 1;
} }

View File

@@ -1081,6 +1081,7 @@ enum gk20a_dbg_categories {
gpu_dbg_sema_v = BIT(16), /* verbose semaphore debugging */ gpu_dbg_sema_v = BIT(16), /* verbose semaphore debugging */
gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */ gpu_dbg_pmu_pstate = BIT(17), /* p state controlled by pmu */
gpu_dbg_xv = BIT(18), /* XVE debugging */ gpu_dbg_xv = BIT(18), /* XVE debugging */
gpu_dbg_shutdown = BIT(19), /* GPU shutdown tracing */
gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */ gpu_dbg_mem = BIT(31), /* memory accesses, very verbose */
}; };

View File

@@ -356,10 +356,25 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev); struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev);
struct gk20a *g = get_gk20a(&pdev->dev); struct gk20a *g = get_gk20a(&pdev->dev);
if (g->remove_support) gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n");
g->remove_support(g->dev); gk20a_driver_start_unload(g);
disable_irq(g->irq_stall);
devm_free_irq(&pdev->dev, g->irq_stall, g);
gk20a_dbg(gpu_dbg_shutdown, "IRQs disabled.\n");
/*
* Wait for the driver to finish up all the IOCTLs it's working on
* before cleaning up the driver's data structures.
*/
gk20a_wait_for_idle(&pdev->dev);
gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n");
gk20a_user_deinit(g->dev, &nvgpu_pci_class); gk20a_user_deinit(g->dev, &nvgpu_pci_class);
gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b");
if (g->remove_support)
g->remove_support(g->dev);
debugfs_remove_recursive(platform->debugfs); debugfs_remove_recursive(platform->debugfs);
debugfs_remove_recursive(platform->debugfs_alias); debugfs_remove_recursive(platform->debugfs_alias);
@@ -368,6 +383,7 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
if (platform->remove) if (platform->remove)
platform->remove(g->dev); platform->remove(g->dev);
gk20a_dbg(gpu_dbg_shutdown, "Platform remove done.\b");
kfree(g); kfree(g);
} }