mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 01:50:07 +03:00
gpu: nvgpu: Unify remove/shutdown codepaths
The following changes are part of the porting of the bind/unbind functionality. These changes reuse the shutdown codepaths in iGPU and dGPU and fix a locking issue with in gk20a_busy() where the usage count can lead to a deadlock during the driver shutdown. It fixes a racing condition with the gr/mm code by invalidating the sw ready flag while holding the busy lock JIRA: EVLR-1739 Change-Id: I62ce47378436b21f447f4cd93388759ed3f9bad1 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554959 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
980bf96bf2
commit
ef6ea3475c
@@ -489,9 +489,8 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
|
||||
nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
|
||||
nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
|
||||
}
|
||||
nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
|
||||
}
|
||||
|
||||
nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
|
||||
nvgpu_kfree(g, g->clk_arb);
|
||||
g->clk_arb = NULL;
|
||||
}
|
||||
|
||||
@@ -640,6 +640,48 @@ static int gk20a_pm_unrailgate(struct device *dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Idle the GPU in preparation of shutdown/remove.
|
||||
* gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
|
||||
* state to prevent further activity on the driver SW side.
|
||||
* On driver removal quiesce() should be called after start_unload()
|
||||
*/
|
||||
int nvgpu_quiesce(struct gk20a *g)
|
||||
{
|
||||
int err;
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
|
||||
err = gk20a_wait_for_idle(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to idle GPU, err=%d", err);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gk20a_fifo_disable_all_engine_activity(g, true);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to disable engine activity, err=%d",
|
||||
err);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = gk20a_fifo_wait_engine_idle(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to idle engines, err=%d",
|
||||
err);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (gk20a_gpu_is_virtual(dev))
|
||||
err = vgpu_pm_prepare_poweroff(dev);
|
||||
else
|
||||
err = gk20a_pm_prepare_poweroff(dev);
|
||||
|
||||
if (err)
|
||||
nvgpu_err(g, "failed to prepare for poweroff, err=%d",
|
||||
err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void gk20a_pm_shutdown(struct platform_device *pdev)
|
||||
{
|
||||
struct gk20a_platform *platform = platform_get_drvdata(pdev);
|
||||
@@ -668,35 +710,9 @@ static void gk20a_pm_shutdown(struct platform_device *pdev)
|
||||
/* Prevent more requests by disabling Runtime PM */
|
||||
__pm_runtime_disable(&pdev->dev, false);
|
||||
|
||||
err = gk20a_wait_for_idle(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to idle GPU, err=%d", err);
|
||||
err = nvgpu_quiesce(g);
|
||||
if (err)
|
||||
goto finish;
|
||||
}
|
||||
|
||||
err = gk20a_fifo_disable_all_engine_activity(g, true);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to disable engine activity, err=%d",
|
||||
err);
|
||||
goto finish;
|
||||
}
|
||||
|
||||
err = gk20a_fifo_wait_engine_idle(g);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to idle engines, err=%d",
|
||||
err);
|
||||
goto finish;
|
||||
}
|
||||
|
||||
if (gk20a_gpu_is_virtual(&pdev->dev))
|
||||
err = vgpu_pm_prepare_poweroff(&pdev->dev);
|
||||
else
|
||||
err = gk20a_pm_prepare_poweroff(&pdev->dev);
|
||||
if (err) {
|
||||
nvgpu_err(g, "failed to prepare for poweroff, err=%d",
|
||||
err);
|
||||
goto finish;
|
||||
}
|
||||
|
||||
err = gk20a_pm_railgate(&pdev->dev);
|
||||
if (err)
|
||||
@@ -854,6 +870,9 @@ void gk20a_driver_start_unload(struct gk20a *g)
|
||||
|
||||
down_write(&g->busy_lock);
|
||||
__nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
|
||||
/* GR SW ready needs to be invalidated at this time with the busy lock
|
||||
* held to prevent a racing condition on the gr/mm code */
|
||||
g->gr.sw_ready = false;
|
||||
up_write(&g->busy_lock);
|
||||
|
||||
if (g->is_virtual)
|
||||
@@ -979,18 +998,14 @@ static int gk20a_probe(struct platform_device *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __exit gk20a_remove(struct platform_device *pdev)
|
||||
int nvgpu_remove(struct device *dev, struct class *class)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||
|
||||
gk20a_dbg_fn("");
|
||||
|
||||
if (gk20a_gpu_is_virtual(dev))
|
||||
return vgpu_remove(pdev);
|
||||
|
||||
if (platform->has_cde)
|
||||
gk20a_cde_destroy(l);
|
||||
|
||||
@@ -1001,16 +1016,11 @@ static int __exit gk20a_remove(struct platform_device *pdev)
|
||||
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
|
||||
gk20a_scale_exit(dev);
|
||||
|
||||
if (g->remove_support)
|
||||
g->remove_support(g);
|
||||
|
||||
gk20a_ce_destroy(g);
|
||||
|
||||
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
||||
nvgpu_clk_arb_cleanup_arbiter(g);
|
||||
#endif
|
||||
|
||||
gk20a_user_deinit(dev, &nvgpu_class);
|
||||
gk20a_user_deinit(dev, class);
|
||||
|
||||
gk20a_debug_deinit(g);
|
||||
|
||||
@@ -1026,14 +1036,28 @@ static int __exit gk20a_remove(struct platform_device *pdev)
|
||||
if (platform->remove)
|
||||
platform->remove(dev);
|
||||
|
||||
set_gk20a(pdev, NULL);
|
||||
gk20a_put(g);
|
||||
|
||||
gk20a_dbg_fn("removed");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __exit gk20a_remove(struct platform_device *pdev)
|
||||
{
|
||||
int err;
|
||||
struct device *dev = &pdev->dev;
|
||||
struct gk20a *g = get_gk20a(dev);
|
||||
|
||||
if (gk20a_gpu_is_virtual(dev))
|
||||
return vgpu_remove(pdev);
|
||||
|
||||
err = nvgpu_remove(dev, &nvgpu_class);
|
||||
|
||||
set_gk20a(pdev, NULL);
|
||||
gk20a_put(g);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct platform_driver gk20a_driver = {
|
||||
.probe = gk20a_probe,
|
||||
.remove = __exit_p(gk20a_remove),
|
||||
|
||||
@@ -19,6 +19,8 @@ struct device;
|
||||
int gk20a_pm_finalize_poweron(struct device *dev);
|
||||
void gk20a_remove_support(struct gk20a *g);
|
||||
void gk20a_driver_start_unload(struct gk20a *g);
|
||||
int nvgpu_quiesce(struct gk20a *g);
|
||||
int nvgpu_remove(struct device *dev, struct class *class);
|
||||
|
||||
extern struct class nvgpu_class;
|
||||
|
||||
|
||||
@@ -513,52 +513,34 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
|
||||
|
||||
static void nvgpu_pci_remove(struct pci_dev *pdev)
|
||||
{
|
||||
struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev);
|
||||
struct gk20a *g = get_gk20a(&pdev->dev);
|
||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||
struct device *dev = dev_from_gk20a(g);
|
||||
int err;
|
||||
|
||||
gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n");
|
||||
/* no support yet for unbind if DGPU is in VGPU mode */
|
||||
if (gk20a_gpu_is_virtual(dev))
|
||||
return;
|
||||
|
||||
if (g->irqs_enabled)
|
||||
disable_irq(g->irq_stall);
|
||||
/* only idle the GPU if the GPU is powered on */
|
||||
if (g->power_on) {
|
||||
gk20a_driver_start_unload(g);
|
||||
err = nvgpu_quiesce(g);
|
||||
/* TODO: handle failure to idle */
|
||||
WARN(err, "gpu failed to idle during driver removal");
|
||||
}
|
||||
|
||||
devm_free_irq(&pdev->dev, g->irq_stall, g);
|
||||
nvgpu_remove(dev, &nvgpu_pci_class);
|
||||
|
||||
#if defined(CONFIG_PCI_MSI)
|
||||
if (g->msi_enabled) {
|
||||
if (g->msi_enabled)
|
||||
pci_disable_msi(pdev);
|
||||
g->msi_enabled = false;
|
||||
else {
|
||||
/* IRQ does not need to be enabled in MSI as the line is not
|
||||
* shared
|
||||
*/
|
||||
enable_irq(g->irq_stall);
|
||||
}
|
||||
#endif
|
||||
gk20a_dbg(gpu_dbg_shutdown, "IRQs disabled.\n");
|
||||
|
||||
/*
|
||||
* Wait for the driver to finish up all the IOCTLs it's working on
|
||||
* before cleaning up the driver's data structures.
|
||||
*/
|
||||
gk20a_driver_start_unload(g);
|
||||
gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n");
|
||||
|
||||
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
||||
nvgpu_clk_arb_cleanup_arbiter(g);
|
||||
#endif
|
||||
|
||||
gk20a_user_deinit(dev_from_gk20a(g), &nvgpu_pci_class);
|
||||
gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b");
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
debugfs_remove_recursive(l->debugfs);
|
||||
debugfs_remove_recursive(l->debugfs_alias);
|
||||
#endif
|
||||
|
||||
nvgpu_remove_sysfs(dev_from_gk20a(g));
|
||||
|
||||
if (platform->remove)
|
||||
platform->remove(dev_from_gk20a(g));
|
||||
gk20a_dbg(gpu_dbg_shutdown, "Platform remove done.\b");
|
||||
|
||||
enable_irq(g->irq_stall);
|
||||
|
||||
gk20a_get_platform(&pdev->dev)->g = NULL;
|
||||
gk20a_put(g);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user