mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
gpu: nvgpu: Unify remove/shutdown codepaths
The following changes are part of the porting of the bind/unbind functionality. These changes reuse the shutdown codepaths in iGPU and dGPU and fix a locking issue with in gk20a_busy() where the usage count can lead to a deadlock during the driver shutdown. It fixes a racing condition with the gr/mm code by invalidating the sw ready flag while holding the busy lock JIRA: EVLR-1739 Change-Id: I62ce47378436b21f447f4cd93388759ed3f9bad1 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1554959 Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: Automatic_Commit_Validation_User Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> GVS: Gerrit_Virtual_Submit Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
committed by
mobile promotions
parent
980bf96bf2
commit
ef6ea3475c
@@ -489,9 +489,8 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
|
|||||||
nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
|
nvgpu_kfree(g, arb->vf_table_pool[index].gpc2clk_points);
|
||||||
nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
|
nvgpu_kfree(g, arb->vf_table_pool[index].mclk_points);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
|
nvgpu_mutex_destroy(&g->clk_arb->pstate_lock);
|
||||||
|
}
|
||||||
nvgpu_kfree(g, g->clk_arb);
|
nvgpu_kfree(g, g->clk_arb);
|
||||||
g->clk_arb = NULL;
|
g->clk_arb = NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -640,6 +640,48 @@ static int gk20a_pm_unrailgate(struct device *dev)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Idle the GPU in preparation of shutdown/remove.
|
||||||
|
* gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
|
||||||
|
* state to prevent further activity on the driver SW side.
|
||||||
|
* On driver removal quiesce() should be called after start_unload()
|
||||||
|
*/
|
||||||
|
int nvgpu_quiesce(struct gk20a *g)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct device *dev = dev_from_gk20a(g);
|
||||||
|
|
||||||
|
err = gk20a_wait_for_idle(g);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "failed to idle GPU, err=%d", err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = gk20a_fifo_disable_all_engine_activity(g, true);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "failed to disable engine activity, err=%d",
|
||||||
|
err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = gk20a_fifo_wait_engine_idle(g);
|
||||||
|
if (err) {
|
||||||
|
nvgpu_err(g, "failed to idle engines, err=%d",
|
||||||
|
err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gk20a_gpu_is_virtual(dev))
|
||||||
|
err = vgpu_pm_prepare_poweroff(dev);
|
||||||
|
else
|
||||||
|
err = gk20a_pm_prepare_poweroff(dev);
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
nvgpu_err(g, "failed to prepare for poweroff, err=%d",
|
||||||
|
err);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
static void gk20a_pm_shutdown(struct platform_device *pdev)
|
static void gk20a_pm_shutdown(struct platform_device *pdev)
|
||||||
{
|
{
|
||||||
struct gk20a_platform *platform = platform_get_drvdata(pdev);
|
struct gk20a_platform *platform = platform_get_drvdata(pdev);
|
||||||
@@ -668,35 +710,9 @@ static void gk20a_pm_shutdown(struct platform_device *pdev)
|
|||||||
/* Prevent more requests by disabling Runtime PM */
|
/* Prevent more requests by disabling Runtime PM */
|
||||||
__pm_runtime_disable(&pdev->dev, false);
|
__pm_runtime_disable(&pdev->dev, false);
|
||||||
|
|
||||||
err = gk20a_wait_for_idle(g);
|
err = nvgpu_quiesce(g);
|
||||||
if (err) {
|
if (err)
|
||||||
nvgpu_err(g, "failed to idle GPU, err=%d", err);
|
|
||||||
goto finish;
|
goto finish;
|
||||||
}
|
|
||||||
|
|
||||||
err = gk20a_fifo_disable_all_engine_activity(g, true);
|
|
||||||
if (err) {
|
|
||||||
nvgpu_err(g, "failed to disable engine activity, err=%d",
|
|
||||||
err);
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = gk20a_fifo_wait_engine_idle(g);
|
|
||||||
if (err) {
|
|
||||||
nvgpu_err(g, "failed to idle engines, err=%d",
|
|
||||||
err);
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (gk20a_gpu_is_virtual(&pdev->dev))
|
|
||||||
err = vgpu_pm_prepare_poweroff(&pdev->dev);
|
|
||||||
else
|
|
||||||
err = gk20a_pm_prepare_poweroff(&pdev->dev);
|
|
||||||
if (err) {
|
|
||||||
nvgpu_err(g, "failed to prepare for poweroff, err=%d",
|
|
||||||
err);
|
|
||||||
goto finish;
|
|
||||||
}
|
|
||||||
|
|
||||||
err = gk20a_pm_railgate(&pdev->dev);
|
err = gk20a_pm_railgate(&pdev->dev);
|
||||||
if (err)
|
if (err)
|
||||||
@@ -854,6 +870,9 @@ void gk20a_driver_start_unload(struct gk20a *g)
|
|||||||
|
|
||||||
down_write(&g->busy_lock);
|
down_write(&g->busy_lock);
|
||||||
__nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
|
__nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
|
||||||
|
/* GR SW ready needs to be invalidated at this time with the busy lock
|
||||||
|
* held to prevent a racing condition on the gr/mm code */
|
||||||
|
g->gr.sw_ready = false;
|
||||||
up_write(&g->busy_lock);
|
up_write(&g->busy_lock);
|
||||||
|
|
||||||
if (g->is_virtual)
|
if (g->is_virtual)
|
||||||
@@ -979,18 +998,14 @@ static int gk20a_probe(struct platform_device *dev)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __exit gk20a_remove(struct platform_device *pdev)
|
int nvgpu_remove(struct device *dev, struct class *class)
|
||||||
{
|
{
|
||||||
struct device *dev = &pdev->dev;
|
|
||||||
struct gk20a *g = get_gk20a(dev);
|
struct gk20a *g = get_gk20a(dev);
|
||||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
||||||
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
struct gk20a_platform *platform = gk20a_get_platform(dev);
|
||||||
|
|
||||||
gk20a_dbg_fn("");
|
gk20a_dbg_fn("");
|
||||||
|
|
||||||
if (gk20a_gpu_is_virtual(dev))
|
|
||||||
return vgpu_remove(pdev);
|
|
||||||
|
|
||||||
if (platform->has_cde)
|
if (platform->has_cde)
|
||||||
gk20a_cde_destroy(l);
|
gk20a_cde_destroy(l);
|
||||||
|
|
||||||
@@ -1001,16 +1016,11 @@ static int __exit gk20a_remove(struct platform_device *pdev)
|
|||||||
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
|
if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
|
||||||
gk20a_scale_exit(dev);
|
gk20a_scale_exit(dev);
|
||||||
|
|
||||||
if (g->remove_support)
|
|
||||||
g->remove_support(g);
|
|
||||||
|
|
||||||
gk20a_ce_destroy(g);
|
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
||||||
nvgpu_clk_arb_cleanup_arbiter(g);
|
nvgpu_clk_arb_cleanup_arbiter(g);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
gk20a_user_deinit(dev, &nvgpu_class);
|
gk20a_user_deinit(dev, class);
|
||||||
|
|
||||||
gk20a_debug_deinit(g);
|
gk20a_debug_deinit(g);
|
||||||
|
|
||||||
@@ -1026,14 +1036,28 @@ static int __exit gk20a_remove(struct platform_device *pdev)
|
|||||||
if (platform->remove)
|
if (platform->remove)
|
||||||
platform->remove(dev);
|
platform->remove(dev);
|
||||||
|
|
||||||
set_gk20a(pdev, NULL);
|
|
||||||
gk20a_put(g);
|
|
||||||
|
|
||||||
gk20a_dbg_fn("removed");
|
gk20a_dbg_fn("removed");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __exit gk20a_remove(struct platform_device *pdev)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
struct device *dev = &pdev->dev;
|
||||||
|
struct gk20a *g = get_gk20a(dev);
|
||||||
|
|
||||||
|
if (gk20a_gpu_is_virtual(dev))
|
||||||
|
return vgpu_remove(pdev);
|
||||||
|
|
||||||
|
err = nvgpu_remove(dev, &nvgpu_class);
|
||||||
|
|
||||||
|
set_gk20a(pdev, NULL);
|
||||||
|
gk20a_put(g);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
static struct platform_driver gk20a_driver = {
|
static struct platform_driver gk20a_driver = {
|
||||||
.probe = gk20a_probe,
|
.probe = gk20a_probe,
|
||||||
.remove = __exit_p(gk20a_remove),
|
.remove = __exit_p(gk20a_remove),
|
||||||
|
|||||||
@@ -19,6 +19,8 @@ struct device;
|
|||||||
int gk20a_pm_finalize_poweron(struct device *dev);
|
int gk20a_pm_finalize_poweron(struct device *dev);
|
||||||
void gk20a_remove_support(struct gk20a *g);
|
void gk20a_remove_support(struct gk20a *g);
|
||||||
void gk20a_driver_start_unload(struct gk20a *g);
|
void gk20a_driver_start_unload(struct gk20a *g);
|
||||||
|
int nvgpu_quiesce(struct gk20a *g);
|
||||||
|
int nvgpu_remove(struct device *dev, struct class *class);
|
||||||
|
|
||||||
extern struct class nvgpu_class;
|
extern struct class nvgpu_class;
|
||||||
|
|
||||||
|
|||||||
@@ -513,52 +513,34 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
|
|||||||
|
|
||||||
static void nvgpu_pci_remove(struct pci_dev *pdev)
|
static void nvgpu_pci_remove(struct pci_dev *pdev)
|
||||||
{
|
{
|
||||||
struct gk20a_platform *platform = gk20a_get_platform(&pdev->dev);
|
|
||||||
struct gk20a *g = get_gk20a(&pdev->dev);
|
struct gk20a *g = get_gk20a(&pdev->dev);
|
||||||
struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
|
struct device *dev = dev_from_gk20a(g);
|
||||||
|
int err;
|
||||||
|
|
||||||
gk20a_dbg(gpu_dbg_shutdown, "Removing nvgpu driver!\n");
|
/* no support yet for unbind if DGPU is in VGPU mode */
|
||||||
|
if (gk20a_gpu_is_virtual(dev))
|
||||||
|
return;
|
||||||
|
|
||||||
if (g->irqs_enabled)
|
/* only idle the GPU if the GPU is powered on */
|
||||||
disable_irq(g->irq_stall);
|
if (g->power_on) {
|
||||||
|
gk20a_driver_start_unload(g);
|
||||||
|
err = nvgpu_quiesce(g);
|
||||||
|
/* TODO: handle failure to idle */
|
||||||
|
WARN(err, "gpu failed to idle during driver removal");
|
||||||
|
}
|
||||||
|
|
||||||
devm_free_irq(&pdev->dev, g->irq_stall, g);
|
nvgpu_remove(dev, &nvgpu_pci_class);
|
||||||
|
|
||||||
#if defined(CONFIG_PCI_MSI)
|
#if defined(CONFIG_PCI_MSI)
|
||||||
if (g->msi_enabled) {
|
if (g->msi_enabled)
|
||||||
pci_disable_msi(pdev);
|
pci_disable_msi(pdev);
|
||||||
g->msi_enabled = false;
|
else {
|
||||||
|
/* IRQ does not need to be enabled in MSI as the line is not
|
||||||
|
* shared
|
||||||
|
*/
|
||||||
|
enable_irq(g->irq_stall);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
gk20a_dbg(gpu_dbg_shutdown, "IRQs disabled.\n");
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Wait for the driver to finish up all the IOCTLs it's working on
|
|
||||||
* before cleaning up the driver's data structures.
|
|
||||||
*/
|
|
||||||
gk20a_driver_start_unload(g);
|
|
||||||
gk20a_dbg(gpu_dbg_shutdown, "Driver idle.\n");
|
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_TEGRA_18x_SOC
|
|
||||||
nvgpu_clk_arb_cleanup_arbiter(g);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
gk20a_user_deinit(dev_from_gk20a(g), &nvgpu_pci_class);
|
|
||||||
gk20a_dbg(gpu_dbg_shutdown, "User de-init done.\b");
|
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_FS
|
|
||||||
debugfs_remove_recursive(l->debugfs);
|
|
||||||
debugfs_remove_recursive(l->debugfs_alias);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
nvgpu_remove_sysfs(dev_from_gk20a(g));
|
|
||||||
|
|
||||||
if (platform->remove)
|
|
||||||
platform->remove(dev_from_gk20a(g));
|
|
||||||
gk20a_dbg(gpu_dbg_shutdown, "Platform remove done.\b");
|
|
||||||
|
|
||||||
enable_irq(g->irq_stall);
|
|
||||||
|
|
||||||
gk20a_get_platform(&pdev->dev)->g = NULL;
|
gk20a_get_platform(&pdev->dev)->g = NULL;
|
||||||
gk20a_put(g);
|
gk20a_put(g);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user