gpu: nvgpu: wait for engine idle in shutdown

In gk20a_pm_shutdown(), we do not check return value of gk20a_pm_prepare_poweroff In some cases it is possible that gk20a_pm_prepare_poweroff() returns -EBUSY (this could happen if engines are busy) so we don't clean up s/w state and directly trigger GPU railgate In case some interrupt is triggered simultaneously we try to access a register while GPU is already railgated This leads to a hard hang in nvgpu shutdown path Make below changes in shutdown sequence to fix this: - check return value of gk20a_wait_for_idle() - disable activity on all engines with gk20a_fifo_disable_all_engine_activity() - ensure engines are idle with gk20a_fifo_wait_engine_idle() - check return value of gk20a_pm_prepare_poweroff() - check return value of gk20a_pm_railgate() Add a print when we bail out early in case GPU is already railgated Move to use new nvgpu_info/err() log messages instead of dev_*() messages Bug 200281010 Change-Id: I2856f9be6cd2de9b0d3ae12955cb1f0a2b6c29be Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/1454658 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2017-04-11 12:10:30 +05:30
parent a100422df5
commit 6de456f840
1 changed files with 36 additions and 8 deletions
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -732,32 +732,60 @@ static int gk20a_pm_unrailgate(struct device *dev)
 static void gk20a_pm_shutdown(struct platform_device *pdev)
 {
 	struct gk20a_platform *platform = platform_get_drvdata(pdev);
 	struct gk20a *g = platform->g;
 	int err;
-	dev_info(&pdev->dev, "shutting down");
+	nvgpu_info(g, "shutting down");
-	gk20a_driver_start_unload(platform->g);
+	gk20a_driver_start_unload(g);
 	/* If GPU is already railgated,
 	 * just prevent more requests, and return */
 	if (platform->is_railgated && platform->is_railgated(&pdev->dev)) {
 		__pm_runtime_disable(&pdev->dev, false);
 		nvgpu_info(g, "already railgated, shut down complete");
 		return;
 	}
 	/* Prevent more requests by disabling Runtime PM */
 	__pm_runtime_disable(&pdev->dev, false);
-	gk20a_wait_for_idle(&pdev->dev);
+	err = gk20a_wait_for_idle(&pdev->dev);
 	if (err) {
 		nvgpu_err(g, "failed to idle GPU, err=%d", err);
 		goto finish;
 	}
 	err = gk20a_fifo_disable_all_engine_activity(g, true);
 	if (err) {
 		nvgpu_err(g, "failed to disable engine activity, err=%d",
 			err);
 		goto finish;
 	}
 	err = gk20a_fifo_wait_engine_idle(g);
 	if (err) {
 		nvgpu_err(g, "failed to idle engines, err=%d",
 			err);
 		goto finish;
 	}
 	/* Be ready for rail-gate after this point */
 	if (gk20a_gpu_is_virtual(&pdev->dev))
-		vgpu_pm_prepare_poweroff(&pdev->dev);
+		err = vgpu_pm_prepare_poweroff(&pdev->dev);
 	else
-		gk20a_pm_prepare_poweroff(&pdev->dev);
+		err = gk20a_pm_prepare_poweroff(&pdev->dev);
 	if (err) {
 		nvgpu_err(g, "failed to prepare for poweroff, err=%d",
 			err);
 		goto finish;
 	}
-	gk20a_pm_railgate(&pdev->dev);
+	err = gk20a_pm_railgate(&pdev->dev);
 	if (err)
 		nvgpu_err(g, "failed to railgate, err=%d", err);
-	dev_info(&pdev->dev, "shut down complete\n");
+finish:
 	nvgpu_info(g, "shut down complete\n");
 }
 #ifdef CONFIG_PM