gpu: nvgpu: fix race between do_idle() and unrailgate()

While we are executing do_idle() API, it is possible that unrailgate() gets invoked in midst of idling the GPU and this can result in failure of do_idle() To prevent simultaneous execution of these methods, add a mutex railgate_lock and acquire it during do_idle() and unrailgate() APIs Also, keep this lock held if do_idle() is successful. In success, lock will be released in do_unidle(), otherwise release this lock before returning Note that this lock should not be held in railgate() API since we do not want it to be blocked during do_idle() bug 1529160 Change-Id: I87114b5367eaa217376455a2699c0d21c451c889 Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/434190 (cherry picked from commit 561dc8e0933ff2d72573292968b893a52f5f783a) Reviewed-on: http://git-master/r/435131 Reviewed-by: Arto Merilainen <amerilainen@nvidia.com> Reviewed-by: Sachin Nikam <snikam@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2014-07-03 17:59:39 +05:30
parent d608aa53ee
commit 0b1f9e4272
2 changed files with 21 additions and 3 deletions
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -1281,8 +1281,13 @@ static int _gk20a_pm_unrailgate(struct platform_device *pdev)
 {
 	struct gk20a_platform *platform = platform_get_drvdata(pdev);
 	int ret = 0;
-	if (platform->unrailgate)
+
 	if (platform->unrailgate) {
 		mutex_lock(&platform->railgate_lock);
 		ret = platform->unrailgate(pdev);
 		mutex_unlock(&platform->railgate_lock);
 	}
 	return ret;
 }
@@ -1362,6 +1367,8 @@ static int gk20a_pm_init(struct platform_device *dev)
 	gk20a_dbg_fn("");
 	mutex_init(&platform->railgate_lock);
 	/* Initialise pm runtime */
 	if (platform->clockgate_delay) {
 		pm_runtime_set_autosuspend_delay(&dev->dev,
@@ -1739,6 +1746,13 @@ int gk20a_do_idle(void)
 	/* acquire busy lock to block other busy() calls */
 	down_write(&g->busy_lock);
 	/* acquire railgate lock to prevent unrailgate in midst of do_idle() */
 	mutex_lock(&platform->railgate_lock);
 	/* check if it is already railgated ? */
 	if (platform->is_railgated(pdev))
 		return 0;
 	/* prevent suspend by incrementing usage counter */
 	pm_runtime_get_noresume(&pdev->dev);
@@ -1776,11 +1790,12 @@ int gk20a_do_idle(void)
 	}
 	/* GPU is not rail gated by now, return error */
-	up_write(&g->busy_lock);
+	goto fail_timeout;
 	return -EBUSY;
 fail:
 	pm_runtime_put_noidle(&pdev->dev);
 fail_timeout:
 	mutex_unlock(&platform->railgate_lock);
 	up_write(&g->busy_lock);
 	return -EBUSY;
 }
@@ -1794,8 +1809,10 @@ int gk20a_do_unidle(void)
 		bus_find_device_by_name(&platform_bus_type,
 		NULL, "gk20a.0"));
 	struct gk20a *g = get_gk20a(pdev);
 	struct gk20a_platform *platform = dev_get_drvdata(&pdev->dev);
 	/* release the lock and open up all other busy() calls */
 	mutex_unlock(&platform->railgate_lock);
 	up_write(&g->busy_lock);
 	return 0;
--- a/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/platform_gk20a.h
@@ -118,6 +118,7 @@ struct gk20a_platform {
 	/* Called to turn on the device */
 	int (*unrailgate)(struct platform_device *dev);
 	struct mutex railgate_lock;
 	/* Called to check state of device */
 	bool (*is_railgated)(struct platform_device *dev);