gpu: nvgpu: enable runtime PM before secure_alloc init during probe

With genpd based runtime PM, the device railgating is managed by the PM core and the nvgpu manages the clocks. To suspend/resume the device for idling/unidling while initializing secure alloc, runtime PM is to be enabled during probe. nvgpu platform railgate handlers will be only managing the clocks. During probe, the nvgpu driver poweroff/poweron are not to be invoked as part of driver runtime suspend/resume hence probe state is added. After platform probe initializes the clock, explicit runtime resume of the device is required to sanely suspend it during gk20a_do_idle. Runtime PM configuration differs based on the NVGPU_CAN_RAILGATE capability, hence the runtime PM is enabled ("truly") only for the duration of nvgpu_probe and then the state is reverted at the beginning of gk20a_pm_late_init. Bug 200602747 JIRA NVGPU-5356 Change-Id: I1fbd03d3f49da07ccbee9714387e00ffc688864e Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2375939 GVS: Gerrit_Virtual_Submit Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: Sachin Nikam <snikam@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 09:12:24 +03:00 · 2020-07-15 13:07:30 +05:30
parent 5a5f082d24
commit 4012a97640
3 changed files with 85 additions and 5 deletions
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -285,6 +285,9 @@ struct gk20a {
 	/** Is the GPU ready to be used? */
 	u32 power_on_state;

+	/** Is the GPU probe complete? */
+	bool probe_done;
+
 #ifdef CONFIG_NVGPU_DGPU
 	bool gpu_reset_done;
 #endif
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -18,6 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/pm_runtime.h>
 #include <uapi/linux/nvgpu.h>

 #include <nvgpu/defaults.h>
@@ -292,6 +293,13 @@ int nvgpu_probe(struct gk20a *g,
 	if (err)
 		return err;

+	/*
+	 * Note that for runtime suspend to work the clocks have to be setup
+	 * which happens in the probe call above. Hence the driver resume
+	 * is done here and not in gk20a_pm_init.
+	 */
+	pm_runtime_get_sync(dev);
+
 	if (platform->late_probe) {
 		err = platform->late_probe(dev);
 		if (err) {
@@ -300,6 +308,8 @@ int nvgpu_probe(struct gk20a *g,
 		}
 	}

+	pm_runtime_put_sync_autosuspend(dev);
+
 	nvgpu_create_sysfs(dev);
 	gk20a_debug_init(g, debugfs_symlink);

@@ -324,6 +334,8 @@ static void nvgpu_free_gk20a(struct gk20a *g)
 {
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);

+	g->probe_done = false;
+
 	kfree(l);
 }

--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -764,6 +764,22 @@ int gk20a_do_idle_impl(struct gk20a *g, bool force_reset)
 	bool is_railgated;
 	int err = 0;

+	if (!g->probe_done) {
+		/*
+		 * Note that autosuspend delay is 0 at this point hence the device
+		 * will suspend immediately. Deterministic channels, gk20a_busy and
+		 * unrailgate don't intervene during probe so no need to hold the
+		 * locks below.
+		 */
+		pm_runtime_put_sync_autosuspend(dev);
+		if (pm_runtime_status_suspended(dev)) {
+			return 0;
+		} else {
+			nvgpu_err(g, "failed to idle");
+			return -EBUSY;
+		}
+	}
+
 	/*
 	 * Hold back deterministic submits and changes to deterministic
 	 * channels - this must be outside the power busy locks.
@@ -905,6 +921,16 @@ int gk20a_do_unidle_impl(struct gk20a *g)
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
 	int err;

+	if (!g->probe_done) {
+		pm_runtime_get_sync(dev);
+		if (pm_runtime_active(dev)) {
+			return 0;
+		} else {
+			nvgpu_err(g, "failed to unidle");
+			return -EBUSY;
+		}
+	}
+
 	if (g->forced_reset) {
 		/*
 		 * If we did a forced-reset/railgate
@@ -1291,12 +1317,17 @@ finish:
 #ifdef CONFIG_PM
 static int gk20a_pm_runtime_resume(struct device *dev)
 {
+	struct gk20a *g = get_gk20a(dev);
 	int err = 0;

 	err = gk20a_pm_unrailgate(dev);
 	if (err)
 		goto fail;

+	if (!g->probe_done) {
+		return 0;
+	}
+
 	if (gk20a_gpu_is_virtual(dev))
 		err = vgpu_pm_finalize_poweron(dev);
 	else
@@ -1320,6 +1351,13 @@ static int gk20a_pm_runtime_suspend(struct device *dev)
 	if (!g)
 		return 0;

+	if (!g->probe_done) {
+		err = gk20a_pm_railgate(dev);
+		if (err)
+			pm_runtime_mark_last_busy(dev);
+		return err;
+	}
+
 	if (gk20a_gpu_is_virtual(dev))
 		err = vgpu_pm_prepare_poweroff(dev);
 	else
@@ -1452,9 +1490,27 @@ static int gk20a_pm_init(struct device *dev)
 	nvgpu_log_fn(g, " ");

 	/*
-	 * Initialise pm runtime. For railgate disable
-	 * case, set autosuspend delay to negative which
-	 * will suspend runtime pm
+	 * runtime PM is enabled here. Irrespective of the device power state,
+	 * it is resumed and suspended as part of nvgpu_probe due to dependency
+	 * on clocks setup. From there onwards runtime PM is truly enabled.
+	 */
+	pm_runtime_enable(dev);
+
+	return err;
+}
+
+static int gk20a_pm_late_init(struct device *dev)
+{
+	struct gk20a *g = get_gk20a(dev);
+	int err = 0;
+
+	nvgpu_log_fn(g, " ");
+
+	pm_runtime_disable(dev);
+
+	/*
+	 * For railgate disable case, set autosuspend delay to negative which
+	 * will avoid runtime pm suspend.
 	 */
 	if (g->railgate_delay && nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
 		pm_runtime_set_autosuspend_delay(dev,
@@ -1624,6 +1680,7 @@ static int gk20a_probe(struct platform_device *dev)

 	nvgpu_init_gk20a(gk20a);
 	set_gk20a(dev, gk20a);
+	gk20a->probe_done = false;
 	l->dev = &dev->dev;
 	gk20a->log_mask = NVGPU_DEFAULT_DBG_MASK;

@@ -1711,13 +1768,19 @@ static int gk20a_probe(struct platform_device *dev)
 		platform->reset_control = NULL;
 #endif

+	err = gk20a_pm_init(&dev->dev);
+	if (err) {
+		dev_err(&dev->dev, "pm init failed");
+		goto return_err;
+	}
+
 	err = nvgpu_probe(gk20a, "gpu.0");
 	if (err)
 		goto return_err;

-	err = gk20a_pm_init(&dev->dev);
+	err = gk20a_pm_late_init(&dev->dev);
 	if (err) {
-		dev_err(&dev->dev, "pm init failed");
+		dev_err(&dev->dev, "pm late_init failed");
 		goto return_err;
 	}

@@ -1730,6 +1793,8 @@ static int gk20a_probe(struct platform_device *dev)
 	nvgpu_mutex_init(&l->dmabuf_priv_list_lock);
 	nvgpu_init_list_node(&l->dmabuf_priv_list);

+	gk20a->probe_done = true;
+
 	return 0;

 return_err: