From 608decf1e610a889b4009bd148f815c8055898f6 Mon Sep 17 00:00:00 2001
From: Debarshi Dutta <ddutta@nvidia.com>
Date: Wed, 18 Aug 2021 11:02:13 +0530
Subject: [PATCH] gpu: nvgpu: add support for powering off gpu

Add support for powering off IGPU for switching between
legacy to SMC mode/vice-versa or changing SMC configuration.
The power off can be issued as follows

echo 0 > /dev/nvgpu/igpu0/power

The following steps are done during a poweroff.
1) Deterministic channel idle
2) Acquire write_lock on l->busy semaphore.
3) Wait till power_usage decrements to indicate 0 active jobs.
4) Invoke pm_runtime_put_sync_suspend()
5) Invoke nvgpu_gr_remove_support() to clear existing GR memory.
6) Release write_lock on l->busy
7) Deterministic channel unidle.

Part of the sequence matches that of the gk20a_do_idle code.
The common parts are extracted into new functions
gk20a_block_new_jobs_and_idle() and gk20a_unblock_jobs()

For joint-rail case, the current implementation, does a railgate
and then sets pm_runtime_set_autosuspend_delay(-1) to disable
regular runtime resume/suspend.

Remove clearing of NVGPU_SUPPORT_MIG status during state change
ias it leads to inconsistencies.

Jira NVGPU-6920

Change-Id: I0b3eb3278176122ac061c1e8a94ebfb3c17c3925
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2578501
Reviewed-by: svc_kernel_abi <svc_kernel_abi@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: Antony Clince Alex <aalex@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
---
 drivers/gpu/nvgpu/common/init/nvgpu_init.c   |  13 ++
 drivers/gpu/nvgpu/hal/grmgr/grmgr_ga10b.c    |   1 -
 drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h |   8 +
 drivers/gpu/nvgpu/os/linux/driver_common.c   |   3 +-
 drivers/gpu/nvgpu/os/linux/module.c          | 151 +++++++++++++++++--
 drivers/gpu/nvgpu/os/linux/module.h          |   9 +-
 drivers/gpu/nvgpu/os/linux/pci.c             |   6 +-
 drivers/gpu/nvgpu/os/linux/power_ops.c       |  16 +-
 drivers/gpu/nvgpu/os/linux/sysfs.c           |   2 +
 9 files changed, 185 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
index ab01433b6..9cd497c53 100644
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -1173,6 +1173,19 @@ const char *nvgpu_get_power_state(struct gk20a *g)
 	return str;
 }
 
+bool nvgpu_poweron_started(struct gk20a *g)
+{
+	unsigned long flags = 0U;
+	u32 power_on;
+
+	nvgpu_spinlock_irqsave(&g->power_spinlock, flags);
+	power_on = g->power_on_state;
+	nvgpu_spinunlock_irqrestore(&g->power_spinlock, flags);
+
+	return (power_on == NVGPU_STATE_POWERED_ON ||
+		power_on == NVGPU_STATE_POWERING_ON);
+}
+
 bool nvgpu_is_powered_on(struct gk20a *g)
 {
 	unsigned long flags = 0U;
diff --git a/drivers/gpu/nvgpu/hal/grmgr/grmgr_ga10b.c b/drivers/gpu/nvgpu/hal/grmgr/grmgr_ga10b.c
index 5bc5e1d95..d8c175de0 100644
--- a/drivers/gpu/nvgpu/hal/grmgr/grmgr_ga10b.c
+++ b/drivers/gpu/nvgpu/hal/grmgr/grmgr_ga10b.c
@@ -653,7 +653,6 @@ static void ga10b_grmgr_set_smc_state(struct gk20a *g, bool enable)
 				smcarb_sys_pipe_info_mode_legacy_v());
 		}
 		nvgpu_writel(g, smcarb_sys_pipe_info_r(), smc_state);
-		nvgpu_set_enabled(g, NVGPU_SUPPORT_MIG, enable);
 		nvgpu_log(g, gpu_dbg_mig, "MIG boot reg_val[%x] enable[%d]",
 			smc_state, enable);
 	}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h
index 7f1bbf563..5e0c11b9d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h
@@ -272,6 +272,14 @@ void nvgpu_set_power_state(struct gk20a *g, u32 state);
  */
 const char *nvgpu_get_power_state(struct gk20a *g);
 
+/**
+ * @brief Get whether power state is
+ *  NVGPU_STATE_POWERING_ON or NVGPU_STATE_POWERED_ON
+ *
+ * @param g [in] The GPU
+ */
+bool nvgpu_poweron_started(struct gk20a *g);
+
 /**
  * @brief Return the nvgpu power-on state
  *
diff --git a/drivers/gpu/nvgpu/os/linux/driver_common.c b/drivers/gpu/nvgpu/os/linux/driver_common.c
index 60d9efc21..f55caf833 100644
--- a/drivers/gpu/nvgpu/os/linux/driver_common.c
+++ b/drivers/gpu/nvgpu/os/linux/driver_common.c
@@ -185,7 +185,8 @@ static void nvgpu_init_pm_vars(struct gk20a *g)
 #endif
 	g->ptimer_src_freq = platform->ptimer_src_freq;
 
-	nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, platform->can_railgate_init);
+	nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE,
+		nvgpu_platform_is_simulation(g)? true : platform->can_railgate_init);
 	g->can_tpc_powergate = platform->can_tpc_powergate;
 
 	for (i = 0; i < MAX_TPC_PG_CONFIGS; i++)
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
index ea0a06e17..813ad86e5 100644
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -688,18 +688,18 @@ static struct of_device_id tegra_gk20a_of_match[] = {
 MODULE_DEVICE_TABLE(of, tegra_gk20a_of_match);
 
 #ifdef CONFIG_PM
-/**
- * gk20a_do_idle() - force the GPU to idle and railgate
- *
- * In success, this call MUST be balanced by caller with gk20a_do_unidle()
- *
- * Acquires two locks : &l->busy_lock and &platform->railgate_lock
- * In success, we hold these locks and return
- * In failure, we release these locks and return
+/* Caller of this API can assume the following return values
+ *  1) -EBUSY indicates failure of the API, no locks are held. (Failure)
+ *  2) 1 indicates pm_runtime_status_suspended without any locks held
+ *     and g->probe_done = false. (Success)
+ *  3) 0 indicates function successfully idles the driver and prevents
+ *     further jobs. Following steps are executed,
+ *	  a) Hold back Deterministic Submits
+ *	  b) Down-Write Busy lock
+ *	  c) Acquire platform->railgate lock.
  */
-int gk20a_do_idle(void *_g)
+int gk20a_block_new_jobs_and_idle(struct gk20a *g)
 {
-	struct gk20a *g = (struct gk20a *)_g;
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
 	struct device *dev = dev_from_gk20a(g);
 	struct gk20a_platform *platform = dev_get_drvdata(dev);
@@ -716,7 +716,7 @@ int gk20a_do_idle(void *_g)
 		 */
 		pm_runtime_put_sync_autosuspend(dev);
 		if (pm_runtime_status_suspended(dev)) {
-			return 0;
+			return 1;
 		} else {
 			nvgpu_err(g, "failed to idle");
 			return -EBUSY;
@@ -779,6 +779,92 @@ int gk20a_do_idle(void *_g)
 		return -EBUSY;
 	}
 
+	return 0;
+}
+
+int gk20a_block_new_jobs_and_poweroff(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int ret;
+
+	ret = gk20a_block_new_jobs_and_idle(g);
+	if (ret == -EBUSY) {
+		return ret;
+	}
+
+	if (ret == 1) {
+		return 0;
+	}
+
+	/* check if it is already railgated ? */
+	if (platform->is_railgated(dev)) {
+		nvgpu_mutex_release(&platform->railgate_lock);
+		return 0;
+	}
+
+	nvgpu_mutex_release(&platform->railgate_lock);
+
+	/* For joint_xpu_rail platforms, This will decrement the
+	 * extra refcount taken by us.
+	 */
+	if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) {
+		pm_runtime_dont_use_autosuspend(dev);
+	}
+
+	pm_runtime_put_sync_suspend(dev);
+
+	nvgpu_log_info(g, "power usage_count = %d", atomic_read(&dev->power.usage_count));
+
+	return 0;
+}
+
+static void gk20a_unblock_jobs(struct gk20a *g)
+{
+	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
+	struct device *dev = dev_from_gk20a(g);
+
+	/*  For joint_xpu_rail, its unsafe to keep the rail gated. */
+	if (!nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE)) {
+		pm_runtime_set_autosuspend_delay(dev, -1);
+		pm_runtime_use_autosuspend(dev);
+	}
+
+	/* release the lock and open up all other busy() calls */
+	up_write(&l->busy_lock);
+
+	nvgpu_channel_deterministic_unidle(g);
+}
+
+/**
+ * gk20a_do_idle() - force the GPU to idle and railgate
+ *
+ * In success, this call MUST be balanced by caller with gk20a_do_unidle()
+ *
+ * Acquires two locks : &l->busy_lock and &platform->railgate_lock
+ * In success, we hold these locks and return
+ * In failure, we release these locks and return
+ */
+int gk20a_do_idle(void *_g)
+{
+	struct gk20a *g = (struct gk20a *)_g;
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = dev_get_drvdata(dev);
+	int ret;
+
+	ret = gk20a_block_new_jobs_and_idle(g);
+	if (ret == -EBUSY) {
+		return ret;
+	}
+
+	if (ret == 1) {
+		return 0;
+	}
+
+	/* check if it is already railgated ? */
+	if (platform->is_railgated(dev)) {
+		return 0;
+	}
 	/*
 	 * If railgating is enabled, autosuspend delay will be > 0. Set it to
 	 * 0 to suspend immediately. If railgating is disabled setting it to
@@ -800,6 +886,8 @@ int gk20a_do_idle(void *_g)
 		(void) gk20a_do_unidle(g);
 		return -EBUSY;
 	}
+
+	return 0;
 }
 
 /**
@@ -829,8 +917,7 @@ int gk20a_do_unidle(void *_g)
 	nvgpu_mutex_release(&platform->railgate_lock);
 
 	if (g->railgate_delay && nvgpu_is_enabled(g, NVGPU_CAN_RAILGATE))
-		pm_runtime_set_autosuspend_delay(dev,
-				 g->railgate_delay);
+		pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);
 	else
 		pm_runtime_set_autosuspend_delay(dev, -1);
 
@@ -1720,6 +1807,44 @@ return_err_platform:
 	return err;
 }
 
+int gk20a_driver_force_power_off(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+	struct gk20a_platform *platform = gk20a_get_platform(dev);
+	int err = 0;
+
+#ifdef CONFIG_NVGPU_DGPU
+	if (g->pci_class) {
+		nvgpu_err(g, "Poweroff is not supported for device yet.");
+		return -EINVAL;
+	}
+#endif
+
+	err = gk20a_block_new_jobs_and_poweroff(g);
+	if (err != 0)
+		goto done;
+
+	nvgpu_gr_remove_support(g);
+
+	/*
+	 * This is a WAR.
+	 * For T210, powernode must not allow device nodes to be powered off
+	 * even during a force poweroff. Once the WAR for T210 is removed,
+	 * this will hold true for all chips.
+	 */
+	if (platform->platform_chip_id != TEGRA_210)
+		gk20a_user_nodes_deinit(dev);
+
+	gk20a_unblock_jobs(g);
+
+done:
+	if (err != 0) {
+		nvgpu_err(g, "failed to poweroff");
+	}
+
+	return err;
+}
+
 int nvgpu_remove(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
diff --git a/drivers/gpu/nvgpu/os/linux/module.h b/drivers/gpu/nvgpu/os/linux/module.h
index b822989ab..a1dfcc50c 100644
--- a/drivers/gpu/nvgpu/os/linux/module.h
+++ b/drivers/gpu/nvgpu/os/linux/module.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011-2018, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2011-2021, NVIDIA CORPORATION.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -21,6 +21,13 @@ struct nvgpu_os_linux;
 int gk20a_pm_finalize_poweron(struct device *dev);
 int nvgpu_finalize_poweron_linux(struct nvgpu_os_linux *l);
 void gk20a_remove_support(struct gk20a *g);
+/*
+ * This method is currently only supported to allow changing
+ * MIG configurations. As such only GR state and device nodes
+ * are freed as part of this. Any future functionality update
+ * can be made by adding more to this.
+ */
+int gk20a_driver_force_power_off(struct gk20a *g);
 void gk20a_driver_start_unload(struct gk20a *g);
 int nvgpu_quiesce(struct gk20a *g);
 int nvgpu_remove(struct device *dev);
diff --git a/drivers/gpu/nvgpu/os/linux/pci.c b/drivers/gpu/nvgpu/os/linux/pci.c
index 813f1d5c6..2084fdd0b 100644
--- a/drivers/gpu/nvgpu/os/linux/pci.c
+++ b/drivers/gpu/nvgpu/os/linux/pci.c
@@ -696,6 +696,8 @@ static int nvgpu_pci_probe(struct pci_dev *pdev,
 	nvgpu_mutex_init(&l->dmabuf_priv_list_lock);
 	nvgpu_init_list_node(&l->dmabuf_priv_list);
 
+	g->probe_done = true;
+
 	return 0;
 
 err_free_irq:
@@ -760,10 +762,6 @@ static void nvgpu_pci_remove(struct pci_dev *pdev)
 		nvgpu_thermal_deinit(g);
 	}
 
-	err = nvgpu_quiesce(g);
-	/* TODO: handle failure to idle */
-	WARN(err, "gpu failed to idle during driver removal");
-
 	nvgpu_free_irq(g);
 
 	nvgpu_remove(dev);
diff --git a/drivers/gpu/nvgpu/os/linux/power_ops.c b/drivers/gpu/nvgpu/os/linux/power_ops.c
index ab0c7ed90..0a44b0750 100644
--- a/drivers/gpu/nvgpu/os/linux/power_ops.c
+++ b/drivers/gpu/nvgpu/os/linux/power_ops.c
@@ -34,8 +34,10 @@
 
 #include "platform_gk20a.h"
 #include "os_linux.h"
+#include "module.h"
 
 #define NVGPU_DRIVER_POWER_ON_NEEDED	1
+#define NVGPU_DRIVER_POWER_OFF_NEEDED	0
 
 int gk20a_power_open(struct inode *inode, struct file *filp)
 {
@@ -117,9 +119,8 @@ int gk20a_power_write(struct file *filp, const char __user *buf,
 	}
 
 	if (power_status == NVGPU_DRIVER_POWER_ON_NEEDED) {
-		if ((g->power_on_state == NVGPU_STATE_POWERING_ON) ||
-				(g->power_on_state == NVGPU_STATE_POWERED_ON)) {
-					goto free_input;
+		if (nvgpu_poweron_started(g)) {
+			goto free_input;
 		}
 
 		err = gk20a_busy(g);
@@ -130,8 +131,15 @@ int gk20a_power_write(struct file *filp, const char __user *buf,
 		}
 
 		gk20a_idle(g);
+	} else if (power_status == NVGPU_DRIVER_POWER_OFF_NEEDED) {
+		err = gk20a_driver_force_power_off(g);
+		if (err) {
+			nvgpu_err(g, "power_node_write failed at busy");
+			kfree(userinput);
+			return -EINVAL;
+		}
 	} else {
-		nvgpu_err(g, "1 is the valid value to power-on the GPU");
+		nvgpu_err(g, "1/0 are the valid values to power-on the GPU");
 		kfree(userinput);
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/nvgpu/os/linux/sysfs.c b/drivers/gpu/nvgpu/os/linux/sysfs.c
index f776902bc..2ff12982b 100644
--- a/drivers/gpu/nvgpu/os/linux/sysfs.c
+++ b/drivers/gpu/nvgpu/os/linux/sysfs.c
@@ -306,6 +306,8 @@ static ssize_t railgate_enable_store(struct device *dev,
 		return -EINVAL;
 	}
 
+	nvgpu_log_info(g, "railgating is enabled %ld", railgate_enable);
+
 	if (railgate_enable) {
 		nvgpu_set_enabled(g, NVGPU_CAN_RAILGATE, true);
 		pm_runtime_set_autosuspend_delay(dev, g->railgate_delay);