From 95bb19827e251568b6e82bc5d3d159f6f49a6db3 Mon Sep 17 00:00:00 2001
From: Thomas Fleury <tfleury@nvidia.com>
Date: Tue, 20 Aug 2019 15:59:08 -0400
Subject: [PATCH] gpu: nvgpu: add sw quiesce

For safety build, nvgpu driver should enter SW quiesce state
in case an uncorrectable error has occurred. In this state, any
activity on the GPU should be prevented, without powering off the GPU.
Also, a minimal set of operations should be used to enter SW quiesce
state.

Entering SW quiesce state does the following:
- set sw_quiesce_pending: when this flag is set, interrupt
  handlers exit after masking interrupts. This should help mitigate
  an interrupt storm.
- wake up thread to complete quiescing.

The thread performs the following:
- set NVGPU_DRIVER_IS_DYING to prevent allocation of new resources
- disable interrupts
- disable fifo scheduling
- preempt all runlists
- set error notifier for all active channels

Note: for channels with usermode submit enabled, userspace can
still ring doorbell, but this will not trigger any work on
engines since fifo scheduling is disabled.

Jira NVGPU-3493

Change-Id: I639a32da754d8833f54dcec1fa23135721d8d89a
Signed-off-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2172391
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/channel.c      | 19 ++++
 drivers/gpu/nvgpu/common/fifo/fifo.c         | 21 +++++
 drivers/gpu/nvgpu/common/init/nvgpu_init.c   | 97 ++++++++++++++++++++
 drivers/gpu/nvgpu/include/nvgpu/channel.h    | 15 +++
 drivers/gpu/nvgpu/include/nvgpu/fifo.h       |  3 +
 drivers/gpu/nvgpu/include/nvgpu/gk20a.h      |  6 ++
 drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h | 46 ++++++++++
 drivers/gpu/nvgpu/libnvgpu-drv_safe.export   |  1 +
 drivers/gpu/nvgpu/os/linux/intr.c            | 13 +++
 drivers/gpu/nvgpu/os/linux/module.c          | 41 ++++-----
 drivers/gpu/nvgpu/os/linux/module.h          |  1 -
 drivers/gpu/nvgpu/os/linux/pci_power.c       |  7 +-
 drivers/gpu/nvgpu/os/posix/nvgpu.c           | 10 ++
 13 files changed, 252 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/channel.c b/drivers/gpu/nvgpu/common/fifo/channel.c
index ecc5606a8..709fc5fdc 100644
--- a/drivers/gpu/nvgpu/common/fifo/channel.c
+++ b/drivers/gpu/nvgpu/common/fifo/channel.c
@@ -2296,6 +2296,25 @@ void nvgpu_channel_set_error_notifier(struct gk20a *g, struct nvgpu_channel *ch,
 	g->ops.channel.set_error_notifier(ch, error_notifier);
 }
 
+#ifndef CONFIG_NVGPU_RECOVERY
+void nvgpu_channel_sw_quiesce(struct gk20a *g)
+{
+	struct nvgpu_fifo *f = &g->fifo;
+	struct nvgpu_channel *ch;
+	u32 chid;
+
+	for (chid = 0; chid < f->num_channels; chid++) {
+		ch = nvgpu_channel_get(&f->channel[chid]);
+		if (ch != NULL) {
+			nvgpu_channel_set_error_notifier(g, ch,
+				NVGPU_ERR_NOTIFIER_FIFO_ERROR_IDLE_TIMEOUT);
+			nvgpu_channel_set_has_timedout_and_wakeup_wqs(g, ch);
+			nvgpu_channel_put(ch);
+		}
+	}
+}
+#endif
+
 /*
  * Stop deterministic channel activity for do_idle() when power needs to go off
  * momentarily but deterministic channels keep power refs for potentially a
diff --git a/drivers/gpu/nvgpu/common/fifo/fifo.c b/drivers/gpu/nvgpu/common/fifo/fifo.c
index 5a4c19441..dc9582dee 100644
--- a/drivers/gpu/nvgpu/common/fifo/fifo.c
+++ b/drivers/gpu/nvgpu/common/fifo/fifo.c
@@ -263,3 +263,24 @@ int nvgpu_fifo_suspend(struct gk20a *g)
 	nvgpu_log_fn(g, "done");
 	return 0;
 }
+
+#ifndef CONFIG_NVGPU_RECOVERY
+void nvgpu_fifo_sw_quiesce(struct gk20a *g)
+{
+	u32 runlist_mask;
+
+	nvgpu_runlist_lock_active_runlists(g);
+
+	/* Disable all runlists */
+	runlist_mask = nvgpu_runlist_get_runlists_mask(g,
+			0U, ID_TYPE_UNKNOWN, 0U, 0U);
+	g->ops.runlist.write_state(g, runlist_mask, RUNLIST_DISABLED);
+
+	/* Preempt all runlists (runlist->reset_eng_bitmask will be ignored)*/
+	g->ops.fifo.preempt_runlists_for_rc(g, runlist_mask);
+
+	nvgpu_channel_sw_quiesce(g);
+
+	nvgpu_runlist_unlock_active_runlists(g);
+}
+#endif
diff --git a/drivers/gpu/nvgpu/common/init/nvgpu_init.c b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
index c5e4b71b3..587a3f311 100644
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -73,6 +73,91 @@ static void gk20a_mask_interrupts(struct gk20a *g)
 	}
 }
 
+#ifndef CONFIG_NVGPU_RECOVERY
+static int nvgpu_sw_quiesce_thread(void *data)
+{
+	struct gk20a *g = data;
+	int err = 0;
+
+	/* wait until all SW quiesce is requested */
+	NVGPU_COND_WAIT(&g->sw_quiesce_cond,
+		g->sw_quiesce_pending ||
+		nvgpu_thread_should_stop(&g->sw_quiesce_thread), 0U);
+
+	if (nvgpu_thread_should_stop(&g->sw_quiesce_thread)) {
+		goto done;
+	}
+
+	nvgpu_err(g, "sw quiesce in progress");
+
+	nvgpu_mutex_acquire(&g->power_lock);
+
+	if (!g->power_on || g->is_virtual) {
+		err = -EINVAL;
+		goto idle;
+	}
+
+	nvgpu_start_gpu_idle(g);
+	nvgpu_disable_irqs(g);
+	gk20a_mask_interrupts(g);
+	nvgpu_fifo_sw_quiesce(g);
+
+idle:
+	nvgpu_mutex_release(&g->power_lock);
+	nvgpu_err(g, "sw quiesce done, err=%d", err);
+
+done:
+	nvgpu_log_info(g, "done");
+	return err;
+}
+
+static int nvgpu_sw_quiesce_init_support(struct gk20a *g)
+{
+	int err;
+
+	nvgpu_cond_init(&g->sw_quiesce_cond);
+	g->sw_quiesce_pending = false;
+
+	err = nvgpu_thread_create(&g->sw_quiesce_thread, g,
+			nvgpu_sw_quiesce_thread, "sw-quiesce");
+	if (err != 0) {
+		return err;
+	}
+
+	return 0;
+}
+
+static void nvgpu_sw_quiesce_remove_support(struct gk20a *g)
+{
+	nvgpu_thread_stop(&g->sw_quiesce_thread);
+	nvgpu_cond_destroy(&g->sw_quiesce_cond);
+}
+#endif
+
+void nvgpu_sw_quiesce(struct gk20a *g)
+{
+#ifndef CONFIG_NVGPU_RECOVERY
+	if (g->is_virtual) {
+		goto fail;
+	}
+
+	nvgpu_err(g, "SW quiesce requested");
+
+	/*
+	 * When this flag is set, interrupt handlers should
+	 * exit after masking interrupts. This should mitigate
+	 * interrupt storm cases.
+	 */
+	g->sw_quiesce_pending = true;
+
+	nvgpu_cond_signal(&g->sw_quiesce_cond);
+	return;
+
+fail:
+#endif
+	nvgpu_err(g, "sw quiesce not supported");
+}
+
 int nvgpu_prepare_poweroff(struct gk20a *g)
 {
 	int tmp_ret, ret = 0;
@@ -164,6 +249,14 @@ int nvgpu_finalize_poweron(struct gk20a *g)
 
 	g->power_on = true;
 
+#ifndef CONFIG_NVGPU_RECOVERY
+	err = nvgpu_sw_quiesce_init_support(g);
+	if (err != 0) {
+		nvgpu_err(g, "failed to init sw-quiesce support");
+		goto done;
+	}
+#endif
+
 #ifdef CONFIG_NVGPU_DGPU
 	/*
 	 * Before probing the GPU make sure the GPU's state is cleared. This is
@@ -668,6 +761,10 @@ static void gk20a_free_cb(struct nvgpu_ref *refcount)
 		g->ops.ltc.ltc_remove_support(g);
 	}
 
+#ifndef CONFIG_NVGPU_RECOVERY
+	nvgpu_sw_quiesce_remove_support(g);
+#endif
+
 	if (g->gfree != NULL) {
 		g->gfree(g);
 	}
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h
index f2dc1e13f..b53e5d1e9 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h
@@ -754,6 +754,21 @@ int nvgpu_channel_setup_sw(struct gk20a *g);
  */
 void nvgpu_channel_cleanup_sw(struct gk20a *g);
 
+#ifndef CONFIG_NVGPU_RECOVERY
+/**
+ * @brief Emergency quiescing of channels
+ *
+ * @param g[in]		Pointer to GPU driver struct.
+ *
+ * Driver has encountered uncorrectable error, and is entering
+ * SW Quiesce state. For each channel:
+ * - set error notifier
+ * - mark channel as unserviceable
+ * - signal on wait queues (notify_wq and semaphore_wq)
+ */
+void nvgpu_channel_sw_quiesce(struct gk20a *g);
+#endif
+
 /**
  * @brief Close channel
  *
diff --git a/drivers/gpu/nvgpu/include/nvgpu/fifo.h b/drivers/gpu/nvgpu/include/nvgpu/fifo.h
index 9e3073301..b17ba9f6a 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/fifo.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/fifo.h
@@ -325,5 +325,8 @@ void nvgpu_fifo_cleanup_sw_common(struct gk20a *g);
 
 const char *nvgpu_fifo_decode_pbdma_ch_eng_status(u32 index);
 int nvgpu_fifo_suspend(struct gk20a *g);
+#ifndef CONFIG_NVGPU_RECOVERY
+void nvgpu_fifo_sw_quiesce(struct gk20a *g);
+#endif
 
 #endif /* NVGPU_FIFO_COMMON_H */
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
index d822c13a5..f24b2649e 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gk20a.h
@@ -2000,6 +2000,12 @@ struct gk20a {
 	bool suspended;
 	bool sw_ready;
 
+#ifndef CONFIG_NVGPU_RECOVERY
+	bool sw_quiesce_pending;
+	struct nvgpu_cond sw_quiesce_cond;
+	struct nvgpu_thread sw_quiesce_thread;
+#endif
+
 	u64 log_mask;
 	u32 log_trace;
 
diff --git a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h
index 46502bf2e..ca20af9fa 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvgpu_init.h
@@ -23,6 +23,9 @@
 #ifndef NVGPU_INIT_H
 #define NVGPU_INIT_H
 
+struct gk20a;
+struct nvgpu_ref;
+
 /**
  * @file
  * @page unit-init Unit Init
@@ -99,6 +102,49 @@ int nvgpu_finalize_poweron(struct gk20a *g);
  */
 int nvgpu_prepare_poweroff(struct gk20a *g);
 
+/**
+ * @brief Enter SW Quiesce state
+ *
+ * @param g [in] The GPU
+ *
+ * Enters SW quiesce state:
+ * - set sw_quiesce_pending: When this flag is set, interrupt
+ *   handlers exit after masking interrupts. This should help mitigate
+ *   an interrupt storm.
+ * - wake up thread to complete quiescing.
+ *
+ * The thread performs the following:
+ * - set NVGPU_DRIVER_IS_DYING to prevent allocation of new resources
+ * - disable interrupts
+ * - disable fifo scheduling
+ * - preempt all runlists
+ * - set error notifier for all active channels
+ *
+ * @note: For channels with usermode submit enabled, userspace can
+ * still ring doorbell, but this will not trigger any work on
+ * engines since fifo scheduling is disabled.
+ */
+void nvgpu_sw_quiesce(struct gk20a *g);
+
+/**
+ * @brief Start GPU idle
+ *
+ * @param g [in] The GPU
+ *
+ * Set #NVGPU_DRIVER_IS_DYING to prevent allocation of new resources.
+ * User API call will fail once this flag is set, as gk20a_busy will fail.
+ */
+void nvgpu_start_gpu_idle(struct gk20a *g);
+
+/**
+ * @brief Disable interrupt handlers
+ *
+ * @param g [in] The GPU
+ *
+ * Disable interrupt handlers.
+ */
+void nvgpu_disable_irqs(struct gk20a *g);
+
 /**
  * @brief Check if the device can go busy
  *
diff --git a/drivers/gpu/nvgpu/libnvgpu-drv_safe.export b/drivers/gpu/nvgpu/libnvgpu-drv_safe.export
index 25603204e..8c46709df 100644
--- a/drivers/gpu/nvgpu/libnvgpu-drv_safe.export
+++ b/drivers/gpu/nvgpu/libnvgpu-drv_safe.export
@@ -254,6 +254,7 @@ nvgpu_sgt_ipa_to_pa
 nvgpu_spinlock_acquire
 nvgpu_spinlock_init
 nvgpu_spinlock_release
+nvgpu_sw_quiesce
 nvgpu_userd_init_slabs
 nvgpu_usermode_writel
 nvgpu_vfree_impl
diff --git a/drivers/gpu/nvgpu/os/linux/intr.c b/drivers/gpu/nvgpu/os/linux/intr.c
index 9ca5d4307..609aec057 100644
--- a/drivers/gpu/nvgpu/os/linux/intr.c
+++ b/drivers/gpu/nvgpu/os/linux/intr.c
@@ -20,6 +20,9 @@
 
 #include <nvgpu/atomic.h>
 #include <nvgpu/unit.h>
+#ifndef CONFIG_NVGPU_RECOVERY
+#include <nvgpu/nvgpu_init.h>
+#endif
 #include "os_linux.h"
 
 irqreturn_t nvgpu_intr_stall(struct gk20a *g)
@@ -39,6 +42,11 @@ irqreturn_t nvgpu_intr_stall(struct gk20a *g)
 		return IRQ_NONE;
 
 	g->ops.mc.intr_stall_pause(g);
+#ifndef CONFIG_NVGPU_RECOVERY
+	if (g->sw_quiesce_pending) {
+		return IRQ_NONE;
+	}
+#endif
 
 	nvgpu_atomic_inc(&g->hw_irq_stall_count);
 
@@ -90,6 +98,11 @@ irqreturn_t nvgpu_intr_nonstall(struct gk20a *g)
 		return IRQ_NONE;
 
 	g->ops.mc.intr_nonstall_pause(g);
+#ifndef CONFIG_NVGPU_RECOVERY
+	if (g->sw_quiesce_pending) {
+		return IRQ_NONE;
+	}
+#endif
 
 	ops = g->ops.mc.isr_nonstall(g);
 	if (ops) {
diff --git a/drivers/gpu/nvgpu/os/linux/module.c b/drivers/gpu/nvgpu/os/linux/module.c
index 22593f817..061c7e3d7 100644
--- a/drivers/gpu/nvgpu/os/linux/module.c
+++ b/drivers/gpu/nvgpu/os/linux/module.c
@@ -534,6 +534,16 @@ static int gk20a_lockout_registers(struct gk20a *g)
 	return 0;
 }
 
+void nvgpu_disable_irqs(struct gk20a *g)
+{
+	if (g->irqs_enabled) {
+		disable_irq(g->irq_stall);
+		if (g->irq_stall != g->irq_nonstall)
+			disable_irq(g->irq_nonstall);
+		g->irqs_enabled = 0;
+	}
+}
+
 static int gk20a_pm_prepare_poweroff(struct device *dev)
 {
 	struct gk20a *g = get_gk20a(dev);
@@ -553,12 +563,7 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
 
 	/* disable IRQs and wait for completion */
 	irqs_enabled = g->irqs_enabled;
-	if (irqs_enabled) {
-		disable_irq(g->irq_stall);
-		if (g->irq_stall != g->irq_nonstall)
-			disable_irq(g->irq_nonstall);
-		g->irqs_enabled = 0;
-	}
+	nvgpu_disable_irqs(g);
 
 	gk20a_scale_suspend(dev);
 
@@ -1319,21 +1324,19 @@ static int gk20a_pm_deinit(struct device *dev)
 	return 0;
 }
 
-int nvgpu_start_gpu_idle(struct gk20a *g)
+void nvgpu_start_gpu_idle(struct gk20a *g)
 {
 	struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g);
 
 	down_write(&l->busy_lock);
-
-	/*
-	 * Set NVGPU_DRIVER_IS_DYING to avoid gpu being marked
-	 * busy to submit new work to gpu.
-	 */
 	nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
-
+	/*
+	 * GR SW ready needs to be invalidated at this time with the busy lock
+	 * held to prevent a racing condition on the gr/mm code
+	 */
+	nvgpu_gr_sw_ready(g, false);
+	g->sw_ready = false;
 	up_write(&l->busy_lock);
-
-	return 0;
 }
 
 int nvgpu_wait_for_gpu_idle(struct gk20a *g)
@@ -1360,13 +1363,7 @@ void gk20a_driver_start_unload(struct gk20a *g)
 
 	nvgpu_log(g, gpu_dbg_shutdown, "Driver is now going down!\n");
 
-	down_write(&l->busy_lock);
-	nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
-	/* GR SW ready needs to be invalidated at this time with the busy lock
-	 * held to prevent a racing condition on the gr/mm code */
-	nvgpu_gr_sw_ready(g, false);
-	g->sw_ready = false;
-	up_write(&l->busy_lock);
+	nvgpu_start_gpu_idle(g);
 
 	if (g->is_virtual)
 		return;
diff --git a/drivers/gpu/nvgpu/os/linux/module.h b/drivers/gpu/nvgpu/os/linux/module.h
index 218d33044..83c5bbf52 100644
--- a/drivers/gpu/nvgpu/os/linux/module.h
+++ b/drivers/gpu/nvgpu/os/linux/module.h
@@ -24,7 +24,6 @@ void gk20a_remove_support(struct gk20a *g);
 void gk20a_driver_start_unload(struct gk20a *g);
 int nvgpu_quiesce(struct gk20a *g);
 int nvgpu_remove(struct device *dev, struct class *class);
-int nvgpu_start_gpu_idle(struct gk20a *g);
 int nvgpu_wait_for_gpu_idle(struct gk20a *g);
 void nvgpu_free_irq(struct gk20a *g);
 struct device_node *nvgpu_get_node(struct gk20a *g);
diff --git a/drivers/gpu/nvgpu/os/linux/pci_power.c b/drivers/gpu/nvgpu/os/linux/pci_power.c
index a8ebc55d0..ba48cfade 100644
--- a/drivers/gpu/nvgpu/os/linux/pci_power.c
+++ b/drivers/gpu/nvgpu/os/linux/pci_power.c
@@ -24,6 +24,7 @@
 #include <linux/debugfs.h>
 
 #include <nvgpu/lock.h>
+#include <nvgpu/nvgpu_init.h>
 
 #include "module.h"
 #include "platform_gk20a.h"
@@ -527,11 +528,7 @@ static int nvgpu_pci_gpu_power_off(char *dev_name)
 	g = get_gk20a(dev);
 	pgpios = &pp->gpios;
 
-	ret = nvgpu_start_gpu_idle(g);
-	if (ret) {
-		pr_err("nvgpu: start gpu idle failed\n");
-		goto out;
-	}
+	nvgpu_start_gpu_idle(g);
 
 	ret = nvgpu_wait_for_gpu_idle(g);
 	if (ret) {
diff --git a/drivers/gpu/nvgpu/os/posix/nvgpu.c b/drivers/gpu/nvgpu/os/posix/nvgpu.c
index 94ba24a42..b67924f24 100644
--- a/drivers/gpu/nvgpu/os/posix/nvgpu.c
+++ b/drivers/gpu/nvgpu/os/posix/nvgpu.c
@@ -28,6 +28,7 @@
 #include <nvgpu/types.h>
 #include <nvgpu/atomic.h>
 #include <nvgpu/nvgpu_common.h>
+#include <nvgpu/nvgpu_init.h>
 #include <nvgpu/os_sched.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/enabled.h>
@@ -44,6 +45,15 @@ void nvgpu_kernel_restart(void *cmd)
 	BUG();
 }
 
+void nvgpu_start_gpu_idle(struct gk20a *g)
+{
+	nvgpu_set_enabled(g, NVGPU_DRIVER_IS_DYING, true);
+}
+
+void nvgpu_disable_irqs(struct gk20a *g)
+{
+}
+
 /*
  * We have no runtime PM stuff in userspace so these are really just noops.
  */