gpu: nvgpu: prevent crash during unbind

This change solves crashes during bind that were introduced in the driver during the OS unification refactoring due to lack of coverage of the remove() function. The fixes during remove are: (1) Prevent NULL dereference on GPUs with secure boot (2) Prevent NULL dereferences when fecs_trace is not enabled (3) Added PRAMIN blocker during driver removal if HW is no longer accesible (4) Prevent double free of debugfs nodes as they are handled on the debugfs_remove_recursive() call (5) quiesce() can now be called without checking is HW accesible flag is set (6) added function to free irq so no IRQ association is left on the driver after it is removed (7) prevent NULL dereference on nvgpu_thread_stop() if the thread is already stopped JIRA: EVLR-1739 Change-Id: I787d38f202d5267a6b34815f23e1bc88110e8455 Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1563005 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-23 01:50:07 +03:00 · 2017-09-18 20:31:28 -07:00
parent f6fcecfc6f
commit 7134e9e852
12 changed files with 98 additions and 65 deletions
--- a/drivers/gpu/nvgpu/common/linux/module.c
+++ b/drivers/gpu/nvgpu/common/linux/module.c
@@ -226,9 +226,12 @@ static int gk20a_pm_prepare_poweroff(struct device *dev)
 	 * After this point, gk20a interrupts should not get
 	 * serviced.
 	 */
-	disable_irq(g->irq_stall);
-	if (g->irq_stall != g->irq_nonstall)
-		disable_irq(g->irq_nonstall);
+	if (g->irqs_enabled) {
+		disable_irq(g->irq_stall);
+		if (g->irq_stall != g->irq_nonstall)
+			disable_irq(g->irq_nonstall);
+		g->irqs_enabled = 0;
+	}

 	/* Decrement platform power refcount */
 	if (platform->idle)
@@ -640,6 +643,18 @@ static int gk20a_pm_unrailgate(struct device *dev)
 	return ret;
 }

+/*
+ * Remove association of the driver with OS interrupt handler
+ */
+void nvgpu_free_irq(struct gk20a *g)
+{
+	struct device *dev = dev_from_gk20a(g);
+
+	devm_free_irq(dev, g->irq_stall, g);
+	if (g->irq_stall != g->irq_nonstall)
+		devm_free_irq(dev, g->irq_nonstall, g);
+}
+
 /*
 * Idle the GPU in preparation of shutdown/remove.
 * gk20a_driver_start_unload() does not idle the GPU, but instead changes the SW
@@ -651,24 +666,27 @@ int nvgpu_quiesce(struct gk20a *g)
 	int err;
 	struct device *dev = dev_from_gk20a(g);

-	err = gk20a_wait_for_idle(g);
-	if (err) {
-		nvgpu_err(g, "failed to idle GPU, err=%d", err);
-		return err;
-	}
+	if (g->power_on) {
+		err = gk20a_wait_for_idle(g);
+		if (err) {
+			nvgpu_err(g, "failed to idle GPU, err=%d", err);
+			return err;
+		}

-	err = gk20a_fifo_disable_all_engine_activity(g, true);
-	if (err) {
-		nvgpu_err(g, "failed to disable engine activity, err=%d",
-			err);
+		err = gk20a_fifo_disable_all_engine_activity(g, true);
+		if (err) {
+			nvgpu_err(g,
+				"failed to disable engine activity, err=%d",
+				err);
 		return err;
-	}
+		}

-	err = gk20a_fifo_wait_engine_idle(g);
-	if (err) {
-		nvgpu_err(g, "failed to idle engines, err=%d",
-			err);
-		return err;
+		err = gk20a_fifo_wait_engine_idle(g);
+		if (err) {
+			nvgpu_err(g, "failed to idle engines, err=%d",
+				err);
+			return err;
+		}
 	}

 	if (gk20a_gpu_is_virtual(dev))
@@ -679,6 +697,7 @@ int nvgpu_quiesce(struct gk20a *g)
 	if (err)
 		nvgpu_err(g, "failed to prepare for poweroff, err=%d",
 			err);
+
 	return err;
 }