gpu: nvgpu: refactor teardown to support unbind

This change refactors the teardown in remove to ensure that it is possible to unload the driver while leaving fds open. This is achieved by making sure that the SW state is kept alive till all fds are closed and by checking that subsequent calls to ioctls after the teardown fail. Normally, this would be achieved ny calls into gk20a_busy(), but in kickoff we dont call into that to reduce latency, so we need to check the driver status directly, and also in some of the functions as we need to make sure the ioctl does not dereference the device or platform struct bug 200277762 JIRA: EVLR-1023 Change-Id: I163e47a08c29d4d5b3ab79f0eb531ef234f40bde Signed-off-by: David Nieto <dmartineznie@nvidia.com> Reviewed-on: http://git-master/r/1320219 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: svccoveritychecker <svccoveritychecker@nvidia.com> Reviewed-by: Shreshtha Sahu <ssahu@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> (cherry picked from commit e0f2afe5eb) Reviewed-on: http://git-master/r/1327755 GVS: Gerrit_Virtual_Submit Reviewed-by: Sumeet Gupta <sumeetg@nvidia.com>
2025-12-25 11:04:51 +03:00 · 2017-03-13 20:23:03 -07:00
parent ccccde66e7
commit abee92ab92
10 changed files with 91 additions and 61 deletions
--- a/drivers/gpu/nvgpu/gk20a/gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.c
@@ -697,26 +697,14 @@ static irqreturn_t gk20a_intr_thread_stall(int irq, void *dev_id)
 	return g->ops.mc.isr_thread_stall(g);
 }

-void gk20a_remove_support(struct device *dev)
+void gk20a_remove_support(struct gk20a *g)
 {
-	struct gk20a *g = get_gk20a(dev);
-
 #ifdef CONFIG_TEGRA_COMMON
 	tegra_unregister_idle_unidle();
 #endif
 	if (g->dbg_regops_tmp_buf)
 		kfree(g->dbg_regops_tmp_buf);

-	nvgpu_wait_for_deferred_interrupts(g);
-
-	gk20a_channel_cancel_pending_sema_waits(g);
-
-	if (g->nonstall_work_queue) {
-		cancel_work_sync(&g->nonstall_fn_work);
-		destroy_workqueue(g->nonstall_work_queue);
-		g->nonstall_work_queue = NULL;
-	}
-
 	if (g->pmu.remove_support)
 		g->pmu.remove_support(&g->pmu);

@@ -1636,6 +1624,11 @@ static int gk20a_probe(struct platform_device *dev)
 	if (gk20a->irq_stall != gk20a->irq_nonstall)
 		disable_irq(gk20a->irq_nonstall);

+	/*
+	 * is_fmodel needs to be in gk20a struct for deferred teardown
+	 */
+	gk20a->is_fmodel = platform->is_fmodel;
+
 	err = gk20a_init_support(dev);
 	if (err)
 		return err;
@@ -1682,11 +1675,6 @@ static int __exit gk20a_remove(struct platform_device *pdev)
 	if (IS_ENABLED(CONFIG_GK20A_DEVFREQ))
 		gk20a_scale_exit(dev);

-	if (g->remove_support)
-		g->remove_support(dev);
-
-	gk20a_ce_destroy(g);
-
 #ifdef CONFIG_ARCH_TEGRA_18x_SOC
 	nvgpu_clk_arb_cleanup_arbiter(g);
 #endif
@@ -1775,7 +1763,21 @@ void gk20a_busy_noresume(struct device *dev)
 void gk20a_driver_start_unload(struct gk20a *g)
 {
 	gk20a_dbg(gpu_dbg_shutdown, "Driver is now going down!\n");
+
+	down_write(&g->busy_lock);
 	g->driver_is_dying = 1;
+	up_write(&g->busy_lock);
+
+	gk20a_wait_for_idle(g->dev);
+
+	nvgpu_wait_for_deferred_interrupts(g);
+	gk20a_channel_cancel_pending_sema_waits(g);
+
+	if (g->nonstall_work_queue) {
+		cancel_work_sync(&g->nonstall_fn_work);
+		destroy_workqueue(g->nonstall_work_queue);
+		g->nonstall_work_queue = NULL;
+	}
 }

 int gk20a_wait_for_idle(struct device *dev)
@@ -2283,6 +2285,12 @@ static void gk20a_free_cb(struct kref *refcount)
 		struct gk20a, refcount);

 	gk20a_dbg(gpu_dbg_shutdown, "Freeing GK20A struct!");
+
+	gk20a_ce_destroy(g);
+
+	if (g->remove_support)
+		g->remove_support(g);
+
 	kfree(g);
 }