gpu: nvgpu: stop nvs thread during unload

nvs worker thread is created on each resume and deinitialized on every suspend. nvgpu can be resumed when process is getting killed. Thread creation can fail when the process is getting killed. That will lead to driver resume failure. To avoid the issue above, don't stop the nvs worker thread in suspend and let the first created thread handle the nvs work always. Deinitialize the nvs worker thread during nvgpu unload. Also, log the error returned by nvgpu_thread_create in the function nvgpu_worker_start. bug 3480192 Change-Id: I8d5d9e7716a950b162cc3c2d9fcfde07c4edfcf6 Signed-off-by: Sagar Kamble <skamble@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2646218 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: svcacv <svcacv@nvidia.com> Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com> Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com> Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com> Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> GVS: Gerrit_Virtual_Submit
2025-12-22 17:36:20 +03:00 · 2021-12-23 11:56:48 +05:30
parent 4fd0f11e9c
commit d424598b7b
4 changed files with 9 additions and 20 deletions
--- a/drivers/gpu/nvgpu/common/init/nvgpu_init.c
+++ b/drivers/gpu/nvgpu/common/init/nvgpu_init.c
@@ -339,10 +339,6 @@ int nvgpu_prepare_poweroff(struct gk20a *g)
 	if (tmp_ret != 0) {
 		ret = tmp_ret;
 	}
-	tmp_ret = nvgpu_nvs_suspend(g);
-	if (tmp_ret != 0) {
-		ret = tmp_ret;
-	}
 	tmp_ret = g->ops.fifo.fifo_suspend(g);
 	if (tmp_ret != 0) {
 		ret = tmp_ret;
--- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
+++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
@@ -161,6 +161,8 @@ static void nvgpu_nvs_worker_deinit(struct gk20a *g)
 	struct nvgpu_worker *worker = &g->scheduler->worker.worker;

 	nvgpu_worker_deinit(worker);
+
+	nvs_dbg(g, "NVS worker suspended");
 }

 int nvgpu_nvs_init(struct gk20a *g)
@@ -198,6 +200,8 @@ void nvgpu_nvs_remove_support(struct gk20a *g)
 		return;
 	}

+	nvgpu_nvs_worker_deinit(g);
+
 	nvs_domain_for_each(sched->sched, nvs_dom) {
 		struct nvgpu_nvs_domain *nvgpu_dom = nvs_dom->priv;
 		if (nvgpu_dom->ref != 1U) {
@@ -217,14 +221,6 @@ void nvgpu_nvs_remove_support(struct gk20a *g)
 	nvgpu_mutex_destroy(&g->sched_mutex);
 }

-int nvgpu_nvs_suspend(struct gk20a *g)
-{
-	nvgpu_nvs_worker_deinit(g);
-	nvs_dbg(g, "NVS worker suspended");
-
-	return 0;
-}
-
 int nvgpu_nvs_open(struct gk20a *g)
 {
 	int err = 0;
@@ -235,8 +231,6 @@ int nvgpu_nvs_open(struct gk20a *g)

 	if (g->scheduler != NULL) {
 		/* resuming from railgate */
-		err = nvgpu_nvs_worker_init(g);
-		nvs_dbg(g, "NVS worker resume, err=%d", err);
 		goto unlock;
 	}

--- a/drivers/gpu/nvgpu/common/utils/worker.c
+++ b/drivers/gpu/nvgpu/common/utils/worker.c
@@ -199,6 +199,11 @@ static int nvgpu_worker_start(struct nvgpu_worker *worker)

 	err = nvgpu_thread_create(&worker->poll_task, worker,
 			nvgpu_worker_poll_work, worker->thread_name);
+	if (err != 0) {
+		nvgpu_err(worker->g,
+			  "failed to create worker poller thread %s err %d",
+			  worker->thread_name, err);
+	}

 	nvgpu_mutex_release(&worker->start_lock);
 	return err;
--- a/drivers/gpu/nvgpu/include/nvgpu/nvs.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/nvs.h
@@ -84,7 +84,6 @@ struct nvgpu_nvs_scheduler {
 int nvgpu_nvs_init(struct gk20a *g);
 int nvgpu_nvs_open(struct gk20a *g);
 void nvgpu_nvs_remove_support(struct gk20a *g);
-int nvgpu_nvs_suspend(struct gk20a *g);
 void nvgpu_nvs_get_log(struct gk20a *g, s64 *timestamp, const char **msg);
 u32 nvgpu_nvs_domain_count(struct gk20a *g);
 int nvgpu_nvs_del_domain(struct gk20a *g, u64 dom_id);
@@ -113,11 +112,6 @@ static inline void nvgpu_nvs_remove_support(struct gk20a *g)
 {
 }

-static inline int nvgpu_nvs_suspend(struct gk20a *g)
-{
-	return 0;
-}
-
 static inline struct nvgpu_nvs_domain *
 nvgpu_nvs_domain_by_name(struct gk20a *g, const char *name)
 {