gpu: nvgpu: stop nvs thread during unload

nvs worker thread is created on each resume and deinitialized on every
suspend. nvgpu can be resumed when process is getting killed. Thread
creation can fail when the process is getting killed. That will lead
to driver resume failure.

To avoid the issue above, don't stop the nvs worker thread in suspend
and let the first created thread handle the nvs work always.
Deinitialize the nvs worker thread during nvgpu unload.

Also, log the error returned by nvgpu_thread_create in the function
nvgpu_worker_start.

bug 3480192

Change-Id: I8d5d9e7716a950b162cc3c2d9fcfde07c4edfcf6
Signed-off-by: Sagar Kamble <skamble@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2646218
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: svcacv <svcacv@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Vijayakumar Subbu <vsubbu@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
This commit is contained in:
Sagar Kamble
2021-12-23 11:56:48 +05:30
committed by mobile promotions
parent 4fd0f11e9c
commit d424598b7b
4 changed files with 9 additions and 20 deletions

View File

@@ -339,10 +339,6 @@ int nvgpu_prepare_poweroff(struct gk20a *g)
if (tmp_ret != 0) {
ret = tmp_ret;
}
tmp_ret = nvgpu_nvs_suspend(g);
if (tmp_ret != 0) {
ret = tmp_ret;
}
tmp_ret = g->ops.fifo.fifo_suspend(g);
if (tmp_ret != 0) {
ret = tmp_ret;

View File

@@ -161,6 +161,8 @@ static void nvgpu_nvs_worker_deinit(struct gk20a *g)
struct nvgpu_worker *worker = &g->scheduler->worker.worker;
nvgpu_worker_deinit(worker);
nvs_dbg(g, "NVS worker suspended");
}
int nvgpu_nvs_init(struct gk20a *g)
@@ -198,6 +200,8 @@ void nvgpu_nvs_remove_support(struct gk20a *g)
return;
}
nvgpu_nvs_worker_deinit(g);
nvs_domain_for_each(sched->sched, nvs_dom) {
struct nvgpu_nvs_domain *nvgpu_dom = nvs_dom->priv;
if (nvgpu_dom->ref != 1U) {
@@ -217,14 +221,6 @@ void nvgpu_nvs_remove_support(struct gk20a *g)
nvgpu_mutex_destroy(&g->sched_mutex);
}
int nvgpu_nvs_suspend(struct gk20a *g)
{
nvgpu_nvs_worker_deinit(g);
nvs_dbg(g, "NVS worker suspended");
return 0;
}
int nvgpu_nvs_open(struct gk20a *g)
{
int err = 0;
@@ -235,8 +231,6 @@ int nvgpu_nvs_open(struct gk20a *g)
if (g->scheduler != NULL) {
/* resuming from railgate */
err = nvgpu_nvs_worker_init(g);
nvs_dbg(g, "NVS worker resume, err=%d", err);
goto unlock;
}

View File

@@ -199,6 +199,11 @@ static int nvgpu_worker_start(struct nvgpu_worker *worker)
err = nvgpu_thread_create(&worker->poll_task, worker,
nvgpu_worker_poll_work, worker->thread_name);
if (err != 0) {
nvgpu_err(worker->g,
"failed to create worker poller thread %s err %d",
worker->thread_name, err);
}
nvgpu_mutex_release(&worker->start_lock);
return err;

View File

@@ -84,7 +84,6 @@ struct nvgpu_nvs_scheduler {
int nvgpu_nvs_init(struct gk20a *g);
int nvgpu_nvs_open(struct gk20a *g);
void nvgpu_nvs_remove_support(struct gk20a *g);
int nvgpu_nvs_suspend(struct gk20a *g);
void nvgpu_nvs_get_log(struct gk20a *g, s64 *timestamp, const char **msg);
u32 nvgpu_nvs_domain_count(struct gk20a *g);
int nvgpu_nvs_del_domain(struct gk20a *g, u64 dom_id);
@@ -113,11 +112,6 @@ static inline void nvgpu_nvs_remove_support(struct gk20a *g)
{
}
static inline int nvgpu_nvs_suspend(struct gk20a *g)
{
return 0;
}
static inline struct nvgpu_nvs_domain *
nvgpu_nvs_domain_by_name(struct gk20a *g, const char *name)
{