gpu: nvgpu: fix race for nvgpu_thread_stop

The pmu init thread typically returns immediately
without calling nvgpu_thread_should_stop().

pmu_pg_kill_task() checks if the thread is running, and
if it is, calls nvgpu_thread_stop().

However, there's a race condition where the init thread could
have exited between the time that kill_task() checked the
running flag and the time we actually stop the thread, leading
to a kernel crash.

Fix this by making the running flag in the nvgpu_thread struct
atomic. Both the thread proxy function and the thread_stop()
function will set the flag to false.

In the case of nvgpu_thread_proxy(), if the flag is already false,
then nvgpu_thread_stop() has already reset it, at which point we
just wait for nvgpu_thread_should_stop() to return true.

In the case of nvgpu_thread_stop(), if the flag is already false,
then the thread proxy function has already exited, and there is
nothing more to do.

Bug 2591298

Change-Id: I9ba6b63c30a5c3e1df11e790094836b44373122b
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2230358
GVS: Gerrit_Virtual_Submit
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Peter Daifuku
2019-11-01 15:28:08 -07:00
committed by Alex Waterman
parent 44a28012de
commit c58029ad24
2 changed files with 26 additions and 7 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -17,11 +17,13 @@
#ifndef __NVGPU_THREAD_LINUX_H__
#define __NVGPU_THREAD_LINUX_H__
#include <nvgpu/atomic.h>
struct task_struct;
struct nvgpu_thread {
struct task_struct *task;
bool running;
nvgpu_atomic_t running;
int (*fn)(void *);
void *data;
};

View File

@@ -23,8 +23,19 @@ int nvgpu_thread_proxy(void *threaddata)
{
struct nvgpu_thread *thread = threaddata;
int ret = thread->fn(thread->data);
bool was_running;
thread->running = false;
was_running = nvgpu_atomic_xchg(&thread->running, false);
/* if the thread was not running, then nvgpu_thread_stop() was
* called, so just wait until we get the notification that we should
* stop.
*/
if (!was_running) {
while (!nvgpu_thread_should_stop(thread)) {
nvgpu_usleep_range(5000, 5100);
}
}
return ret;
}
@@ -40,15 +51,21 @@ int nvgpu_thread_create(struct nvgpu_thread *thread,
thread->task = task;
thread->fn = threadfn;
thread->data = data;
thread->running = true;
nvgpu_atomic_set(&thread->running, true);
wake_up_process(task);
return 0;
};
void nvgpu_thread_stop(struct nvgpu_thread *thread)
{
bool was_running;
if (thread->task) {
was_running = nvgpu_atomic_xchg(&thread->running, false);
if (was_running) {
kthread_stop(thread->task);
}
thread->task = NULL;
}
};
@@ -72,12 +89,12 @@ bool nvgpu_thread_should_stop(struct nvgpu_thread *thread)
bool nvgpu_thread_is_running(struct nvgpu_thread *thread)
{
return ACCESS_ONCE(thread->running);
return nvgpu_atomic_read(&thread->running);
};
void nvgpu_thread_join(struct nvgpu_thread *thread)
{
while (ACCESS_ONCE(thread->running)) {
while (nvgpu_atomic_read(&thread->running)) {
nvgpu_msleep(10);
}
};