gpu: nvgpu: fix race for nvgpu_thread_stop

The pmu init thread typically returns immediately
without calling nvgpu_thread_should_stop().

pmu_pg_kill_task() checks if the thread is running, and
if it is, calls nvgpu_thread_stop().

However, there's a race condition where the init thread could
have exited between the time that kill_task() checked the
running flag and the time we actually stop the thread, leading
to a kernel crash.

Fix this by making the running flag in the nvgpu_thread struct
atomic. Both the thread proxy function and the thread_stop()
function will set the flag to false.

In the case of nvgpu_thread_proxy(), if the flag is already false,
then nvgpu_thread_stop() has already reset it, at which point we
just wait for nvgpu_thread_should_stop() to return true.

In the case of nvgpu_thread_stop(), if the flag is already false,
then the thread proxy function has already exited, and there is
nothing more to do.

Bug 2591298

Change-Id: I9ba6b63c30a5c3e1df11e790094836b44373122b
Signed-off-by: Peter Daifuku <pdaifuku@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2230358
GVS: Gerrit_Virtual_Submit
Reviewed-by: Thomas Fleury <tfleury@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Peter Daifuku
2019-11-01 15:28:08 -07:00
committed by Alex Waterman
parent 44a28012de
commit c58029ad24
2 changed files with 26 additions and 7 deletions

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -17,11 +17,13 @@
#ifndef __NVGPU_THREAD_LINUX_H__ #ifndef __NVGPU_THREAD_LINUX_H__
#define __NVGPU_THREAD_LINUX_H__ #define __NVGPU_THREAD_LINUX_H__
#include <nvgpu/atomic.h>
struct task_struct; struct task_struct;
struct nvgpu_thread { struct nvgpu_thread {
struct task_struct *task; struct task_struct *task;
bool running; nvgpu_atomic_t running;
int (*fn)(void *); int (*fn)(void *);
void *data; void *data;
}; };

View File

@@ -23,8 +23,19 @@ int nvgpu_thread_proxy(void *threaddata)
{ {
struct nvgpu_thread *thread = threaddata; struct nvgpu_thread *thread = threaddata;
int ret = thread->fn(thread->data); int ret = thread->fn(thread->data);
bool was_running;
thread->running = false; was_running = nvgpu_atomic_xchg(&thread->running, false);
/* if the thread was not running, then nvgpu_thread_stop() was
* called, so just wait until we get the notification that we should
* stop.
*/
if (!was_running) {
while (!nvgpu_thread_should_stop(thread)) {
nvgpu_usleep_range(5000, 5100);
}
}
return ret; return ret;
} }
@@ -40,15 +51,21 @@ int nvgpu_thread_create(struct nvgpu_thread *thread,
thread->task = task; thread->task = task;
thread->fn = threadfn; thread->fn = threadfn;
thread->data = data; thread->data = data;
thread->running = true; nvgpu_atomic_set(&thread->running, true);
wake_up_process(task); wake_up_process(task);
return 0; return 0;
}; };
void nvgpu_thread_stop(struct nvgpu_thread *thread) void nvgpu_thread_stop(struct nvgpu_thread *thread)
{ {
bool was_running;
if (thread->task) { if (thread->task) {
kthread_stop(thread->task); was_running = nvgpu_atomic_xchg(&thread->running, false);
if (was_running) {
kthread_stop(thread->task);
}
thread->task = NULL; thread->task = NULL;
} }
}; };
@@ -72,12 +89,12 @@ bool nvgpu_thread_should_stop(struct nvgpu_thread *thread)
bool nvgpu_thread_is_running(struct nvgpu_thread *thread) bool nvgpu_thread_is_running(struct nvgpu_thread *thread)
{ {
return ACCESS_ONCE(thread->running); return nvgpu_atomic_read(&thread->running);
}; };
void nvgpu_thread_join(struct nvgpu_thread *thread) void nvgpu_thread_join(struct nvgpu_thread *thread)
{ {
while (ACCESS_ONCE(thread->running)) { while (nvgpu_atomic_read(&thread->running)) {
nvgpu_msleep(10); nvgpu_msleep(10);
} }
}; };