From c58029ad24442d323ce8f33551eb3abf9bcb2da7 Mon Sep 17 00:00:00 2001 From: Peter Daifuku Date: Fri, 1 Nov 2019 15:28:08 -0700 Subject: [PATCH] gpu: nvgpu: fix race for nvgpu_thread_stop The pmu init thread typically returns immediately without calling nvgpu_thread_should_stop(). pmu_pg_kill_task() checks if the thread is running, and if it is, calls nvgpu_thread_stop(). However, there's a race condition where the init thread could have exited between the time that kill_task() checked the running flag and the time we actually stop the thread, leading to a kernel crash. Fix this by making the running flag in the nvgpu_thread struct atomic. Both the thread proxy function and the thread_stop() function will set the flag to false. In the case of nvgpu_thread_proxy(), if the flag is already false, then nvgpu_thread_stop() has already reset it, at which point we just wait for nvgpu_thread_should_stop() to return true. In the case of nvgpu_thread_stop(), if the flag is already false, then the thread proxy function has already exited, and there is nothing more to do. Bug 2591298 Change-Id: I9ba6b63c30a5c3e1df11e790094836b44373122b Signed-off-by: Peter Daifuku Reviewed-on: https://git-master.nvidia.com/r/2230358 GVS: Gerrit_Virtual_Submit Reviewed-by: Thomas Fleury Reviewed-by: Alex Waterman Reviewed-by: mobile promotions Tested-by: mobile promotions --- .../gpu/nvgpu/include/nvgpu/linux/thread.h | 6 +++-- drivers/gpu/nvgpu/os/linux/thread.c | 27 +++++++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/nvgpu/include/nvgpu/linux/thread.h b/drivers/gpu/nvgpu/include/nvgpu/linux/thread.h index 1355319c5..9cc6bf547 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/linux/thread.h +++ b/drivers/gpu/nvgpu/include/nvgpu/linux/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -17,11 +17,13 @@ #ifndef __NVGPU_THREAD_LINUX_H__ #define __NVGPU_THREAD_LINUX_H__ +#include + struct task_struct; struct nvgpu_thread { struct task_struct *task; - bool running; + nvgpu_atomic_t running; int (*fn)(void *); void *data; }; diff --git a/drivers/gpu/nvgpu/os/linux/thread.c b/drivers/gpu/nvgpu/os/linux/thread.c index 951b31323..07d3beedb 100644 --- a/drivers/gpu/nvgpu/os/linux/thread.c +++ b/drivers/gpu/nvgpu/os/linux/thread.c @@ -23,8 +23,19 @@ int nvgpu_thread_proxy(void *threaddata) { struct nvgpu_thread *thread = threaddata; int ret = thread->fn(thread->data); + bool was_running; - thread->running = false; + was_running = nvgpu_atomic_xchg(&thread->running, false); + + /* if the thread was not running, then nvgpu_thread_stop() was + * called, so just wait until we get the notification that we should + * stop. + */ + if (!was_running) { + while (!nvgpu_thread_should_stop(thread)) { + nvgpu_usleep_range(5000, 5100); + } + } return ret; } @@ -40,15 +51,21 @@ int nvgpu_thread_create(struct nvgpu_thread *thread, thread->task = task; thread->fn = threadfn; thread->data = data; - thread->running = true; + nvgpu_atomic_set(&thread->running, true); wake_up_process(task); return 0; }; void nvgpu_thread_stop(struct nvgpu_thread *thread) { + bool was_running; + if (thread->task) { - kthread_stop(thread->task); + was_running = nvgpu_atomic_xchg(&thread->running, false); + + if (was_running) { + kthread_stop(thread->task); + } thread->task = NULL; } }; @@ -72,12 +89,12 @@ bool nvgpu_thread_should_stop(struct nvgpu_thread *thread) bool nvgpu_thread_is_running(struct nvgpu_thread *thread) { - return ACCESS_ONCE(thread->running); + return nvgpu_atomic_read(&thread->running); }; void nvgpu_thread_join(struct nvgpu_thread *thread) { - while (ACCESS_ONCE(thread->running)) { + while (nvgpu_atomic_read(&thread->running)) { nvgpu_msleep(10); } };