diff --git a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c index ca5c4bc3d..5065f9a09 100644 --- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c +++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c @@ -28,6 +28,7 @@ #include #include #include +#include static struct nvs_sched_ops nvgpu_nvs_ops = { .preempt = NULL, @@ -55,6 +56,7 @@ struct nvgpu_nvs_worker_item { bool wait_for_finish; bool locked; int status; + struct nvgpu_ref ref; struct nvgpu_list_node list; nvgpu_atomic_t state; }; @@ -77,6 +79,13 @@ nvgpu_nvs_worker_from_worker(struct nvgpu_worker *worker) ((uintptr_t)worker - offsetof(struct nvgpu_nvs_worker, worker)); }; +static inline struct nvgpu_nvs_worker_item * +nvgpu_nvs_worker_item_from_ref(struct nvgpu_ref *ref_node) +{ + return (struct nvgpu_nvs_worker_item *) + ((uintptr_t)ref_node - offsetof(struct nvgpu_nvs_worker_item, ref)); +}; + static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker) { struct nvgpu_nvs_worker *nvs_worker = @@ -152,6 +161,16 @@ static u64 nvgpu_nvs_tick(struct gk20a *g) return timeslice; } +static void nvgpu_nvs_worker_item_release(struct nvgpu_ref *ref) +{ + struct nvgpu_nvs_worker_item *work = + nvgpu_nvs_worker_item_from_ref(ref); + struct gk20a *g = work->g; + + nvgpu_cond_destroy(&work->cond); + nvgpu_kfree(g, work); +} + static void nvgpu_nvs_worker_wakeup_process_item(struct nvgpu_list_node *work_item) { struct nvgpu_nvs_worker_item *work = @@ -195,9 +214,14 @@ static void nvgpu_nvs_worker_wakeup_process_item(struct nvgpu_list_node *work_it done: nvgpu_mutex_release(&g->sched_mutex); work->status = ret; - (void)nvgpu_atomic_xchg(&work->state, 1); + nvgpu_atomic_set(&work->state, 1); + + nvgpu_smp_mb(); /* Wakeup threads waiting on runlist submit */ nvgpu_cond_signal(&work->cond); + + /* This reference was taken as part of nvgpu_nvs_worker_submit */ + nvgpu_ref_put(&work->ref, nvgpu_nvs_worker_item_release); } static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, @@ -228,21 +252,29 @@ static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, nvgpu_init_list_node(&work->list); work->wait_for_finish = wait_for_finish; nvgpu_atomic_set(&work->state, 0); + nvgpu_ref_init(&work->ref); nvs_dbg(g, " enqueueing runlist submit"); + /* Add a barrier here to ensure all reads and writes have happened before + * enqueuing the job in the worker thread. + */ + nvgpu_smp_mb(); + + /* The corresponding refcount is decremented inside the wakeup_process item */ + nvgpu_ref_get(&work->ref); ret = nvgpu_worker_enqueue(&worker->worker, &work->list); if (ret != 0) { + /* Refcount is decremented here as no additional job is enqueued */ + nvgpu_ref_put(&work->ref, nvgpu_nvs_worker_item_release); goto fail; } - /* Add a barrier here to ensure that worker thread is interrupted - * before waiting on the condition below - */ - nvgpu_mb(); - ret = NVGPU_COND_WAIT(&work->cond, nvgpu_atomic_read(&work->state) == 1, 0U); if (ret != 0) { + /* refcount is not decremented here since even though this thread is + * unblocked, but the job could be still queued. + */ nvgpu_err(g, "Runlist submit interrupted while waiting for submit"); goto fail; } @@ -252,8 +284,7 @@ static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl, ret = work->status; fail: - nvgpu_cond_destroy(&work->cond); - nvgpu_kfree(g, work); + nvgpu_ref_put(&work->ref, nvgpu_nvs_worker_item_release); free_domain: diff --git a/drivers/gpu/nvgpu/common/utils/worker.c b/drivers/gpu/nvgpu/common/utils/worker.c index c1fc2be69..e9a22d715 100644 --- a/drivers/gpu/nvgpu/common/utils/worker.c +++ b/drivers/gpu/nvgpu/common/utils/worker.c @@ -25,6 +25,7 @@ #include #include #include +#include static void nvgpu_worker_pre_process(struct nvgpu_worker *worker) { @@ -331,6 +332,11 @@ static void nvgpu_worker_init_common(struct gk20a *g, nvgpu_mutex_init(&worker->start_lock); worker->ops = worker_ops; + + /* Ensure initialization is complete before actually invoking the thread. + * The corresponding read barrier lies in the nvgpu_thread_proxy function. + */ + nvgpu_smp_wmb(); } int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker, diff --git a/drivers/gpu/nvgpu/os/linux/thread.c b/drivers/gpu/nvgpu/os/linux/thread.c index 9c818f1e2..3e9af90cc 100644 --- a/drivers/gpu/nvgpu/os/linux/thread.c +++ b/drivers/gpu/nvgpu/os/linux/thread.c @@ -18,14 +18,22 @@ #include #include +#include #include #include int nvgpu_thread_proxy(void *threaddata) { struct nvgpu_thread *thread = threaddata; - int ret = thread->fn(thread->data); bool was_running; + int ret; + + /* Ensure any initialization required for this thread is completed. + * The corresponding write barrier lies at the end of nvgpu_worker_init_common. + */ + nvgpu_smp_rmb(); + + ret = thread->fn(thread->data); was_running = nvgpu_atomic_xchg(&thread->running, false); diff --git a/drivers/gpu/nvgpu/os/posix/cond.c b/drivers/gpu/nvgpu/os/posix/cond.c index 7a56b1f77..8e98ac5d1 100644 --- a/drivers/gpu/nvgpu/os/posix/cond.c +++ b/drivers/gpu/nvgpu/os/posix/cond.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -155,13 +155,16 @@ void nvgpu_cond_destroy(struct nvgpu_cond *cond) if (cond == NULL) { BUG(); } + + nvgpu_mutex_acquire(&cond->mutex); err = pthread_cond_destroy(&cond->cond); nvgpu_assert(err == 0); - nvgpu_mutex_destroy(&cond->mutex); err = pthread_condattr_destroy(&cond->attr); + nvgpu_mutex_release(&cond->mutex); if (err != 0) { nvgpu_info(NULL, "Cond attr destroy error"); } + nvgpu_mutex_destroy(&cond->mutex); cond->initialized = false; } diff --git a/drivers/gpu/nvgpu/os/posix/thread.c b/drivers/gpu/nvgpu/os/posix/thread.c index e971f4221..be3a0d97a 100644 --- a/drivers/gpu/nvgpu/os/posix/thread.c +++ b/drivers/gpu/nvgpu/os/posix/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -22,6 +22,7 @@ #include #include +#include #include #ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT #include @@ -74,6 +75,8 @@ static void *nvgpu_posix_thread_wrapper(void *data) nvgpu_posix_init_fault_injection(nvgpu->fi_container); #endif + nvgpu_smp_rmb(); + ret = nvgpu->fn(nvgpu->data); if (ret != 0L) {