diff --git a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
index ca5c4bc3d..5065f9a09 100644
--- a/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
+++ b/drivers/gpu/nvgpu/common/nvs/nvs_sched.c
@@ -28,6 +28,7 @@
 #include <nvgpu/kmem.h>
 #include <nvgpu/gk20a.h>
 #include <nvgpu/runlist.h>
+#include <nvgpu/kref.h>
 
 static struct nvs_sched_ops nvgpu_nvs_ops = {
 	.preempt = NULL,
@@ -55,6 +56,7 @@ struct nvgpu_nvs_worker_item {
 	bool wait_for_finish;
 	bool locked;
 	int status;
+	struct nvgpu_ref ref;
 	struct nvgpu_list_node list;
 	nvgpu_atomic_t state;
 };
@@ -77,6 +79,13 @@ nvgpu_nvs_worker_from_worker(struct nvgpu_worker *worker)
 	   ((uintptr_t)worker - offsetof(struct nvgpu_nvs_worker, worker));
 };
 
+static inline struct nvgpu_nvs_worker_item *
+nvgpu_nvs_worker_item_from_ref(struct nvgpu_ref *ref_node)
+{
+	return (struct nvgpu_nvs_worker_item *)
+	   ((uintptr_t)ref_node - offsetof(struct nvgpu_nvs_worker_item, ref));
+};
+
 static void nvgpu_nvs_worker_poll_init(struct nvgpu_worker *worker)
 {
 	struct nvgpu_nvs_worker *nvs_worker =
@@ -152,6 +161,16 @@ static u64 nvgpu_nvs_tick(struct gk20a *g)
 	return timeslice;
 }
 
+static void nvgpu_nvs_worker_item_release(struct nvgpu_ref *ref)
+{
+	struct nvgpu_nvs_worker_item *work =
+			nvgpu_nvs_worker_item_from_ref(ref);
+	struct gk20a *g = work->g;
+
+	nvgpu_cond_destroy(&work->cond);
+	nvgpu_kfree(g, work);
+}
+
 static void nvgpu_nvs_worker_wakeup_process_item(struct nvgpu_list_node *work_item)
 {
 	struct nvgpu_nvs_worker_item *work =
@@ -195,9 +214,14 @@ static void nvgpu_nvs_worker_wakeup_process_item(struct nvgpu_list_node *work_it
 done:
 	nvgpu_mutex_release(&g->sched_mutex);
 	work->status = ret;
-	(void)nvgpu_atomic_xchg(&work->state, 1);
+	nvgpu_atomic_set(&work->state, 1);
+
+	nvgpu_smp_mb();
 	/* Wakeup threads waiting on runlist submit */
 	nvgpu_cond_signal(&work->cond);
+
+	/* This reference was taken as part of nvgpu_nvs_worker_submit */
+	nvgpu_ref_put(&work->ref, nvgpu_nvs_worker_item_release);
 }
 
 static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl,
@@ -228,21 +252,29 @@ static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl,
 	nvgpu_init_list_node(&work->list);
 	work->wait_for_finish = wait_for_finish;
 	nvgpu_atomic_set(&work->state, 0);
+	nvgpu_ref_init(&work->ref);
 
 	nvs_dbg(g, " enqueueing runlist submit");
 
+	/* Add a barrier here to ensure all reads and writes have happened before
+	 * enqueuing the job in the worker thread.
+	 */
+	nvgpu_smp_mb();
+
+	/* The corresponding refcount is decremented inside the wakeup_process item */
+	nvgpu_ref_get(&work->ref);
 	ret = nvgpu_worker_enqueue(&worker->worker, &work->list);
 	if (ret != 0) {
+		/* Refcount is decremented here as no additional job is enqueued */
+		nvgpu_ref_put(&work->ref, nvgpu_nvs_worker_item_release);
 		goto fail;
 	}
 
-	/* Add a barrier here to ensure that worker thread is interrupted
-	 * before waiting on the condition below
-	 */
-	nvgpu_mb();
-
 	ret = NVGPU_COND_WAIT(&work->cond, nvgpu_atomic_read(&work->state) == 1, 0U);
 	if (ret != 0) {
+		/* refcount is not decremented here since even though this thread is
+		 * unblocked, but the job could be still queued.
+		 */
 		nvgpu_err(g, "Runlist submit interrupted while waiting for submit");
 		goto fail;
 	}
@@ -252,8 +284,7 @@ static int nvgpu_nvs_worker_submit(struct gk20a *g, struct nvgpu_runlist *rl,
 	ret = work->status;
 
 fail:
-	nvgpu_cond_destroy(&work->cond);
-	nvgpu_kfree(g, work);
+	nvgpu_ref_put(&work->ref, nvgpu_nvs_worker_item_release);
 
 free_domain:
 
diff --git a/drivers/gpu/nvgpu/common/utils/worker.c b/drivers/gpu/nvgpu/common/utils/worker.c
index c1fc2be69..e9a22d715 100644
--- a/drivers/gpu/nvgpu/common/utils/worker.c
+++ b/drivers/gpu/nvgpu/common/utils/worker.c
@@ -25,6 +25,7 @@
 #include <nvgpu/worker.h>
 #include <nvgpu/string.h>
 #include <nvgpu/thread.h>
+#include <nvgpu/barrier.h>
 
 static void nvgpu_worker_pre_process(struct nvgpu_worker *worker)
 {
@@ -331,6 +332,11 @@ static void nvgpu_worker_init_common(struct gk20a *g,
 	nvgpu_mutex_init(&worker->start_lock);
 
 	worker->ops = worker_ops;
+
+	/* Ensure initialization is complete before actually invoking the thread.
+	 * The corresponding read barrier lies in the nvgpu_thread_proxy function.
+	 */
+	nvgpu_smp_wmb();
 }
 
 int nvgpu_worker_init(struct gk20a *g, struct nvgpu_worker *worker,
diff --git a/drivers/gpu/nvgpu/os/linux/thread.c b/drivers/gpu/nvgpu/os/linux/thread.c
index 9c818f1e2..3e9af90cc 100644
--- a/drivers/gpu/nvgpu/os/linux/thread.c
+++ b/drivers/gpu/nvgpu/os/linux/thread.c
@@ -18,14 +18,22 @@
 #include <linux/sched.h>
 #include <linux/version.h>
 
+#include <nvgpu/barrier.h>
 #include <nvgpu/thread.h>
 #include <nvgpu/timers.h>
 
 int nvgpu_thread_proxy(void *threaddata)
 {
 	struct nvgpu_thread *thread = threaddata;
-	int ret = thread->fn(thread->data);
 	bool was_running;
+	int ret;
+
+	/* Ensure any initialization required for this thread is completed.
+	 * The corresponding write barrier lies at the end of nvgpu_worker_init_common.
+	 */
+	nvgpu_smp_rmb();
+
+	ret = thread->fn(thread->data);
 
 	was_running = nvgpu_atomic_xchg(&thread->running, false);
 
diff --git a/drivers/gpu/nvgpu/os/posix/cond.c b/drivers/gpu/nvgpu/os/posix/cond.c
index 7a56b1f77..8e98ac5d1 100644
--- a/drivers/gpu/nvgpu/os/posix/cond.c
+++ b/drivers/gpu/nvgpu/os/posix/cond.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -155,13 +155,16 @@ void nvgpu_cond_destroy(struct nvgpu_cond *cond)
 	if (cond == NULL) {
 		BUG();
 	}
+
+	nvgpu_mutex_acquire(&cond->mutex);
 	err = pthread_cond_destroy(&cond->cond);
 	nvgpu_assert(err == 0);
-	nvgpu_mutex_destroy(&cond->mutex);
 	err = pthread_condattr_destroy(&cond->attr);
+	nvgpu_mutex_release(&cond->mutex);
 	if (err != 0) {
 		nvgpu_info(NULL, "Cond attr destroy error");
 	}
+	nvgpu_mutex_destroy(&cond->mutex);
 	cond->initialized = false;
 }
 
diff --git a/drivers/gpu/nvgpu/os/posix/thread.c b/drivers/gpu/nvgpu/os/posix/thread.c
index e971f4221..be3a0d97a 100644
--- a/drivers/gpu/nvgpu/os/posix/thread.c
+++ b/drivers/gpu/nvgpu/os/posix/thread.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2018-2022, NVIDIA CORPORATION.  All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -22,6 +22,7 @@
 
 #include <nvgpu/bug.h>
 #include <nvgpu/thread.h>
+#include <nvgpu/barrier.h>
 #include <nvgpu/os_sched.h>
 #ifdef NVGPU_UNITTEST_FAULT_INJECTION_ENABLEMENT
 #include <nvgpu/posix/posix-fault-injection.h>
@@ -74,6 +75,8 @@ static void *nvgpu_posix_thread_wrapper(void *data)
 	nvgpu_posix_init_fault_injection(nvgpu->fi_container);
 #endif
 
+	nvgpu_smp_rmb();
+
 	ret = nvgpu->fn(nvgpu->data);
 
 	if (ret != 0L) {