diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb.c b/drivers/gpu/nvgpu/common/linux/clk_arb.c
index bb0fd6289..2a6278e80 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb.c
@@ -397,17 +397,14 @@ exit_vf_table:
 	if (status < 0)
 		nvgpu_clk_arb_set_global_alarm(g,
 			EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
-	if (arb->update_work_queue)
-		queue_work(arb->update_work_queue, &arb->update_fn_work);
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
 
 	return status;
 }
 
 
-static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
+static void nvgpu_clk_arb_run_vf_table_cb(struct nvgpu_clk_arb *arb)
 {
-	struct nvgpu_clk_arb *arb =
-		container_of(work, struct nvgpu_clk_arb, vf_table_fn_work);
 	struct gk20a *g = arb->g;
 	u32 err;
 
@@ -417,9 +414,7 @@ static void nvgpu_clk_arb_run_vf_table_cb(struct work_struct *work)
 		nvgpu_err(g, "failed to cache VF table");
 		nvgpu_clk_arb_set_global_alarm(g,
 			EVENT(ALARM_VF_TABLE_UPDATE_FAILED));
-		if (arb->update_work_queue)
-			queue_work(arb->update_work_queue,
-				&arb->update_fn_work);
+		nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
 
 		return;
 	}
@@ -725,10 +720,8 @@ static void nvgpu_clk_arb_clear_global_alarm(struct gk20a *g, u32 alarm)
 					current_mask, new_mask)));
 }
 
-static void nvgpu_clk_arb_run_arbiter_cb(struct work_struct *work)
+static void nvgpu_clk_arb_run_arbiter_cb(struct nvgpu_clk_arb *arb)
 {
-	struct nvgpu_clk_arb *arb =
-		container_of(work, struct nvgpu_clk_arb, update_fn_work);
 	struct nvgpu_clk_session *session;
 	struct nvgpu_clk_dev *dev;
 	struct nvgpu_clk_dev *tmp;
@@ -1027,6 +1020,205 @@ exit_arb:
 		~EVENT(ALARM_GPU_LOST));
 }
 
+/*
+ * Process one scheduled work item.
+ */
+static void nvgpu_clk_arb_worker_process_item(
+		struct nvgpu_clk_arb_work_item *work_item)
+{
+	nvgpu_log(work_item->arb->g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+
+	if (work_item->item_type == CLK_ARB_WORK_UPDATE_VF_TABLE)
+		nvgpu_clk_arb_run_vf_table_cb(work_item->arb);
+	else if (work_item->item_type == CLK_ARB_WORK_UPDATE_ARB)
+		nvgpu_clk_arb_run_arbiter_cb(work_item->arb);
+}
+
+/**
+ * Tell the worker that one more work needs to be done.
+ *
+ * Increase the work counter to synchronize the worker with the new work. Wake
+ * up the worker. If the worker was already running, it will handle this work
+ * before going to sleep.
+ */
+static int nvgpu_clk_arb_worker_wakeup(struct gk20a *g)
+{
+	int put;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+
+	put = nvgpu_atomic_inc_return(&g->clk_arb_worker.put);
+	nvgpu_cond_signal_interruptible(&g->clk_arb_worker.wq);
+
+	return put;
+}
+
+/**
+ * Test if there is some work pending.
+ *
+ * This is a pair for nvgpu_clk_arb_worker_wakeup to be called from the
+ * worker. The worker has an internal work counter which is incremented once
+ * per finished work item. This is compared with the number of queued jobs.
+ */
+static bool nvgpu_clk_arb_worker_pending(struct gk20a *g, int get)
+{
+	bool pending = nvgpu_atomic_read(&g->clk_arb_worker.put) != get;
+
+	/* We don't need barriers because they are implicit in locking */
+	return pending;
+}
+
+/**
+ * Process the queued works for the worker thread serially.
+ *
+ * Flush all the work items in the queue one by one. This may block timeout
+ * handling for a short while, as these are serialized.
+ */
+static void nvgpu_clk_arb_worker_process(struct gk20a *g, int *get)
+{
+
+	while (nvgpu_clk_arb_worker_pending(g, *get)) {
+		struct nvgpu_clk_arb_work_item *work_item = NULL;
+
+		nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
+		if (!nvgpu_list_empty(&g->clk_arb_worker.items)) {
+			work_item = nvgpu_list_first_entry(&g->clk_arb_worker.items,
+				nvgpu_clk_arb_work_item, worker_item);
+			nvgpu_list_del(&work_item->worker_item);
+		}
+		nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
+
+		if (!work_item) {
+			/*
+			 * Woke up for some other reason, but there are no
+			 * other reasons than a work item added in the items list
+			 * currently, so warn and ack the message.
+			 */
+			nvgpu_warn(g, "Spurious worker event!");
+			++*get;
+			break;
+		}
+
+		nvgpu_clk_arb_worker_process_item(work_item);
+		++*get;
+	}
+}
+
+/*
+ * Process all work items found in the clk arbiter work queue.
+ */
+static int nvgpu_clk_arb_poll_worker(void *arg)
+{
+	struct gk20a *g = (struct gk20a *)arg;
+	struct gk20a_worker *worker = &g->clk_arb_worker;
+	int get = 0;
+
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+
+	while (!nvgpu_thread_should_stop(&worker->poll_task)) {
+		int ret;
+
+		ret = NVGPU_COND_WAIT_INTERRUPTIBLE(
+				&worker->wq,
+				nvgpu_clk_arb_worker_pending(g, get), 0);
+
+		if (ret == 0)
+			nvgpu_clk_arb_worker_process(g, &get);
+	}
+	return 0;
+}
+
+static int __nvgpu_clk_arb_worker_start(struct gk20a *g)
+{
+	char thread_name[64];
+	int err = 0;
+
+	if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task))
+		return err;
+
+	nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
+
+	/*
+	 * Mutexes have implicit barriers, so there is no risk of a thread
+	 * having a stale copy of the poll_task variable as the call to
+	 * thread_is_running is volatile
+	 */
+
+	if (nvgpu_thread_is_running(&g->clk_arb_worker.poll_task)) {
+		nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
+		return err;
+	}
+
+	snprintf(thread_name, sizeof(thread_name),
+			"nvgpu_clk_arb_poll_%s", g->name);
+
+	err = nvgpu_thread_create(&g->clk_arb_worker.poll_task, g,
+			nvgpu_clk_arb_poll_worker, thread_name);
+
+	nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
+	return err;
+}
+
+/**
+ * Append a work item to the worker's list.
+ *
+ * This adds work item to the end of the list and wakes the worker
+ * up immediately. If the work item already existed in the list, it's not added,
+ * because in that case it has been scheduled already but has not yet been
+ * processed.
+ */
+void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
+		struct nvgpu_clk_arb_work_item *work_item)
+{
+	nvgpu_log(g, gpu_dbg_fn | gpu_dbg_clk_arb, " ");
+
+	/*
+	 * Warn if worker thread cannot run
+	 */
+	if (WARN_ON(__nvgpu_clk_arb_worker_start(g))) {
+		nvgpu_warn(g, "clk arb worker cannot run!");
+		return;
+	}
+
+	nvgpu_spinlock_acquire(&g->clk_arb_worker.items_lock);
+	if (!nvgpu_list_empty(&work_item->worker_item)) {
+		/*
+		 * Already queued, so will get processed eventually.
+		 * The worker is probably awake already.
+		 */
+		nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
+		return;
+	}
+	nvgpu_list_add_tail(&work_item->worker_item, &g->clk_arb_worker.items);
+	nvgpu_spinlock_release(&g->clk_arb_worker.items_lock);
+
+	nvgpu_clk_arb_worker_wakeup(g);
+}
+
+/**
+ * Initialize the clk arb worker's metadata and start the background thread.
+ */
+int nvgpu_clk_arb_worker_init(struct gk20a *g)
+{
+	int err;
+
+	nvgpu_atomic_set(&g->clk_arb_worker.put, 0);
+	nvgpu_cond_init(&g->clk_arb_worker.wq);
+	nvgpu_init_list_node(&g->clk_arb_worker.items);
+	nvgpu_spinlock_init(&g->clk_arb_worker.items_lock);
+	err = nvgpu_mutex_init(&g->clk_arb_worker.start_lock);
+	if (err)
+		goto error_check;
+
+	err = __nvgpu_clk_arb_worker_start(g);
+error_check:
+	if (err) {
+		nvgpu_err(g, "failed to start clk arb poller thread");
+		return err;
+	}
+	return 0;
+}
+
 int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 {
 	struct nvgpu_clk_arb *arb;
@@ -1120,15 +1312,17 @@ int nvgpu_clk_arb_init_arbiter(struct gk20a *g)
 	nvgpu_init_list_node(&arb->requests);
 
 	nvgpu_cond_init(&arb->request_wq);
-	arb->vf_table_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
-		"vf_table_update");
-	arb->update_work_queue = alloc_workqueue("%s", WQ_HIGHPRI, 1,
-		"arbiter_update");
 
+	nvgpu_init_list_node(&arb->update_vf_table_work_item.worker_item);
+	nvgpu_init_list_node(&arb->update_arb_work_item.worker_item);
+	arb->update_vf_table_work_item.arb = arb;
+	arb->update_arb_work_item.arb = arb;
+	arb->update_vf_table_work_item.item_type = CLK_ARB_WORK_UPDATE_VF_TABLE;
+	arb->update_arb_work_item.item_type = CLK_ARB_WORK_UPDATE_ARB;
 
-	INIT_WORK(&arb->vf_table_fn_work, nvgpu_clk_arb_run_vf_table_cb);
-
-	INIT_WORK(&arb->update_fn_work, nvgpu_clk_arb_run_arbiter_cb);
+	err = nvgpu_clk_arb_worker_init(g);
+	if (err < 0)
+		goto init_fail;
 
 #ifdef CONFIG_DEBUG_FS
 	arb->debug = &arb->debug_pool[0];
@@ -1183,8 +1377,14 @@ void nvgpu_clk_arb_schedule_alarm(struct gk20a *g, u32 alarm)
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 
 	nvgpu_clk_arb_set_global_alarm(g, alarm);
-	if (arb->update_work_queue)
-		queue_work(arb->update_work_queue, &arb->update_fn_work);
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
+}
+
+void nvgpu_clk_arb_worker_deinit(struct gk20a *g)
+{
+	nvgpu_mutex_acquire(&g->clk_arb_worker.start_lock);
+	nvgpu_thread_stop(&g->clk_arb_worker.poll_task);
+	nvgpu_mutex_release(&g->clk_arb_worker.start_lock);
 }
 
 void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
@@ -1193,13 +1393,7 @@ void nvgpu_clk_arb_cleanup_arbiter(struct gk20a *g)
 	int index;
 
 	if (arb) {
-		cancel_work_sync(&arb->vf_table_fn_work);
-		destroy_workqueue(arb->vf_table_work_queue);
-		arb->vf_table_work_queue = NULL;
-
-		cancel_work_sync(&arb->update_fn_work);
-		destroy_workqueue(arb->update_work_queue);
-		arb->update_work_queue = NULL;
+		nvgpu_clk_arb_worker_deinit(g);
 
 		nvgpu_kfree(g, arb->gpc2clk_f_points);
 		nvgpu_kfree(g, arb->mclk_f_points);
@@ -1298,16 +1492,15 @@ void nvgpu_clk_arb_release_session(struct gk20a *g,
 
 	session->zombie = true;
 	nvgpu_ref_put(&session->refcount, nvgpu_clk_arb_free_session);
-	if (arb && arb->update_work_queue)
-		queue_work(arb->update_work_queue, &arb->update_fn_work);
+	if (arb)
+		nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
 }
 
 void nvgpu_clk_arb_schedule_vf_table_update(struct gk20a *g)
 {
 	struct nvgpu_clk_arb *arb = g->clk_arb;
 
-	if (arb->vf_table_work_queue)
-		queue_work(arb->vf_table_work_queue, &arb->vf_table_fn_work);
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_vf_table_work_item);
 }
 
 /* This function is inherently unsafe to call while arbiter is running
diff --git a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
index e5ada25dc..464590d58 100644
--- a/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
+++ b/drivers/gpu/nvgpu/common/linux/clk_arb_linux.h
@@ -39,6 +39,18 @@
  * The defines here should finally move to clk_arb.h, once these are
  * refactored to be free of Linux fields.
  */
+
+enum clk_arb_work_item_type {
+	CLK_ARB_WORK_UPDATE_VF_TABLE,
+	CLK_ARB_WORK_UPDATE_ARB
+};
+
+struct nvgpu_clk_arb_work_item {
+	enum clk_arb_work_item_type item_type;
+	struct nvgpu_clk_arb *arb;
+	struct nvgpu_list_node worker_item;
+};
+
 struct nvgpu_clk_arb {
 	struct nvgpu_spinlock sessions_lock;
 	struct nvgpu_spinlock users_lock;
@@ -62,10 +74,8 @@ struct nvgpu_clk_arb {
 	u16 gpc2clk_min, gpc2clk_max;
 	u16 mclk_min, mclk_max;
 
-	struct work_struct update_fn_work;
-	struct workqueue_struct *update_work_queue;
-	struct work_struct vf_table_fn_work;
-	struct workqueue_struct *vf_table_work_queue;
+	struct nvgpu_clk_arb_work_item update_vf_table_work_item;
+	struct nvgpu_clk_arb_work_item update_arb_work_item;
 
 	struct nvgpu_cond request_wq;
 
@@ -140,5 +150,14 @@ nvgpu_clk_dev_from_link(struct nvgpu_list_node *node)
 	   ((uintptr_t)node - offsetof(struct nvgpu_clk_dev, link));
 };
 
+static inline struct nvgpu_clk_arb_work_item *
+nvgpu_clk_arb_work_item_from_worker_item(struct nvgpu_list_node *node)
+{
+	return (struct nvgpu_clk_arb_work_item *)
+	   ((uintptr_t)node - offsetof(struct nvgpu_clk_arb_work_item, worker_item));
+};
+
+void nvgpu_clk_arb_worker_enqueue(struct gk20a *g,
+		struct nvgpu_clk_arb_work_item *work_item);
 #endif /* __NVGPU_CLK_ARB_LINUX_H__ */
 
diff --git a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
index 6d09b4b8f..039f65f82 100644
--- a/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
+++ b/drivers/gpu/nvgpu/common/linux/ioctl_clk_arb.c
@@ -424,8 +424,7 @@ int nvgpu_clk_arb_commit_request_fd(struct gk20a *g,
 	nvgpu_spinlock_acquire(&session->session_lock);
 	nvgpu_list_add(&dev->node, &session->targets);
 	nvgpu_spinlock_release(&session->session_lock);
-	if (arb->update_work_queue)
-		queue_work(arb->update_work_queue, &arb->update_fn_work);
+	nvgpu_clk_arb_worker_enqueue(g, &arb->update_arb_work_item);
 
 fdput_fd:
 	fdput(fd);
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index a7a08b5a8..e65ed2785 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -1591,7 +1591,7 @@ static void gk20a_channel_worker_process(struct gk20a *g, int *get)
 static int gk20a_channel_poll_worker(void *arg)
 {
 	struct gk20a *g = (struct gk20a *)arg;
-	struct gk20a_channel_worker *worker = &g->channel_worker;
+	struct gk20a_worker *worker = &g->channel_worker;
 	unsigned long watchdog_interval = 100; /* milliseconds */
 	struct nvgpu_timeout timeout;
 	int get = 0;
diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h
index 75357a82f..03cfe285d 100644
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -1406,14 +1406,14 @@ struct gk20a {
 	u32 ltc_count;
 	u32 ltc_streamid;
 
-	struct gk20a_channel_worker {
+	struct gk20a_worker {
 		struct nvgpu_thread poll_task;
 		nvgpu_atomic_t put;
 		struct nvgpu_cond wq;
 		struct nvgpu_list_node items;
 		struct nvgpu_spinlock items_lock;
 		struct nvgpu_mutex start_lock;
-	} channel_worker;
+	} channel_worker, clk_arb_worker;
 
 	struct {
 		void (*open)(struct channel_gk20a *ch);