gpu: nvgpu: add worker for watchdog and job cleanup

Implement a worker thread to replace the delayed works in channel watchdog and job cleanups. Watchdog runs by polling the channel states periodically, and job cleanup is performed on channels that are appended on a work queue consumed by the worker thread. Handling both of these two in the same thread makes it impossible for them to cause a deadlock, as has previously happened. The watchdog takes references to channels during checking and possibly recovering channels. Jobs in the cleanup queue have an additional reference taken which is released after the channel is processed. The worker is woken up from periodic sleep when channels are added to the queue. Currently, the queue is only used for job cleanups, but it is extendable for other per-channel works too. The worker can also process other periodic actions dependent on channels. Neither the semantics of timeout handling or of job cleanups are yet significantly changed - this patch only serializes them into one background thread. Each job that needs cleanup is tracked and holds a reference to its channel and a power reference, and timeouts can only be processed on channels that are tracked, so the thread will always be idle if the system is going to be suspended, so there is currently no need to explicitly suspend or stop it. Bug 1848834 Bug 1851689 Bug 1814773 Bug 200270332 Jira NVGPU-21 Change-Id: I355101802f50841ea9bd8042a017f91c931d2dc7 Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: http://git-master/r/1297183 Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2017-02-01 10:28:38 +02:00
parent 0c155313e7
commit f1072a28be
8 changed files with 455 additions and 137 deletions
--- a/drivers/gpu/nvgpu/common/nvgpu_common.c
+++ b/drivers/gpu/nvgpu/common/nvgpu_common.c
@@ -39,7 +39,6 @@ static void nvgpu_init_vars(struct gk20a *g)
 	nvgpu_mutex_init(&platform->railgate_lock);
 	nvgpu_mutex_init(&g->dbg_sessions_lock);
 	nvgpu_mutex_init(&g->client_lock);
-	nvgpu_mutex_init(&g->ch_wdt_lock);
 	nvgpu_mutex_init(&g->poweroff_lock);

 	g->regs_saved = g->regs;
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/delay.h>
 #include <linux/highmem.h> /* need for nvmap.h*/
+#include <linux/kthread.h>
 #include <trace/events/gk20a.h>
 #include <linux/scatterlist.h>
 #include <linux/file.h>
@@ -91,8 +92,6 @@ static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);

 static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 					bool clean_all);
-static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
-				bool wait_for_completion);

 /* allocate GPU channel */
 static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f)
@@ -491,7 +490,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 	bool released_job_semaphore = false;
 	bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch);

-	gk20a_channel_cancel_job_clean_up(ch, true);
+	/* synchronize with actual job cleanup */
+	nvgpu_mutex_acquire(&ch->joblist.cleanup_lock);

 	/* ensure no fences are pending */
 	nvgpu_mutex_acquire(&ch->sync_lock);
@@ -533,10 +533,16 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 	}
 	channel_gk20a_joblist_unlock(ch);

+	nvgpu_mutex_release(&ch->joblist.cleanup_lock);
+
 	if (released_job_semaphore)
 		wake_up_interruptible_all(&ch->semaphore_wq);

-	gk20a_channel_update(ch, 0);
+	/*
+	 * When closing the channel, this scheduled update holds one ref which
+	 * is waited for before advancing with freeing.
+	 */
+	gk20a_channel_update(ch);
 }

 void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt)
@@ -1016,8 +1022,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force)
 	ch->update_fn_data = NULL;
 	nvgpu_spinlock_release(&ch->update_fn_lock);
 	cancel_work_sync(&ch->update_fn_work);
-	cancel_delayed_work_sync(&ch->clean_up.wq);
-	cancel_delayed_work_sync(&ch->timeout.wq);

 	/* make sure we don't have deferred interrupts pending that
 	 * could still touch the channel */
@@ -1345,7 +1349,6 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g,
 	ch->has_timedout = false;
 	ch->wdt_enabled = true;
 	ch->obj_class = 0;
-	ch->clean_up.scheduled = false;
 	ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW;
 	ch->timeslice_us = g->timeslice_low_priority_us;

@@ -2075,6 +2078,30 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c,
 		nvgpu_kfree(g);
 }

+static void __gk20a_channel_timeout_start(struct channel_gk20a *ch)
+{
+	ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
+	ch->timeout.running = true;
+	nvgpu_timeout_init(ch->g, &ch->timeout.timer,
+			gk20a_get_channel_watchdog_timeout(ch),
+			NVGPU_TIMER_CPU_TIMER);
+}
+
+/**
+ * Start a timeout counter (watchdog) on this channel.
+ *
+ * Trigger a watchdog to recover the channel after the per-platform timeout
+ * duration (but strictly no earlier) if the channel hasn't advanced within
+ * that time.
+ *
+ * If the timeout is already running, do nothing. This should be called when
+ * new jobs are submitted. The timeout will stop when the last tracked job
+ * finishes, making the channel idle.
+ *
+ * The channel's gpfifo read pointer will be used to determine if the job has
+ * actually stuck at that time. After the timeout duration has expired, a
+ * worker thread will consider the channel stuck and recover it if stuck.
+ */
 static void gk20a_channel_timeout_start(struct channel_gk20a *ch)
 {
 	struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev);
@@ -2087,94 +2114,108 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch)

 	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);

-	if (ch->timeout.initialized) {
+	if (ch->timeout.running) {
 		nvgpu_raw_spinlock_release(&ch->timeout.lock);
 		return;
 	}
-
-	ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch);
-	ch->timeout.initialized = true;
+	__gk20a_channel_timeout_start(ch);
 	nvgpu_raw_spinlock_release(&ch->timeout.lock);
-
-	schedule_delayed_work(&ch->timeout.wq,
-	       msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch)));
 }

-static void gk20a_channel_timeout_stop(struct channel_gk20a *ch)
+/**
+ * Stop a running timeout counter (watchdog) on this channel.
+ *
+ * Make the watchdog consider the channel not running, so that it won't get
+ * recovered even if no progress is detected. Progress is not tracked if the
+ * watchdog is turned off.
+ *
+ * No guarantees are made about concurrent execution of the timeout handler.
+ * (This should be called from an update handler running in the same thread
+ * with the watchdog.)
+ */
+static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch)
+{
+	bool was_running;
+
+	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
+	was_running = ch->timeout.running;
+	ch->timeout.running = false;
+	nvgpu_raw_spinlock_release(&ch->timeout.lock);
+	return was_running;
+}
+
+/**
+ * Continue a previously stopped timeout
+ *
+ * Enable the timeout again but don't reinitialize its timer.
+ *
+ * No guarantees are made about concurrent execution of the timeout handler.
+ * (This should be called from an update handler running in the same thread
+ * with the watchdog.)
+ */
+static void gk20a_channel_timeout_continue(struct channel_gk20a *ch)
 {
 	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
-	if (!ch->timeout.initialized) {
-		nvgpu_raw_spinlock_release(&ch->timeout.lock);
-		return;
-	}
-	nvgpu_raw_spinlock_release(&ch->timeout.lock);
-
-	cancel_delayed_work_sync(&ch->timeout.wq);
-
-	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
-	ch->timeout.initialized = false;
+	ch->timeout.running = true;
 	nvgpu_raw_spinlock_release(&ch->timeout.lock);
 }

+/**
+ * Rewind the timeout on each non-dormant channel.
+ *
+ * Reschedule the timeout of each active channel for which timeouts are running
+ * as if something was happened on each channel right now. This should be
+ * called when a global hang is detected that could cause a false positive on
+ * other innocent channels.
+ */
 void gk20a_channel_timeout_restart_all_channels(struct gk20a *g)
 {
-	u32 chid;
 	struct fifo_gk20a *f = &g->fifo;
+	u32 chid;

 	for (chid = 0; chid < f->num_channels; chid++) {
 		struct channel_gk20a *ch = &f->channel[chid];

-		if (gk20a_channel_get(ch)) {
-			nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
-			if (!ch->timeout.initialized) {
-				nvgpu_raw_spinlock_release(&ch->timeout.lock);
-				gk20a_channel_put(ch);
-				continue;
-			}
-			nvgpu_raw_spinlock_release(&ch->timeout.lock);
+		if (!gk20a_channel_get(ch))
+			continue;

-			cancel_delayed_work_sync(&ch->timeout.wq);
-			if (!ch->has_timedout)
-				schedule_delayed_work(&ch->timeout.wq,
-				       msecs_to_jiffies(
-				       gk20a_get_channel_watchdog_timeout(ch)));
+		nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
+		if (ch->timeout.running)
+			__gk20a_channel_timeout_start(ch);
+		nvgpu_raw_spinlock_release(&ch->timeout.lock);

-			gk20a_channel_put(ch);
-		}
+		gk20a_channel_put(ch);
 	}
 }

-static void gk20a_channel_timeout_handler(struct work_struct *work)
+/**
+ * Check if a timed out channel has hung and recover it if it has.
+ *
+ * Test if this channel has really got stuck at this point (should be called
+ * when the watchdog timer has expired) by checking if its gp_get has advanced
+ * or not. If no gp_get action happened since when the watchdog was started,
+ * force-reset the channel.
+ *
+ * The gpu is implicitly on at this point, because the watchdog can only run on
+ * channels that have submitted jobs pending for cleanup.
+ */
+static void gk20a_channel_timeout_handler(struct channel_gk20a *ch)
 {
+	struct gk20a *g = ch->g;
 	u32 gp_get;
-	struct gk20a *g;
-	struct channel_gk20a *ch;

-	ch = container_of(to_delayed_work(work), struct channel_gk20a,
-			timeout.wq);
-	ch = gk20a_channel_get(ch);
-	if (!ch)
-		return;
+	gk20a_dbg_fn("");

-	g = ch->g;
-
-	if (gk20a_busy(dev_from_gk20a(g))) {
-		gk20a_channel_put(ch);
-		return;
-	}
-
-	/* Need global lock since multiple channels can timeout at a time */
-	nvgpu_mutex_acquire(&g->ch_wdt_lock);
-
-	/* Get timed out job and reset the timer */
+	/* Get status and clear the timer */
 	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
 	gp_get = ch->timeout.gp_get;
-	ch->timeout.initialized = false;
+	ch->timeout.running = false;
 	nvgpu_raw_spinlock_release(&ch->timeout.lock);

 	if (gk20a_userd_gp_get(ch->g, ch) != gp_get) {
+		/* Channel has advanced, reschedule */
 		gk20a_channel_timeout_start(ch);
-		goto fail_unlock;
+		return;
 	}

 	gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out",
@@ -2185,11 +2226,262 @@ static void gk20a_channel_timeout_handler(struct work_struct *work)

 	g->ops.fifo.force_reset_ch(ch,
 		NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true);
+}

-fail_unlock:
-	nvgpu_mutex_release(&g->ch_wdt_lock);
+/**
+ * Test if the per-channel timeout is expired and handle the timeout in that case.
+ *
+ * Each channel has an expiration time based watchdog. The timer is
+ * (re)initialized in two situations: when a new job is submitted on an idle
+ * channel and when the timeout is checked but progress is detected.
+ *
+ * Watchdog timeout does not yet necessarily mean a stuck channel so this may
+ * or may not cause recovery.
+ *
+ * The timeout is stopped (disabled) after the last job in a row finishes
+ * making the channel idle.
+ */
+static void gk20a_channel_timeout_check(struct channel_gk20a *ch)
+{
+	bool timed_out;
+
+	nvgpu_raw_spinlock_acquire(&ch->timeout.lock);
+	timed_out = ch->timeout.running &&
+		nvgpu_timeout_expired(&ch->timeout.timer);
+	nvgpu_raw_spinlock_release(&ch->timeout.lock);
+
+	if (timed_out)
+		gk20a_channel_timeout_handler(ch);
+}
+
+/**
+ * Loop every living channel, check timeouts and handle stuck channels.
+ */
+static void gk20a_channel_poll_timeouts(struct gk20a *g)
+{
+	unsigned int chid;
+
+	gk20a_dbg_fn("");
+
+	for (chid = 0; chid < g->fifo.num_channels; chid++) {
+		struct channel_gk20a *ch = &g->fifo.channel[chid];
+
+		if (gk20a_channel_get(ch)) {
+			gk20a_channel_timeout_check(ch);
+			gk20a_channel_put(ch);
+		}
+	}
+}
+
+/*
+ * Process one scheduled work item for this channel. Currently, the only thing
+ * the worker does is job cleanup handling.
+ */
+static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch)
+{
+	gk20a_dbg_fn("");
+
+	gk20a_channel_clean_up_jobs(ch, true);
+
+	/* ref taken when enqueued */
 	gk20a_channel_put(ch);
-	gk20a_idle(dev_from_gk20a(g));
+}
+
+/**
+ * Tell the worker that one more work needs to be done.
+ *
+ * Increase the work counter to synchronize the worker with the new work. Wake
+ * up the worker. If the worker was already running, it will handle this work
+ * before going to sleep.
+ */
+static int __gk20a_channel_worker_wakeup(struct gk20a *g)
+{
+	int put;
+
+	gk20a_dbg_fn("");
+
+	/*
+	 * Currently, the only work type is associated with a lock, which deals
+	 * with any necessary barriers. If a work type with no locking were
+	 * added, a a wmb() would be needed here. See ..worker_pending() for a
+	 * pair.
+	 */
+
+	put = atomic_inc_return(&g->channel_worker.put);
+	wake_up(&g->channel_worker.wq);
+
+	return put;
+}
+
+/**
+ * Test if there is some work pending.
+ *
+ * This is a pair for __gk20a_channel_worker_wakeup to be called from the
+ * worker. The worker has an internal work counter which is incremented once
+ * per finished work item. This is compared with the number of queued jobs,
+ * which may be channels on the items list or any other types of work.
+ */
+static bool __gk20a_channel_worker_pending(struct gk20a *g, int get)
+{
+	bool pending = atomic_read(&g->channel_worker.put) != get;
+
+	/*
+	 * This would be the place for a rmb() pairing a wmb() for a wakeup
+	 * if we had any work with no implicit barriers caused by locking.
+	 */
+
+	return pending;
+}
+
+/**
+ * Process the queued works for the worker thread serially.
+ *
+ * Flush all the work items in the queue one by one. This may block timeout
+ * handling for a short while, as these are serialized.
+ */
+static void gk20a_channel_worker_process(struct gk20a *g, int *get)
+{
+	gk20a_dbg_fn("");
+
+	while (__gk20a_channel_worker_pending(g, *get)) {
+		struct channel_gk20a *ch;
+
+		/*
+		 * If a channel is on the list, it's guaranteed to be handled
+		 * eventually just once. However, the opposite is not true. A
+		 * channel may be being processed if it's on the list or not.
+		 *
+		 * With this, processing channel works should be conservative
+		 * as follows: it's always safe to look at a channel found in
+		 * the list, and if someone enqueues the channel, it will be
+		 * handled eventually, even if it's being handled at the same
+		 * time. A channel is on the list only once; multiple calls to
+		 * enqueue are harmless.
+		 */
+		nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
+		ch = list_first_entry_or_null(&g->channel_worker.items,
+				struct channel_gk20a,
+				worker_item);
+		if (ch)
+			list_del_init(&ch->worker_item);
+		nvgpu_spinlock_release(&g->channel_worker.items_lock);
+
+		if (!ch) {
+			/*
+			 * Woke up for some other reason, but there are no
+			 * other reasons than a channel added in the items list
+			 * currently, so warn and ack the message.
+			 */
+			gk20a_warn(g->dev, "Spurious worker event!");
+			++*get;
+			break;
+		}
+
+		gk20a_channel_worker_process_ch(ch);
+		++*get;
+	}
+}
+
+/*
+ * Look at channel states periodically, until canceled. Abort timed out
+ * channels serially. Process all work items found in the queue.
+ */
+static int gk20a_channel_poll_worker(void *arg)
+{
+	struct gk20a *g = (struct gk20a *)arg;
+	struct gk20a_channel_worker *worker = &g->channel_worker;
+	unsigned long start_wait;
+	/* event timeout for also polling the watchdog */
+	unsigned long timeout = msecs_to_jiffies(100);
+	int get = 0;
+
+	gk20a_dbg_fn("");
+
+	start_wait = jiffies;
+	while (!kthread_should_stop()) {
+		bool got_events;
+
+		got_events = wait_event_timeout(
+				worker->wq,
+				__gk20a_channel_worker_pending(g, get),
+				timeout) > 0;
+
+		if (got_events)
+			gk20a_channel_worker_process(g, &get);
+
+		if (jiffies - start_wait >= timeout) {
+			gk20a_channel_poll_timeouts(g);
+			start_wait = jiffies;
+		}
+	}
+	return 0;
+}
+
+/**
+ * Initialize the channel worker's metadata and start the background thread.
+ */
+int nvgpu_channel_worker_init(struct gk20a *g)
+{
+	struct task_struct *task;
+
+	atomic_set(&g->channel_worker.put, 0);
+	init_waitqueue_head(&g->channel_worker.wq);
+	INIT_LIST_HEAD(&g->channel_worker.items);
+	nvgpu_spinlock_init(&g->channel_worker.items_lock);
+	task = kthread_run(gk20a_channel_poll_worker, g,
+			"nvgpu_channel_poll_%s", dev_name(g->dev));
+	if (IS_ERR(task)) {
+		gk20a_err(g->dev, "failed to start channel poller thread");
+		return PTR_ERR(task);
+	}
+	g->channel_worker.poll_task = task;
+
+	return 0;
+}
+
+void nvgpu_channel_worker_deinit(struct gk20a *g)
+{
+	kthread_stop(g->channel_worker.poll_task);
+}
+
+/**
+ * Append a channel to the worker's list, if not there already.
+ *
+ * The worker thread processes work items (channels in its work list) and polls
+ * for other things. This adds @ch to the end of the list and wakes the worker
+ * up immediately. If the channel already existed in the list, it's not added,
+ * because in that case it has been scheduled already but has not yet been
+ * processed.
+ */
+void gk20a_channel_worker_enqueue(struct channel_gk20a *ch)
+{
+	struct gk20a *g = ch->g;
+
+	gk20a_dbg_fn("");
+
+	/*
+	 * Ref released when this item gets processed. The caller should hold
+	 * one ref already, so can't fail.
+	 */
+	if (WARN_ON(!gk20a_channel_get(ch))) {
+		gk20a_warn(g->dev, "cannot get ch ref for worker!");
+		return;
+	}
+
+	nvgpu_spinlock_acquire(&g->channel_worker.items_lock);
+	if (!list_empty(&ch->worker_item)) {
+		/*
+		 * Already queued, so will get processed eventually.
+		 * The worker is probably awake already.
+		 */
+		nvgpu_spinlock_release(&g->channel_worker.items_lock);
+		gk20a_channel_put(ch);
+		return;
+	}
+	list_add_tail(&ch->worker_item, &g->channel_worker.items);
+	nvgpu_spinlock_release(&g->channel_worker.items_lock);
+
+	__gk20a_channel_worker_wakeup(g);
 }

 int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
@@ -2214,32 +2506,6 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
 	return 0;
 }

-static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c)
-{
-	nvgpu_mutex_acquire(&c->clean_up.lock);
-
-	if (c->clean_up.scheduled) {
-		nvgpu_mutex_release(&c->clean_up.lock);
-		return;
-	}
-
-	c->clean_up.scheduled = true;
-	schedule_delayed_work(&c->clean_up.wq, 1);
-
-	nvgpu_mutex_release(&c->clean_up.lock);
-}
-
-static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
-				bool wait_for_completion)
-{
-	if (wait_for_completion)
-		cancel_delayed_work_sync(&c->clean_up.wq);
-
-	nvgpu_mutex_acquire(&c->clean_up.lock);
-	c->clean_up.scheduled = false;
-	nvgpu_mutex_release(&c->clean_up.lock);
-}
-
 static int gk20a_channel_add_job(struct channel_gk20a *c,
 				 struct channel_gk20a_job *job,
 				 bool skip_buffer_refcounting)
@@ -2256,7 +2522,10 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
 			return err;
 	}

-	/* put() is done in gk20a_channel_update() when the job is done */
+	/*
+	 * Ref to hold the channel open during the job lifetime. This is
+	 * released by job cleanup launched via syncpt or sema interrupt.
+	 */
 	c = gk20a_channel_get(c);

 	if (c) {
@@ -2291,14 +2560,16 @@ err_put_buffers:
 	return err;
 }

-static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work)
-{
-	struct channel_gk20a *c = container_of(to_delayed_work(work),
-			struct channel_gk20a, clean_up.wq);
-
-	gk20a_channel_clean_up_jobs(c, true);
-}
-
+/**
+ * Clean up job resources for further jobs to use.
+ * @clean_all: If true, process as many jobs as possible, otherwise just one.
+ *
+ * Loop all jobs from the joblist until a pending job is found, or just one if
+ * clean_all is not set. Pending jobs are detected from the job's post fence,
+ * so this is only done for jobs that have job tracking resources. Free all
+ * per-job memory for completed jobs; in case of preallocated resources, this
+ * opens up slots for new jobs to be submitted.
+ */
 static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 					bool clean_all)
 {
@@ -2307,6 +2578,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 	struct gk20a_platform *platform;
 	struct gk20a *g;
 	int job_finished = 0;
+	bool watchdog_on = false;

 	c = gk20a_channel_get(c);
 	if (!c)
@@ -2321,13 +2593,25 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 	g = c->g;
 	platform = gk20a_get_platform(g->dev);

-	gk20a_channel_cancel_job_clean_up(c, false);
+	/*
+	 * If !clean_all, we're in a condition where watchdog isn't supported
+	 * anyway (this would be a no-op).
+	 */
+	if (clean_all)
+		watchdog_on = gk20a_channel_timeout_stop(c);
+
+	/* Synchronize with abort cleanup that needs the jobs. */
+	nvgpu_mutex_acquire(&c->joblist.cleanup_lock);

 	while (1) {
 		bool completed;

 		channel_gk20a_joblist_lock(c);
 		if (channel_gk20a_joblist_is_empty(c)) {
+			/*
+			 * No jobs in flight, timeout will remain stopped until
+			 * new jobs are submitted.
+			 */
 			channel_gk20a_joblist_unlock(c);
 			break;
 		}
@@ -2343,7 +2627,15 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,

 		completed = gk20a_fence_is_expired(job->post_fence);
 		if (!completed) {
-			gk20a_channel_timeout_start(c);
+			/*
+			 * The watchdog eventually sees an updated gp_get if
+			 * something happened in this loop. A new job can have
+			 * been submitted between the above call to stop and
+			 * this - in that case, this is a no-op and the new
+			 * later timeout is still used.
+			 */
+			if (clean_all && watchdog_on)
+				gk20a_channel_timeout_continue(c);
 			break;
 		}

@@ -2394,32 +2686,38 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
 		job_finished = 1;
 		gk20a_idle(g->dev);

-		if (!clean_all)
+		if (!clean_all) {
+			/* Timeout isn't supported here so don't touch it. */
 			break;
+		}
 	}

+	nvgpu_mutex_release(&c->joblist.cleanup_lock);
+
 	if (job_finished && c->update_fn)
 		schedule_work(&c->update_fn_work);

 	gk20a_channel_put(c);
 }

-void gk20a_channel_update(struct channel_gk20a *c, int nr_completed)
+/**
+ * Schedule a job cleanup work on this channel to free resources and to signal
+ * about completion.
+ *
+ * Call this when there has been an interrupt about finished jobs, or when job
+ * cleanup needs to be performed, e.g., when closing a channel. This is always
+ * safe to call even if there is nothing to clean up. Any visible actions on
+ * jobs just before calling this are guaranteed to be processed.
+ */
+void gk20a_channel_update(struct channel_gk20a *c)
 {
-	c = gk20a_channel_get(c);
-	if (!c)
-		return;
-
 	if (!c->g->power_on) { /* shutdown case */
-		gk20a_channel_put(c);
 		return;
 	}

 	trace_gk20a_channel_update(c->hw_chid);
-	gk20a_channel_timeout_stop(c);
-	gk20a_channel_schedule_job_clean_up(c);
-
-	gk20a_channel_put(c);
+	/* A queued channel is always checked for job cleanup. */
+	gk20a_channel_worker_enqueue(c);
 }

 static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c,
@@ -2809,7 +3107,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
 		if (c->deterministic && need_deferred_cleanup)
 			return -EINVAL;

-		/* gk20a_channel_update releases this ref. */
+		/* released by job cleanup via syncpt or sema interrupt */
 		err = gk20a_busy(g->dev);
 		if (err) {
 			gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s",
@@ -2929,13 +3227,12 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 #endif
 	nvgpu_mutex_init(&c->ioctl_lock);
 	nvgpu_mutex_init(&c->error_notifier_mutex);
+	nvgpu_mutex_init(&c->joblist.cleanup_lock);
 	nvgpu_spinlock_init(&c->joblist.dynamic.lock);
 	nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock);
 	nvgpu_raw_spinlock_init(&c->timeout.lock);
 	nvgpu_mutex_init(&c->sync_lock);
-	INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
-	INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
-	nvgpu_mutex_init(&c->clean_up.lock);
+
 	INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex);
@@ -2947,6 +3244,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
 	nvgpu_mutex_init(&c->dbg_s_lock);
 	list_add(&c->free_chs, &g->fifo.free_chs);

+	INIT_LIST_HEAD(&c->worker_item);
+
 	return 0;
 }

@@ -3384,8 +3683,6 @@ int gk20a_channel_suspend(struct gk20a *g)
 			gk20a_disable_channel_tsg(g, ch);
 			/* preempt the channel */
 			gk20a_fifo_preempt(g, ch);
-			gk20a_channel_timeout_stop(ch);
-			gk20a_channel_cancel_job_clean_up(ch, true);
 			/* wait for channel update notifiers */
 			if (ch->update_fn)
 				cancel_work_sync(&ch->update_fn_work);
@@ -3481,7 +3778,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events)
 				 * semaphore.
 				 */
 				if (!c->deterministic)
-					gk20a_channel_update(c, 0);
+					gk20a_channel_update(c);
 			}
 			gk20a_channel_put(c);
 		}
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h
@@ -27,6 +27,7 @@
 #include <uapi/linux/nvgpu.h>

 #include <nvgpu/lock.h>
+#include <nvgpu/timers.h>

 struct gk20a;
 struct gr_gk20a;
@@ -87,12 +88,19 @@ struct channel_gk20a_joblist {
 		struct list_head jobs;
 		struct nvgpu_spinlock lock;
 	} dynamic;
+
+	/*
+	 * Synchronize abort cleanup (when closing a channel) and job cleanup
+	 * (asynchronously from worker) - protect from concurrent access when
+	 * job resources are being freed.
+	 */
+	struct nvgpu_mutex cleanup_lock;
 };

 struct channel_gk20a_timeout {
-	struct delayed_work wq;
 	struct nvgpu_raw_spinlock lock;
-	bool initialized;
+	struct nvgpu_timeout timer;
+	bool running;
 	u32 gp_get;
 };

@@ -110,12 +118,6 @@ struct gk20a_event_id_data {
 	struct list_head event_id_node;
 };

-struct channel_gk20a_clean_up {
-	struct nvgpu_mutex lock;
-	bool scheduled;
-	struct delayed_work wq;
-};
-
 /*
 * Track refcount actions, saving their stack traces. This number specifies how
 * many most recent actions are stored in a buffer. Set to 0 to disable. 128
@@ -214,7 +216,8 @@ struct channel_gk20a {
 	u32 timeout_gpfifo_get;

 	struct channel_gk20a_timeout timeout;
-	struct channel_gk20a_clean_up clean_up;
+	/* for job cleanup handling in the background worker */
+	struct list_head worker_item;

 #if defined(CONFIG_GK20A_CYCLE_STATS)
 	struct {
@@ -250,8 +253,11 @@ struct channel_gk20a {
 	u64 virt_ctx;
 #endif

-	/* signal channel owner via a callback, if set, in gk20a_channel_update
-	 * via schedule_work */
+	/*
+	 * Signal channel owner via a callback, if set, in job cleanup with
+	 * schedule_work. Means that something finished on the channel (perhaps
+	 * more than one job).
+	 */
 	void (*update_fn)(struct channel_gk20a *, void *);
 	void *update_fn_data;
 	struct nvgpu_spinlock update_fn_lock; /* make access to the two above atomic */
@@ -293,6 +299,9 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);
 int gk20a_channel_suspend(struct gk20a *g);
 int gk20a_channel_resume(struct gk20a *g);

+int nvgpu_channel_worker_init(struct gk20a *g);
+void nvgpu_channel_worker_deinit(struct gk20a *g);
+
 /* Channel file operations */
 int gk20a_channel_open(struct inode *inode, struct file *filp);
 int gk20a_channel_open_ioctl(struct gk20a *g,
@@ -302,7 +311,7 @@ long gk20a_channel_ioctl(struct file *filp,
 			 unsigned long arg);
 int gk20a_channel_release(struct inode *inode, struct file *filp);
 struct channel_gk20a *gk20a_get_channel_from_file(int fd);
-void gk20a_channel_update(struct channel_gk20a *c, int nr_completed);
+void gk20a_channel_update(struct channel_gk20a *c);

 void gk20a_init_channel(struct gpu_ops *gops);

--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -179,7 +179,7 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
 {
 	struct channel_gk20a *ch = priv;

-	gk20a_channel_update(ch, nr_completed);
+	gk20a_channel_update(ch);

 	/* note: channel_get() is in __gk20a_channel_syncpt_incr() */
 	gk20a_channel_put(ch);
--- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c
@@ -502,6 +502,7 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f)

 	gk20a_dbg_fn("");

+	nvgpu_channel_worker_deinit(g);
 	/*
 	 * Make sure all channels are closed before deleting them.
 	 */
@@ -900,6 +901,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g)
 	}
 	nvgpu_mutex_init(&f->tsg_inuse_mutex);

+	err = nvgpu_channel_worker_init(g);
+	if (err)
+		goto clean_up;
 	f->remove_support = gk20a_remove_fifo_support;

 	f->deferred_reset_pending = false;
--- a/drivers/gpu/nvgpu/gk20a/gk20a.h
+++ b/drivers/gpu/nvgpu/gk20a/gk20a.h
@@ -874,8 +874,6 @@ struct gk20a {
 	bool timeouts_enabled;
 #endif

-	struct nvgpu_mutex ch_wdt_lock;
-
 	struct nvgpu_mutex poweroff_lock;

 	/* Channel priorities */
@@ -1008,6 +1006,14 @@ struct gk20a {
 	atomic_t sw_irq_nonstall_last_handled;
 	wait_queue_head_t sw_irq_nonstall_last_handled_wq;

+	struct gk20a_channel_worker {
+		struct task_struct *poll_task;
+		atomic_t put;
+		wait_queue_head_t wq;
+		struct list_head items;
+		struct nvgpu_spinlock items_lock;
+	} channel_worker;
+
 	struct devfreq *devfreq;

 	struct gk20a_scale_profile *scale_profile;
--- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c
@@ -308,6 +308,10 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g)
 	}
 	nvgpu_mutex_init(&f->tsg_inuse_mutex);

+	err = nvgpu_channel_worker_init(g);
+	if (err)
+		goto clean_up;
+
 	f->deferred_reset_pending = false;
 	nvgpu_mutex_init(&f->deferred_reset_mutex);

--- a/drivers/gpu/nvgpu/vgpu/vgpu.c
+++ b/drivers/gpu/nvgpu/vgpu/vgpu.c
@@ -252,7 +252,6 @@ static int vgpu_init_support(struct platform_device *pdev)

 	nvgpu_mutex_init(&g->dbg_sessions_lock);
 	nvgpu_mutex_init(&g->client_lock);
-	nvgpu_mutex_init(&g->ch_wdt_lock);

 	g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL);
 	if (!g->dbg_regops_tmp_buf) {