diff --git a/drivers/gpu/nvgpu/common/nvgpu_common.c b/drivers/gpu/nvgpu/common/nvgpu_common.c index 521ccd9d6..16640aa6d 100644 --- a/drivers/gpu/nvgpu/common/nvgpu_common.c +++ b/drivers/gpu/nvgpu/common/nvgpu_common.c @@ -39,7 +39,6 @@ static void nvgpu_init_vars(struct gk20a *g) nvgpu_mutex_init(&platform->railgate_lock); nvgpu_mutex_init(&g->dbg_sessions_lock); nvgpu_mutex_init(&g->client_lock); - nvgpu_mutex_init(&g->ch_wdt_lock); nvgpu_mutex_init(&g->poweroff_lock); g->regs_saved = g->regs; diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index ef8a3e7d9..6eb1cb068 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -20,6 +20,7 @@ #include #include #include /* need for nvmap.h*/ +#include #include #include #include @@ -91,8 +92,6 @@ static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch); static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, bool clean_all); -static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, - bool wait_for_completion); /* allocate GPU channel */ static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) @@ -491,7 +490,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) bool released_job_semaphore = false; bool pre_alloc_enabled = channel_gk20a_is_prealloc_enabled(ch); - gk20a_channel_cancel_job_clean_up(ch, true); + /* synchronize with actual job cleanup */ + nvgpu_mutex_acquire(&ch->joblist.cleanup_lock); /* ensure no fences are pending */ nvgpu_mutex_acquire(&ch->sync_lock); @@ -533,10 +533,16 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) } channel_gk20a_joblist_unlock(ch); + nvgpu_mutex_release(&ch->joblist.cleanup_lock); + if (released_job_semaphore) wake_up_interruptible_all(&ch->semaphore_wq); - gk20a_channel_update(ch, 0); + /* + * When closing the channel, this scheduled update holds one ref which + * is waited for before advancing with freeing. + */ + gk20a_channel_update(ch); } void gk20a_channel_abort(struct channel_gk20a *ch, bool channel_preempt) @@ -1016,8 +1022,6 @@ static void gk20a_free_channel(struct channel_gk20a *ch, bool force) ch->update_fn_data = NULL; nvgpu_spinlock_release(&ch->update_fn_lock); cancel_work_sync(&ch->update_fn_work); - cancel_delayed_work_sync(&ch->clean_up.wq); - cancel_delayed_work_sync(&ch->timeout.wq); /* make sure we don't have deferred interrupts pending that * could still touch the channel */ @@ -1345,7 +1349,6 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g, ch->has_timedout = false; ch->wdt_enabled = true; ch->obj_class = 0; - ch->clean_up.scheduled = false; ch->interleave_level = NVGPU_RUNLIST_INTERLEAVE_LEVEL_LOW; ch->timeslice_us = g->timeslice_low_priority_us; @@ -2075,6 +2078,30 @@ static void trace_write_pushbuffer_range(struct channel_gk20a *c, nvgpu_kfree(g); } +static void __gk20a_channel_timeout_start(struct channel_gk20a *ch) +{ + ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch); + ch->timeout.running = true; + nvgpu_timeout_init(ch->g, &ch->timeout.timer, + gk20a_get_channel_watchdog_timeout(ch), + NVGPU_TIMER_CPU_TIMER); +} + +/** + * Start a timeout counter (watchdog) on this channel. + * + * Trigger a watchdog to recover the channel after the per-platform timeout + * duration (but strictly no earlier) if the channel hasn't advanced within + * that time. + * + * If the timeout is already running, do nothing. This should be called when + * new jobs are submitted. The timeout will stop when the last tracked job + * finishes, making the channel idle. + * + * The channel's gpfifo read pointer will be used to determine if the job has + * actually stuck at that time. After the timeout duration has expired, a + * worker thread will consider the channel stuck and recover it if stuck. + */ static void gk20a_channel_timeout_start(struct channel_gk20a *ch) { struct gk20a_platform *platform = gk20a_get_platform(ch->g->dev); @@ -2087,94 +2114,108 @@ static void gk20a_channel_timeout_start(struct channel_gk20a *ch) nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - if (ch->timeout.initialized) { + if (ch->timeout.running) { nvgpu_raw_spinlock_release(&ch->timeout.lock); return; } - - ch->timeout.gp_get = gk20a_userd_gp_get(ch->g, ch); - ch->timeout.initialized = true; + __gk20a_channel_timeout_start(ch); nvgpu_raw_spinlock_release(&ch->timeout.lock); - - schedule_delayed_work(&ch->timeout.wq, - msecs_to_jiffies(gk20a_get_channel_watchdog_timeout(ch))); } -static void gk20a_channel_timeout_stop(struct channel_gk20a *ch) +/** + * Stop a running timeout counter (watchdog) on this channel. + * + * Make the watchdog consider the channel not running, so that it won't get + * recovered even if no progress is detected. Progress is not tracked if the + * watchdog is turned off. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +static bool gk20a_channel_timeout_stop(struct channel_gk20a *ch) +{ + bool was_running; + + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + was_running = ch->timeout.running; + ch->timeout.running = false; + nvgpu_raw_spinlock_release(&ch->timeout.lock); + return was_running; +} + +/** + * Continue a previously stopped timeout + * + * Enable the timeout again but don't reinitialize its timer. + * + * No guarantees are made about concurrent execution of the timeout handler. + * (This should be called from an update handler running in the same thread + * with the watchdog.) + */ +static void gk20a_channel_timeout_continue(struct channel_gk20a *ch) { nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - if (!ch->timeout.initialized) { - nvgpu_raw_spinlock_release(&ch->timeout.lock); - return; - } - nvgpu_raw_spinlock_release(&ch->timeout.lock); - - cancel_delayed_work_sync(&ch->timeout.wq); - - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - ch->timeout.initialized = false; + ch->timeout.running = true; nvgpu_raw_spinlock_release(&ch->timeout.lock); } +/** + * Rewind the timeout on each non-dormant channel. + * + * Reschedule the timeout of each active channel for which timeouts are running + * as if something was happened on each channel right now. This should be + * called when a global hang is detected that could cause a false positive on + * other innocent channels. + */ void gk20a_channel_timeout_restart_all_channels(struct gk20a *g) { - u32 chid; struct fifo_gk20a *f = &g->fifo; + u32 chid; for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *ch = &f->channel[chid]; - if (gk20a_channel_get(ch)) { - nvgpu_raw_spinlock_acquire(&ch->timeout.lock); - if (!ch->timeout.initialized) { - nvgpu_raw_spinlock_release(&ch->timeout.lock); - gk20a_channel_put(ch); - continue; - } - nvgpu_raw_spinlock_release(&ch->timeout.lock); + if (!gk20a_channel_get(ch)) + continue; - cancel_delayed_work_sync(&ch->timeout.wq); - if (!ch->has_timedout) - schedule_delayed_work(&ch->timeout.wq, - msecs_to_jiffies( - gk20a_get_channel_watchdog_timeout(ch))); + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + if (ch->timeout.running) + __gk20a_channel_timeout_start(ch); + nvgpu_raw_spinlock_release(&ch->timeout.lock); - gk20a_channel_put(ch); - } + gk20a_channel_put(ch); } } -static void gk20a_channel_timeout_handler(struct work_struct *work) +/** + * Check if a timed out channel has hung and recover it if it has. + * + * Test if this channel has really got stuck at this point (should be called + * when the watchdog timer has expired) by checking if its gp_get has advanced + * or not. If no gp_get action happened since when the watchdog was started, + * force-reset the channel. + * + * The gpu is implicitly on at this point, because the watchdog can only run on + * channels that have submitted jobs pending for cleanup. + */ +static void gk20a_channel_timeout_handler(struct channel_gk20a *ch) { + struct gk20a *g = ch->g; u32 gp_get; - struct gk20a *g; - struct channel_gk20a *ch; - ch = container_of(to_delayed_work(work), struct channel_gk20a, - timeout.wq); - ch = gk20a_channel_get(ch); - if (!ch) - return; + gk20a_dbg_fn(""); - g = ch->g; - - if (gk20a_busy(dev_from_gk20a(g))) { - gk20a_channel_put(ch); - return; - } - - /* Need global lock since multiple channels can timeout at a time */ - nvgpu_mutex_acquire(&g->ch_wdt_lock); - - /* Get timed out job and reset the timer */ + /* Get status and clear the timer */ nvgpu_raw_spinlock_acquire(&ch->timeout.lock); gp_get = ch->timeout.gp_get; - ch->timeout.initialized = false; + ch->timeout.running = false; nvgpu_raw_spinlock_release(&ch->timeout.lock); if (gk20a_userd_gp_get(ch->g, ch) != gp_get) { + /* Channel has advanced, reschedule */ gk20a_channel_timeout_start(ch); - goto fail_unlock; + return; } gk20a_err(dev_from_gk20a(g), "Job on channel %d timed out", @@ -2185,11 +2226,262 @@ static void gk20a_channel_timeout_handler(struct work_struct *work) g->ops.fifo.force_reset_ch(ch, NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT, true); +} -fail_unlock: - nvgpu_mutex_release(&g->ch_wdt_lock); +/** + * Test if the per-channel timeout is expired and handle the timeout in that case. + * + * Each channel has an expiration time based watchdog. The timer is + * (re)initialized in two situations: when a new job is submitted on an idle + * channel and when the timeout is checked but progress is detected. + * + * Watchdog timeout does not yet necessarily mean a stuck channel so this may + * or may not cause recovery. + * + * The timeout is stopped (disabled) after the last job in a row finishes + * making the channel idle. + */ +static void gk20a_channel_timeout_check(struct channel_gk20a *ch) +{ + bool timed_out; + + nvgpu_raw_spinlock_acquire(&ch->timeout.lock); + timed_out = ch->timeout.running && + nvgpu_timeout_expired(&ch->timeout.timer); + nvgpu_raw_spinlock_release(&ch->timeout.lock); + + if (timed_out) + gk20a_channel_timeout_handler(ch); +} + +/** + * Loop every living channel, check timeouts and handle stuck channels. + */ +static void gk20a_channel_poll_timeouts(struct gk20a *g) +{ + unsigned int chid; + + gk20a_dbg_fn(""); + + for (chid = 0; chid < g->fifo.num_channels; chid++) { + struct channel_gk20a *ch = &g->fifo.channel[chid]; + + if (gk20a_channel_get(ch)) { + gk20a_channel_timeout_check(ch); + gk20a_channel_put(ch); + } + } +} + +/* + * Process one scheduled work item for this channel. Currently, the only thing + * the worker does is job cleanup handling. + */ +static void gk20a_channel_worker_process_ch(struct channel_gk20a *ch) +{ + gk20a_dbg_fn(""); + + gk20a_channel_clean_up_jobs(ch, true); + + /* ref taken when enqueued */ gk20a_channel_put(ch); - gk20a_idle(dev_from_gk20a(g)); +} + +/** + * Tell the worker that one more work needs to be done. + * + * Increase the work counter to synchronize the worker with the new work. Wake + * up the worker. If the worker was already running, it will handle this work + * before going to sleep. + */ +static int __gk20a_channel_worker_wakeup(struct gk20a *g) +{ + int put; + + gk20a_dbg_fn(""); + + /* + * Currently, the only work type is associated with a lock, which deals + * with any necessary barriers. If a work type with no locking were + * added, a a wmb() would be needed here. See ..worker_pending() for a + * pair. + */ + + put = atomic_inc_return(&g->channel_worker.put); + wake_up(&g->channel_worker.wq); + + return put; +} + +/** + * Test if there is some work pending. + * + * This is a pair for __gk20a_channel_worker_wakeup to be called from the + * worker. The worker has an internal work counter which is incremented once + * per finished work item. This is compared with the number of queued jobs, + * which may be channels on the items list or any other types of work. + */ +static bool __gk20a_channel_worker_pending(struct gk20a *g, int get) +{ + bool pending = atomic_read(&g->channel_worker.put) != get; + + /* + * This would be the place for a rmb() pairing a wmb() for a wakeup + * if we had any work with no implicit barriers caused by locking. + */ + + return pending; +} + +/** + * Process the queued works for the worker thread serially. + * + * Flush all the work items in the queue one by one. This may block timeout + * handling for a short while, as these are serialized. + */ +static void gk20a_channel_worker_process(struct gk20a *g, int *get) +{ + gk20a_dbg_fn(""); + + while (__gk20a_channel_worker_pending(g, *get)) { + struct channel_gk20a *ch; + + /* + * If a channel is on the list, it's guaranteed to be handled + * eventually just once. However, the opposite is not true. A + * channel may be being processed if it's on the list or not. + * + * With this, processing channel works should be conservative + * as follows: it's always safe to look at a channel found in + * the list, and if someone enqueues the channel, it will be + * handled eventually, even if it's being handled at the same + * time. A channel is on the list only once; multiple calls to + * enqueue are harmless. + */ + nvgpu_spinlock_acquire(&g->channel_worker.items_lock); + ch = list_first_entry_or_null(&g->channel_worker.items, + struct channel_gk20a, + worker_item); + if (ch) + list_del_init(&ch->worker_item); + nvgpu_spinlock_release(&g->channel_worker.items_lock); + + if (!ch) { + /* + * Woke up for some other reason, but there are no + * other reasons than a channel added in the items list + * currently, so warn and ack the message. + */ + gk20a_warn(g->dev, "Spurious worker event!"); + ++*get; + break; + } + + gk20a_channel_worker_process_ch(ch); + ++*get; + } +} + +/* + * Look at channel states periodically, until canceled. Abort timed out + * channels serially. Process all work items found in the queue. + */ +static int gk20a_channel_poll_worker(void *arg) +{ + struct gk20a *g = (struct gk20a *)arg; + struct gk20a_channel_worker *worker = &g->channel_worker; + unsigned long start_wait; + /* event timeout for also polling the watchdog */ + unsigned long timeout = msecs_to_jiffies(100); + int get = 0; + + gk20a_dbg_fn(""); + + start_wait = jiffies; + while (!kthread_should_stop()) { + bool got_events; + + got_events = wait_event_timeout( + worker->wq, + __gk20a_channel_worker_pending(g, get), + timeout) > 0; + + if (got_events) + gk20a_channel_worker_process(g, &get); + + if (jiffies - start_wait >= timeout) { + gk20a_channel_poll_timeouts(g); + start_wait = jiffies; + } + } + return 0; +} + +/** + * Initialize the channel worker's metadata and start the background thread. + */ +int nvgpu_channel_worker_init(struct gk20a *g) +{ + struct task_struct *task; + + atomic_set(&g->channel_worker.put, 0); + init_waitqueue_head(&g->channel_worker.wq); + INIT_LIST_HEAD(&g->channel_worker.items); + nvgpu_spinlock_init(&g->channel_worker.items_lock); + task = kthread_run(gk20a_channel_poll_worker, g, + "nvgpu_channel_poll_%s", dev_name(g->dev)); + if (IS_ERR(task)) { + gk20a_err(g->dev, "failed to start channel poller thread"); + return PTR_ERR(task); + } + g->channel_worker.poll_task = task; + + return 0; +} + +void nvgpu_channel_worker_deinit(struct gk20a *g) +{ + kthread_stop(g->channel_worker.poll_task); +} + +/** + * Append a channel to the worker's list, if not there already. + * + * The worker thread processes work items (channels in its work list) and polls + * for other things. This adds @ch to the end of the list and wakes the worker + * up immediately. If the channel already existed in the list, it's not added, + * because in that case it has been scheduled already but has not yet been + * processed. + */ +void gk20a_channel_worker_enqueue(struct channel_gk20a *ch) +{ + struct gk20a *g = ch->g; + + gk20a_dbg_fn(""); + + /* + * Ref released when this item gets processed. The caller should hold + * one ref already, so can't fail. + */ + if (WARN_ON(!gk20a_channel_get(ch))) { + gk20a_warn(g->dev, "cannot get ch ref for worker!"); + return; + } + + nvgpu_spinlock_acquire(&g->channel_worker.items_lock); + if (!list_empty(&ch->worker_item)) { + /* + * Already queued, so will get processed eventually. + * The worker is probably awake already. + */ + nvgpu_spinlock_release(&g->channel_worker.items_lock); + gk20a_channel_put(ch); + return; + } + list_add_tail(&ch->worker_item, &g->channel_worker.items); + nvgpu_spinlock_release(&g->channel_worker.items_lock); + + __gk20a_channel_worker_wakeup(g); } int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) @@ -2214,32 +2506,6 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e) return 0; } -static void gk20a_channel_schedule_job_clean_up(struct channel_gk20a *c) -{ - nvgpu_mutex_acquire(&c->clean_up.lock); - - if (c->clean_up.scheduled) { - nvgpu_mutex_release(&c->clean_up.lock); - return; - } - - c->clean_up.scheduled = true; - schedule_delayed_work(&c->clean_up.wq, 1); - - nvgpu_mutex_release(&c->clean_up.lock); -} - -static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c, - bool wait_for_completion) -{ - if (wait_for_completion) - cancel_delayed_work_sync(&c->clean_up.wq); - - nvgpu_mutex_acquire(&c->clean_up.lock); - c->clean_up.scheduled = false; - nvgpu_mutex_release(&c->clean_up.lock); -} - static int gk20a_channel_add_job(struct channel_gk20a *c, struct channel_gk20a_job *job, bool skip_buffer_refcounting) @@ -2256,7 +2522,10 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, return err; } - /* put() is done in gk20a_channel_update() when the job is done */ + /* + * Ref to hold the channel open during the job lifetime. This is + * released by job cleanup launched via syncpt or sema interrupt. + */ c = gk20a_channel_get(c); if (c) { @@ -2291,14 +2560,16 @@ err_put_buffers: return err; } -static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work) -{ - struct channel_gk20a *c = container_of(to_delayed_work(work), - struct channel_gk20a, clean_up.wq); - - gk20a_channel_clean_up_jobs(c, true); -} - +/** + * Clean up job resources for further jobs to use. + * @clean_all: If true, process as many jobs as possible, otherwise just one. + * + * Loop all jobs from the joblist until a pending job is found, or just one if + * clean_all is not set. Pending jobs are detected from the job's post fence, + * so this is only done for jobs that have job tracking resources. Free all + * per-job memory for completed jobs; in case of preallocated resources, this + * opens up slots for new jobs to be submitted. + */ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, bool clean_all) { @@ -2307,6 +2578,7 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, struct gk20a_platform *platform; struct gk20a *g; int job_finished = 0; + bool watchdog_on = false; c = gk20a_channel_get(c); if (!c) @@ -2321,13 +2593,25 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, g = c->g; platform = gk20a_get_platform(g->dev); - gk20a_channel_cancel_job_clean_up(c, false); + /* + * If !clean_all, we're in a condition where watchdog isn't supported + * anyway (this would be a no-op). + */ + if (clean_all) + watchdog_on = gk20a_channel_timeout_stop(c); + + /* Synchronize with abort cleanup that needs the jobs. */ + nvgpu_mutex_acquire(&c->joblist.cleanup_lock); while (1) { bool completed; channel_gk20a_joblist_lock(c); if (channel_gk20a_joblist_is_empty(c)) { + /* + * No jobs in flight, timeout will remain stopped until + * new jobs are submitted. + */ channel_gk20a_joblist_unlock(c); break; } @@ -2343,7 +2627,15 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, completed = gk20a_fence_is_expired(job->post_fence); if (!completed) { - gk20a_channel_timeout_start(c); + /* + * The watchdog eventually sees an updated gp_get if + * something happened in this loop. A new job can have + * been submitted between the above call to stop and + * this - in that case, this is a no-op and the new + * later timeout is still used. + */ + if (clean_all && watchdog_on) + gk20a_channel_timeout_continue(c); break; } @@ -2394,32 +2686,38 @@ static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c, job_finished = 1; gk20a_idle(g->dev); - if (!clean_all) + if (!clean_all) { + /* Timeout isn't supported here so don't touch it. */ break; + } } + nvgpu_mutex_release(&c->joblist.cleanup_lock); + if (job_finished && c->update_fn) schedule_work(&c->update_fn_work); gk20a_channel_put(c); } -void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) +/** + * Schedule a job cleanup work on this channel to free resources and to signal + * about completion. + * + * Call this when there has been an interrupt about finished jobs, or when job + * cleanup needs to be performed, e.g., when closing a channel. This is always + * safe to call even if there is nothing to clean up. Any visible actions on + * jobs just before calling this are guaranteed to be processed. + */ +void gk20a_channel_update(struct channel_gk20a *c) { - c = gk20a_channel_get(c); - if (!c) - return; - if (!c->g->power_on) { /* shutdown case */ - gk20a_channel_put(c); return; } trace_gk20a_channel_update(c->hw_chid); - gk20a_channel_timeout_stop(c); - gk20a_channel_schedule_job_clean_up(c); - - gk20a_channel_put(c); + /* A queued channel is always checked for job cleanup. */ + gk20a_channel_worker_enqueue(c); } static void gk20a_submit_append_priv_cmdbuf(struct channel_gk20a *c, @@ -2809,7 +3107,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, if (c->deterministic && need_deferred_cleanup) return -EINVAL; - /* gk20a_channel_update releases this ref. */ + /* released by job cleanup via syncpt or sema interrupt */ err = gk20a_busy(g->dev); if (err) { gk20a_err(d, "failed to host gk20a to submit gpfifo, process %s", @@ -2929,13 +3227,12 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) #endif nvgpu_mutex_init(&c->ioctl_lock); nvgpu_mutex_init(&c->error_notifier_mutex); + nvgpu_mutex_init(&c->joblist.cleanup_lock); nvgpu_spinlock_init(&c->joblist.dynamic.lock); nvgpu_mutex_init(&c->joblist.pre_alloc.read_lock); nvgpu_raw_spinlock_init(&c->timeout.lock); nvgpu_mutex_init(&c->sync_lock); - INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler); - INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn); - nvgpu_mutex_init(&c->clean_up.lock); + INIT_LIST_HEAD(&c->joblist.dynamic.jobs); #if defined(CONFIG_GK20A_CYCLE_STATS) nvgpu_mutex_init(&c->cyclestate.cyclestate_buffer_mutex); @@ -2947,6 +3244,8 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) nvgpu_mutex_init(&c->dbg_s_lock); list_add(&c->free_chs, &g->fifo.free_chs); + INIT_LIST_HEAD(&c->worker_item); + return 0; } @@ -3384,8 +3683,6 @@ int gk20a_channel_suspend(struct gk20a *g) gk20a_disable_channel_tsg(g, ch); /* preempt the channel */ gk20a_fifo_preempt(g, ch); - gk20a_channel_timeout_stop(ch); - gk20a_channel_cancel_job_clean_up(ch, true); /* wait for channel update notifiers */ if (ch->update_fn) cancel_work_sync(&ch->update_fn_work); @@ -3481,7 +3778,7 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events) * semaphore. */ if (!c->deterministic) - gk20a_channel_update(c, 0); + gk20a_channel_update(c); } gk20a_channel_put(c); } diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index 14ee9f69e..d9913cd7b 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -27,6 +27,7 @@ #include #include +#include struct gk20a; struct gr_gk20a; @@ -87,12 +88,19 @@ struct channel_gk20a_joblist { struct list_head jobs; struct nvgpu_spinlock lock; } dynamic; + + /* + * Synchronize abort cleanup (when closing a channel) and job cleanup + * (asynchronously from worker) - protect from concurrent access when + * job resources are being freed. + */ + struct nvgpu_mutex cleanup_lock; }; struct channel_gk20a_timeout { - struct delayed_work wq; struct nvgpu_raw_spinlock lock; - bool initialized; + struct nvgpu_timeout timer; + bool running; u32 gp_get; }; @@ -110,12 +118,6 @@ struct gk20a_event_id_data { struct list_head event_id_node; }; -struct channel_gk20a_clean_up { - struct nvgpu_mutex lock; - bool scheduled; - struct delayed_work wq; -}; - /* * Track refcount actions, saving their stack traces. This number specifies how * many most recent actions are stored in a buffer. Set to 0 to disable. 128 @@ -214,7 +216,8 @@ struct channel_gk20a { u32 timeout_gpfifo_get; struct channel_gk20a_timeout timeout; - struct channel_gk20a_clean_up clean_up; + /* for job cleanup handling in the background worker */ + struct list_head worker_item; #if defined(CONFIG_GK20A_CYCLE_STATS) struct { @@ -250,8 +253,11 @@ struct channel_gk20a { u64 virt_ctx; #endif - /* signal channel owner via a callback, if set, in gk20a_channel_update - * via schedule_work */ + /* + * Signal channel owner via a callback, if set, in job cleanup with + * schedule_work. Means that something finished on the channel (perhaps + * more than one job). + */ void (*update_fn)(struct channel_gk20a *, void *); void *update_fn_data; struct nvgpu_spinlock update_fn_lock; /* make access to the two above atomic */ @@ -293,6 +299,9 @@ int gk20a_disable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch); int gk20a_channel_suspend(struct gk20a *g); int gk20a_channel_resume(struct gk20a *g); +int nvgpu_channel_worker_init(struct gk20a *g); +void nvgpu_channel_worker_deinit(struct gk20a *g); + /* Channel file operations */ int gk20a_channel_open(struct inode *inode, struct file *filp); int gk20a_channel_open_ioctl(struct gk20a *g, @@ -302,7 +311,7 @@ long gk20a_channel_ioctl(struct file *filp, unsigned long arg); int gk20a_channel_release(struct inode *inode, struct file *filp); struct channel_gk20a *gk20a_get_channel_from_file(int fd); -void gk20a_channel_update(struct channel_gk20a *c, int nr_completed); +void gk20a_channel_update(struct channel_gk20a *c); void gk20a_init_channel(struct gpu_ops *gops); diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 097635a7a..0aa202c5e 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -179,7 +179,7 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed) { struct channel_gk20a *ch = priv; - gk20a_channel_update(ch, nr_completed); + gk20a_channel_update(ch); /* note: channel_get() is in __gk20a_channel_syncpt_incr() */ gk20a_channel_put(ch); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 95351a43e..e9eab5513 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -502,6 +502,7 @@ static void gk20a_remove_fifo_support(struct fifo_gk20a *f) gk20a_dbg_fn(""); + nvgpu_channel_worker_deinit(g); /* * Make sure all channels are closed before deleting them. */ @@ -900,6 +901,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) } nvgpu_mutex_init(&f->tsg_inuse_mutex); + err = nvgpu_channel_worker_init(g); + if (err) + goto clean_up; f->remove_support = gk20a_remove_fifo_support; f->deferred_reset_pending = false; diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index c30a8eaf6..c79cc6c87 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -874,8 +874,6 @@ struct gk20a { bool timeouts_enabled; #endif - struct nvgpu_mutex ch_wdt_lock; - struct nvgpu_mutex poweroff_lock; /* Channel priorities */ @@ -1008,6 +1006,14 @@ struct gk20a { atomic_t sw_irq_nonstall_last_handled; wait_queue_head_t sw_irq_nonstall_last_handled_wq; + struct gk20a_channel_worker { + struct task_struct *poll_task; + atomic_t put; + wait_queue_head_t wq; + struct list_head items; + struct nvgpu_spinlock items_lock; + } channel_worker; + struct devfreq *devfreq; struct gk20a_scale_profile *scale_profile; diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 7b6ed3226..027a92fca 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -308,6 +308,10 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) } nvgpu_mutex_init(&f->tsg_inuse_mutex); + err = nvgpu_channel_worker_init(g); + if (err) + goto clean_up; + f->deferred_reset_pending = false; nvgpu_mutex_init(&f->deferred_reset_mutex); diff --git a/drivers/gpu/nvgpu/vgpu/vgpu.c b/drivers/gpu/nvgpu/vgpu/vgpu.c index 4c88ab967..c8ab23f1a 100644 --- a/drivers/gpu/nvgpu/vgpu/vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/vgpu.c @@ -252,7 +252,6 @@ static int vgpu_init_support(struct platform_device *pdev) nvgpu_mutex_init(&g->dbg_sessions_lock); nvgpu_mutex_init(&g->client_lock); - nvgpu_mutex_init(&g->ch_wdt_lock); g->dbg_regops_tmp_buf = kzalloc(SZ_4K, GFP_KERNEL); if (!g->dbg_regops_tmp_buf) {