diff --git a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c index 4a3076b5f..b4fdfb44e 100644 --- a/drivers/gpu/nvgpu/gk20a/cde_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/cde_gk20a.c @@ -1,7 +1,7 @@ /* * Color decompression engine support * - * Copyright (c) 2014, NVIDIA Corporation. All rights reserved. + * Copyright (c) 2014-2015, NVIDIA Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -74,7 +74,7 @@ __must_hold(&cde_app->mutex) trace_gk20a_cde_remove_ctx(cde_ctx); /* free the channel */ - gk20a_free_channel(cde_ctx->ch, true); + gk20a_channel_close(ch); /* ..then release mapped memory */ gk20a_deinit_cde_img(cde_ctx); diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index c12f196d0..5a71e8746 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -42,8 +42,8 @@ #define NVMAP_HANDLE_PARAM_SIZE 1 -static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f); -static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c); +static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f); +static void free_channel(struct fifo_gk20a *f, struct channel_gk20a *c); static void free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e); @@ -61,29 +61,33 @@ static int channel_gk20a_update_runlist(struct channel_gk20a *c, bool add); static void gk20a_free_error_notifiers(struct channel_gk20a *ch); -static struct channel_gk20a *acquire_unused_channel(struct fifo_gk20a *f) +/* allocate GPU channel */ +static struct channel_gk20a *allocate_channel(struct fifo_gk20a *f) { struct channel_gk20a *ch = NULL; - int chid; - mutex_lock(&f->ch_inuse_mutex); - for (chid = 0; chid < f->num_channels; chid++) { - if (!f->channel[chid].in_use) { - f->channel[chid].in_use = true; - ch = &f->channel[chid]; - break; - } + mutex_lock(&f->free_chs_mutex); + if (!list_empty(&f->free_chs)) { + ch = list_first_entry(&f->free_chs, struct channel_gk20a, + free_chs); + list_del(&ch->free_chs); + WARN_ON(atomic_read(&ch->ref_count)); + WARN_ON(ch->referenceable); } - mutex_unlock(&f->ch_inuse_mutex); + mutex_unlock(&f->free_chs_mutex); return ch; } -static void release_used_channel(struct fifo_gk20a *f, struct channel_gk20a *c) +static void free_channel(struct fifo_gk20a *f, + struct channel_gk20a *ch) { - mutex_lock(&f->ch_inuse_mutex); - f->channel[c->hw_chid].in_use = false; - mutex_unlock(&f->ch_inuse_mutex); + trace_gk20a_release_used_channel(ch->hw_chid); + /* refcount is zero here and channel is in a freed/dead state */ + mutex_lock(&f->free_chs_mutex); + /* add to head to increase visibility of timing-related bugs */ + list_add(&ch->free_chs, &f->free_chs); + mutex_unlock(&f->free_chs_mutex); } int channel_gk20a_commit_va(struct channel_gk20a *c) @@ -361,6 +365,11 @@ void gk20a_channel_abort(struct channel_gk20a *ch) struct channel_gk20a_job *job, *n; bool released_job_semaphore = false; + gk20a_dbg_fn(""); + + /* make sure new kickoffs are prevented */ + ch->has_timedout = true; + /* ensure no fences are pending */ mutex_lock(&ch->submit_lock); if (ch->sync) @@ -416,6 +425,8 @@ void gk20a_disable_channel(struct channel_gk20a *ch, bool finish, unsigned long finish_timeout) { + gk20a_dbg_fn(""); + if (finish) { int err = gk20a_channel_finish(ch, finish_timeout); WARN_ON(err); @@ -627,8 +638,9 @@ void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error) (u32)(nsec >> 32); ch->error_notifier->info32 = error; ch->error_notifier->status = 0xffff; + gk20a_err(dev_from_gk20a(ch->g), - "error notifier set to %d for ch %d\n", error, ch->hw_chid); + "error notifier set to %d for ch %d", error, ch->hw_chid); } } @@ -643,7 +655,53 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch) } } -void gk20a_free_channel(struct channel_gk20a *ch, bool finish) +/* Returns delta of cyclic integers a and b. If a is ahead of b, delta + * is positive */ +static int cyclic_delta(int a, int b) +{ + return a - b; +} + +static void gk20a_wait_for_deferred_interrupts(struct gk20a *g) +{ + int stall_irq_threshold = atomic_read(&g->hw_irq_stall_count); + int nonstall_irq_threshold = atomic_read(&g->hw_irq_nonstall_count); + + /* wait until all stalling irqs are handled */ + wait_event(g->sw_irq_stall_last_handled_wq, + cyclic_delta(stall_irq_threshold, + atomic_read(&g->sw_irq_stall_last_handled)) + <= 0); + + /* wait until all non-stalling irqs are handled */ + wait_event(g->sw_irq_nonstall_last_handled_wq, + cyclic_delta(nonstall_irq_threshold, + atomic_read(&g->sw_irq_nonstall_last_handled)) + <= 0); +} + +static void gk20a_wait_until_counter_is_N( + struct channel_gk20a *ch, atomic_t *counter, int wait_value, + wait_queue_head_t *wq, const char *caller, const char *counter_name) +{ + while (true) { + if (wait_event_timeout( + *wq, + atomic_read(counter) == wait_value, + msecs_to_jiffies(5000)) > 0) + break; + + gk20a_warn(dev_from_gk20a(ch->g), + "%s: channel %d, still waiting, %s left: %d, waiting for: %d", + caller, ch->hw_chid, counter_name, + atomic_read(counter), wait_value); + } +} + + + +/* call ONLY when no references to the channel exist: after the last put */ +static void gk20a_free_channel(struct channel_gk20a *ch) { struct gk20a *g = ch->g; struct fifo_gk20a *f = &g->fifo; @@ -654,13 +712,50 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) gk20a_dbg_fn(""); + WARN_ON(ch->g == NULL); + + trace_gk20a_free_channel(ch->hw_chid); + + /* prevent new kickoffs */ + ch->has_timedout = true; + wmb(); + + /* wait until there's only our ref to the channel */ + gk20a_wait_until_counter_is_N( + ch, &ch->ref_count, 1, &ch->ref_count_dec_wq, + __func__, "references"); + + /* wait until all pending interrupts for recently completed + * jobs are handled */ + gk20a_wait_for_deferred_interrupts(g); + + /* prevent new refs */ + spin_lock(&ch->ref_obtain_lock); + if (!ch->referenceable) { + spin_unlock(&ch->ref_obtain_lock); + gk20a_err(dev_from_gk20a(ch->g), + "Extra %s() called to channel %u", + __func__, ch->hw_chid); + return; + } + ch->referenceable = false; + spin_unlock(&ch->ref_obtain_lock); + + /* matches with the initial reference in gk20a_open_new_channel() */ + atomic_dec(&ch->ref_count); + + /* wait until no more refs to the channel */ + gk20a_wait_until_counter_is_N( + ch, &ch->ref_count, 0, &ch->ref_count_dec_wq, + __func__, "references"); + /* if engine reset was deferred, perform it now */ mutex_lock(&f->deferred_reset_mutex); if (g->fifo.deferred_reset_pending) { gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "engine reset was" " deferred, running now"); - gk20a_fifo_reset_engine(g, g->fifo.mmu_fault_engines); - g->fifo.mmu_fault_engines = 0; + gk20a_fifo_reset_engine(g, g->fifo.deferred_fault_engines); + g->fifo.deferred_fault_engines = 0; g->fifo.deferred_reset_pending = false; } mutex_unlock(&f->deferred_reset_mutex); @@ -674,7 +769,7 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) gk20a_dbg_info("freeing bound channel context, timeout=%ld", timeout); - gk20a_disable_channel(ch, finish && !ch->has_timedout, timeout); + gk20a_disable_channel(ch, !ch->has_timedout, timeout); gk20a_free_error_notifiers(ch); @@ -714,6 +809,10 @@ void gk20a_free_channel(struct channel_gk20a *ch, bool finish) spin_unlock(&ch->update_fn_lock); cancel_work_sync(&ch->update_fn_work); + /* make sure we don't have deferred interrupts pending that + * could still touch the channel */ + gk20a_wait_for_deferred_interrupts(g); + unbind: if (gk20a_is_channel_marked_as_tsg(ch)) gk20a_tsg_unbind_channel(ch); @@ -743,8 +842,66 @@ unbind: mutex_unlock(&ch->dbg_s_lock); release: + /* make sure we catch accesses of unopened channels in case + * there's non-refcounted channel pointers hanging around */ + ch->g = NULL; + wmb(); + /* ALWAYS last */ - release_used_channel(f, ch); + free_channel(f, ch); +} + +/* Try to get a reference to the channel. Return nonzero on success. If fails, + * the channel is dead or being freed elsewhere and you must not touch it. + * + * Always when a channel_gk20a pointer is seen and about to be used, a + * reference must be held to it - either by you or the caller, which should be + * documented well or otherwise clearly seen. This usually boils down to the + * file from ioctls directly, or an explicit get in exception handlers when the + * channel is found by a hw_chid. + * + * Most global functions in this file require a reference to be held by the + * caller. + */ +struct channel_gk20a *_gk20a_channel_get(struct channel_gk20a *ch, + const char *caller) { + struct channel_gk20a *ret; + + spin_lock(&ch->ref_obtain_lock); + + if (likely(ch->referenceable)) { + atomic_inc(&ch->ref_count); + ret = ch; + } else + ret = NULL; + + spin_unlock(&ch->ref_obtain_lock); + + if (ret) + trace_gk20a_channel_get(ch->hw_chid, caller); + + return ret; +} + +void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller) +{ + trace_gk20a_channel_put(ch->hw_chid, caller); + atomic_dec(&ch->ref_count); + wake_up_all(&ch->ref_count_dec_wq); + + /* More puts than gets. Channel is probably going to get + * stuck. */ + WARN_ON(atomic_read(&ch->ref_count) < 0); + + /* Also, more puts than gets. ref_count can go to 0 only if + * the channel is closing. Channel is probably going to get + * stuck. */ + WARN_ON(atomic_read(&ch->ref_count) == 0 && ch->referenceable); +} + +void gk20a_channel_close(struct channel_gk20a *ch) +{ + gk20a_free_channel(ch); } int gk20a_channel_release(struct inode *inode, struct file *filp) @@ -758,14 +915,14 @@ int gk20a_channel_release(struct inode *inode, struct file *filp) trace_gk20a_channel_release(dev_name(&g->dev->dev)); - err = gk20a_busy(ch->g->dev); + err = gk20a_busy(g->dev); if (err) { gk20a_err(dev_from_gk20a(g), "failed to release channel %d", ch->hw_chid); return err; } - gk20a_free_channel(ch, true); - gk20a_idle(ch->g->dev); + gk20a_channel_close(ch); + gk20a_idle(g->dev); filp->private_data = NULL; return 0; @@ -808,22 +965,31 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) struct fifo_gk20a *f = &g->fifo; struct channel_gk20a *ch; - ch = acquire_unused_channel(f); + gk20a_dbg_fn(""); + + ch = allocate_channel(f); if (ch == NULL) { /* TBD: we want to make this virtualizable */ gk20a_err(dev_from_gk20a(g), "out of hw chids"); return NULL; } + trace_gk20a_open_new_channel(ch->hw_chid); + + BUG_ON(ch->g); ch->g = g; if (g->ops.fifo.alloc_inst(g, ch)) { - ch->in_use = false; + ch->g = NULL; + free_channel(f, ch); gk20a_err(dev_from_gk20a(g), "failed to open gk20a channel, out of inst mem"); - return NULL; } + + /* now the channel is in a limbo out of the free list but not marked as + * alive and used (i.e. get-able) yet */ + ch->pid = current->pid; /* By default, channel is regular (non-TSG) channel */ @@ -854,6 +1020,13 @@ struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g) spin_lock_init(&ch->update_fn_lock); INIT_WORK(&ch->update_fn_work, gk20a_channel_update_runcb_fn); + /* Mark the channel alive, get-able, with 1 initial use + * references. The initial reference will be decreased in + * gk20a_free_channel() */ + ch->referenceable = true; + atomic_set(&ch->ref_count, 1); + wmb(); + return ch; } @@ -1379,7 +1552,7 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, struct mapped_buffer_node **mapped_buffers = NULL; int err = 0, num_mapped_buffers; - /* job needs reference to this vm */ + /* job needs reference to this vm (released in channel_update) */ gk20a_vm_get(vm); err = gk20a_vm_get_buffers(vm, &mapped_buffers, &num_mapped_buffers); @@ -1395,14 +1568,21 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, return -ENOMEM; } - job->num_mapped_buffers = num_mapped_buffers; - job->mapped_buffers = mapped_buffers; - job->pre_fence = gk20a_fence_get(pre_fence); - job->post_fence = gk20a_fence_get(post_fence); + /* put() is done in gk20a_channel_update() when the job is done */ + c = gk20a_channel_get(c); - mutex_lock(&c->jobs_lock); - list_add_tail(&job->list, &c->jobs); - mutex_unlock(&c->jobs_lock); + if (c) { + job->num_mapped_buffers = num_mapped_buffers; + job->mapped_buffers = mapped_buffers; + job->pre_fence = gk20a_fence_get(pre_fence); + job->post_fence = gk20a_fence_get(post_fence); + + mutex_lock(&c->jobs_lock); + list_add_tail(&job->list, &c->jobs); + mutex_unlock(&c->jobs_lock); + } else { + return -ETIMEDOUT; + } return 0; } @@ -1412,13 +1592,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) struct vm_gk20a *vm = c->vm; struct channel_gk20a_job *job, *n; - trace_gk20a_channel_update(c); + trace_gk20a_channel_update(c->hw_chid); wake_up(&c->submit_wq); mutex_lock(&c->submit_lock); mutex_lock(&c->jobs_lock); list_for_each_entry_safe(job, n, &c->jobs, list) { + struct gk20a *g = c->g; + bool completed = gk20a_fence_is_expired(job->post_fence); if (!completed) break; @@ -1434,12 +1616,15 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) gk20a_fence_put(job->pre_fence); gk20a_fence_put(job->post_fence); - /* job is done. release its reference to vm */ + /* job is done. release its vm reference (taken in add_job) */ gk20a_vm_put(vm); + /* another bookkeeping taken in add_job. caller must hold a ref + * so this wouldn't get freed here. */ + gk20a_channel_put(c); list_del_init(&job->list); kfree(job); - gk20a_idle(c->g->dev); + gk20a_idle(g->dev); } /* @@ -1719,10 +1904,13 @@ clean_up: int gk20a_init_channel_support(struct gk20a *g, u32 chid) { struct channel_gk20a *c = g->fifo.channel+chid; - c->g = g; - c->in_use = false; + c->g = NULL; c->hw_chid = chid; c->bound = false; + spin_lock_init(&c->ref_obtain_lock); + atomic_set(&c->ref_count, 0); + c->referenceable = false; + init_waitqueue_head(&c->ref_count_dec_wq); mutex_init(&c->ioctl_lock); mutex_init(&c->jobs_lock); mutex_init(&c->submit_lock); @@ -1733,6 +1921,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) #endif INIT_LIST_HEAD(&c->dbg_s_list); mutex_init(&c->dbg_s_lock); + list_add(&c->free_chs, &g->fifo.free_chs); return 0; } @@ -2066,8 +2255,7 @@ int gk20a_channel_suspend(struct gk20a *g) for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *ch = &f->channel[chid]; - if (ch->in_use) { - + if (gk20a_channel_get(ch)) { gk20a_dbg_info("suspend channel %d", chid); /* disable channel */ g->ops.fifo.disable_channel(ch); @@ -2079,6 +2267,8 @@ int gk20a_channel_suspend(struct gk20a *g) flush_work(&ch->update_fn_work); channels_in_use = true; + + gk20a_channel_put(ch); } } @@ -2086,8 +2276,10 @@ int gk20a_channel_suspend(struct gk20a *g) g->ops.fifo.update_runlist(g, 0, ~0, false, true); for (chid = 0; chid < f->num_channels; chid++) { - if (f->channel[chid].in_use) + if (gk20a_channel_get(&f->channel[chid])) { g->ops.fifo.unbind_channel(&f->channel[chid]); + gk20a_channel_put(&f->channel[chid]); + } } } @@ -2095,8 +2287,6 @@ int gk20a_channel_suspend(struct gk20a *g) return 0; } -/* in this context the "channel" is the host1x channel which - * maps to *all* gk20a channels */ int gk20a_channel_resume(struct gk20a *g) { struct fifo_gk20a *f = &g->fifo; @@ -2106,10 +2296,11 @@ int gk20a_channel_resume(struct gk20a *g) gk20a_dbg_fn(""); for (chid = 0; chid < f->num_channels; chid++) { - if (f->channel[chid].in_use) { + if (gk20a_channel_get(&f->channel[chid])) { gk20a_dbg_info("resume channel %d", chid); g->ops.fifo.bind_channel(&f->channel[chid]); channels_in_use = true; + gk20a_channel_put(&f->channel[chid]); } } @@ -2129,10 +2320,11 @@ void gk20a_channel_semaphore_wakeup(struct gk20a *g) for (chid = 0; chid < f->num_channels; chid++) { struct channel_gk20a *c = g->fifo.channel+chid; - if (c->in_use) { + if (gk20a_channel_get(c)) { gk20a_channel_event(c); wake_up_interruptible_all(&c->semaphore_wq); gk20a_channel_update(c, 0); + gk20a_channel_put(c); } } } @@ -2225,10 +2417,18 @@ long gk20a_channel_ioctl(struct file *filp, return -EFAULT; } + /* take a ref or return timeout if channel refs can't be taken */ + ch = gk20a_channel_get(ch); + if (!ch) + return -ETIMEDOUT; + /* protect our sanity for threaded userspace - most of the channel is * not thread safe */ mutex_lock(&ch->ioctl_lock); + /* this ioctl call keeps a ref to the file which keeps a ref to the + * channel */ + switch (cmd) { case NVGPU_IOCTL_CHANNEL_OPEN: err = gk20a_channel_open_ioctl(ch->g, @@ -2449,9 +2649,11 @@ long gk20a_channel_ioctl(struct file *filp, if ((err == 0) && (_IOC_DIR(cmd) & _IOC_READ)) err = copy_to_user((void __user *)arg, buf, _IOC_SIZE(cmd)); - gk20a_dbg_fn("end"); - mutex_unlock(&ch->ioctl_lock); + gk20a_channel_put(ch); + + gk20a_dbg_fn("end"); + return err; } diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index f022fe36d..2ea5b4beb 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -19,12 +19,13 @@ #define CHANNEL_GK20A_H #include -#include -#include #include -#include #include +#include +#include #include +#include +#include struct gk20a; struct gr_gk20a; @@ -77,8 +78,15 @@ struct channel_gk20a_poll_events { /* this is the priv element of struct nvhost_channel */ struct channel_gk20a { - struct gk20a *g; - bool in_use; + struct gk20a *g; /* set only when channel is active */ + + struct list_head free_chs; + + spinlock_t ref_obtain_lock; + bool referenceable; + atomic_t ref_count; + wait_queue_head_t ref_count_dec_wq; + int hw_chid; bool bound; bool first_init; @@ -171,7 +179,10 @@ static inline bool gk20a_channel_as_bound(struct channel_gk20a *ch) } int channel_gk20a_commit_va(struct channel_gk20a *c); int gk20a_init_channel_support(struct gk20a *, u32 chid); -void gk20a_free_channel(struct channel_gk20a *ch, bool finish); + +/* must be inside gk20a_busy()..gk20a_idle() */ +void gk20a_channel_close(struct channel_gk20a *ch); + bool gk20a_channel_update_and_check_timeout(struct channel_gk20a *ch, u32 timeout_delta_ms); void gk20a_disable_channel(struct channel_gk20a *ch, @@ -202,6 +213,15 @@ void gk20a_channel_event(struct channel_gk20a *ch); void gk20a_init_channel(struct gpu_ops *gops); +/* returns ch if reference was obtained */ +struct channel_gk20a *__must_check _gk20a_channel_get(struct channel_gk20a *ch, + const char *caller); +#define gk20a_channel_get(ch) _gk20a_channel_get(ch, __func__) + + +void _gk20a_channel_put(struct channel_gk20a *ch, const char *caller); +#define gk20a_channel_put(ch) _gk20a_channel_put(ch, __func__) + int gk20a_wait_channel_idle(struct channel_gk20a *ch); struct channel_gk20a *gk20a_open_new_channel(struct gk20a *g); struct channel_gk20a *gk20a_open_new_channel_with_cb(struct gk20a *g, diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 8cc852c71..7a707fbdd 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -154,8 +154,23 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, static void gk20a_channel_syncpt_update(void *priv, int nr_completed) { - struct channel_gk20a *ch20a = priv; - gk20a_channel_update(ch20a, nr_completed); + struct channel_gk20a *ch = priv; + struct gk20a *g = ch->g; + + /* need busy for possible channel deletion */ + if (gk20a_busy(ch->g->dev)) { + gk20a_err(dev_from_gk20a(ch->g), + "failed to busy while syncpt update"); + /* Last gk20a_idle()s are in channel_update, so we shouldn't + * get here. If we do, the channel is badly broken now */ + return; + } + + /* note: channel_get() is in __gk20a_channel_syncpt_incr() */ + gk20a_channel_update(ch, nr_completed); + gk20a_channel_put(ch); + + gk20a_idle(g->dev); } static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, @@ -209,14 +224,37 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2); if (register_irq) { - err = nvhost_intr_register_notifier(sp->host1x_pdev, - sp->id, thresh, - gk20a_channel_syncpt_update, c); + err = gk20a_busy(c->g->dev); + if (err) + gk20a_err(dev_from_gk20a(c->g), + "failed to add syncpt interrupt notifier for channel %d", + c->hw_chid); + else { + struct channel_gk20a *referenced = gk20a_channel_get(c); - /* Adding interrupt action should never fail. A proper error - * handling here would require us to decrement the syncpt max - * back to its original value. */ - WARN(err, "failed to set submit complete interrupt"); + WARN_ON(!referenced); + gk20a_idle(c->g->dev); + + if (referenced) { + /* note: channel_put() is in + * gk20a_channel_syncpt_update() */ + + err = nvhost_intr_register_notifier( + sp->host1x_pdev, + sp->id, thresh, + gk20a_channel_syncpt_update, c); + if (err) + gk20a_channel_put(referenced); + + /* Adding interrupt action should + * never fail. A proper error handling + * here would require us to decrement + * the syncpt max back to its original + * value. */ + WARN(err, + "failed to set submit complete interrupt"); + } + } } *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, diff --git a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c index 0f1c31ddf..bda0dab03 100644 --- a/drivers/gpu/nvgpu/gk20a/debug_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/debug_gk20a.c @@ -36,6 +36,7 @@ static struct platform_device *gk20a_device; struct ch_state { int pid; + int refs; u8 inst_block[0]; }; @@ -118,9 +119,10 @@ static void gk20a_debug_show_channel(struct gk20a *g, syncpointa = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointa_w()); syncpointb = gk20a_mem_rd32(inst_ptr, ram_fc_syncpointb_w()); - gk20a_debug_output(o, "%d-%s, pid %d: ", hw_chid, + gk20a_debug_output(o, "%d-%s, pid %d, refs: %d: ", hw_chid, g->dev->name, - ch_state->pid); + ch_state->pid, + ch_state->refs); gk20a_debug_output(o, "%s in use %s %s\n", ccsr_channel_enable_v(channel) ? "" : "not", ccsr_chan_status_str[status], @@ -231,16 +233,30 @@ void gk20a_debug_show_dump(struct gk20a *g, struct gk20a_debug_output *o) } for (chid = 0; chid < f->num_channels; chid++) { - if (f->channel[chid].in_use) - ch_state[chid] = kmalloc(sizeof(struct ch_state) + ram_in_alloc_size_v(), GFP_KERNEL); + struct channel_gk20a *ch = &f->channel[chid]; + if (gk20a_channel_get(ch)) { + ch_state[chid] = + kmalloc(sizeof(struct ch_state) + + ram_in_alloc_size_v(), GFP_KERNEL); + /* ref taken stays to below loop with + * successful allocs */ + if (!ch_state[chid]) + gk20a_channel_put(ch); + } } for (chid = 0; chid < f->num_channels; chid++) { - if (ch_state[chid] && f->channel[chid].inst_block.cpu_va) { - ch_state[chid]->pid = f->channel[chid].pid; - memcpy(&ch_state[chid]->inst_block[0], - f->channel[chid].inst_block.cpu_va, - ram_in_alloc_size_v()); + struct channel_gk20a *ch = &f->channel[chid]; + if (ch_state[chid]) { + if (ch->inst_block.cpu_va) { + ch_state[chid]->pid = ch->pid; + ch_state[chid]->refs = + atomic_read(&ch->ref_count); + memcpy(&ch_state[chid]->inst_block[0], + ch->inst_block.cpu_va, + ram_in_alloc_size_v()); + } + gk20a_channel_put(ch); } } for (chid = 0; chid < f->num_channels; chid++) { diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c index 56b954a98..4ef310b23 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.c @@ -515,6 +515,9 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) init_runlist(g, f); + INIT_LIST_HEAD(&f->free_chs); + mutex_init(&f->free_chs_mutex); + for (chid = 0; chid < f->num_channels; chid++) { f->channel[chid].userd_cpu_va = f->userd.cpu_va + chid * f->userd_entry_size; @@ -527,7 +530,6 @@ static int gk20a_init_fifo_setup_sw(struct gk20a *g) gk20a_init_channel_support(g, chid); gk20a_init_tsg_support(g, chid); } - mutex_init(&f->ch_inuse_mutex); mutex_init(&f->tsg_inuse_mutex); f->remove_support = gk20a_remove_fifo_support; @@ -637,6 +639,7 @@ int gk20a_init_fifo_support(struct gk20a *g) return err; } +/* return with a reference to the channel, caller must put it back */ static struct channel_gk20a * channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) { @@ -644,10 +647,16 @@ channel_from_inst_ptr(struct fifo_gk20a *f, u64 inst_ptr) if (unlikely(!f->channel)) return NULL; for (ci = 0; ci < f->num_channels; ci++) { - struct channel_gk20a *c = f->channel+ci; - if (c->inst_block.cpu_va && - (inst_ptr == gk20a_mem_phys(&c->inst_block))) - return f->channel+ci; + struct channel_gk20a *ch = gk20a_channel_get(&f->channel[ci]); + /* only alive channels are searched */ + if (!ch) + continue; + + if (ch->inst_block.cpu_va && + (inst_ptr == gk20a_mem_phys(&ch->inst_block))) + return ch; + + gk20a_channel_put(ch); } return NULL; } @@ -803,6 +812,7 @@ static bool gk20a_fifo_should_defer_engine_reset(struct gk20a *g, u32 engine_id, return true; } +/* caller must hold a channel reference */ static bool gk20a_fifo_set_ctx_mmu_error(struct gk20a *g, struct channel_gk20a *ch) { @@ -854,14 +864,38 @@ static bool gk20a_fifo_set_ctx_mmu_error_tsg(struct gk20a *g, "TSG %d generated a mmu fault", tsg->tsgid); mutex_lock(&tsg->ch_list_lock); - list_for_each_entry(ch, &tsg->ch_list, ch_entry) - ret = gk20a_fifo_set_ctx_mmu_error(g, ch); + list_for_each_entry(ch, &tsg->ch_list, ch_entry) { + if (gk20a_channel_get(ch)) { + if (!gk20a_fifo_set_ctx_mmu_error(g, ch)) + ret = false; + gk20a_channel_put(ch); + } + } mutex_unlock(&tsg->ch_list_lock); return ret; } -static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) +static void gk20a_fifo_abort_tsg(struct gk20a *g, u32 tsgid) +{ + struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; + struct channel_gk20a *ch; + + mutex_lock(&tsg->ch_list_lock); + list_for_each_entry(ch, &tsg->ch_list, ch_entry) { + if (gk20a_channel_get(ch)) { + gk20a_channel_abort(ch); + gk20a_channel_put(ch); + } + } + mutex_unlock(&tsg->ch_list_lock); +} + +static bool gk20a_fifo_handle_mmu_fault( + struct gk20a *g, + u32 mmu_fault_engines, /* queried from HW if 0 */ + u32 hw_id, /* queried from HW if ~(u32)0 OR mmu_fault_engines == 0*/ + bool id_is_tsg) { bool fake_fault; unsigned long fault_id; @@ -894,10 +928,8 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) grfifo_ctl | gr_gpfifo_ctl_access_f(0) | gr_gpfifo_ctl_semaphore_access_f(0)); - /* If we have recovery in progress, MMU fault id is invalid */ - if (g->fifo.mmu_fault_engines) { - fault_id = g->fifo.mmu_fault_engines; - g->fifo.mmu_fault_engines = 0; + if (mmu_fault_engines) { + fault_id = mmu_fault_engines; fake_fault = true; } else { fault_id = gk20a_readl(g, fifo_intr_mmu_fault_id_r()); @@ -914,6 +946,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) struct fifo_mmu_fault_info_gk20a f; struct channel_gk20a *ch = NULL; struct tsg_gk20a *tsg = NULL; + struct channel_gk20a *referenced_channel = 0; /* read and parse engine status */ u32 status = gk20a_readl(g, fifo_engine_status_r(engine_id)); u32 ctx_status = fifo_engine_status_ctx_status_v(status); @@ -953,22 +986,34 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) /* get the channel/TSG */ if (fake_fault) { /* use next_id if context load is failing */ - u32 id = (ctx_status == - fifo_engine_status_ctx_status_ctxsw_load_v()) ? - fifo_engine_status_next_id_v(status) : - fifo_engine_status_id_v(status); - u32 type = (ctx_status == - fifo_engine_status_ctx_status_ctxsw_load_v()) ? - fifo_engine_status_next_id_type_v(status) : - fifo_engine_status_id_type_v(status); + u32 id, type; + + if (hw_id == ~(u32)0) { + id = (ctx_status == + fifo_engine_status_ctx_status_ctxsw_load_v()) ? + fifo_engine_status_next_id_v(status) : + fifo_engine_status_id_v(status); + type = (ctx_status == + fifo_engine_status_ctx_status_ctxsw_load_v()) ? + fifo_engine_status_next_id_type_v(status) : + fifo_engine_status_id_type_v(status); + } else { + id = hw_id; + type = id_is_tsg ? + fifo_engine_status_id_type_tsgid_v() : + fifo_engine_status_id_type_chid_v(); + } if (type == fifo_engine_status_id_type_tsgid_v()) tsg = &g->fifo.tsg[id]; - else if (type == fifo_engine_status_id_type_chid_v()) + else if (type == fifo_engine_status_id_type_chid_v()) { ch = &g->fifo.channel[id]; + referenced_channel = gk20a_channel_get(ch); + } } else { /* read channel based on instruction pointer */ ch = channel_from_inst_ptr(&g->fifo, f.inst_ptr); + referenced_channel = ch; } if (ch && gk20a_is_channel_marked_as_tsg(ch)) @@ -977,7 +1022,7 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) /* check if engine reset should be deferred */ if ((ch || tsg) && gk20a_fifo_should_defer_engine_reset(g, engine_id, &f, fake_fault)) { - g->fifo.mmu_fault_engines = fault_id; + g->fifo.deferred_fault_engines = fault_id; /* handled during channel free */ g->fifo.deferred_reset_pending = true; @@ -988,19 +1033,31 @@ static bool gk20a_fifo_handle_mmu_fault(struct gk20a *g) * syncpoints */ if (tsg) { - struct channel_gk20a *ch = NULL; if (!g->fifo.deferred_reset_pending) verbose = gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg); - mutex_lock(&tsg->ch_list_lock); - list_for_each_entry(ch, &tsg->ch_list, ch_entry) - gk20a_channel_abort(ch); - mutex_unlock(&tsg->ch_list_lock); + + gk20a_fifo_abort_tsg(g, ch->tsgid); + + /* put back the ref taken early above */ + if (referenced_channel) { + gk20a_channel_put(ch); + } else { + gk20a_err(dev_from_gk20a(g), + "mmu error in freed tsg channel %d on tsgid %d", + ch->hw_chid, ch->tsgid); + } } else if (ch) { - if (!g->fifo.deferred_reset_pending) - verbose = - gk20a_fifo_set_ctx_mmu_error_ch(g, ch); - gk20a_channel_abort(ch); + if (referenced_channel) { + if (!g->fifo.deferred_reset_pending) + verbose = gk20a_fifo_set_ctx_mmu_error_ch(g, ch); + gk20a_channel_abort(ch); + gk20a_channel_put(ch); + } else { + gk20a_err(dev_from_gk20a(g), + "mmu error in freed channel %d", + ch->hw_chid); + } } else if (f.inst_ptr == gk20a_mem_phys(&g->mm.bar1.inst_block)) { gk20a_err(dev_from_gk20a(g), "mmu fault from bar1"); @@ -1133,46 +1190,69 @@ static u32 gk20a_fifo_engines_on_id(struct gk20a *g, u32 id, bool is_tsg) void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose) { - u32 engines = gk20a_fifo_engines_on_id(g, hw_chid, false); + u32 engines; + + /* stop context switching to prevent engine assignments from + changing until channel is recovered */ + mutex_lock(&g->dbg_sessions_lock); + gr_gk20a_disable_ctxsw(g); + + engines = gk20a_fifo_engines_on_id(g, hw_chid, false); + if (engines) - gk20a_fifo_recover(g, engines, verbose); + gk20a_fifo_recover(g, engines, hw_chid, false, verbose); else { - struct channel_gk20a *ch = - g->fifo.channel + hw_chid; + struct channel_gk20a *ch = &g->fifo.channel[hw_chid]; - gk20a_channel_abort(ch); + if (gk20a_channel_get(ch)) { + gk20a_channel_abort(ch); - if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) - gk20a_debug_dump(g->dev); + if (gk20a_fifo_set_ctx_mmu_error_ch(g, ch)) + gk20a_debug_dump(g->dev); + + gk20a_channel_put(ch); + } } + + gr_gk20a_enable_ctxsw(g); + mutex_unlock(&g->dbg_sessions_lock); } void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose) { - u32 engines = gk20a_fifo_engines_on_id(g, tsgid, true); + u32 engines; + + /* stop context switching to prevent engine assignments from + changing until TSG is recovered */ + mutex_lock(&g->dbg_sessions_lock); + gr_gk20a_disable_ctxsw(g); + + engines = gk20a_fifo_engines_on_id(g, tsgid, true); + if (engines) - gk20a_fifo_recover(g, engines, verbose); + gk20a_fifo_recover(g, engines, tsgid, true, verbose); else { struct tsg_gk20a *tsg = &g->fifo.tsg[tsgid]; - struct channel_gk20a *ch; if (gk20a_fifo_set_ctx_mmu_error_tsg(g, tsg)) gk20a_debug_dump(g->dev); - mutex_lock(&tsg->ch_list_lock); - list_for_each_entry(ch, &tsg->ch_list, ch_entry) - gk20a_channel_abort(ch); - mutex_unlock(&tsg->ch_list_lock); + gk20a_fifo_abort_tsg(g, tsgid); } + + gr_gk20a_enable_ctxsw(g); + mutex_unlock(&g->dbg_sessions_lock); } void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, - bool verbose) + u32 hw_id, bool id_is_tsg, + bool verbose) { unsigned long engine_id, i; unsigned long _engine_ids = __engine_ids; unsigned long engine_ids = 0; u32 val; + u32 mmu_fault_engines = 0; if (verbose) gk20a_debug_dump(g->dev); @@ -1181,7 +1261,6 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, g->ops.ltc.flush(g); /* store faulted engines in advance */ - g->fifo.mmu_fault_engines = 0; for_each_set_bit(engine_id, &_engine_ids, 32) { u32 ref_type; u32 ref_id; @@ -1196,11 +1275,10 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, gk20a_fifo_get_faulty_id_type(g, i, &id, &type); if (ref_type == type && ref_id == id) { engine_ids |= BIT(i); - g->fifo.mmu_fault_engines |= + mmu_fault_engines |= BIT(gk20a_engine_id_to_mmu_id(i)); } } - } /* @@ -1214,7 +1292,7 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, fifo_intr_0_sched_error_reset_f()); g->ops.fifo.trigger_mmu_fault(g, engine_ids); - gk20a_fifo_handle_mmu_fault(g); + gk20a_fifo_handle_mmu_fault(g, engine_ids, hw_id, id_is_tsg); val = gk20a_readl(g, fifo_intr_en_0_r()); val |= fifo_intr_en_0_mmu_fault_f(1) @@ -1222,25 +1300,32 @@ void gk20a_fifo_recover(struct gk20a *g, u32 __engine_ids, gk20a_writel(g, fifo_intr_en_0_r(), val); } +/* force reset channel and tsg (if it's part of one) */ int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose) { struct tsg_gk20a *tsg = NULL; struct channel_gk20a *ch_tsg = NULL; + struct gk20a *g = ch->g; if (gk20a_is_channel_marked_as_tsg(ch)) { - tsg = &ch->g->fifo.tsg[ch->hw_chid]; + tsg = &g->fifo.tsg[ch->hw_chid]; mutex_lock(&tsg->ch_list_lock); + list_for_each_entry(ch_tsg, &tsg->ch_list, ch_entry) { - gk20a_set_error_notifier(ch_tsg, - NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); + if (gk20a_channel_get(ch_tsg)) { + gk20a_set_error_notifier(ch_tsg, + NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); + gk20a_channel_put(ch_tsg); + } } + mutex_unlock(&tsg->ch_list_lock); - gk20a_fifo_recover_tsg(ch->g, ch->tsgid, verbose); + gk20a_fifo_recover_tsg(g, ch->tsgid, verbose); } else { gk20a_set_error_notifier(ch, NVGPU_CHANNEL_RESETCHANNEL_VERIF_ERROR); - gk20a_fifo_recover_ch(ch->g, ch->hw_chid, verbose); + gk20a_fifo_recover_ch(g, ch->hw_chid, verbose); } return 0; @@ -1300,11 +1385,14 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) struct channel_gk20a *ch = &f->channel[id]; if (non_chid) { - gk20a_fifo_recover(g, BIT(engine_id), true); + gk20a_fifo_recover(g, BIT(engine_id), id, true, true); ret = true; goto err; } + if (!gk20a_channel_get(ch)) + goto err; + if (gk20a_channel_update_and_check_timeout(ch, GRFIFO_TIMEOUT_CHECK_PERIOD_US / 1000)) { gk20a_set_error_notifier(ch, @@ -1313,7 +1401,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) "fifo sched ctxsw timeout error:" "engine = %u, ch = %d", engine_id, id); gk20a_gr_debug_dump(g->dev); - gk20a_fifo_recover(g, BIT(engine_id), + gk20a_fifo_recover(g, BIT(engine_id), id, false, ch->timeout_debug_dump); ret = true; } else { @@ -1324,6 +1412,7 @@ static bool gk20a_fifo_handle_sched_error(struct gk20a *g) id); ret = false; } + gk20a_channel_put(ch); return ret; } @@ -1336,7 +1425,7 @@ err: static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) { - bool print_channel_reset_log = false, reset_engine = false; + bool print_channel_reset_log = false; struct device *dev = dev_from_gk20a(g); u32 handled = 0; @@ -1367,8 +1456,8 @@ static u32 fifo_error_isr(struct gk20a *g, u32 fifo_intr) } if (fifo_intr & fifo_intr_0_mmu_fault_pending_f()) { - print_channel_reset_log = gk20a_fifo_handle_mmu_fault(g); - reset_engine = true; + print_channel_reset_log = + gk20a_fifo_handle_mmu_fault(g, 0, ~(u32)0, false); handled |= fifo_intr_0_mmu_fault_pending_f(); } @@ -1452,9 +1541,12 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev, == fifo_pbdma_status_id_type_chid_v()) { struct channel_gk20a *ch = &f->channel[id]; - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_PBDMA_ERROR); - gk20a_fifo_recover_ch(g, id, true); + if (gk20a_channel_get(ch)) { + gk20a_set_error_notifier(ch, + NVGPU_CHANNEL_PBDMA_ERROR); + gk20a_fifo_recover_ch(g, id, true); + gk20a_channel_put(ch); + } } else if (fifo_pbdma_status_id_type_v(status) == fifo_pbdma_status_id_type_tsgid_v()) { struct tsg_gk20a *tsg = &f->tsg[id]; @@ -1462,8 +1554,11 @@ static u32 gk20a_fifo_handle_pbdma_intr(struct device *dev, mutex_lock(&tsg->ch_list_lock); list_for_each_entry(ch, &tsg->ch_list, ch_entry) { - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_PBDMA_ERROR); + if (gk20a_channel_get(ch)) { + gk20a_set_error_notifier(ch, + NVGPU_CHANNEL_PBDMA_ERROR); + gk20a_channel_put(ch); + } } mutex_unlock(&tsg->ch_list_lock); gk20a_fifo_recover_tsg(g, id, true); @@ -1559,6 +1654,8 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) + msecs_to_jiffies(gk20a_get_gr_idle_timeout(g)); u32 ret = 0; + gk20a_dbg_fn("%d", id); + /* issue preempt */ if (is_tsg) gk20a_writel(g, fifo_preempt_r(), @@ -1569,6 +1666,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) fifo_preempt_chid_f(id) | fifo_preempt_type_channel_f()); + gk20a_dbg_fn("%d", id); /* wait for preempt */ ret = -EBUSY; do { @@ -1583,6 +1681,7 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) } while (time_before(jiffies, end_jiffies) || !tegra_platform_is_silicon()); + gk20a_dbg_fn("%d", id); if (ret) { if (is_tsg) { struct tsg_gk20a *tsg = &g->fifo.tsg[id]; @@ -1593,8 +1692,11 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) mutex_lock(&tsg->ch_list_lock); list_for_each_entry(ch, &tsg->ch_list, ch_entry) { + if (!gk20a_channel_get(ch)) + continue; gk20a_set_error_notifier(ch, NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + gk20a_channel_put(ch); } mutex_unlock(&tsg->ch_list_lock); gk20a_fifo_recover_tsg(g, id, true); @@ -1604,9 +1706,12 @@ static int __locked_fifo_preempt(struct gk20a *g, u32 id, bool is_tsg) gk20a_err(dev_from_gk20a(g), "preempt channel %d timeout\n", id); - gk20a_set_error_notifier(ch, - NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); - gk20a_fifo_recover_ch(g, id, true); + if (gk20a_channel_get(ch)) { + gk20a_set_error_notifier(ch, + NVGPU_CHANNEL_FIFO_ERROR_IDLE_TIMEOUT); + gk20a_fifo_recover_ch(g, id, true); + gk20a_channel_put(ch); + } } } @@ -1790,7 +1895,9 @@ static void gk20a_fifo_runlist_reset_engines(struct gk20a *g, u32 runlist_id) (f->engine_info[i].runlist_id == runlist_id)) engines |= BIT(i); } - gk20a_fifo_recover(g, engines, true); + + if (engines) + gk20a_fifo_recover(g, engines, ~(u32)0, false, true); } static int gk20a_fifo_runlist_wait_pending(struct gk20a *g, u32 runlist_id) @@ -1994,6 +2101,8 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 runlist_id, u32 hw_chid, u32 mutex_ret; u32 ret = 0; + gk20a_dbg_fn(""); + runlist = &f->runlist_info[runlist_id]; mutex_lock(&runlist->mutex); diff --git a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h index dd320ae1e..fdf843d2f 100644 --- a/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/fifo_gk20a.h @@ -3,7 +3,7 @@ * * GK20A graphics fifo (gr host) * - * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -106,7 +106,9 @@ struct fifo_gk20a { u32 userd_entry_size; struct channel_gk20a *channel; - struct mutex ch_inuse_mutex; /* protect unused chid look up */ + /* zero-kref'd channels here */ + struct list_head free_chs; + struct mutex free_chs_mutex; struct tsg_gk20a *tsg; struct mutex tsg_inuse_mutex; @@ -130,7 +132,7 @@ struct fifo_gk20a { } intr; - u32 mmu_fault_engines; + u32 deferred_fault_engines; bool deferred_reset_pending; struct mutex deferred_reset_mutex; }; @@ -157,7 +159,12 @@ int gk20a_fifo_update_runlist(struct gk20a *g, u32 engine_id, u32 hw_chid, int gk20a_fifo_suspend(struct gk20a *g); bool gk20a_fifo_mmu_fault_pending(struct gk20a *g); -void gk20a_fifo_recover(struct gk20a *g, u32 engine_ids, bool verbose); + +void gk20a_fifo_recover(struct gk20a *g, + u32 engine_ids, /* if zero, will be queried from HW */ + u32 hw_id, /* if ~0, will be queried from HW */ + bool hw_id_is_tsg, /* ignored if hw_id == ~0 */ + bool verbose); void gk20a_fifo_recover_ch(struct gk20a *g, u32 hw_chid, bool verbose); void gk20a_fifo_recover_tsg(struct gk20a *g, u32 tsgid, bool verbose); int gk20a_fifo_force_reset_ch(struct channel_gk20a *ch, bool verbose); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.c b/drivers/gpu/nvgpu/gk20a/gk20a.c index 9c201f32a..498de7e78 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gk20a.c @@ -1388,6 +1388,9 @@ static int gk20a_probe(struct platform_device *dev) return -ENOMEM; } + init_waitqueue_head(&gk20a->sw_irq_stall_last_handled_wq); + init_waitqueue_head(&gk20a->sw_irq_nonstall_last_handled_wq); + #ifdef CONFIG_PM_GENERIC_DOMAINS_OF gk20a_domain = container_of(dev_to_genpd(&dev->dev), struct gk20a_domain_data, gpd); diff --git a/drivers/gpu/nvgpu/gk20a/gk20a.h b/drivers/gpu/nvgpu/gk20a/gk20a.h index a52d97f36..d8e3586f4 100644 --- a/drivers/gpu/nvgpu/gk20a/gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/gk20a.h @@ -538,6 +538,15 @@ struct gk20a { u32 max_ltc_count; u32 ltc_count; + atomic_t hw_irq_stall_count; + atomic_t hw_irq_nonstall_count; + + atomic_t sw_irq_stall_last_handled; + wait_queue_head_t sw_irq_stall_last_handled_wq; + + atomic_t sw_irq_nonstall_last_handled; + wait_queue_head_t sw_irq_nonstall_last_handled_wq; + struct devfreq *devfreq; struct gk20a_scale_profile *scale_profile; diff --git a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c index b2fea5b82..edd4c6c86 100644 --- a/drivers/gpu/nvgpu/gk20a/gr_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/gr_gk20a.c @@ -5138,22 +5138,25 @@ static int gk20a_gr_handle_notify_pending(struct gk20a *g, * Also used by regops to translate current ctx to chid and tsgid. * For performance, we don't want to go through 128 channels every time. * curr_ctx should be the value read from gr_fecs_current_ctx_r(). - * A small tlb is used here to cache translation */ -static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, - int *curr_tsgid) + * A small tlb is used here to cache translation. + * + * Returned channel must be freed with gk20a_channel_put() */ +static struct channel_gk20a *gk20a_gr_get_channel_from_ctx( + struct gk20a *g, u32 curr_ctx, int *curr_tsgid) { struct fifo_gk20a *f = &g->fifo; struct gr_gk20a *gr = &g->gr; u32 chid = -1; int tsgid = NVGPU_INVALID_TSG_ID; u32 i; + struct channel_gk20a *ret = NULL; /* when contexts are unloaded from GR, the valid bit is reset * but the instance pointer information remains intact. So the * valid bit must be checked to be absolutely certain that a * valid context is currently resident. */ if (!gr_fecs_current_ctx_valid_v(curr_ctx)) - return -1; + return NULL; spin_lock(&gr->ch_tlb_lock); @@ -5162,25 +5165,30 @@ static int gk20a_gr_get_chid_from_ctx(struct gk20a *g, u32 curr_ctx, if (gr->chid_tlb[i].curr_ctx == curr_ctx) { chid = gr->chid_tlb[i].hw_chid; tsgid = gr->chid_tlb[i].tsgid; + ret = gk20a_channel_get(&f->channel[chid]); goto unlock; } } /* slow path */ - for (chid = 0; chid < f->num_channels; chid++) - if (f->channel[chid].in_use) { - if ((u32)(gk20a_mem_phys(&f->channel[chid].inst_block) >> - ram_in_base_shift_v()) == + for (chid = 0; chid < f->num_channels; chid++) { + struct channel_gk20a *ch = &f->channel[chid]; + if (!gk20a_channel_get(ch)) + continue; + + if ((u32)(gk20a_mem_phys(&ch->inst_block) >> + ram_in_base_shift_v()) == gr_fecs_current_ctx_ptr_v(curr_ctx)) { - tsgid = f->channel[chid].tsgid; - break; - } + tsgid = ch->tsgid; + /* found it */ + ret = ch; + break; + } + gk20a_channel_put(ch); } - if (chid >= f->num_channels) { - chid = -1; + if (!ret) goto unlock; - } /* add to free tlb entry */ for (i = 0; i < GR_CHANNEL_MAP_TLB_SIZE; i++) { @@ -5205,7 +5213,7 @@ unlock: spin_unlock(&gr->ch_tlb_lock); if (curr_tsgid) *curr_tsgid = tsgid; - return chid; + return ret; } int gk20a_gr_lock_down_sm(struct gk20a *g, @@ -5399,6 +5407,7 @@ int gk20a_gr_isr(struct gk20a *g) u32 obj_table; int need_reset = 0; u32 gr_intr = gk20a_readl(g, gr_intr_r()); + struct channel_gk20a *ch = NULL; gk20a_dbg_fn(""); gk20a_dbg(gpu_dbg_intr, "pgraph intr %08x", gr_intr); @@ -5424,13 +5433,13 @@ int gk20a_gr_isr(struct gk20a *g) gr_fe_object_table_r(isr_data.sub_chan)) : 0; isr_data.class_num = gr_fe_object_table_nvclass_v(obj_table); - isr_data.chid = - gk20a_gr_get_chid_from_ctx(g, isr_data.curr_ctx, NULL); - if (isr_data.chid == -1) { + ch = gk20a_gr_get_channel_from_ctx(g, isr_data.curr_ctx, NULL); + if (!ch) { gk20a_err(dev_from_gk20a(g), "invalid channel ctx 0x%08x", isr_data.curr_ctx); goto clean_up; } + isr_data.chid = ch->hw_chid; gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "channel %d: addr 0x%08x, " @@ -5512,8 +5521,6 @@ int gk20a_gr_isr(struct gk20a *g) if (gr_intr & gr_intr_exception_pending_f()) { u32 exception = gk20a_readl(g, gr_exception_r()); - struct fifo_gk20a *f = &g->fifo; - struct channel_gk20a *ch = &f->channel[isr_data.chid]; gk20a_dbg(gpu_dbg_intr | gpu_dbg_gpu_dbg, "exception %08x\n", exception); @@ -5572,9 +5579,20 @@ int gk20a_gr_isr(struct gk20a *g) } if (need_reset) - gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), true); + gk20a_fifo_recover(g, BIT(ENGINE_GR_GK20A), + ~(u32)0, false, true); clean_up: + if (gr_intr && !ch) { + /* Clear interrupts for unused channel. This is + probably an interrupt during gk20a_free_channel() */ + gk20a_err(dev_from_gk20a(g), + "unhandled gr interrupt 0x%08x for unreferenceable channel, clearing", + gr_intr); + gk20a_writel(g, gr_intr_r(), gr_intr); + gr_intr = 0; + } + gk20a_writel(g, gr_gpfifo_ctl_r(), grfifo_ctl | gr_gpfifo_ctl_access_f(1) | gr_gpfifo_ctl_semaphore_access_f(1)); @@ -5583,6 +5601,9 @@ clean_up: gk20a_err(dev_from_gk20a(g), "unhandled gr interrupt 0x%08x", gr_intr); + if (ch) + gk20a_channel_put(ch); + return 0; } @@ -6670,28 +6691,34 @@ static int gr_gk20a_find_priv_offset_in_buffer(struct gk20a *g, bool gk20a_is_channel_ctx_resident(struct channel_gk20a *ch) { - int curr_gr_chid, curr_gr_ctx, curr_gr_tsgid; + int curr_gr_ctx, curr_gr_tsgid; struct gk20a *g = ch->g; + struct channel_gk20a *curr_ch; + bool ret = false; curr_gr_ctx = gk20a_readl(g, gr_fecs_current_ctx_r()); - curr_gr_chid = gk20a_gr_get_chid_from_ctx(g, curr_gr_ctx, - &curr_gr_tsgid); + curr_ch = gk20a_gr_get_channel_from_ctx(g, curr_gr_ctx, + &curr_gr_tsgid); gk20a_dbg(gpu_dbg_fn | gpu_dbg_gpu_dbg, - "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" - " ch->hw_chid=%d", curr_gr_chid, - curr_gr_tsgid, ch->tsgid, ch->hw_chid); + "curr_gr_chid=%d curr_tsgid=%d, ch->tsgid=%d" + " ch->hw_chid=%d", + curr_ch ? curr_ch->hw_chid : -1, + curr_gr_tsgid, + ch->tsgid, + ch->hw_chid); - if (curr_gr_chid == -1) + if (!curr_ch) return false; - if (ch->hw_chid == curr_gr_chid) - return true; + if (ch->hw_chid == curr_ch->hw_chid) + ret = true; if (gk20a_is_channel_marked_as_tsg(ch) && (ch->tsgid == curr_gr_tsgid)) - return true; + ret = true; - return false; + gk20a_channel_put(curr_ch); + return ret; } int gr_gk20a_exec_ctx_ops(struct channel_gk20a *ch, diff --git a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c index 06b00a25c..0a773d10d 100644 --- a/drivers/gpu/nvgpu/gk20a/mc_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/mc_gk20a.c @@ -40,6 +40,8 @@ irqreturn_t mc_gk20a_isr_stall(struct gk20a *g) /* flush previous write */ gk20a_readl(g, mc_intr_en_0_r()); + atomic_inc(&g->hw_irq_stall_count); + trace_mc_gk20a_intr_stall_done(g->dev->name); return IRQ_WAKE_THREAD; @@ -63,18 +65,22 @@ irqreturn_t mc_gk20a_isr_nonstall(struct gk20a *g) /* flush previous write */ gk20a_readl(g, mc_intr_en_1_r()); + atomic_inc(&g->hw_irq_nonstall_count); + return IRQ_WAKE_THREAD; } irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) { u32 mc_intr_0; + int hw_irq_count; gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); trace_mc_gk20a_intr_thread_stall(g->dev->name); mc_intr_0 = gk20a_readl(g, mc_intr_0_r()); + hw_irq_count = atomic_read(&g->hw_irq_stall_count); gk20a_dbg(gpu_dbg_intr, "stall intr %08x\n", mc_intr_0); @@ -94,12 +100,17 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) if (mc_intr_0 & mc_intr_0_pbus_pending_f()) gk20a_pbus_isr(g); + /* sync handled irq counter before re-enabling interrupts */ + atomic_set(&g->sw_irq_stall_last_handled, hw_irq_count); + gk20a_writel(g, mc_intr_en_0_r(), mc_intr_en_0_inta_hardware_f()); /* flush previous write */ gk20a_readl(g, mc_intr_en_0_r()); + wake_up_all(&g->sw_irq_stall_last_handled_wq); + trace_mc_gk20a_intr_thread_stall_done(g->dev->name); return IRQ_HANDLED; @@ -108,10 +119,12 @@ irqreturn_t mc_gk20a_intr_thread_stall(struct gk20a *g) irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) { u32 mc_intr_1; + int hw_irq_count; gk20a_dbg(gpu_dbg_intr, "interrupt thread launched"); mc_intr_1 = gk20a_readl(g, mc_intr_1_r()); + hw_irq_count = atomic_read(&g->hw_irq_nonstall_count); gk20a_dbg(gpu_dbg_intr, "non-stall intr %08x\n", mc_intr_1); @@ -125,12 +138,17 @@ irqreturn_t mc_gk20a_intr_thread_nonstall(struct gk20a *g) && g->ops.ce2.isr_nonstall) g->ops.ce2.isr_nonstall(g); + /* sync handled irq counter before re-enabling interrupts */ + atomic_set(&g->sw_irq_nonstall_last_handled, hw_irq_count); + gk20a_writel(g, mc_intr_en_1_r(), mc_intr_en_1_inta_hardware_f()); /* flush previous write */ gk20a_readl(g, mc_intr_en_1_r()); + wake_up_all(&g->sw_irq_stall_last_handled_wq); + return IRQ_HANDLED; } diff --git a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c index 68a31ecac..23ff86778 100644 --- a/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c +++ b/drivers/gpu/nvgpu/vgpu/fifo_vgpu.c @@ -283,6 +283,9 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) init_runlist(g, f); + INIT_LIST_HEAD(&f->free_chs); + mutex_init(&f->free_chs_mutex); + for (chid = 0; chid < f->num_channels; chid++) { f->channel[chid].userd_cpu_va = f->userd.cpu_va + chid * f->userd_entry_size; @@ -294,7 +297,6 @@ static int vgpu_init_fifo_setup_sw(struct gk20a *g) gk20a_init_channel_support(g, chid); } - mutex_init(&f->ch_inuse_mutex); f->deferred_reset_pending = false; mutex_init(&f->deferred_reset_mutex); diff --git a/include/trace/events/gk20a.h b/include/trace/events/gk20a.h index ad738f437..461ff6e8c 100644 --- a/include/trace/events/gk20a.h +++ b/include/trace/events/gk20a.h @@ -140,12 +140,54 @@ DEFINE_EVENT(gk20a, gk20a_mm_g_elpg_flush_locked_done, TP_ARGS(name) ); -TRACE_EVENT(gk20a_channel_update, - TP_PROTO(const void *channel), +DECLARE_EVENT_CLASS(gk20a_channel, + TP_PROTO(int channel), TP_ARGS(channel), - TP_STRUCT__entry(__field(const void *, channel)), + TP_STRUCT__entry(__field(int, channel)), TP_fast_assign(__entry->channel = channel;), - TP_printk("channel=%p", __entry->channel) + TP_printk("ch id %d", __entry->channel) +); +DEFINE_EVENT(gk20a_channel, gk20a_channel_update, + TP_PROTO(int channel), + TP_ARGS(channel) +); +DEFINE_EVENT(gk20a_channel, gk20a_free_channel, + TP_PROTO(int channel), + TP_ARGS(channel) +); +DEFINE_EVENT(gk20a_channel, gk20a_open_new_channel, + TP_PROTO(int channel), + TP_ARGS(channel) +); +DEFINE_EVENT(gk20a_channel, gk20a_release_used_channel, + TP_PROTO(int channel), + TP_ARGS(channel) +); + +DECLARE_EVENT_CLASS(gk20a_channel_getput, + TP_PROTO(int channel, const char *caller), + TP_ARGS(channel, caller), + TP_STRUCT__entry( + __field(int, channel) + __field(const char *, caller) + ), + TP_fast_assign( + __entry->channel = channel; + __entry->caller = caller; + ), + TP_printk("channel %d caller %s", __entry->channel, __entry->caller) +); +DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_get, + TP_PROTO(int channel, const char *caller), + TP_ARGS(channel, caller) +); +DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put, + TP_PROTO(int channel, const char *caller), + TP_ARGS(channel, caller) +); +DEFINE_EVENT(gk20a_channel_getput, gk20a_channel_put_nofree, + TP_PROTO(int channel, const char *caller), + TP_ARGS(channel, caller) ); TRACE_EVENT(gk20a_push_cmdbuf,