diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 68a303924..651ea08c7 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -33,6 +33,7 @@ #include "gk20a.h" #include "dbg_gpu_gk20a.h" +#include "semaphore_gk20a.h" #include "hw_ram_gk20a.h" #include "hw_fifo_gk20a.h" @@ -340,7 +341,7 @@ static void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a) * resource at this point * if not, then it will be destroyed at channel_free() */ - if (ch_gk20a->sync && ch_gk20a->sync->syncpt_aggressive_destroy) { + if (ch_gk20a->sync && ch_gk20a->sync->aggressive_destroy) { ch_gk20a->sync->destroy(ch_gk20a->sync); ch_gk20a->sync = NULL; } @@ -657,6 +658,8 @@ unbind: ch->vpr = false; ch->vm = NULL; + gk20a_channel_fence_close(&ch->last_submit.pre_fence); + gk20a_channel_fence_close(&ch->last_submit.post_fence); if (ch->sync) { ch->sync->destroy(ch->sync); ch->sync = NULL; @@ -1089,7 +1092,8 @@ static int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, ch_vm = c->vm; c->cmds_pending = false; - c->last_submit_fence.valid = false; + gk20a_channel_fence_close(&c->last_submit.pre_fence); + gk20a_channel_fence_close(&c->last_submit.post_fence); c->ramfc.offset = 0; c->ramfc.size = ram_in_ramfc_s() / 8; @@ -1272,13 +1276,16 @@ static int gk20a_channel_submit_wfi(struct channel_gk20a *c) } } - err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit_fence); + gk20a_channel_fence_close(&c->last_submit.pre_fence); + gk20a_channel_fence_close(&c->last_submit.post_fence); + + err = c->sync->incr_wfi(c->sync, &cmd, &c->last_submit.post_fence); if (unlikely(err)) { mutex_unlock(&c->submit_lock); return err; } - WARN_ON(!c->last_submit_fence.wfi); + WARN_ON(!c->last_submit.post_fence.wfi); c->gpfifo.cpu_va[c->gpfifo.put].entry0 = u64_lo32(cmd->gva); c->gpfifo.cpu_va[c->gpfifo.put].entry1 = u64_hi32(cmd->gva) | @@ -1344,7 +1351,8 @@ static void trace_write_pushbuffer(struct channel_gk20a *c, struct gpfifo *g) } static int gk20a_channel_add_job(struct channel_gk20a *c, - struct gk20a_channel_fence *fence) + struct gk20a_channel_fence *pre_fence, + struct gk20a_channel_fence *post_fence) { struct vm_gk20a *vm = c->vm; struct channel_gk20a_job *job = NULL; @@ -1369,7 +1377,8 @@ static int gk20a_channel_add_job(struct channel_gk20a *c, job->num_mapped_buffers = num_mapped_buffers; job->mapped_buffers = mapped_buffers; - job->fence = *fence; + gk20a_channel_fence_dup(pre_fence, &job->pre_fence); + gk20a_channel_fence_dup(post_fence, &job->post_fence); mutex_lock(&c->jobs_lock); list_add_tail(&job->list, &c->jobs); @@ -1391,13 +1400,18 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) mutex_lock(&c->jobs_lock); list_for_each_entry_safe(job, n, &c->jobs, list) { bool completed = WARN_ON(!c->sync) || - c->sync->is_expired(c->sync, &job->fence); + c->sync->is_expired(c->sync, &job->post_fence); if (!completed) break; gk20a_vm_put_buffers(vm, job->mapped_buffers, job->num_mapped_buffers); + /* Close the fences (this will unref the semaphores and release + * them to the pool). */ + gk20a_channel_fence_close(&job->pre_fence); + gk20a_channel_fence_close(&job->post_fence); + /* job is done. release its reference to vm */ gk20a_vm_put(vm); @@ -1413,8 +1427,8 @@ void gk20a_channel_update(struct channel_gk20a *c, int nr_completed) * the sync resource */ if (list_empty(&c->jobs)) { - if (c->sync && c->sync->syncpt_aggressive_destroy && - c->sync->is_expired(c->sync, &c->last_submit_fence)) { + if (c->sync && c->sync->aggressive_destroy && + c->sync->is_expired(c->sync, &c->last_submit.post_fence)) { c->sync->destroy(c->sync); c->sync = NULL; } @@ -1448,8 +1462,11 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, struct device *d = dev_from_gk20a(g); int err = 0; int i; + int wait_fence_fd = -1; struct priv_cmd_entry *wait_cmd = NULL; struct priv_cmd_entry *incr_cmd = NULL; + struct gk20a_channel_fence pre_fence = { 0 }; + struct gk20a_channel_fence post_fence = { 0 }; /* we might need two extra gpfifo entries - one for pre fence * and one for post fence. */ const int extra_entries = 2; @@ -1534,12 +1551,14 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, * keep running some tests which trigger this condition */ if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) { - if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) - err = c->sync->wait_fd(c->sync, fence->syncpt_id, - &wait_cmd); - else + if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { + wait_fence_fd = fence->syncpt_id; + err = c->sync->wait_fd(c->sync, wait_fence_fd, + &wait_cmd, &pre_fence); + } else { err = c->sync->wait_syncpt(c->sync, fence->syncpt_id, - fence->value, &wait_cmd); + fence->value, &wait_cmd, &pre_fence); + } } if (err) { mutex_unlock(&c->submit_lock); @@ -1551,19 +1570,19 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, to keep track of method completion for idle railgating */ if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET && flags & NVHOST_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) - err = c->sync->incr_user_fd(c->sync, &incr_cmd, - &c->last_submit_fence, + err = c->sync->incr_user_fd(c->sync, wait_fence_fd, &incr_cmd, + &post_fence, need_wfi, &fence->syncpt_id); else if (flags & NVHOST_SUBMIT_GPFIFO_FLAGS_FENCE_GET) err = c->sync->incr_user_syncpt(c->sync, &incr_cmd, - &c->last_submit_fence, + &post_fence, need_wfi, &fence->syncpt_id, &fence->value); else err = c->sync->incr(c->sync, &incr_cmd, - &c->last_submit_fence); + &post_fence); if (err) { mutex_unlock(&c->submit_lock); goto clean_up; @@ -1611,8 +1630,13 @@ static int gk20a_submit_channel_gpfifo(struct channel_gk20a *c, incr_cmd->gp_put = c->gpfifo.put; } + gk20a_channel_fence_close(&c->last_submit.pre_fence); + gk20a_channel_fence_close(&c->last_submit.post_fence); + c->last_submit.pre_fence = pre_fence; + c->last_submit.post_fence = post_fence; + /* TODO! Check for errors... */ - gk20a_channel_add_job(c, &c->last_submit_fence); + gk20a_channel_add_job(c, &pre_fence, &post_fence); c->cmds_pending = true; gk20a_bar1_writel(g, @@ -1637,6 +1661,8 @@ clean_up: gk20a_err(d, "fail"); free_priv_cmdbuf(c, wait_cmd); free_priv_cmdbuf(c, incr_cmd); + gk20a_channel_fence_close(&pre_fence); + gk20a_channel_fence_close(&post_fence); gk20a_idle(g->dev); return err; } @@ -1669,6 +1695,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid) int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) { int err = 0; + struct gk20a_channel_fence *fence = &ch->last_submit.post_fence; if (!ch->cmds_pending) return 0; @@ -1677,21 +1704,20 @@ int gk20a_channel_finish(struct channel_gk20a *ch, unsigned long timeout) if (ch->has_timedout) return -ETIMEDOUT; - if (!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)) { + if (!(fence->valid && fence->wfi)) { gk20a_dbg_fn("issuing wfi, incr to finish the channel"); err = gk20a_channel_submit_wfi(ch); } if (err) return err; - BUG_ON(!(ch->last_submit_fence.valid && ch->last_submit_fence.wfi)); + BUG_ON(!(fence->valid && fence->wfi)); - gk20a_dbg_fn("waiting for channel to finish thresh:%d", - ch->last_submit_fence.thresh); + gk20a_dbg_fn("waiting for channel to finish thresh:%d sema:%p", + fence->thresh, fence->semaphore); if (ch->sync) { - err = ch->sync->wait_cpu(ch->sync, &ch->last_submit_fence, - timeout); + err = ch->sync->wait_cpu(ch->sync, fence, timeout); if (WARN_ON(err)) dev_warn(dev_from_gk20a(ch->g), "timed out waiting for gk20a channel to finish"); @@ -1900,7 +1926,8 @@ int gk20a_channel_suspend(struct gk20a *g) if (c->sync) c->sync->wait_cpu(c->sync, - &c->last_submit_fence, 500000); + &c->last_submit.post_fence, + 500000); break; } } diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h index dd0197d6f..84983cc65 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.h @@ -68,7 +68,8 @@ struct channel_ctx_gk20a { struct channel_gk20a_job { struct mapped_buffer_node **mapped_buffers; int num_mapped_buffers; - struct gk20a_channel_fence fence; + struct gk20a_channel_fence pre_fence; + struct gk20a_channel_fence post_fence; struct list_head list; }; @@ -112,7 +113,10 @@ struct channel_gk20a { u32 timeout_gpfifo_get; bool cmds_pending; - struct gk20a_channel_fence last_submit_fence; + struct { + struct gk20a_channel_fence pre_fence; + struct gk20a_channel_fence post_fence; + } last_submit; void (*remove_support)(struct channel_gk20a *); #if defined(CONFIG_GK20A_CYCLE_STATS) diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index f91dd52d6..677c4b493 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -19,6 +19,9 @@ #include "channel_sync_gk20a.h" #include "gk20a.h" +#include "semaphore_gk20a.h" +#include "sync_gk20a.h" +#include "mm_gk20a.h" #ifdef CONFIG_SYNC #include "../../../staging/android/sync.h" @@ -74,7 +77,8 @@ bool gk20a_channel_syncpt_is_expired(struct gk20a_channel_sync *s, } int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, - u32 thresh, struct priv_cmd_entry **entry) + u32 thresh, struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) { struct gk20a_channel_syncpt *sp = container_of(s, struct gk20a_channel_syncpt, ops); @@ -99,11 +103,13 @@ int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s, u32 id, add_wait_cmd(&wait_cmd->ptr[0], id, thresh); *entry = wait_cmd; + fence->valid = false; return 0; } int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, - struct priv_cmd_entry **entry) + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) { #ifdef CONFIG_SYNC int i; @@ -158,6 +164,7 @@ int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd, sync_fence_put(sync_fence); *entry = wait_cmd; + fence->valid = false; return 0; #else return -ENODEV; @@ -301,6 +308,7 @@ int gk20a_channel_syncpt_incr_user_syncpt(struct gk20a_channel_sync *s, } int gk20a_channel_syncpt_incr_user_fd(struct gk20a_channel_sync *s, + int wait_fence_fd, struct priv_cmd_entry **entry, struct gk20a_channel_fence *fence, bool wfi, @@ -366,18 +374,424 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c) sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max; sp->ops.destroy = gk20a_channel_syncpt_destroy; - sp->ops.syncpt_aggressive_destroy = true; + sp->ops.aggressive_destroy = true; return &sp->ops; } #endif /* CONFIG_TEGRA_GK20A */ +struct gk20a_channel_semaphore { + struct gk20a_channel_sync ops; + struct channel_gk20a *c; + + /* A semaphore pool owned by this channel. */ + struct gk20a_semaphore_pool *pool; + + /* A sync timeline that advances when gpu completes work. */ + struct sync_timeline *timeline; +}; + +#ifdef CONFIG_SYNC +struct wait_fence_work { + struct sync_fence_waiter waiter; + struct channel_gk20a *ch; + struct gk20a_semaphore *sema; +}; + +static void gk20a_channel_semaphore_launcher( + struct sync_fence *fence, + struct sync_fence_waiter *waiter) +{ + int err; + struct wait_fence_work *w = + container_of(waiter, struct wait_fence_work, waiter); + struct gk20a *g = w->ch->g; + + gk20a_dbg_info("waiting for pre fence %p '%s'", + fence, fence->name); + err = sync_fence_wait(fence, -1); + if (err < 0) + dev_err(&g->dev->dev, "error waiting pre-fence: %d\n", err); + + gk20a_dbg_info( + "wait completed (%d) for fence %p '%s', triggering gpu work", + err, fence, fence->name); + sync_fence_put(fence); + gk20a_semaphore_release(w->sema); + gk20a_semaphore_put(w->sema); + kfree(w); +} +#endif + +static int add_sema_cmd(u32 *ptr, u64 sema, u32 payload, + bool acquire, bool wfi) +{ + int i = 0; + /* semaphore_a */ + ptr[i++] = 0x20010004; + /* offset_upper */ + ptr[i++] = (sema >> 32) & 0xff; + /* semaphore_b */ + ptr[i++] = 0x20010005; + /* offset */ + ptr[i++] = sema & 0xffffffff; + /* semaphore_c */ + ptr[i++] = 0x20010006; + /* payload */ + ptr[i++] = payload; + if (acquire) { + /* semaphore_d */ + ptr[i++] = 0x20010007; + /* operation: acq_geq, switch_en */ + ptr[i++] = 0x4 | (0x1 << 12); + } else { + /* semaphore_d */ + ptr[i++] = 0x20010007; + /* operation: release, wfi */ + ptr[i++] = 0x2 | ((wfi ? 0x0 : 0x1) << 20); + /* non_stall_int */ + ptr[i++] = 0x20010008; + /* ignored */ + ptr[i++] = 0; + } + return i; +} + +static int gk20a_channel_semaphore_wait_cpu( + struct gk20a_channel_sync *s, + struct gk20a_channel_fence *fence, + int timeout) +{ + int remain; + struct gk20a_channel_semaphore *sp = + container_of(s, struct gk20a_channel_semaphore, ops); + if (!fence->valid || WARN_ON(!fence->semaphore)) + return 0; + + remain = wait_event_interruptible_timeout( + sp->c->semaphore_wq, + !gk20a_semaphore_is_acquired(fence->semaphore), + timeout); + if (remain == 0 && gk20a_semaphore_is_acquired(fence->semaphore)) + return -ETIMEDOUT; + else if (remain < 0) + return remain; + return 0; +} + +static bool gk20a_channel_semaphore_is_expired( + struct gk20a_channel_sync *s, + struct gk20a_channel_fence *fence) +{ + bool expired; + struct gk20a_channel_semaphore *sp = + container_of(s, struct gk20a_channel_semaphore, ops); + if (!fence->valid || WARN_ON(!fence->semaphore)) + return true; + + expired = !gk20a_semaphore_is_acquired(fence->semaphore); + if (expired) + gk20a_sync_timeline_signal(sp->timeline); + return expired; +} + +static int gk20a_channel_semaphore_wait_syncpt( + struct gk20a_channel_sync *s, u32 id, + u32 thresh, struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + struct gk20a_channel_semaphore *sema = + container_of(s, struct gk20a_channel_semaphore, ops); + struct device *dev = dev_from_gk20a(sema->c->g); + gk20a_err(dev, "trying to use syncpoint synchronization"); + return -ENODEV; +} + +static int gk20a_channel_semaphore_wait_fd( + struct gk20a_channel_sync *s, int fd, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + struct gk20a_channel_semaphore *sema = + container_of(s, struct gk20a_channel_semaphore, ops); + struct channel_gk20a *c = sema->c; +#ifdef CONFIG_SYNC + struct sync_fence *sync_fence; + struct priv_cmd_entry *wait_cmd = NULL; + struct wait_fence_work *w; + int written; + int err; + u64 va; + + sync_fence = gk20a_sync_fence_fdget(fd); + if (!sync_fence) + return -EINVAL; + + w = kzalloc(sizeof(*w), GFP_KERNEL); + if (!w) { + err = -ENOMEM; + goto fail; + } + sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher); + w->ch = c; + w->sema = gk20a_semaphore_alloc(sema->pool); + if (!w->sema) { + gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores"); + err = -EAGAIN; + goto fail; + } + + gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd); + if (wait_cmd == NULL) { + gk20a_err(dev_from_gk20a(c->g), + "not enough priv cmd buffer space"); + err = -EAGAIN; + goto fail; + } + + va = gk20a_semaphore_gpu_va(w->sema, c->vm); + /* GPU unblocked when when the semaphore value becomes 1. */ + written = add_sema_cmd(wait_cmd->ptr, va, 1, true, false); + WARN_ON(written != wait_cmd->size); + sync_fence_wait_async(sync_fence, &w->waiter); + + *entry = wait_cmd; + return 0; +fail: + if (w && w->sema) + gk20a_semaphore_put(w->sema); + kfree(w); + sync_fence_put(sync_fence); + return err; +#else + gk20a_err(dev_from_gk20a(c->g), + "trying to use sync fds with CONFIG_SYNC disabled"); + return -ENODEV; +#endif +} + +static int __gk20a_channel_semaphore_incr( + struct gk20a_channel_sync *s, bool wfi_cmd, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + u64 va; + int incr_cmd_size; + int written; + struct priv_cmd_entry *incr_cmd = NULL; + struct gk20a_channel_semaphore *sp = + container_of(s, struct gk20a_channel_semaphore, ops); + struct channel_gk20a *c = sp->c; + struct gk20a_semaphore *semaphore; + + semaphore = gk20a_semaphore_alloc(sp->pool); + if (!semaphore) { + gk20a_err(dev_from_gk20a(c->g), + "ran out of semaphores"); + return -EAGAIN; + } + + incr_cmd_size = 10; + gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd); + if (incr_cmd == NULL) { + gk20a_err(dev_from_gk20a(c->g), + "not enough priv cmd buffer space"); + gk20a_semaphore_put(semaphore); + return -EAGAIN; + } + + /* Release the completion semaphore. */ + va = gk20a_semaphore_gpu_va(semaphore, c->vm); + written = add_sema_cmd(incr_cmd->ptr, va, 1, false, wfi_cmd); + WARN_ON(written != incr_cmd_size); + + fence->valid = true; + fence->wfi = wfi_cmd; + fence->semaphore = semaphore; + *entry = incr_cmd; + return 0; +} + +static int gk20a_channel_semaphore_incr_wfi( + struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + return __gk20a_channel_semaphore_incr(s, + true /* wfi */, + entry, fence); +} + +static int gk20a_channel_semaphore_incr( + struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence) +{ + /* Don't put wfi cmd to this one since we're not returning + * a fence to user space. */ + return __gk20a_channel_semaphore_incr(s, false /* no wfi */, + entry, fence); +} + +static int gk20a_channel_semaphore_incr_user_syncpt( + struct gk20a_channel_sync *s, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence, + bool wfi, + u32 *id, u32 *thresh) +{ + struct gk20a_channel_semaphore *sema = + container_of(s, struct gk20a_channel_semaphore, ops); + struct device *dev = dev_from_gk20a(sema->c->g); + gk20a_err(dev, "trying to use syncpoint synchronization"); + return -ENODEV; +} + +static int gk20a_channel_semaphore_incr_user_fd( + struct gk20a_channel_sync *s, + int wait_fence_fd, + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence, + bool wfi, + int *fd) +{ + struct gk20a_channel_semaphore *sema = + container_of(s, struct gk20a_channel_semaphore, ops); +#ifdef CONFIG_SYNC + struct sync_fence *dependency = NULL; + int err; + + err = __gk20a_channel_semaphore_incr(s, wfi, + entry, fence); + if (err) + return err; + + if (wait_fence_fd >= 0) { + dependency = gk20a_sync_fence_fdget(wait_fence_fd); + if (!dependency) + return -EINVAL; + } + + *fd = gk20a_sync_fence_create(sema->timeline, fence->semaphore, + dependency, "fence"); + if (*fd < 0) { + if (dependency) + sync_fence_put(dependency); + return *fd; + } + return 0; +#else + gk20a_err(dev_from_gk20a(sema->c->g), + "trying to use sync fds with CONFIG_SYNC disabled"); + return -ENODEV; +#endif +} + +static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s) +{ + /* Nothing to do. */ +} + +static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s) +{ + struct gk20a_channel_semaphore *sema = + container_of(s, struct gk20a_channel_semaphore, ops); + if (sema->timeline) + gk20a_sync_timeline_destroy(sema->timeline); + if (sema->pool) { + gk20a_semaphore_pool_unmap(sema->pool, sema->c->vm); + gk20a_semaphore_pool_put(sema->pool); + } + kfree(sema); +} + +static struct gk20a_channel_sync * +gk20a_channel_semaphore_create(struct channel_gk20a *c) +{ + int err; + int asid = -1; + struct gk20a_channel_semaphore *sema; + char pool_name[20]; + + if (WARN_ON(!c->vm)) + return NULL; + + sema = kzalloc(sizeof(*sema), GFP_KERNEL); + if (!sema) + return NULL; + sema->c = c; + + if (c->vm->as_share) + asid = c->vm->as_share->id; + + /* A pool of 256 semaphores fits into one 4k page. */ + sprintf(pool_name, "semaphore_pool-%d", c->hw_chid); + sema->pool = gk20a_semaphore_pool_alloc(dev_from_gk20a(c->g), + pool_name, 256); + if (!sema->pool) + goto clean_up; + + /* Map the semaphore pool to the channel vm. Map as read-write to the + * owner channel (all other channels should map as read only!). */ + err = gk20a_semaphore_pool_map(sema->pool, c->vm, gk20a_mem_flag_none); + if (err) + goto clean_up; + +#ifdef CONFIG_SYNC + sema->timeline = gk20a_sync_timeline_create( + "gk20a_ch%d_as%d", c->hw_chid, asid); + if (!sema->timeline) + goto clean_up; +#endif + sema->ops.wait_cpu = gk20a_channel_semaphore_wait_cpu; + sema->ops.is_expired = gk20a_channel_semaphore_is_expired; + sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt; + sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd; + sema->ops.incr = gk20a_channel_semaphore_incr; + sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi; + sema->ops.incr_user_syncpt = gk20a_channel_semaphore_incr_user_syncpt; + sema->ops.incr_user_fd = gk20a_channel_semaphore_incr_user_fd; + sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max; + sema->ops.destroy = gk20a_channel_semaphore_destroy; + + /* Aggressively destroying the semaphore sync would cause overhead + * since the pool needs to be mapped to GMMU. */ + sema->ops.aggressive_destroy = false; + + return &sema->ops; +clean_up: + gk20a_channel_semaphore_destroy(&sema->ops); + return NULL; +} + struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c) { #ifdef CONFIG_TEGRA_GK20A if (gk20a_platform_has_syncpoints(c->g->dev)) return gk20a_channel_syncpt_create(c); #endif - WARN_ON(1); - return NULL; + return gk20a_channel_semaphore_create(c); +} + +static inline bool gk20a_channel_fence_is_closed(struct gk20a_channel_fence *f) +{ + if (f->valid || f->semaphore) + return false; + return true; +} + +void gk20a_channel_fence_close(struct gk20a_channel_fence *f) +{ + if (f->semaphore) + gk20a_semaphore_put(f->semaphore); + memset(f, 0, sizeof(*f)); +} + +void gk20a_channel_fence_dup(struct gk20a_channel_fence *from, + struct gk20a_channel_fence *to) +{ + WARN_ON(!gk20a_channel_fence_is_closed(to)); + *to = *from; + if (to->semaphore) + gk20a_semaphore_get(to->semaphore); } diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h index 90b61bfd7..baa4a151a 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.h @@ -23,11 +23,13 @@ struct gk20a_channel_sync; struct priv_cmd_entry; struct channel_gk20a; +struct gk20a_semaphore; struct gk20a_channel_fence { bool valid; bool wfi; /* was issued with preceding wfi */ - u32 thresh; /* either semaphore or syncpoint value */ + u32 thresh; /* syncpoint fences only */ + struct gk20a_semaphore *semaphore; /* semaphore fences only */ }; struct gk20a_channel_sync { @@ -43,11 +45,13 @@ struct gk20a_channel_sync { /* Generate a gpu wait cmdbuf from syncpoint. */ int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh, - struct priv_cmd_entry **entry); + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence); /* Generate a gpu wait cmdbuf from sync fd. */ int (*wait_fd)(struct gk20a_channel_sync *s, int fd, - struct priv_cmd_entry **entry); + struct priv_cmd_entry **entry, + struct gk20a_channel_fence *fence); /* Increment syncpoint/semaphore. * Returns @@ -88,6 +92,7 @@ struct gk20a_channel_sync { * - a sync fd that can be returned to user space. */ int (*incr_user_fd)(struct gk20a_channel_sync *s, + int wait_fence_fd, struct priv_cmd_entry **entry, struct gk20a_channel_fence *fence, bool wfi, @@ -96,12 +101,16 @@ struct gk20a_channel_sync { /* Reset the channel syncpoint/semaphore. */ void (*set_min_eq_max)(struct gk20a_channel_sync *s); - /* flag to set syncpt destroy aggressiveness */ - bool syncpt_aggressive_destroy; + /* flag to set sync destroy aggressiveness */ + bool aggressive_destroy; /* Free the resources allocated by gk20a_channel_sync_create. */ void (*destroy)(struct gk20a_channel_sync *s); }; struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c); + +void gk20a_channel_fence_close(struct gk20a_channel_fence *f); +void gk20a_channel_fence_dup(struct gk20a_channel_fence *from, + struct gk20a_channel_fence *to); #endif