gpu: nvgpu: use inplace allocation in sync framework

This change is the first of a series of changes to
support the usage of pre-allocated job tracking resources
in the submit path. With this change, we still maintain a
dynamically-allocated joblist, but make the necessary changes
in the channel_sync & fence framework to use in-place
allocations. Specifically, we:

1) Update channel sync framework routines to take in
pre-allocated priv_cmd_entry(s) & gk20a_fence(s) rather
than dynamically allocating themselves

2) Move allocation of priv_cmd_entry(s) & gk20a_fence(s)
to gk20a_submit_prepare_syncs

3) Modify fence framework to have seperate allocation
and init APIs. We expose allocation as a seperate API, so
the client can allocate the object before passing it into
the channel sync framework.

4) Fix clean_up logic in channel sync framework

Bug 1795076

Change-Id: I96db457683cd207fd029c31c45f548f98055e844
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1206725
(cherry picked from commit 9d196fd10db6c2f934c2a53b1fc0500eb4626624)
Reviewed-on: http://git-master/r/1223933
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Sachit Kadle
2016-08-22 18:06:30 -07:00
committed by mobile promotions
parent 3c2656c8c6
commit 63e8592e06
7 changed files with 275 additions and 175 deletions

View File

@@ -1376,16 +1376,20 @@ static void channel_gk20a_free_priv_cmdbuf(struct channel_gk20a *c)
/* allocate a cmd buffer with given size. size is number of u32 entries */
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
struct priv_cmd_entry **entry)
struct priv_cmd_entry *e)
{
struct priv_cmd_queue *q = &c->priv_cmd_q;
struct priv_cmd_entry *e;
u32 free_count;
u32 size = orig_size;
gk20a_dbg_fn("size %d", orig_size);
*entry = NULL;
if (!e) {
gk20a_err(dev_from_gk20a(c->g),
"ch %d: priv cmd entry is null",
c->hw_chid);
return -EINVAL;
}
/* if free space in the end is less than requested, increase the size
* to make the real allocated space start from beginning. */
@@ -1400,14 +1404,6 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
if (size > free_count)
return -EAGAIN;
e = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
if (!e) {
gk20a_err(dev_from_gk20a(c->g),
"ch %d: fail to allocate priv cmd entry",
c->hw_chid);
return -ENOMEM;
}
e->size = orig_size;
e->mem = &q->mem;
@@ -1426,8 +1422,10 @@ int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 orig_size,
/* we already handled q->put + size > q->size so BUG_ON this */
BUG_ON(q->put > q->size);
*entry = e;
/* commit the previous writes before making the entry valid */
wmb();
e->valid = true;
gk20a_dbg_fn("done");
return 0;
@@ -1441,6 +1439,21 @@ static void free_priv_cmdbuf(struct channel_gk20a *c,
kfree(e);
}
static struct channel_gk20a_job *channel_gk20a_alloc_job(
struct channel_gk20a *c)
{
struct channel_gk20a_job *job = NULL;
job = kzalloc(sizeof(*job), GFP_KERNEL);
return job;
}
static void channel_gk20a_free_job(struct channel_gk20a *c,
struct channel_gk20a_job *job)
{
kfree(job);
}
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
struct nvgpu_alloc_gpfifo_args *args)
{
@@ -1818,10 +1831,15 @@ int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e)
if (!e)
return 0;
if ((q->get != e->off) && e->off != 0)
gk20a_err(d, "requests out-of-order, ch=%d\n", c->hw_chid);
if (e->valid) {
/* read the entry's valid flag before reading its contents */
rmb();
if ((q->get != e->off) && e->off != 0)
gk20a_err(d, "requests out-of-order, ch=%d\n",
c->hw_chid);
q->get = e->off + e->size;
}
q->get = e->off + e->size;
free_priv_cmdbuf(c, e);
return 0;
@@ -1854,14 +1872,10 @@ static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
}
static int gk20a_channel_add_job(struct channel_gk20a *c,
struct gk20a_fence *pre_fence,
struct gk20a_fence *post_fence,
struct priv_cmd_entry *wait_cmd,
struct priv_cmd_entry *incr_cmd,
struct channel_gk20a_job *job,
bool skip_buffer_refcounting)
{
struct vm_gk20a *vm = c->vm;
struct channel_gk20a_job *job = NULL;
struct mapped_buffer_node **mapped_buffers = NULL;
int err = 0, num_mapped_buffers = 0;
@@ -1875,22 +1889,12 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
goto err_put_vm;
}
job = kzalloc(sizeof(*job), GFP_KERNEL);
if (!job) {
err = -ENOMEM;
goto err_put_buffers;
}
/* put() is done in gk20a_channel_update() when the job is done */
c = gk20a_channel_get(c);
if (c) {
job->num_mapped_buffers = num_mapped_buffers;
job->mapped_buffers = mapped_buffers;
job->pre_fence = pre_fence;
job->post_fence = post_fence;
job->wait_cmd = wait_cmd;
job->incr_cmd = incr_cmd;
gk20a_channel_timeout_start(c, job);
@@ -1899,13 +1903,11 @@ static int gk20a_channel_add_job(struct channel_gk20a *c,
spin_unlock(&c->jobs_lock);
} else {
err = -ETIMEDOUT;
goto err_free_job;
goto err_put_buffers;
}
return 0;
err_free_job:
kfree(job);
err_put_buffers:
gk20a_vm_put_buffers(vm, mapped_buffers, num_mapped_buffers);
err_put_vm:
@@ -2000,7 +2002,7 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
list_del_init(&job->list);
spin_unlock(&c->jobs_lock);
kfree(job);
channel_gk20a_free_job(c, job);
job_finished = 1;
gk20a_idle(g->dev);
}
@@ -2143,6 +2145,7 @@ out:
*/
static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
struct nvgpu_fence *fence,
struct channel_gk20a_job *job,
struct priv_cmd_entry **wait_cmd,
struct priv_cmd_entry **incr_cmd,
struct gk20a_fence **pre_fence,
@@ -2194,18 +2197,32 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
* this condition.
*/
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) {
job->wait_cmd = kzalloc(sizeof(struct priv_cmd_entry),
GFP_KERNEL);
job->pre_fence = gk20a_alloc_fence(c);
if (!job->wait_cmd || !job->pre_fence) {
err = -ENOMEM;
goto clean_up_pre_fence;
}
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
wait_fence_fd = fence->id;
err = c->sync->wait_fd(c->sync, wait_fence_fd,
wait_cmd, pre_fence);
job->wait_cmd, job->pre_fence);
} else {
err = c->sync->wait_syncpt(c->sync, fence->id,
fence->value, wait_cmd,
pre_fence);
fence->value, job->wait_cmd,
job->pre_fence);
}
if (!err) {
if (job->wait_cmd->valid)
*wait_cmd = job->wait_cmd;
*pre_fence = job->pre_fence;
} else
goto clean_up_pre_fence;
}
if (err)
goto fail;
if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
@@ -2216,22 +2233,41 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
* is used to keep track of method completion for idle railgating. The
* sync_pt/semaphore PB is added to the GPFIFO later on in submit.
*/
job->incr_cmd = kzalloc(sizeof(struct priv_cmd_entry), GFP_KERNEL);
job->post_fence = gk20a_alloc_fence(c);
if (!job->incr_cmd || !job->post_fence) {
err = -ENOMEM;
goto clean_up_post_fence;
}
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
err = c->sync->incr_user(c->sync, wait_fence_fd, incr_cmd,
post_fence, need_wfi, need_sync_fence);
err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
job->post_fence, need_wfi, need_sync_fence);
else
err = c->sync->incr(c->sync, incr_cmd,
post_fence, need_sync_fence);
if (err)
goto fail;
err = c->sync->incr(c->sync, job->incr_cmd,
job->post_fence, need_sync_fence);
if (!err) {
*incr_cmd = job->incr_cmd;
*post_fence = job->post_fence;
} else
goto clean_up_post_fence;
return 0;
clean_up_post_fence:
gk20a_free_priv_cmdbuf(c, job->incr_cmd);
gk20a_fence_put(job->post_fence);
job->incr_cmd = NULL;
job->post_fence = NULL;
clean_up_pre_fence:
gk20a_free_priv_cmdbuf(c, job->wait_cmd);
gk20a_fence_put(job->pre_fence);
job->wait_cmd = NULL;
job->pre_fence = NULL;
*wait_cmd = NULL;
*pre_fence = NULL;
fail:
/*
* Cleanup is handled by gk20a_submit_channel_gpfifo() since it is the
* real owner of the objects we make here.
*/
return err;
}
@@ -2250,6 +2286,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
struct priv_cmd_entry *incr_cmd = NULL;
struct gk20a_fence *pre_fence = NULL;
struct gk20a_fence *post_fence = NULL;
struct channel_gk20a_job *job = NULL;
/* we might need two extra gpfifo entries - one for pre fence
* and one for post fence. */
const int extra_entries = 2;
@@ -2351,11 +2388,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
}
if (need_job_tracking) {
err = gk20a_submit_prepare_syncs(c, fence, &wait_cmd, &incr_cmd,
job = channel_gk20a_alloc_job(c);
if (!job) {
err = -ENOMEM;
goto clean_up;
}
err = gk20a_submit_prepare_syncs(c, fence, job,
&wait_cmd, &incr_cmd,
&pre_fence, &post_fence,
force_need_sync_fence, flags);
if (err)
goto clean_up;
goto clean_up_job;
}
if (wait_cmd)
@@ -2365,7 +2409,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
err = gk20a_submit_append_gpfifo(c, gpfifo, user_gpfifo,
num_entries);
if (err)
goto clean_up;
goto clean_up_job;
/*
* And here's where we add the incr_cmd we generated earlier. It should
@@ -2379,9 +2423,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
if (need_job_tracking)
/* TODO! Check for errors... */
gk20a_channel_add_job(c, pre_fence, post_fence,
wait_cmd, incr_cmd,
skip_buffer_refcounting);
gk20a_channel_add_job(c, job, skip_buffer_refcounting);
g->ops.fifo.userd_gp_put(g, c);
@@ -2398,10 +2440,10 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
gk20a_dbg_fn("done");
return err;
clean_up_job:
channel_gk20a_free_job(c, job);
clean_up:
gk20a_dbg_fn("fail");
free_priv_cmdbuf(c, wait_cmd);
free_priv_cmdbuf(c, incr_cmd);
gk20a_fence_put(pre_fence);
gk20a_fence_put(post_fence);
if (need_job_tracking)

View File

@@ -218,7 +218,7 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch);
void gk20a_set_error_notifier(struct channel_gk20a *ch, __u32 error);
void gk20a_channel_semaphore_wakeup(struct gk20a *g, bool post_events);
int gk20a_channel_alloc_priv_cmdbuf(struct channel_gk20a *c, u32 size,
struct priv_cmd_entry **entry);
struct priv_cmd_entry *entry);
int gk20a_free_priv_cmdbuf(struct channel_gk20a *c, struct priv_cmd_entry *e);
int gk20a_enable_channel_tsg(struct gk20a *g, struct channel_gk20a *ch);

View File

@@ -57,12 +57,11 @@ static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off,
}
static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
u32 id, u32 thresh, struct priv_cmd_entry **entry,
struct gk20a_fence **fence)
u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
struct gk20a_fence *fence)
{
struct gk20a_channel_syncpt *sp =
container_of(s, struct gk20a_channel_syncpt, ops);
struct priv_cmd_entry *wait_cmd = NULL;
struct channel_gk20a *c = sp->c;
int err = 0;
@@ -75,7 +74,7 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
return 0;
err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd);
err = gk20a_channel_alloc_priv_cmdbuf(c, 4, wait_cmd);
if (err) {
gk20a_err(dev_from_gk20a(c->g),
"not enough priv cmd buffer space");
@@ -84,21 +83,18 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
*entry = wait_cmd;
*fence = NULL;
return 0;
}
static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence)
struct priv_cmd_entry *wait_cmd,
struct gk20a_fence *fence)
{
#ifdef CONFIG_SYNC
int i;
int num_wait_cmds;
struct sync_fence *sync_fence;
struct sync_pt *pt;
struct priv_cmd_entry *wait_cmd = NULL;
struct gk20a_channel_syncpt *sp =
container_of(s, struct gk20a_channel_syncpt, ops);
struct channel_gk20a *c = sp->c;
@@ -134,7 +130,7 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
return 0;
}
err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, wait_cmd);
if (err) {
gk20a_err(dev_from_gk20a(c->g),
"not enough priv cmd buffer space");
@@ -172,8 +168,6 @@ static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
WARN_ON(i != num_wait_cmds);
sync_fence_put(sync_fence);
*entry = wait_cmd;
*fence = NULL;
return 0;
#else
return -ENODEV;
@@ -193,15 +187,14 @@ static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
bool wfi_cmd,
bool register_irq,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *incr_cmd,
struct gk20a_fence *fence,
bool need_sync_fence)
{
u32 thresh;
int incr_cmd_size;
int off;
int err;
struct priv_cmd_entry *incr_cmd = NULL;
struct gk20a_channel_syncpt *sp =
container_of(s, struct gk20a_channel_syncpt, ops);
struct channel_gk20a *c = sp->c;
@@ -210,7 +203,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
if (wfi_cmd)
incr_cmd_size += 2;
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
if (err)
return err;
@@ -267,15 +260,21 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
}
}
*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
err = gk20a_fence_from_syncpt(fence, sp->host1x_pdev, sp->id, thresh,
wfi_cmd, need_sync_fence);
*entry = incr_cmd;
if (err)
goto clean_up_priv_cmd;
return 0;
clean_up_priv_cmd:
gk20a_free_priv_cmdbuf(c, incr_cmd);
return err;
}
static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence)
struct priv_cmd_entry *entry,
struct gk20a_fence *fence)
{
return __gk20a_channel_syncpt_incr(s,
true /* wfi */,
@@ -284,8 +283,8 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
}
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool need_sync_fence)
{
/* Don't put wfi cmd to this one since we're not returning
@@ -298,8 +297,8 @@ static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
int wait_fence_fd,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool wfi,
bool need_sync_fence)
{
@@ -500,8 +499,8 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
static int gk20a_channel_semaphore_wait_syncpt(
struct gk20a_channel_sync *s, u32 id,
u32 thresh, struct priv_cmd_entry **entry,
struct gk20a_fence **fence)
u32 thresh, struct priv_cmd_entry *entry,
struct gk20a_fence *fence)
{
struct gk20a_channel_semaphore *sema =
container_of(s, struct gk20a_channel_semaphore, ops);
@@ -525,7 +524,7 @@ static int gk20a_channel_semaphore_wait_syncpt(
*/
static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
struct sync_fence *fence,
struct priv_cmd_entry **wait_cmd,
struct priv_cmd_entry *wait_cmd,
struct gk20a_semaphore **fp_sema)
{
struct gk20a_semaphore *sema;
@@ -551,7 +550,7 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
gk20a_semaphore_get(sema);
BUG_ON(!atomic_read(&sema->value));
add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false);
add_sema_cmd(c->g, c, sema, wait_cmd, 8, true, false);
/*
* Make sure that gk20a_channel_semaphore_wait_fd() can create another
@@ -565,8 +564,8 @@ static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
static int gk20a_channel_semaphore_wait_fd(
struct gk20a_channel_sync *s, int fd,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence)
struct priv_cmd_entry *entry,
struct gk20a_fence *fence)
{
struct gk20a_channel_semaphore *sema =
container_of(s, struct gk20a_channel_semaphore, ops);
@@ -574,7 +573,7 @@ static int gk20a_channel_semaphore_wait_fd(
#ifdef CONFIG_SYNC
struct gk20a_semaphore *fp_sema;
struct sync_fence *sync_fence;
struct priv_cmd_entry *wait_cmd = NULL;
struct priv_cmd_entry *wait_cmd = entry;
struct wait_fence_work *w = NULL;
int err, ret, status;
@@ -582,19 +581,24 @@ static int gk20a_channel_semaphore_wait_fd(
if (!sync_fence)
return -EINVAL;
ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema);
ret = __semaphore_wait_fd_fast_path(c, sync_fence, wait_cmd, &fp_sema);
if (ret == 0) {
if (fp_sema)
*fence = gk20a_fence_from_semaphore(sema->timeline,
fp_sema,
&c->semaphore_wq,
NULL, false, false);
else
if (fp_sema) {
err = gk20a_fence_from_semaphore(fence,
sema->timeline,
fp_sema,
&c->semaphore_wq,
NULL, false, false);
if (err) {
gk20a_semaphore_put(fp_sema);
goto clean_up_priv_cmd;
}
} else
/*
* Allocate an empty fence. It will instantly return
* Init an empty fence. It will instantly return
* from gk20a_fence_wait().
*/
*fence = gk20a_alloc_fence(NULL, NULL, false);
gk20a_init_fence(fence, NULL, NULL, false);
sync_fence_put(sync_fence);
goto skip_slow_path;
@@ -611,18 +615,17 @@ static int gk20a_channel_semaphore_wait_fd(
goto skip_slow_path;
}
err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
if (err) {
gk20a_err(dev_from_gk20a(c->g),
"not enough priv cmd buffer space");
sync_fence_put(sync_fence);
return -ENOMEM;
goto clean_up_sync_fence;
}
w = kzalloc(sizeof(*w), GFP_KERNEL);
if (!w) {
err = -ENOMEM;
goto fail_free_cmdbuf;
goto clean_up_priv_cmd;
}
sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
@@ -631,7 +634,7 @@ static int gk20a_channel_semaphore_wait_fd(
if (!w->sema) {
gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
err = -ENOMEM;
goto fail_free_worker;
goto clean_up_worker;
}
/* worker takes one reference */
@@ -641,6 +644,16 @@ static int gk20a_channel_semaphore_wait_fd(
/* GPU unblocked when the semaphore value increments. */
add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
/*
* We need to create the fence before adding the waiter to ensure
* that we properly clean up in the event the sync_fence has
* already signaled
*/
err = gk20a_fence_from_semaphore(fence, sema->timeline, w->sema,
&c->semaphore_wq, NULL, false, false);
if (err)
goto clean_up_sema;
ret = sync_fence_wait_async(sync_fence, &w->waiter);
/*
@@ -655,24 +668,22 @@ static int gk20a_channel_semaphore_wait_fd(
gk20a_semaphore_put(w->sema);
}
/* XXX - this fixes an actual bug, we need to hold a ref to this
semaphore while the job is in flight. */
*fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
&c->semaphore_wq,
NULL, false, false);
skip_slow_path:
*entry = wait_cmd;
return 0;
fail_free_worker:
if (w && w->sema)
gk20a_semaphore_put(w->sema);
clean_up_sema:
/*
* Release the refs to the semaphore, including
* the one for the worker since it will never run.
*/
gk20a_semaphore_put(w->sema);
gk20a_semaphore_put(w->sema);
clean_up_worker:
kfree(w);
clean_up_priv_cmd:
gk20a_free_priv_cmdbuf(c, entry);
clean_up_sync_fence:
sync_fence_put(sync_fence);
fail_free_cmdbuf:
if (wait_cmd)
gk20a_free_priv_cmdbuf(c, wait_cmd);
return err;
#else
gk20a_err(dev_from_gk20a(c->g),
@@ -684,12 +695,11 @@ fail_free_cmdbuf:
static int __gk20a_channel_semaphore_incr(
struct gk20a_channel_sync *s, bool wfi_cmd,
struct sync_fence *dependency,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *incr_cmd,
struct gk20a_fence *fence,
bool need_sync_fence)
{
int incr_cmd_size;
struct priv_cmd_entry *incr_cmd = NULL;
struct gk20a_channel_semaphore *sp =
container_of(s, struct gk20a_channel_semaphore, ops);
struct channel_gk20a *c = sp->c;
@@ -704,29 +714,37 @@ static int __gk20a_channel_semaphore_incr(
}
incr_cmd_size = 10;
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
if (err) {
gk20a_err(dev_from_gk20a(c->g),
"not enough priv cmd buffer space");
gk20a_semaphore_put(semaphore);
return err;
goto clean_up_sema;
}
/* Release the completion semaphore. */
add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
*fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
&c->semaphore_wq,
dependency, wfi_cmd,
need_sync_fence);
*entry = incr_cmd;
err = gk20a_fence_from_semaphore(fence,
sp->timeline, semaphore,
&c->semaphore_wq,
dependency, wfi_cmd,
need_sync_fence);
if (err)
goto clean_up_priv_cmd;
return 0;
clean_up_priv_cmd:
gk20a_free_priv_cmdbuf(c, incr_cmd);
clean_up_sema:
gk20a_semaphore_put(semaphore);
return err;
}
static int gk20a_channel_semaphore_incr_wfi(
struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence)
struct priv_cmd_entry *entry,
struct gk20a_fence *fence)
{
return __gk20a_channel_semaphore_incr(s,
true /* wfi */,
@@ -736,8 +754,8 @@ static int gk20a_channel_semaphore_incr_wfi(
static int gk20a_channel_semaphore_incr(
struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool need_sync_fence)
{
/* Don't put wfi cmd to this one since we're not returning
@@ -751,8 +769,8 @@ static int gk20a_channel_semaphore_incr(
static int gk20a_channel_semaphore_incr_user(
struct gk20a_channel_sync *s,
int wait_fence_fd,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool wfi,
bool need_sync_fence)
{

View File

@@ -36,8 +36,8 @@ struct gk20a_channel_sync {
* cmdbuf is executed.
*/
int (*wait_syncpt)(struct gk20a_channel_sync *s, u32 id, u32 thresh,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence);
struct priv_cmd_entry *entry,
struct gk20a_fence *fence);
/* Generate a gpu wait cmdbuf from sync fd.
* Returns
@@ -46,8 +46,8 @@ struct gk20a_channel_sync {
* cmdbuf is executed.
*/
int (*wait_fd)(struct gk20a_channel_sync *s, int fd,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence);
struct priv_cmd_entry *entry,
struct gk20a_fence *fence);
/* Increment syncpoint/semaphore.
* Returns
@@ -55,8 +55,8 @@ struct gk20a_channel_sync {
* - a fence that can be passed to wait_cpu() and is_expired().
*/
int (*incr)(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool need_sync_fence);
/* Increment syncpoint/semaphore, preceded by a wfi.
@@ -65,8 +65,8 @@ struct gk20a_channel_sync {
* - a fence that can be passed to wait_cpu() and is_expired().
*/
int (*incr_wfi)(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence);
struct priv_cmd_entry *entry,
struct gk20a_fence *fence);
/* Increment syncpoint/semaphore, so that the returned fence represents
* work completion (may need wfi) and can be returned to user space.
@@ -77,8 +77,8 @@ struct gk20a_channel_sync {
*/
int (*incr_user)(struct gk20a_channel_sync *s,
int wait_fence_fd,
struct priv_cmd_entry **entry,
struct gk20a_fence **fence,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool wfi,
bool need_sync_fence);

View File

@@ -63,16 +63,27 @@ struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
return f;
}
static inline bool gk20a_fence_is_valid(struct gk20a_fence *f)
{
bool valid = f->valid;
rmb();
return valid;
}
int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
{
if (!tegra_platform_is_silicon())
timeout = (u32)MAX_SCHEDULE_TIMEOUT;
return f->ops->wait(f, timeout);
if (f && gk20a_fence_is_valid(f)) {
if (!tegra_platform_is_silicon())
timeout = (u32)MAX_SCHEDULE_TIMEOUT;
return f->ops->wait(f, timeout);
}
return 0;
}
bool gk20a_fence_is_expired(struct gk20a_fence *f)
{
if (f && f->ops)
if (f && gk20a_fence_is_valid(f) && f->ops)
return f->ops->is_expired(f);
else
return true;
@@ -83,7 +94,7 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
#ifdef CONFIG_SYNC
int fd;
if (!f->sync_fence)
if (!f || !gk20a_fence_is_valid(f) || !f->sync_fence)
return -EINVAL;
fd = get_unused_fd_flags(O_RDWR);
@@ -98,18 +109,28 @@ int gk20a_fence_install_fd(struct gk20a_fence *f)
#endif
}
struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops,
struct sync_fence *sync_fence, bool wfi)
struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c)
{
struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
struct gk20a_fence *fence;
fence = kzalloc(sizeof(struct gk20a_fence), GFP_KERNEL);
if (!fence)
return NULL;
kref_init(&f->ref);
kref_init(&fence->ref);
return fence;
}
void gk20a_init_fence(struct gk20a_fence *f,
const struct gk20a_fence_ops *ops,
struct sync_fence *sync_fence, bool wfi)
{
if (!f)
return;
f->ops = ops;
f->sync_fence = sync_fence;
f->wfi = wfi;
f->syncpt_id = -1;
return f;
}
/* Fences that are backed by GPU semaphores: */
@@ -143,14 +164,15 @@ static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
};
/* This function takes ownership of the semaphore */
struct gk20a_fence *gk20a_fence_from_semaphore(
int gk20a_fence_from_semaphore(
struct gk20a_fence *fence_out,
struct sync_timeline *timeline,
struct gk20a_semaphore *semaphore,
wait_queue_head_t *semaphore_wq,
struct sync_fence *dependency,
bool wfi, bool need_sync_fence)
{
struct gk20a_fence *f;
struct gk20a_fence *f = fence_out;
struct sync_fence *sync_fence = NULL;
#ifdef CONFIG_SYNC
@@ -159,21 +181,26 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
dependency, "f-gk20a-0x%04x",
gk20a_semaphore_gpu_ro_va(semaphore));
if (!sync_fence)
return NULL;
return -1;
}
#endif
f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);
gk20a_init_fence(f, &gk20a_semaphore_fence_ops, sync_fence, wfi);
if (!f) {
#ifdef CONFIG_SYNC
sync_fence_put(sync_fence);
#endif
return NULL;
return -EINVAL;
}
f->semaphore = semaphore;
f->semaphore_wq = semaphore_wq;
return f;
/* commit previous writes before setting the valid flag */
wmb();
f->valid = true;
return 0;
}
#ifdef CONFIG_TEGRA_GK20A
@@ -197,11 +224,13 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
.is_expired = &gk20a_syncpt_fence_is_expired,
};
struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
u32 id, u32 value, bool wfi,
bool need_sync_fence)
int gk20a_fence_from_syncpt(
struct gk20a_fence *fence_out,
struct platform_device *host1x_pdev,
u32 id, u32 value, bool wfi,
bool need_sync_fence)
{
struct gk20a_fence *f;
struct gk20a_fence *f = fence_out;
struct sync_fence *sync_fence = NULL;
#ifdef CONFIG_SYNC
@@ -214,27 +243,32 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
"fence");
if (IS_ERR(sync_fence))
return NULL;
return -1;
}
#endif
f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
gk20a_init_fence(f, &gk20a_syncpt_fence_ops, sync_fence, wfi);
if (!f) {
#ifdef CONFIG_SYNC
if (sync_fence)
sync_fence_put(sync_fence);
#endif
return NULL;
return -EINVAL;
}
f->host1x_pdev = host1x_pdev;
f->syncpt_id = id;
f->syncpt_value = value;
return f;
/* commit previous writes before setting the valid flag */
wmb();
f->valid = true;
return 0;
}
#else
struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
int gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
u32 id, u32 value, bool wfi)
{
return NULL;
return -EINVAL;
}
#endif

View File

@@ -31,6 +31,7 @@ struct gk20a_fence_ops;
struct gk20a_fence {
/* Valid for all fence types: */
bool valid;
struct kref ref;
bool wfi;
struct sync_fence *sync_fence;
@@ -47,21 +48,25 @@ struct gk20a_fence {
};
/* Fences can be created from semaphores or syncpoint (id, value) pairs */
struct gk20a_fence *gk20a_fence_from_semaphore(
int gk20a_fence_from_semaphore(
struct gk20a_fence *fence_out,
struct sync_timeline *timeline,
struct gk20a_semaphore *semaphore,
wait_queue_head_t *semaphore_wq,
struct sync_fence *dependency,
bool wfi, bool need_sync_fence);
struct gk20a_fence *gk20a_fence_from_syncpt(
int gk20a_fence_from_syncpt(
struct gk20a_fence *fence_out,
struct platform_device *host1x_pdev,
u32 id, u32 value, bool wfi,
bool need_sync_fence);
struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops,
struct sync_fence *sync_fence,
bool wfi);
struct gk20a_fence *gk20a_alloc_fence(struct channel_gk20a *c);
void gk20a_init_fence(struct gk20a_fence *f,
const struct gk20a_fence_ops *ops,
struct sync_fence *sync_fence, bool wfi);
/* Fence operations */
void gk20a_fence_put(struct gk20a_fence *f);

View File

@@ -198,6 +198,7 @@ struct priv_cmd_queue {
};
struct priv_cmd_entry {
bool valid;
struct mem_desc *mem;
u32 off; /* offset in mem, in u32 entries */
u64 gva;