gpu: nvgpu: implement sync refcounting

We currently free sync when we find job list empty
If aggressive_sync is set to true, we try to free
sync during channel unbind() call

But we rarely free sync from channel_unbind() call
since freeing it when job list is empty is
aggressive enough

Hence remove sync free code from channel_unbind()

Implement refcounting for sync:
- get a refcount while submitting a job (and
  allocate sync if it is not allocated already)
- put a refcount while freeing the job
- if refcount==0 and if aggressive_sync_destroy is
  set, free the sync
- if aggressive_sync_destroy is not set, we will
  free the sync during channel close time

Bug 200187553

Change-Id: I74e24adb15dc26a375ebca1fdd017b3ad6d57b61
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/1120410
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Deepak Nibade
2016-04-18 15:46:10 +05:30
committed by Terje Bergstrom
parent 1c96bc6942
commit e0c9da1fe9
4 changed files with 23 additions and 45 deletions

View File

@@ -370,7 +370,6 @@ static void channel_gk20a_bind(struct channel_gk20a *c)
void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
{
struct gk20a *g = ch_gk20a->g;
struct gk20a_platform *platform = gk20a_get_platform(g->dev);
gk20a_dbg_fn("");
@@ -380,18 +379,6 @@ void channel_gk20a_unbind(struct channel_gk20a *ch_gk20a)
ccsr_channel_inst_bind_false_f());
ch_gk20a->bound = false;
/*
* if we are agrressive then we can destroy the syncpt
* resource at this point
* if not, then it will be destroyed at channel_free()
*/
mutex_lock(&ch_gk20a->sync_lock);
if (ch_gk20a->sync && platform->aggressive_sync_destroy) {
ch_gk20a->sync->destroy(ch_gk20a->sync);
ch_gk20a->sync = NULL;
}
mutex_unlock(&ch_gk20a->sync_lock);
}
int channel_gk20a_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)
@@ -954,7 +941,7 @@ static void gk20a_free_channel(struct channel_gk20a *ch)
/* sync must be destroyed before releasing channel vm */
mutex_lock(&ch->sync_lock);
if (ch->sync) {
ch->sync->destroy(ch->sync);
gk20a_channel_sync_destroy(ch->sync);
ch->sync = NULL;
}
mutex_unlock(&ch->sync_lock);
@@ -1922,8 +1909,18 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
gk20a_channel_timeout_stop(c);
if (c->sync)
mutex_lock(&c->sync_lock);
if (c->sync) {
c->sync->signal_timeline(c->sync);
if (atomic_dec_and_test(&c->sync->refcount) &&
platform->aggressive_sync_destroy) {
gk20a_channel_sync_destroy(c->sync);
c->sync = NULL;
}
} else {
WARN_ON(1);
}
mutex_unlock(&c->sync_lock);
if (job->num_mapped_buffers)
gk20a_vm_put_buffers(vm, job->mapped_buffers,
@@ -1950,23 +1947,6 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
gk20a_idle(g->dev);
}
/*
* If job list is empty then channel is idle and we can free
* the syncpt here (given aggressive_destroy flag is set)
* Note: check if last submit is complete before destroying
* the sync resource
*/
if (list_empty(&c->jobs)) {
mutex_lock(&c->sync_lock);
mutex_lock(&c->last_submit.fence_lock);
if (c->sync && platform->aggressive_sync_destroy &&
gk20a_fence_is_expired(c->last_submit.post_fence)) {
c->sync->destroy(c->sync);
c->sync = NULL;
}
mutex_unlock(&c->last_submit.fence_lock);
mutex_unlock(&c->sync_lock);
}
mutex_unlock(&c->jobs_lock);
mutex_unlock(&c->submit_lock);
@@ -2121,6 +2101,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
}
new_sync_created = true;
}
atomic_inc(&c->sync->refcount);
mutex_unlock(&c->sync_lock);
if (g->ops.fifo.resetup_ramfc && new_sync_created) {

View File

@@ -356,6 +356,7 @@ gk20a_channel_syncpt_create(struct channel_gk20a *c)
nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
atomic_set(&sp->ops.refcount, 0);
sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
sp->ops.incr = gk20a_channel_syncpt_incr;
@@ -711,6 +712,7 @@ gk20a_channel_semaphore_create(struct channel_gk20a *c)
if (!sema->timeline)
goto clean_up;
#endif
atomic_set(&sema->ops.refcount, 0);
sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
sema->ops.incr = gk20a_channel_semaphore_incr;
@@ -727,6 +729,11 @@ clean_up:
return NULL;
}
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
{
sync->destroy(sync);
}
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
{
#ifdef CONFIG_TEGRA_GK20A

View File

@@ -27,6 +27,8 @@ struct gk20a_semaphore;
struct gk20a_fence;
struct gk20a_channel_sync {
atomic_t refcount;
/* Generate a gpu wait cmdbuf from syncpoint.
* Returns
* - a gpu cmdbuf that performs the wait when executed,
@@ -91,13 +93,11 @@ struct gk20a_channel_sync {
/* Returns the sync point id or negative number if no syncpt*/
int (*syncpt_id)(struct gk20a_channel_sync *s);
/* flag to set sync destroy aggressiveness */
bool aggressive_destroy;
/* Free the resources allocated by gk20a_channel_sync_create. */
void (*destroy)(struct gk20a_channel_sync *s);
};
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
#endif

View File

@@ -59,16 +59,6 @@ static void vgpu_channel_unbind(struct channel_gk20a *ch)
}
ch->bound = false;
/*
* if we are agrressive then we can destroy the syncpt
* resource at this point
* if not, then it will be destroyed at channel_free()
*/
if (ch->sync && platform->aggressive_sync_destroy) {
ch->sync->destroy(ch->sync);
ch->sync = NULL;
}
}
static int vgpu_channel_alloc_inst(struct gk20a *g, struct channel_gk20a *ch)