gpu: nvgpu: make deferred clean-up conditional

This change makes the invocation of the deferred job clean-up
mechanism conditional. For submissions that require job tracking,
deferred clean-up is only required if any of the following
conditions are met:

1) Channel's deterministic flag is not set
2) Rail-gating is enabled
3) Channel WDT is enabled
4) Buffer refcounting is enabled
5) Dependency on Sync Framework

In case deferred clean-up is not needed, we clean-up
a single job tracking resource in the submit path. For
deterministic channels, we do not allow deferred clean-up to
occur and fail any submits that require it.

Bug 1795076

Change-Id: I4021dffe8a71aa58f12db6b58518d3f4021f3313
Signed-off-by: Sachit Kadle <skadle@nvidia.com>
Reviewed-on: http://git-master/r/1220920
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
(cherry picked from commit b09f7589d5ad3c496e7350f1ed583a4fe2db574a)
Reviewed-on: http://git-master/r/1223941
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
This commit is contained in:
Sachit Kadle
2016-09-14 11:45:38 -07:00
committed by mobile promotions
parent 55dba9f1a9
commit ab593b9ccd
6 changed files with 119 additions and 26 deletions

View File

@@ -76,7 +76,8 @@ static void gk20a_free_error_notifiers(struct channel_gk20a *ch);
static u32 gk20a_get_channel_watchdog_timeout(struct channel_gk20a *ch);
static void gk20a_channel_clean_up_jobs(struct work_struct *work);
static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
bool clean_all);
static void gk20a_channel_cancel_job_clean_up(struct channel_gk20a *c,
bool wait_for_completion);
@@ -1029,6 +1030,7 @@ unbind:
g->ops.fifo.free_inst(g, ch);
ch->vpr = false;
ch->deterministic = false;
ch->vm = NULL;
WARN_ON(ch->sync);
@@ -1703,9 +1705,12 @@ int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
gpfifo_size = args->num_entries;
if (args->flags & NVGPU_ALLOC_GPFIFO_FLAGS_VPR_ENABLED)
if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED)
c->vpr = true;
if (args->flags & NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC)
c->deterministic = true;
/* an address space needs to have been bound at this point. */
if (!gk20a_channel_as_bound(c)) {
gk20a_err(d,
@@ -2173,10 +2178,17 @@ err_put_vm:
return err;
}
static void gk20a_channel_clean_up_jobs(struct work_struct *work)
static void gk20a_channel_clean_up_runcb_fn(struct work_struct *work)
{
struct channel_gk20a *c = container_of(to_delayed_work(work),
struct channel_gk20a, clean_up.wq);
gk20a_channel_clean_up_jobs(c, true);
}
static void gk20a_channel_clean_up_jobs(struct channel_gk20a *c,
bool clean_all)
{
struct vm_gk20a *vm;
struct channel_gk20a_job *job;
struct gk20a_platform *platform;
@@ -2273,6 +2285,9 @@ static void gk20a_channel_clean_up_jobs(struct work_struct *work)
channel_gk20a_free_job(c, job);
job_finished = 1;
gk20a_idle(g->dev);
if (!clean_all)
break;
}
if (job_finished && c->update_fn)
@@ -2419,6 +2434,7 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
struct gk20a_fence **pre_fence,
struct gk20a_fence **post_fence,
bool force_need_sync_fence,
bool register_irq,
u32 flags)
{
struct gk20a *g = c->g;
@@ -2515,10 +2531,12 @@ static int gk20a_submit_prepare_syncs(struct channel_gk20a *c,
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
err = c->sync->incr_user(c->sync, wait_fence_fd, job->incr_cmd,
job->post_fence, need_wfi, need_sync_fence);
job->post_fence, need_wfi, need_sync_fence,
register_irq);
else
err = c->sync->incr(c->sync, job->incr_cmd,
job->post_fence, need_sync_fence);
job->post_fence, need_sync_fence,
register_irq);
if (!err) {
*incr_cmd = job->incr_cmd;
*post_fence = job->post_fence;
@@ -2568,6 +2586,7 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
int err = 0;
bool need_job_tracking;
bool need_deferred_cleanup = false;
struct nvgpu_gpfifo __user *user_gpfifo = args ?
(struct nvgpu_gpfifo __user *)(uintptr_t)args->gpfifo : NULL;
struct gk20a_platform *platform = gk20a_get_platform(d);
@@ -2626,13 +2645,48 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
!skip_buffer_refcounting;
if (need_job_tracking) {
bool need_sync_framework = false;
/*
* If the submit is to have deterministic latency and
* If the channel is to have deterministic latency and
* job tracking is required, the channel must have
* pre-allocated resources. Otherwise, we fail the submit here
*/
if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_DETERMINISTIC) &&
!channel_gk20a_is_prealloc_enabled(c))
if (c->deterministic && !channel_gk20a_is_prealloc_enabled(c))
return -EINVAL;
need_sync_framework = force_need_sync_fence ||
gk20a_channel_sync_needs_sync_framework(c) ||
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE &&
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT ||
flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET));
/*
* Deferred clean-up is necessary for any of the following
* conditions:
* - channel's deterministic flag is not set
* - dependency on sync framework, which could make the
* behavior of the clean-up operation non-deterministic
* (should not be performed in the submit path)
* - channel wdt
* - GPU rail-gating
* - buffer refcounting
*
* If none of the conditions are met, then deferred clean-up
* is not required, and we clean-up one job-tracking
* resource in the submit path.
*/
need_deferred_cleanup = !c->deterministic ||
need_sync_framework ||
c->wdt_enabled ||
platform->can_railgate ||
!skip_buffer_refcounting;
/*
* For deterministic channels, we don't allow deferred clean_up
* processing to occur. In cases we hit this, we fail the submit
*/
if (c->deterministic && need_deferred_cleanup)
return -EINVAL;
/* gk20a_channel_update releases this ref. */
@@ -2641,6 +2695,11 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
gk20a_err(d, "failed to host gk20a to submit gpfifo");
return err;
}
if (!need_deferred_cleanup) {
/* clean up a single job */
gk20a_channel_clean_up_jobs(c, false);
}
}
trace_gk20a_channel_submit_gpfifo(dev_name(c->g->dev),
@@ -2678,7 +2737,9 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
err = gk20a_submit_prepare_syncs(c, fence, job,
&wait_cmd, &incr_cmd,
&pre_fence, &post_fence,
force_need_sync_fence, flags);
force_need_sync_fence,
need_deferred_cleanup,
flags);
if (err)
goto clean_up_job;
}
@@ -2727,7 +2788,7 @@ clean_up:
gk20a_dbg_fn("fail");
gk20a_fence_put(pre_fence);
gk20a_fence_put(post_fence);
if (need_job_tracking)
if (need_deferred_cleanup)
gk20a_idle(g->dev);
return err;
}
@@ -2749,7 +2810,7 @@ int gk20a_init_channel_support(struct gk20a *g, u32 chid)
raw_spin_lock_init(&c->timeout.lock);
mutex_init(&c->sync_lock);
INIT_DELAYED_WORK(&c->timeout.wq, gk20a_channel_timeout_handler);
INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_jobs);
INIT_DELAYED_WORK(&c->clean_up.wq, gk20a_channel_clean_up_runcb_fn);
mutex_init(&c->clean_up.lock);
INIT_LIST_HEAD(&c->joblist.dynamic.jobs);
#if defined(CONFIG_GK20A_CYCLE_STATS)
@@ -3416,10 +3477,10 @@ long gk20a_channel_ioctl(struct file *filp,
if (!is_power_of_2(alloc_gpfifo_ex_args->num_entries)) {
err = -EINVAL;
gk20a_idle(dev);
break;
}
err = gk20a_alloc_channel_gpfifo(ch,
(struct nvgpu_alloc_gpfifo_ex_args *)buf);
err = gk20a_alloc_channel_gpfifo(ch, alloc_gpfifo_ex_args);
gk20a_idle(dev);
break;
}

View File

@@ -131,7 +131,7 @@ struct channel_gk20a {
bool bound;
bool first_init;
bool vpr;
bool no_block;
bool deterministic;
bool cde;
pid_t pid;
pid_t tgid;

View File

@@ -285,13 +285,14 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool need_sync_fence)
bool need_sync_fence,
bool register_irq)
{
/* Don't put wfi cmd to this one since we're not returning
* a fence to user space. */
return __gk20a_channel_syncpt_incr(s,
false /* no wfi */,
true /* register irq */,
register_irq /* register irq */,
entry, fence, need_sync_fence);
}
@@ -300,13 +301,14 @@ static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool wfi,
bool need_sync_fence)
bool need_sync_fence,
bool register_irq)
{
/* Need to do 'wfi + host incr' since we return the fence
* to user space. */
return __gk20a_channel_syncpt_incr(s,
wfi,
true /* register irq */,
register_irq /* register irq */,
entry, fence, need_sync_fence);
}
@@ -756,7 +758,8 @@ static int gk20a_channel_semaphore_incr(
struct gk20a_channel_sync *s,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool need_sync_fence)
bool need_sync_fence,
bool register_irq)
{
/* Don't put wfi cmd to this one since we're not returning
* a fence to user space. */
@@ -772,7 +775,8 @@ static int gk20a_channel_semaphore_incr_user(
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool wfi,
bool need_sync_fence)
bool need_sync_fence,
bool register_irq)
{
#ifdef CONFIG_SYNC
struct sync_fence *dependency = NULL;
@@ -889,3 +893,12 @@ struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
#endif
return gk20a_channel_semaphore_create(c);
}
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c)
{
#ifdef CONFIG_TEGRA_GK20A
if (gk20a_platform_has_syncpoints(c->g->dev))
return false;
#endif
return true;
}

View File

@@ -57,7 +57,8 @@ struct gk20a_channel_sync {
int (*incr)(struct gk20a_channel_sync *s,
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool need_sync_fence);
bool need_sync_fence,
bool register_irq);
/* Increment syncpoint/semaphore, preceded by a wfi.
* Returns
@@ -80,7 +81,8 @@ struct gk20a_channel_sync {
struct priv_cmd_entry *entry,
struct gk20a_fence *fence,
bool wfi,
bool need_sync_fence);
bool need_sync_fence,
bool register_irq);
/* Reset the channel syncpoint/semaphore. */
void (*set_min_eq_max)(struct gk20a_channel_sync *s);
@@ -99,5 +101,6 @@ struct gk20a_channel_sync {
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync);
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c);
bool gk20a_channel_sync_needs_sync_framework(struct channel_gk20a *c);
#endif

View File

@@ -272,8 +272,24 @@ static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout)
static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
{
return nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
f->syncpt_value);
/*
* In cases we don't register a notifier, we can't expect the
* syncpt value to be updated. For this case, we force a read
* of the value from HW, and then check for expiration.
*/
if (!nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
f->syncpt_value)) {
u32 val;
if (!nvhost_syncpt_read_ext_check(f->host1x_pdev,
f->syncpt_id, &val)) {
return nvhost_syncpt_is_expired_ext(f->host1x_pdev,
f->syncpt_id, f->syncpt_value);
}
}
return true;
}
static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {

View File

@@ -968,6 +968,8 @@ struct nvgpu_alloc_gpfifo_args {
struct nvgpu_alloc_gpfifo_ex_args {
__u32 num_entries;
__u32 num_inflight_jobs;
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_VPR_ENABLED (1 << 0) /* set owner channel of this gpfifo as a vpr channel */
#define NVGPU_ALLOC_GPFIFO_EX_FLAGS_DETERMINISTIC (1 << 1) /* channel shall exhibit deterministic behavior in the submit path */
__u32 flags;
__u32 reserved[5];
};
@@ -994,8 +996,6 @@ struct nvgpu_fence {
#define NVGPU_SUBMIT_GPFIFO_FLAGS_SUPPRESS_WFI (1 << 4)
/* skip buffer refcounting during submit */
#define NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING (1 << 5)
/* submit should exhibit deterministic latency */
#define NVGPU_SUBMIT_GPFIFO_FLAGS_DETERMINISTIC (1 << 6)
struct nvgpu_submit_gpfifo_args {
__u64 gpfifo;