gpu: nvgpu: create sync_fence only if needed

Currently, we create sync_fence (from nvhost_sync_create_fence())
for every submit
But not all submits request for a sync_fence.

Also, nvhost_sync_create_fence() API takes about 1/3rd of the total
submit path.

Hence to optimize, we can allocate sync_fence
only when user explicitly asks for it using
(NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET &&
NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)

Also, in CDE path from gk20a_prepare_compressible_read(),
we reuse existing fence stored in "state" and that can
result into not returning sync_fence_fd when user asked
for it
Hence, force allocation of sync_fence when job submission
comes from CDE path

Bug 200141116

Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e
Signed-off-by: Deepak Nibade <dnibade@nvidia.com>
Reviewed-on: http://git-master/r/812845
(cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98)
Reviewed-on: http://git-master/r/837662
Reviewed-by: Automatic_Commit_Validation_User
GVS: Gerrit_Virtual_Submit
Reviewed-by: Sachin Nikam <snikam@nvidia.com>
This commit is contained in:
Deepak Nibade
2015-10-07 16:20:07 +05:30
committed by Sachin Nikam
parent 937de14907
commit 52753b51f1
7 changed files with 54 additions and 29 deletions

View File

@@ -724,7 +724,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
} }
return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL, return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
num_entries, flags, fence, fence_out); num_entries, flags, fence, fence_out, true);
} }
static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx) static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)

View File

@@ -1830,7 +1830,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
u32 num_entries, u32 num_entries,
u32 flags, u32 flags,
struct nvgpu_fence *fence, struct nvgpu_fence *fence,
struct gk20a_fence **fence_out) struct gk20a_fence **fence_out,
bool force_need_sync_fence)
{ {
struct gk20a *g = c->g; struct gk20a *g = c->g;
struct device *d = dev_from_gk20a(g); struct device *d = dev_from_gk20a(g);
@@ -1848,6 +1849,14 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va; struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
bool skip_buffer_refcounting = (flags & bool skip_buffer_refcounting = (flags &
NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING); NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
bool need_sync_fence = false;
/*
* If user wants to allocate sync_fence_fd always, then respect that;
* otherwise, allocate sync_fence_fd based on user flags only
*/
if (force_need_sync_fence)
need_sync_fence = true;
if (c->has_timedout) if (c->has_timedout)
return -ETIMEDOUT; return -ETIMEDOUT;
@@ -1970,15 +1979,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
goto clean_up; goto clean_up;
} }
if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
need_sync_fence = true;
/* always insert syncpt increment at end of gpfifo submission /* always insert syncpt increment at end of gpfifo submission
to keep track of method completion for idle railgating */ to keep track of method completion for idle railgating */
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd, err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
&post_fence, need_wfi); &post_fence, need_wfi, need_sync_fence);
else else
err = c->sync->incr(c->sync, &incr_cmd, err = c->sync->incr(c->sync, &incr_cmd,
&post_fence); &post_fence, need_sync_fence);
if (err) { if (err) {
mutex_unlock(&c->submit_lock); mutex_unlock(&c->submit_lock);
goto clean_up; goto clean_up;
@@ -2578,7 +2590,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries, ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
args->flags, &args->fence, args->flags, &args->fence,
&fence_out); &fence_out, false);
if (ret) if (ret)
goto clean_up; goto clean_up;

View File

@@ -244,7 +244,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
u32 num_entries, u32 num_entries,
u32 flags, u32 flags,
struct nvgpu_fence *fence, struct nvgpu_fence *fence,
struct gk20a_fence **fence_out); struct gk20a_fence **fence_out,
bool force_need_sync_fence);
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c, int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
struct nvgpu_alloc_gpfifo_args *args); struct nvgpu_alloc_gpfifo_args *args);

View File

@@ -166,7 +166,8 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
bool wfi_cmd, bool wfi_cmd,
bool register_irq, bool register_irq,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence) struct gk20a_fence **fence,
bool need_sync_fence)
{ {
u32 thresh; u32 thresh;
int incr_cmd_size; int incr_cmd_size;
@@ -239,7 +240,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
} }
*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh, *fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
wfi_cmd); wfi_cmd, need_sync_fence);
*entry = incr_cmd; *entry = incr_cmd;
return 0; return 0;
} }
@@ -251,33 +252,35 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
return __gk20a_channel_syncpt_incr(s, return __gk20a_channel_syncpt_incr(s,
true /* wfi */, true /* wfi */,
false /* no irq handler */, false /* no irq handler */,
entry, fence); entry, fence, true);
} }
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s, static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence) struct gk20a_fence **fence,
bool need_sync_fence)
{ {
/* Don't put wfi cmd to this one since we're not returning /* Don't put wfi cmd to this one since we're not returning
* a fence to user space. */ * a fence to user space. */
return __gk20a_channel_syncpt_incr(s, return __gk20a_channel_syncpt_incr(s,
false /* no wfi */, false /* no wfi */,
true /* register irq */, true /* register irq */,
entry, fence); entry, fence, need_sync_fence);
} }
static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s, static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
int wait_fence_fd, int wait_fence_fd,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence, struct gk20a_fence **fence,
bool wfi) bool wfi,
bool need_sync_fence)
{ {
/* Need to do 'wfi + host incr' since we return the fence /* Need to do 'wfi + host incr' since we return the fence
* to user space. */ * to user space. */
return __gk20a_channel_syncpt_incr(s, return __gk20a_channel_syncpt_incr(s,
wfi, wfi,
true /* register irq */, true /* register irq */,
entry, fence); entry, fence, need_sync_fence);
} }
static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s) static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
@@ -513,7 +516,8 @@ static int __gk20a_channel_semaphore_incr(
struct gk20a_channel_sync *s, bool wfi_cmd, struct gk20a_channel_sync *s, bool wfi_cmd,
struct sync_fence *dependency, struct sync_fence *dependency,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence) struct gk20a_fence **fence,
bool need_sync_fence)
{ {
u64 va; u64 va;
int incr_cmd_size; int incr_cmd_size;
@@ -560,18 +564,19 @@ static int gk20a_channel_semaphore_incr_wfi(
return __gk20a_channel_semaphore_incr(s, return __gk20a_channel_semaphore_incr(s,
true /* wfi */, true /* wfi */,
NULL, NULL,
entry, fence); entry, fence, true);
} }
static int gk20a_channel_semaphore_incr( static int gk20a_channel_semaphore_incr(
struct gk20a_channel_sync *s, struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence) struct gk20a_fence **fence,
bool need_sync_fence)
{ {
/* Don't put wfi cmd to this one since we're not returning /* Don't put wfi cmd to this one since we're not returning
* a fence to user space. */ * a fence to user space. */
return __gk20a_channel_semaphore_incr(s, false /* no wfi */, return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
NULL, entry, fence); NULL, entry, fence, need_sync_fence);
} }
static int gk20a_channel_semaphore_incr_user( static int gk20a_channel_semaphore_incr_user(
@@ -579,7 +584,8 @@ static int gk20a_channel_semaphore_incr_user(
int wait_fence_fd, int wait_fence_fd,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence, struct gk20a_fence **fence,
bool wfi) bool wfi,
bool need_sync_fence)
{ {
#ifdef CONFIG_SYNC #ifdef CONFIG_SYNC
struct sync_fence *dependency = NULL; struct sync_fence *dependency = NULL;
@@ -592,7 +598,7 @@ static int gk20a_channel_semaphore_incr_user(
} }
err = __gk20a_channel_semaphore_incr(s, wfi, dependency, err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
entry, fence); entry, fence, need_sync_fence);
if (err) { if (err) {
if (dependency) if (dependency)
sync_fence_put(dependency); sync_fence_put(dependency);

View File

@@ -3,7 +3,7 @@
* *
* GK20A Channel Synchronization Abstraction * GK20A Channel Synchronization Abstraction
* *
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -54,7 +54,8 @@ struct gk20a_channel_sync {
*/ */
int (*incr)(struct gk20a_channel_sync *s, int (*incr)(struct gk20a_channel_sync *s,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence); struct gk20a_fence **fence,
bool need_sync_fence);
/* Increment syncpoint/semaphore, preceded by a wfi. /* Increment syncpoint/semaphore, preceded by a wfi.
* Returns * Returns
@@ -76,7 +77,8 @@ struct gk20a_channel_sync {
int wait_fence_fd, int wait_fence_fd,
struct priv_cmd_entry **entry, struct priv_cmd_entry **entry,
struct gk20a_fence **fence, struct gk20a_fence **fence,
bool wfi); bool wfi,
bool need_sync_fence);
/* Reset the channel syncpoint/semaphore. */ /* Reset the channel syncpoint/semaphore. */
void (*set_min_eq_max)(struct gk20a_channel_sync *s); void (*set_min_eq_max)(struct gk20a_channel_sync *s);

View File

@@ -194,7 +194,8 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
}; };
struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev, struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
u32 id, u32 value, bool wfi) u32 id, u32 value, bool wfi,
bool need_sync_fence)
{ {
struct gk20a_fence *f; struct gk20a_fence *f;
struct sync_fence *sync_fence = NULL; struct sync_fence *sync_fence = NULL;
@@ -205,10 +206,12 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
.thresh = value .thresh = value
}; };
sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1, if (need_sync_fence) {
"fence"); sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
if (IS_ERR(sync_fence)) "fence");
return NULL; if (IS_ERR(sync_fence))
return NULL;
}
#endif #endif
f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi); f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);

View File

@@ -3,7 +3,7 @@
* *
* GK20A Fences * GK20A Fences
* *
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
@@ -56,7 +56,8 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
struct gk20a_fence *gk20a_fence_from_syncpt( struct gk20a_fence *gk20a_fence_from_syncpt(
struct platform_device *host1x_pdev, struct platform_device *host1x_pdev,
u32 id, u32 value, bool wfi); u32 id, u32 value, bool wfi,
bool need_sync_fence);
/* Fence operations */ /* Fence operations */
void gk20a_fence_put(struct gk20a_fence *f); void gk20a_fence_put(struct gk20a_fence *f);