mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
gpu: nvgpu: create sync_fence only if needed
Currently, we create sync_fence (from nvhost_sync_create_fence()) for every submit But not all submits request for a sync_fence. Also, nvhost_sync_create_fence() API takes about 1/3rd of the total submit path. Hence to optimize, we can allocate sync_fence only when user explicitly asks for it using (NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET && NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) Also, in CDE path from gk20a_prepare_compressible_read(), we reuse existing fence stored in "state" and that can result into not returning sync_fence_fd when user asked for it Hence, force allocation of sync_fence when job submission comes from CDE path Bug 200141116 Change-Id: Ia921701bf0e2432d6b8a5e8b7d91160e7f52db1e Signed-off-by: Deepak Nibade <dnibade@nvidia.com> Reviewed-on: http://git-master/r/812845 (cherry picked from commit 5fd47015eeed00352cc8473eff969a66c94fee98) Reviewed-on: http://git-master/r/837662 Reviewed-by: Automatic_Commit_Validation_User GVS: Gerrit_Virtual_Submit Reviewed-by: Sachin Nikam <snikam@nvidia.com>
This commit is contained in:
committed by
Sachin Nikam
parent
937de14907
commit
52753b51f1
@@ -724,7 +724,7 @@ static int gk20a_cde_execute_buffer(struct gk20a_cde_ctx *cde_ctx,
|
|||||||
}
|
}
|
||||||
|
|
||||||
return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
|
return gk20a_submit_channel_gpfifo(cde_ctx->ch, gpfifo, NULL,
|
||||||
num_entries, flags, fence, fence_out);
|
num_entries, flags, fence, fence_out, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
|
static void gk20a_cde_ctx_release(struct gk20a_cde_ctx *cde_ctx)
|
||||||
|
|||||||
@@ -1830,7 +1830,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
u32 num_entries,
|
u32 num_entries,
|
||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_fence *fence,
|
struct nvgpu_fence *fence,
|
||||||
struct gk20a_fence **fence_out)
|
struct gk20a_fence **fence_out,
|
||||||
|
bool force_need_sync_fence)
|
||||||
{
|
{
|
||||||
struct gk20a *g = c->g;
|
struct gk20a *g = c->g;
|
||||||
struct device *d = dev_from_gk20a(g);
|
struct device *d = dev_from_gk20a(g);
|
||||||
@@ -1848,6 +1849,14 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
|
struct nvgpu_gpfifo *gpfifo_mem = c->gpfifo.mem.cpu_va;
|
||||||
bool skip_buffer_refcounting = (flags &
|
bool skip_buffer_refcounting = (flags &
|
||||||
NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
|
NVGPU_SUBMIT_GPFIFO_FLAGS_SKIP_BUFFER_REFCOUNTING);
|
||||||
|
bool need_sync_fence = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If user wants to allocate sync_fence_fd always, then respect that;
|
||||||
|
* otherwise, allocate sync_fence_fd based on user flags only
|
||||||
|
*/
|
||||||
|
if (force_need_sync_fence)
|
||||||
|
need_sync_fence = true;
|
||||||
|
|
||||||
if (c->has_timedout)
|
if (c->has_timedout)
|
||||||
return -ETIMEDOUT;
|
return -ETIMEDOUT;
|
||||||
@@ -1970,15 +1979,18 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
goto clean_up;
|
goto clean_up;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) &&
|
||||||
|
(flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE))
|
||||||
|
need_sync_fence = true;
|
||||||
|
|
||||||
/* always insert syncpt increment at end of gpfifo submission
|
/* always insert syncpt increment at end of gpfifo submission
|
||||||
to keep track of method completion for idle railgating */
|
to keep track of method completion for idle railgating */
|
||||||
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
|
if (flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
|
||||||
err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
|
err = c->sync->incr_user(c->sync, wait_fence_fd, &incr_cmd,
|
||||||
&post_fence, need_wfi);
|
&post_fence, need_wfi, need_sync_fence);
|
||||||
else
|
else
|
||||||
err = c->sync->incr(c->sync, &incr_cmd,
|
err = c->sync->incr(c->sync, &incr_cmd,
|
||||||
&post_fence);
|
&post_fence, need_sync_fence);
|
||||||
if (err) {
|
if (err) {
|
||||||
mutex_unlock(&c->submit_lock);
|
mutex_unlock(&c->submit_lock);
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
@@ -2578,7 +2590,7 @@ static int gk20a_ioctl_channel_submit_gpfifo(
|
|||||||
|
|
||||||
ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
|
ret = gk20a_submit_channel_gpfifo(ch, NULL, args, args->num_entries,
|
||||||
args->flags, &args->fence,
|
args->flags, &args->fence,
|
||||||
&fence_out);
|
&fence_out, false);
|
||||||
|
|
||||||
if (ret)
|
if (ret)
|
||||||
goto clean_up;
|
goto clean_up;
|
||||||
|
|||||||
@@ -244,7 +244,8 @@ int gk20a_submit_channel_gpfifo(struct channel_gk20a *c,
|
|||||||
u32 num_entries,
|
u32 num_entries,
|
||||||
u32 flags,
|
u32 flags,
|
||||||
struct nvgpu_fence *fence,
|
struct nvgpu_fence *fence,
|
||||||
struct gk20a_fence **fence_out);
|
struct gk20a_fence **fence_out,
|
||||||
|
bool force_need_sync_fence);
|
||||||
|
|
||||||
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
int gk20a_alloc_channel_gpfifo(struct channel_gk20a *c,
|
||||||
struct nvgpu_alloc_gpfifo_args *args);
|
struct nvgpu_alloc_gpfifo_args *args);
|
||||||
|
|||||||
@@ -166,7 +166,8 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|||||||
bool wfi_cmd,
|
bool wfi_cmd,
|
||||||
bool register_irq,
|
bool register_irq,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence)
|
struct gk20a_fence **fence,
|
||||||
|
bool need_sync_fence)
|
||||||
{
|
{
|
||||||
u32 thresh;
|
u32 thresh;
|
||||||
int incr_cmd_size;
|
int incr_cmd_size;
|
||||||
@@ -239,7 +240,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
|
*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
|
||||||
wfi_cmd);
|
wfi_cmd, need_sync_fence);
|
||||||
*entry = incr_cmd;
|
*entry = incr_cmd;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -251,33 +252,35 @@ static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
|
|||||||
return __gk20a_channel_syncpt_incr(s,
|
return __gk20a_channel_syncpt_incr(s,
|
||||||
true /* wfi */,
|
true /* wfi */,
|
||||||
false /* no irq handler */,
|
false /* no irq handler */,
|
||||||
entry, fence);
|
entry, fence, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence)
|
struct gk20a_fence **fence,
|
||||||
|
bool need_sync_fence)
|
||||||
{
|
{
|
||||||
/* Don't put wfi cmd to this one since we're not returning
|
/* Don't put wfi cmd to this one since we're not returning
|
||||||
* a fence to user space. */
|
* a fence to user space. */
|
||||||
return __gk20a_channel_syncpt_incr(s,
|
return __gk20a_channel_syncpt_incr(s,
|
||||||
false /* no wfi */,
|
false /* no wfi */,
|
||||||
true /* register irq */,
|
true /* register irq */,
|
||||||
entry, fence);
|
entry, fence, need_sync_fence);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
|
static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
|
||||||
int wait_fence_fd,
|
int wait_fence_fd,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence,
|
struct gk20a_fence **fence,
|
||||||
bool wfi)
|
bool wfi,
|
||||||
|
bool need_sync_fence)
|
||||||
{
|
{
|
||||||
/* Need to do 'wfi + host incr' since we return the fence
|
/* Need to do 'wfi + host incr' since we return the fence
|
||||||
* to user space. */
|
* to user space. */
|
||||||
return __gk20a_channel_syncpt_incr(s,
|
return __gk20a_channel_syncpt_incr(s,
|
||||||
wfi,
|
wfi,
|
||||||
true /* register irq */,
|
true /* register irq */,
|
||||||
entry, fence);
|
entry, fence, need_sync_fence);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
|
static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
|
||||||
@@ -513,7 +516,8 @@ static int __gk20a_channel_semaphore_incr(
|
|||||||
struct gk20a_channel_sync *s, bool wfi_cmd,
|
struct gk20a_channel_sync *s, bool wfi_cmd,
|
||||||
struct sync_fence *dependency,
|
struct sync_fence *dependency,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence)
|
struct gk20a_fence **fence,
|
||||||
|
bool need_sync_fence)
|
||||||
{
|
{
|
||||||
u64 va;
|
u64 va;
|
||||||
int incr_cmd_size;
|
int incr_cmd_size;
|
||||||
@@ -560,18 +564,19 @@ static int gk20a_channel_semaphore_incr_wfi(
|
|||||||
return __gk20a_channel_semaphore_incr(s,
|
return __gk20a_channel_semaphore_incr(s,
|
||||||
true /* wfi */,
|
true /* wfi */,
|
||||||
NULL,
|
NULL,
|
||||||
entry, fence);
|
entry, fence, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_channel_semaphore_incr(
|
static int gk20a_channel_semaphore_incr(
|
||||||
struct gk20a_channel_sync *s,
|
struct gk20a_channel_sync *s,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence)
|
struct gk20a_fence **fence,
|
||||||
|
bool need_sync_fence)
|
||||||
{
|
{
|
||||||
/* Don't put wfi cmd to this one since we're not returning
|
/* Don't put wfi cmd to this one since we're not returning
|
||||||
* a fence to user space. */
|
* a fence to user space. */
|
||||||
return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
|
return __gk20a_channel_semaphore_incr(s, false /* no wfi */,
|
||||||
NULL, entry, fence);
|
NULL, entry, fence, need_sync_fence);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int gk20a_channel_semaphore_incr_user(
|
static int gk20a_channel_semaphore_incr_user(
|
||||||
@@ -579,7 +584,8 @@ static int gk20a_channel_semaphore_incr_user(
|
|||||||
int wait_fence_fd,
|
int wait_fence_fd,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence,
|
struct gk20a_fence **fence,
|
||||||
bool wfi)
|
bool wfi,
|
||||||
|
bool need_sync_fence)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_SYNC
|
#ifdef CONFIG_SYNC
|
||||||
struct sync_fence *dependency = NULL;
|
struct sync_fence *dependency = NULL;
|
||||||
@@ -592,7 +598,7 @@ static int gk20a_channel_semaphore_incr_user(
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
|
err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
|
||||||
entry, fence);
|
entry, fence, need_sync_fence);
|
||||||
if (err) {
|
if (err) {
|
||||||
if (dependency)
|
if (dependency)
|
||||||
sync_fence_put(dependency);
|
sync_fence_put(dependency);
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* GK20A Channel Synchronization Abstraction
|
* GK20A Channel Synchronization Abstraction
|
||||||
*
|
*
|
||||||
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -54,7 +54,8 @@ struct gk20a_channel_sync {
|
|||||||
*/
|
*/
|
||||||
int (*incr)(struct gk20a_channel_sync *s,
|
int (*incr)(struct gk20a_channel_sync *s,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence);
|
struct gk20a_fence **fence,
|
||||||
|
bool need_sync_fence);
|
||||||
|
|
||||||
/* Increment syncpoint/semaphore, preceded by a wfi.
|
/* Increment syncpoint/semaphore, preceded by a wfi.
|
||||||
* Returns
|
* Returns
|
||||||
@@ -76,7 +77,8 @@ struct gk20a_channel_sync {
|
|||||||
int wait_fence_fd,
|
int wait_fence_fd,
|
||||||
struct priv_cmd_entry **entry,
|
struct priv_cmd_entry **entry,
|
||||||
struct gk20a_fence **fence,
|
struct gk20a_fence **fence,
|
||||||
bool wfi);
|
bool wfi,
|
||||||
|
bool need_sync_fence);
|
||||||
|
|
||||||
/* Reset the channel syncpoint/semaphore. */
|
/* Reset the channel syncpoint/semaphore. */
|
||||||
void (*set_min_eq_max)(struct gk20a_channel_sync *s);
|
void (*set_min_eq_max)(struct gk20a_channel_sync *s);
|
||||||
|
|||||||
@@ -194,7 +194,8 @@ static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
|
struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
|
||||||
u32 id, u32 value, bool wfi)
|
u32 id, u32 value, bool wfi,
|
||||||
|
bool need_sync_fence)
|
||||||
{
|
{
|
||||||
struct gk20a_fence *f;
|
struct gk20a_fence *f;
|
||||||
struct sync_fence *sync_fence = NULL;
|
struct sync_fence *sync_fence = NULL;
|
||||||
@@ -205,10 +206,12 @@ struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
|
|||||||
.thresh = value
|
.thresh = value
|
||||||
};
|
};
|
||||||
|
|
||||||
sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
|
if (need_sync_fence) {
|
||||||
"fence");
|
sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
|
||||||
if (IS_ERR(sync_fence))
|
"fence");
|
||||||
return NULL;
|
if (IS_ERR(sync_fence))
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
|
f = alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
*
|
*
|
||||||
* GK20A Fences
|
* GK20A Fences
|
||||||
*
|
*
|
||||||
* Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
|
* Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms and conditions of the GNU General Public License,
|
* under the terms and conditions of the GNU General Public License,
|
||||||
@@ -56,7 +56,8 @@ struct gk20a_fence *gk20a_fence_from_semaphore(
|
|||||||
|
|
||||||
struct gk20a_fence *gk20a_fence_from_syncpt(
|
struct gk20a_fence *gk20a_fence_from_syncpt(
|
||||||
struct platform_device *host1x_pdev,
|
struct platform_device *host1x_pdev,
|
||||||
u32 id, u32 value, bool wfi);
|
u32 id, u32 value, bool wfi,
|
||||||
|
bool need_sync_fence);
|
||||||
|
|
||||||
/* Fence operations */
|
/* Fence operations */
|
||||||
void gk20a_fence_put(struct gk20a_fence *f);
|
void gk20a_fence_put(struct gk20a_fence *f);
|
||||||
|
|||||||
Reference in New Issue
Block a user