mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-23 09:57:08 +03:00
Revamp the support the nvgpu driver has for semaphores. The original problem with nvgpu's semaphore support is that it required a SW based wait for every semaphore release. This was because for every fence that gk20a_channel_semaphore_wait_fd() waited on a new semaphore was created. This semaphore would then get released by SW when the fence signaled. This meant that for every release there was necessarily a sync_fence_wait_async() call which could block. The latency of this SW wait was enough to cause massive degredation in performance. To fix this a fast path was implemented. When a fence is passed to gk20a_channel_semaphore_wait_fd() that is backed by a GPU semaphore a semaphore acquire is directly used to block the GPU. No longer is a sync_fence_wait_async() performed nor is there an extra semaphore created. To implement this fast path the semaphore memory had to be shared between channels. Previously since a new semaphore was created every time through gk20a_channel_semaphore_wait_fd() what address space a semaphore was mapped into was irrelevant. However, when using the fast path a sempahore may be released on one address space but acquired in another. Sharing the semaphore memory was done by making a fixed GPU mapping in all channels. This mapping points to the semaphore memory (the so called semaphore sea). This global fixed mapping is read-only to make sure no semaphores can be incremented (i.e released) by a malicious channel. Each channel then gets a RW mapping of it's own semaphore. This way a channel may only acquire other channel's semaphores but may both acquire and release its own semaphore. The gk20a fence code was updated to allow introspection of the GPU backed fences. This allows detection of when the fast path can be taken. If the fast path cannot be used (for example when a fence is sync-pt backed) the original slow path is still present. This gets used when the GPU needs to wait on an event from something which only understands how to use sync-pts. Bug 1732449 JIRA DNVGPU-12 Change-Id: Ic0fea74994da5819a771deac726bb0d47a33c2de Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1133792 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
874 lines
23 KiB
C
874 lines
23 KiB
C
/*
|
|
* drivers/video/tegra/host/gk20a/channel_sync_gk20a.c
|
|
*
|
|
* GK20A Channel Synchronization Abstraction
|
|
*
|
|
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#include <linux/gk20a.h>
|
|
#include <linux/version.h>
|
|
|
|
#include "channel_sync_gk20a.h"
|
|
#include "gk20a.h"
|
|
#include "fence_gk20a.h"
|
|
#include "semaphore_gk20a.h"
|
|
#include "sync_gk20a.h"
|
|
#include "mm_gk20a.h"
|
|
|
|
#ifdef CONFIG_SYNC
|
|
#include "../drivers/staging/android/sync.h"
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
#include <linux/nvhost.h>
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
|
|
struct gk20a_channel_syncpt {
|
|
struct gk20a_channel_sync ops;
|
|
struct channel_gk20a *c;
|
|
struct platform_device *host1x_pdev;
|
|
u32 id;
|
|
};
|
|
|
|
static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off,
|
|
u32 id, u32 thresh)
|
|
{
|
|
off = cmd->off + off;
|
|
/* syncpoint_a */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001C);
|
|
/* payload */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, thresh);
|
|
/* syncpoint_b */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001D);
|
|
/* syncpt_id, switch_en, wait */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
|
|
u32 id, u32 thresh, struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct channel_gk20a *c = sp->c;
|
|
int err = 0;
|
|
|
|
if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) {
|
|
dev_warn(dev_from_gk20a(c->g),
|
|
"invalid wait id in gpfifo submit, elided");
|
|
return 0;
|
|
}
|
|
|
|
if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
|
|
return 0;
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
return err;
|
|
}
|
|
|
|
add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
|
|
|
|
*entry = wait_cmd;
|
|
*fence = NULL;
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
#ifdef CONFIG_SYNC
|
|
int i;
|
|
int num_wait_cmds;
|
|
struct sync_fence *sync_fence;
|
|
struct sync_pt *pt;
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
u32 wait_id;
|
|
int err = 0;
|
|
|
|
sync_fence = nvhost_sync_fdget(fd);
|
|
if (!sync_fence)
|
|
return -EINVAL;
|
|
|
|
/* validate syncpt ids */
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) {
|
|
#else
|
|
for (i = 0; i < sync_fence->num_fences; i++) {
|
|
pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
|
|
#endif
|
|
wait_id = nvhost_sync_pt_id(pt);
|
|
if (!wait_id || !nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev,
|
|
wait_id)) {
|
|
sync_fence_put(sync_fence);
|
|
return -EINVAL;
|
|
}
|
|
#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0))
|
|
}
|
|
#else
|
|
}
|
|
#endif
|
|
|
|
num_wait_cmds = nvhost_sync_num_pts(sync_fence);
|
|
if (num_wait_cmds == 0) {
|
|
sync_fence_put(sync_fence);
|
|
return 0;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
sync_fence_put(sync_fence);
|
|
return err;
|
|
}
|
|
|
|
i = 0;
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) {
|
|
#else
|
|
for (i = 0; i < sync_fence->num_fences; i++) {
|
|
struct fence *f = sync_fence->cbs[i].sync_pt;
|
|
struct sync_pt *pt = sync_pt_from_fence(f);
|
|
#endif
|
|
u32 wait_id = nvhost_sync_pt_id(pt);
|
|
u32 wait_value = nvhost_sync_pt_thresh(pt);
|
|
|
|
if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,
|
|
wait_id, wait_value)) {
|
|
/* each wait_cmd is 4 u32s */
|
|
gk20a_memset(c->g, wait_cmd->mem,
|
|
(wait_cmd->off + i * 4) * sizeof(u32),
|
|
0, 4 * sizeof(u32));
|
|
} else
|
|
add_wait_cmd(c->g, wait_cmd, i * 4, wait_id,
|
|
wait_value);
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
i++;
|
|
}
|
|
#else
|
|
}
|
|
#endif
|
|
|
|
WARN_ON(i != num_wait_cmds);
|
|
sync_fence_put(sync_fence);
|
|
|
|
*entry = wait_cmd;
|
|
*fence = NULL;
|
|
return 0;
|
|
#else
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
|
|
{
|
|
struct channel_gk20a *ch = priv;
|
|
|
|
gk20a_channel_update(ch, nr_completed);
|
|
|
|
/* note: channel_get() is in __gk20a_channel_syncpt_incr() */
|
|
gk20a_channel_put(ch);
|
|
}
|
|
|
|
static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|
bool wfi_cmd,
|
|
bool register_irq,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
u32 thresh;
|
|
int incr_cmd_size;
|
|
int off;
|
|
int err;
|
|
struct priv_cmd_entry *incr_cmd = NULL;
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
|
|
incr_cmd_size = 6;
|
|
if (wfi_cmd)
|
|
incr_cmd_size += 2;
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
|
|
if (err)
|
|
return err;
|
|
|
|
off = incr_cmd->off;
|
|
|
|
/* WAR for hw bug 1491360: syncpt needs to be incremented twice */
|
|
|
|
if (wfi_cmd) {
|
|
/* wfi */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E);
|
|
/* handle, ignored */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000);
|
|
}
|
|
/* syncpoint_a */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C);
|
|
/* payload, ignored */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0);
|
|
/* syncpoint_b */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
|
|
/* syncpt_id, incr */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
|
|
/* syncpoint_b */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
|
|
/* syncpt_id, incr */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
|
|
|
|
WARN_ON(off - incr_cmd->off != incr_cmd_size);
|
|
|
|
thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
|
|
|
|
if (register_irq) {
|
|
struct channel_gk20a *referenced = gk20a_channel_get(c);
|
|
|
|
WARN_ON(!referenced);
|
|
|
|
if (referenced) {
|
|
/* note: channel_put() is in
|
|
* gk20a_channel_syncpt_update() */
|
|
|
|
err = nvhost_intr_register_notifier(
|
|
sp->host1x_pdev,
|
|
sp->id, thresh,
|
|
gk20a_channel_syncpt_update, c);
|
|
if (err)
|
|
gk20a_channel_put(referenced);
|
|
|
|
/* Adding interrupt action should
|
|
* never fail. A proper error handling
|
|
* here would require us to decrement
|
|
* the syncpt max back to its original
|
|
* value. */
|
|
WARN(err,
|
|
"failed to set submit complete interrupt");
|
|
}
|
|
}
|
|
|
|
*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
|
|
wfi_cmd, need_sync_fence);
|
|
*entry = incr_cmd;
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
true /* wfi */,
|
|
false /* no irq handler */,
|
|
entry, fence, true);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
/* Don't put wfi cmd to this one since we're not returning
|
|
* a fence to user space. */
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
false /* no wfi */,
|
|
true /* register irq */,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
|
|
int wait_fence_fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool wfi,
|
|
bool need_sync_fence)
|
|
{
|
|
/* Need to do 'wfi + host incr' since we return the fence
|
|
* to user space. */
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
wfi,
|
|
true /* register irq */,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_signal_timeline(
|
|
struct gk20a_channel_sync *s)
|
|
{
|
|
/* Nothing to do. */
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
return sp->id;
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
|
|
nvhost_syncpt_put_ref_ext(sp->host1x_pdev, sp->id);
|
|
kfree(sp);
|
|
}
|
|
|
|
static struct gk20a_channel_sync *
|
|
gk20a_channel_syncpt_create(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a_channel_syncpt *sp;
|
|
char syncpt_name[32];
|
|
|
|
sp = kzalloc(sizeof(*sp), GFP_KERNEL);
|
|
if (!sp)
|
|
return NULL;
|
|
|
|
sp->c = c;
|
|
sp->host1x_pdev = c->g->host1x_dev;
|
|
|
|
snprintf(syncpt_name, sizeof(syncpt_name),
|
|
"%s_%d", dev_name(c->g->dev), c->hw_chid);
|
|
|
|
sp->id = nvhost_get_syncpt_host_managed(sp->host1x_pdev,
|
|
c->hw_chid, syncpt_name);
|
|
if (!sp->id) {
|
|
kfree(sp);
|
|
gk20a_err(c->g->dev, "failed to get free syncpt");
|
|
return NULL;
|
|
}
|
|
|
|
nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
|
|
|
|
atomic_set(&sp->ops.refcount, 0);
|
|
sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
|
|
sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
|
|
sp->ops.incr = gk20a_channel_syncpt_incr;
|
|
sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi;
|
|
sp->ops.incr_user = gk20a_channel_syncpt_incr_user;
|
|
sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
|
|
sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline;
|
|
sp->ops.syncpt_id = gk20a_channel_syncpt_id;
|
|
sp->ops.destroy = gk20a_channel_syncpt_destroy;
|
|
|
|
return &sp->ops;
|
|
}
|
|
#endif /* CONFIG_TEGRA_GK20A */
|
|
|
|
struct gk20a_channel_semaphore {
|
|
struct gk20a_channel_sync ops;
|
|
struct channel_gk20a *c;
|
|
|
|
/* A semaphore pool owned by this channel. */
|
|
struct gk20a_semaphore_pool *pool;
|
|
|
|
/* A sync timeline that advances when gpu completes work. */
|
|
struct sync_timeline *timeline;
|
|
};
|
|
|
|
#ifdef CONFIG_SYNC
|
|
struct wait_fence_work {
|
|
struct sync_fence_waiter waiter;
|
|
struct channel_gk20a *ch;
|
|
struct gk20a_semaphore *sema;
|
|
};
|
|
|
|
static void gk20a_channel_semaphore_launcher(
|
|
struct sync_fence *fence,
|
|
struct sync_fence_waiter *waiter)
|
|
{
|
|
int err;
|
|
struct wait_fence_work *w =
|
|
container_of(waiter, struct wait_fence_work, waiter);
|
|
struct gk20a *g = w->ch->g;
|
|
|
|
gk20a_dbg_info("waiting for pre fence %p '%s'",
|
|
fence, fence->name);
|
|
err = sync_fence_wait(fence, -1);
|
|
if (err < 0)
|
|
dev_err(g->dev, "error waiting pre-fence: %d\n", err);
|
|
|
|
gk20a_dbg_info(
|
|
"wait completed (%d) for fence %p '%s', triggering gpu work",
|
|
err, fence, fence->name);
|
|
sync_fence_put(fence);
|
|
gk20a_semaphore_release(w->sema);
|
|
gk20a_semaphore_put(w->sema);
|
|
kfree(w);
|
|
}
|
|
#endif
|
|
|
|
static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
|
|
struct gk20a_semaphore *s, struct priv_cmd_entry *cmd,
|
|
int cmd_size, bool acquire, bool wfi)
|
|
{
|
|
u32 off = cmd->off;
|
|
u64 va;
|
|
|
|
/*
|
|
* RO for acquire (since we just need to read the mem) and RW for
|
|
* release since we will need to write back to the semaphore memory.
|
|
*/
|
|
va = acquire ? gk20a_semaphore_gpu_ro_va(s) :
|
|
gk20a_semaphore_gpu_rw_va(s);
|
|
|
|
/*
|
|
* If the op is not an acquire (so therefor a release) we should
|
|
* incr the underlying sema next_value.
|
|
*/
|
|
if (!acquire)
|
|
gk20a_semaphore_incr(s);
|
|
|
|
/* semaphore_a */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004);
|
|
/* offset_upper */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff);
|
|
/* semaphore_b */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005);
|
|
/* offset */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, va & 0xffffffff);
|
|
|
|
if (acquire) {
|
|
/* semaphore_c */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
|
|
/* payload */
|
|
gk20a_mem_wr32(g, cmd->mem, off++,
|
|
gk20a_semaphore_get_value(s));
|
|
/* semaphore_d */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
|
|
/* operation: acq_geq, switch_en */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
|
|
} else {
|
|
/* semaphore_c */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
|
|
/* payload */
|
|
gk20a_mem_wr32(g, cmd->mem, off++,
|
|
gk20a_semaphore_get_value(s));
|
|
/* semaphore_d */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
|
|
/* operation: release, wfi */
|
|
gk20a_mem_wr32(g, cmd->mem, off++,
|
|
0x2 | ((wfi ? 0x0 : 0x1) << 20));
|
|
/* non_stall_int */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010008);
|
|
/* ignored */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0);
|
|
}
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_wait_syncpt(
|
|
struct gk20a_channel_sync *s, u32 id,
|
|
u32 thresh, struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct device *dev = dev_from_gk20a(sema->c->g);
|
|
gk20a_err(dev, "trying to use syncpoint synchronization");
|
|
return -ENODEV;
|
|
}
|
|
|
|
/*
|
|
* UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18.
|
|
* But since there's no API for getting the underlying sync_pts we have to do
|
|
* some conditional compilation.
|
|
*/
|
|
#ifdef CONFIG_SYNC
|
|
static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f)
|
|
{
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
struct sync_pt *pt;
|
|
|
|
pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list);
|
|
return gk20a_sync_pt_inst_get_sema(pt);
|
|
#else
|
|
return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Attempt a fast path for waiting on a sync_fence. Basically if the passed
|
|
* sync_fence is backed by a gk20a_semaphore then there's no reason to go
|
|
* through the rigmarole of setting up a separate semaphore which waits on an
|
|
* interrupt from the GPU and then triggers a worker thread to execute a SW
|
|
* based semaphore release. Instead just have the GPU wait on the same semaphore
|
|
* that is going to be incremented by the GPU.
|
|
*
|
|
* This function returns 2 possible values: -ENODEV or 0 on success. In the case
|
|
* of -ENODEV the fastpath cannot be taken due to the fence not being backed by
|
|
* a GPU semaphore.
|
|
*/
|
|
static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
|
|
struct sync_fence *fence,
|
|
struct priv_cmd_entry **wait_cmd,
|
|
struct gk20a_semaphore **fp_sema)
|
|
{
|
|
struct gk20a_semaphore *sema;
|
|
int err;
|
|
|
|
if (!gk20a_is_sema_backed_sync_fence(fence))
|
|
return -ENODEV;
|
|
|
|
sema = sema_from_sync_fence(fence);
|
|
|
|
/*
|
|
* If there's no underlying sema then that means the underlying sema has
|
|
* already signaled.
|
|
*/
|
|
if (!sema) {
|
|
*fp_sema = NULL;
|
|
return 0;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
|
|
if (err)
|
|
return err;
|
|
|
|
gk20a_semaphore_get(sema);
|
|
BUG_ON(!atomic_read(&sema->value));
|
|
add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false);
|
|
|
|
/*
|
|
* Make sure that gk20a_channel_semaphore_wait_fd() can create another
|
|
* fence with the underlying semaphore.
|
|
*/
|
|
*fp_sema = sema;
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static int gk20a_channel_semaphore_wait_fd(
|
|
struct gk20a_channel_sync *s, int fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct channel_gk20a *c = sema->c;
|
|
#ifdef CONFIG_SYNC
|
|
struct gk20a_semaphore *fp_sema;
|
|
struct sync_fence *sync_fence;
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct wait_fence_work *w = NULL;
|
|
int err, ret, status;
|
|
|
|
sync_fence = gk20a_sync_fence_fdget(fd);
|
|
if (!sync_fence)
|
|
return -EINVAL;
|
|
|
|
ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema);
|
|
if (ret == 0) {
|
|
if (fp_sema)
|
|
*fence = gk20a_fence_from_semaphore(sema->timeline,
|
|
fp_sema,
|
|
&c->semaphore_wq,
|
|
NULL, false);
|
|
else
|
|
/*
|
|
* Allocate an empty fence. It will instantly return
|
|
* from gk20a_fence_wait().
|
|
*/
|
|
*fence = gk20a_alloc_fence(NULL, NULL, false);
|
|
|
|
sync_fence_put(sync_fence);
|
|
goto skip_slow_path;
|
|
}
|
|
|
|
/* If the fence has signaled there is no reason to wait on it. */
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
status = sync_fence->status;
|
|
#else
|
|
status = atomic_read(&sync_fence->status);
|
|
#endif
|
|
if (status) {
|
|
sync_fence_put(sync_fence);
|
|
goto skip_slow_path;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
sync_fence_put(sync_fence);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
w = kzalloc(sizeof(*w), GFP_KERNEL);
|
|
if (!w) {
|
|
err = -ENOMEM;
|
|
goto fail_free_cmdbuf;
|
|
}
|
|
|
|
sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
|
|
w->ch = c;
|
|
w->sema = gk20a_semaphore_alloc(c);
|
|
if (!w->sema) {
|
|
gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
|
|
err = -ENOMEM;
|
|
goto fail_free_worker;
|
|
}
|
|
|
|
/* worker takes one reference */
|
|
gk20a_semaphore_get(w->sema);
|
|
gk20a_semaphore_incr(w->sema);
|
|
|
|
/* GPU unblocked when the semaphore value increments. */
|
|
add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
|
|
|
|
ret = sync_fence_wait_async(sync_fence, &w->waiter);
|
|
|
|
/*
|
|
* If the sync_fence has already signaled then the above async_wait
|
|
* will never trigger. This causes the semaphore release op to never
|
|
* happen which, in turn, hangs the GPU. That's bad. So let's just
|
|
* do the gk20a_semaphore_release() right now.
|
|
*/
|
|
if (ret == 1) {
|
|
sync_fence_put(sync_fence);
|
|
gk20a_semaphore_release(w->sema);
|
|
gk20a_semaphore_put(w->sema);
|
|
}
|
|
|
|
/* XXX - this fixes an actual bug, we need to hold a ref to this
|
|
semaphore while the job is in flight. */
|
|
*fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
|
|
&c->semaphore_wq,
|
|
NULL, false);
|
|
|
|
skip_slow_path:
|
|
*entry = wait_cmd;
|
|
return 0;
|
|
|
|
fail_free_worker:
|
|
if (w && w->sema)
|
|
gk20a_semaphore_put(w->sema);
|
|
kfree(w);
|
|
sync_fence_put(sync_fence);
|
|
fail_free_cmdbuf:
|
|
if (wait_cmd)
|
|
gk20a_free_priv_cmdbuf(c, wait_cmd);
|
|
return err;
|
|
#else
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"trying to use sync fds with CONFIG_SYNC disabled");
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static int __gk20a_channel_semaphore_incr(
|
|
struct gk20a_channel_sync *s, bool wfi_cmd,
|
|
struct sync_fence *dependency,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
int incr_cmd_size;
|
|
struct priv_cmd_entry *incr_cmd = NULL;
|
|
struct gk20a_channel_semaphore *sp =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
struct gk20a_semaphore *semaphore;
|
|
int err = 0;
|
|
|
|
semaphore = gk20a_semaphore_alloc(c);
|
|
if (!semaphore) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"ran out of semaphores");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
incr_cmd_size = 10;
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
gk20a_semaphore_put(semaphore);
|
|
return err;
|
|
}
|
|
|
|
/* Release the completion semaphore. */
|
|
add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
|
|
|
|
*fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
|
|
&c->semaphore_wq,
|
|
dependency, wfi_cmd);
|
|
*entry = incr_cmd;
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr_wfi(
|
|
struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
return __gk20a_channel_semaphore_incr(s,
|
|
true /* wfi */,
|
|
NULL,
|
|
entry, fence, true);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr(
|
|
struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
/* Don't put wfi cmd to this one since we're not returning
|
|
* a fence to user space. */
|
|
return __gk20a_channel_semaphore_incr(s,
|
|
false /* no wfi */,
|
|
NULL,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr_user(
|
|
struct gk20a_channel_sync *s,
|
|
int wait_fence_fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool wfi,
|
|
bool need_sync_fence)
|
|
{
|
|
#ifdef CONFIG_SYNC
|
|
struct sync_fence *dependency = NULL;
|
|
int err;
|
|
|
|
if (wait_fence_fd >= 0) {
|
|
dependency = gk20a_sync_fence_fdget(wait_fence_fd);
|
|
if (!dependency)
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
|
|
entry, fence, need_sync_fence);
|
|
if (err) {
|
|
if (dependency)
|
|
sync_fence_put(dependency);
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
#else
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
gk20a_err(dev_from_gk20a(sema->c->g),
|
|
"trying to use sync fds with CONFIG_SYNC disabled");
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
|
|
{
|
|
/* Nothing to do. */
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_signal_timeline(
|
|
struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_semaphore *sp =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
gk20a_sync_timeline_signal(sp->timeline);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
if (sema->timeline)
|
|
gk20a_sync_timeline_destroy(sema->timeline);
|
|
|
|
/* The sema pool is cleaned up by the VM destroy. */
|
|
sema->pool = NULL;
|
|
|
|
kfree(sema);
|
|
}
|
|
|
|
static struct gk20a_channel_sync *
|
|
gk20a_channel_semaphore_create(struct channel_gk20a *c)
|
|
{
|
|
int asid = -1;
|
|
struct gk20a_channel_semaphore *sema;
|
|
char pool_name[20];
|
|
|
|
if (WARN_ON(!c->vm))
|
|
return NULL;
|
|
|
|
sema = kzalloc(sizeof(*sema), GFP_KERNEL);
|
|
if (!sema)
|
|
return NULL;
|
|
sema->c = c;
|
|
|
|
if (c->vm->as_share)
|
|
asid = c->vm->as_share->id;
|
|
|
|
sprintf(pool_name, "semaphore_pool-%d", c->hw_chid);
|
|
sema->pool = c->vm->sema_pool;
|
|
|
|
#ifdef CONFIG_SYNC
|
|
sema->timeline = gk20a_sync_timeline_create(
|
|
"gk20a_ch%d_as%d", c->hw_chid, asid);
|
|
if (!sema->timeline) {
|
|
gk20a_channel_semaphore_destroy(&sema->ops);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
atomic_set(&sema->ops.refcount, 0);
|
|
sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
|
|
sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
|
|
sema->ops.incr = gk20a_channel_semaphore_incr;
|
|
sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
|
|
sema->ops.incr_user = gk20a_channel_semaphore_incr_user;
|
|
sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
|
|
sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
|
|
sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id;
|
|
sema->ops.destroy = gk20a_channel_semaphore_destroy;
|
|
|
|
return &sema->ops;
|
|
}
|
|
|
|
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
|
|
{
|
|
sync->destroy(sync);
|
|
}
|
|
|
|
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
|
|
{
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
if (gk20a_platform_has_syncpoints(c->g->dev))
|
|
return gk20a_channel_syncpt_create(c);
|
|
#endif
|
|
return gk20a_channel_semaphore_create(c);
|
|
}
|