mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
874 lines
23 KiB
C
874 lines
23 KiB
C
/*
|
|
* drivers/video/tegra/host/gk20a/channel_sync_gk20a.c
|
|
*
|
|
* GK20A Channel Synchronization Abstraction
|
|
*
|
|
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#include <linux/gk20a.h>
|
|
#include <linux/version.h>
|
|
|
|
#include "channel_sync_gk20a.h"
|
|
#include "gk20a.h"
|
|
#include "fence_gk20a.h"
|
|
#include "semaphore_gk20a.h"
|
|
#include "sync_gk20a.h"
|
|
#include "mm_gk20a.h"
|
|
|
|
#ifdef CONFIG_SYNC
|
|
#include "../drivers/staging/android/sync.h"
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
#include <linux/nvhost.h>
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
|
|
struct gk20a_channel_syncpt {
|
|
struct gk20a_channel_sync ops;
|
|
struct channel_gk20a *c;
|
|
struct platform_device *host1x_pdev;
|
|
u32 id;
|
|
};
|
|
|
|
static void add_wait_cmd(struct gk20a *g, struct priv_cmd_entry *cmd, u32 off,
|
|
u32 id, u32 thresh)
|
|
{
|
|
off = cmd->off + off;
|
|
/* syncpoint_a */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001C);
|
|
/* payload */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, thresh);
|
|
/* syncpoint_b */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x2001001D);
|
|
/* syncpt_id, switch_en, wait */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, (id << 8) | 0x10);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
|
|
u32 id, u32 thresh, struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct channel_gk20a *c = sp->c;
|
|
int err = 0;
|
|
|
|
if (!nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev, id)) {
|
|
dev_warn(dev_from_gk20a(c->g),
|
|
"invalid wait id in gpfifo submit, elided");
|
|
return 0;
|
|
}
|
|
|
|
if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev, id, thresh))
|
|
return 0;
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 4, &wait_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
return err;
|
|
}
|
|
|
|
add_wait_cmd(c->g, wait_cmd, 0, id, thresh);
|
|
|
|
*entry = wait_cmd;
|
|
*fence = NULL;
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
#ifdef CONFIG_SYNC
|
|
int i;
|
|
int num_wait_cmds;
|
|
struct sync_fence *sync_fence;
|
|
struct sync_pt *pt;
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
u32 wait_id;
|
|
int err = 0;
|
|
|
|
sync_fence = nvhost_sync_fdget(fd);
|
|
if (!sync_fence)
|
|
return -EINVAL;
|
|
|
|
/* validate syncpt ids */
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) {
|
|
#else
|
|
for (i = 0; i < sync_fence->num_fences; i++) {
|
|
pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
|
|
#endif
|
|
wait_id = nvhost_sync_pt_id(pt);
|
|
if (!wait_id || !nvhost_syncpt_is_valid_pt_ext(sp->host1x_pdev,
|
|
wait_id)) {
|
|
sync_fence_put(sync_fence);
|
|
return -EINVAL;
|
|
}
|
|
#if !(LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0))
|
|
}
|
|
#else
|
|
}
|
|
#endif
|
|
|
|
num_wait_cmds = nvhost_sync_num_pts(sync_fence);
|
|
if (num_wait_cmds == 0) {
|
|
sync_fence_put(sync_fence);
|
|
return 0;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 4 * num_wait_cmds, &wait_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
sync_fence_put(sync_fence);
|
|
return err;
|
|
}
|
|
|
|
i = 0;
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
list_for_each_entry(pt, &sync_fence->pt_list_head, pt_list) {
|
|
#else
|
|
for (i = 0; i < sync_fence->num_fences; i++) {
|
|
struct fence *f = sync_fence->cbs[i].sync_pt;
|
|
struct sync_pt *pt = sync_pt_from_fence(f);
|
|
#endif
|
|
u32 wait_id = nvhost_sync_pt_id(pt);
|
|
u32 wait_value = nvhost_sync_pt_thresh(pt);
|
|
|
|
if (nvhost_syncpt_is_expired_ext(sp->host1x_pdev,
|
|
wait_id, wait_value)) {
|
|
/* each wait_cmd is 4 u32s */
|
|
gk20a_memset(c->g, wait_cmd->mem,
|
|
(wait_cmd->off + i * 4) * sizeof(u32),
|
|
0, 4 * sizeof(u32));
|
|
} else
|
|
add_wait_cmd(c->g, wait_cmd, i * 4, wait_id,
|
|
wait_value);
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
i++;
|
|
}
|
|
#else
|
|
}
|
|
#endif
|
|
|
|
WARN_ON(i != num_wait_cmds);
|
|
sync_fence_put(sync_fence);
|
|
|
|
*entry = wait_cmd;
|
|
*fence = NULL;
|
|
return 0;
|
|
#else
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
|
|
{
|
|
struct channel_gk20a *ch = priv;
|
|
|
|
gk20a_channel_update(ch, nr_completed);
|
|
|
|
/* note: channel_get() is in __gk20a_channel_syncpt_incr() */
|
|
gk20a_channel_put(ch);
|
|
}
|
|
|
|
static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|
bool wfi_cmd,
|
|
bool register_irq,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
u32 thresh;
|
|
int incr_cmd_size;
|
|
int off;
|
|
int err;
|
|
struct priv_cmd_entry *incr_cmd = NULL;
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
|
|
incr_cmd_size = 6;
|
|
if (wfi_cmd)
|
|
incr_cmd_size += 2;
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
|
|
if (err)
|
|
return err;
|
|
|
|
off = incr_cmd->off;
|
|
|
|
/* WAR for hw bug 1491360: syncpt needs to be incremented twice */
|
|
|
|
if (wfi_cmd) {
|
|
/* wfi */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001E);
|
|
/* handle, ignored */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x00000000);
|
|
}
|
|
/* syncpoint_a */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001C);
|
|
/* payload, ignored */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0);
|
|
/* syncpoint_b */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
|
|
/* syncpt_id, incr */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
|
|
/* syncpoint_b */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, 0x2001001D);
|
|
/* syncpt_id, incr */
|
|
gk20a_mem_wr32(c->g, incr_cmd->mem, off++, (sp->id << 8) | 0x1);
|
|
|
|
WARN_ON(off - incr_cmd->off != incr_cmd_size);
|
|
|
|
thresh = nvhost_syncpt_incr_max_ext(sp->host1x_pdev, sp->id, 2);
|
|
|
|
if (register_irq) {
|
|
struct channel_gk20a *referenced = gk20a_channel_get(c);
|
|
|
|
WARN_ON(!referenced);
|
|
|
|
if (referenced) {
|
|
/* note: channel_put() is in
|
|
* gk20a_channel_syncpt_update() */
|
|
|
|
err = nvhost_intr_register_notifier(
|
|
sp->host1x_pdev,
|
|
sp->id, thresh,
|
|
gk20a_channel_syncpt_update, c);
|
|
if (err)
|
|
gk20a_channel_put(referenced);
|
|
|
|
/* Adding interrupt action should
|
|
* never fail. A proper error handling
|
|
* here would require us to decrement
|
|
* the syncpt max back to its original
|
|
* value. */
|
|
WARN(err,
|
|
"failed to set submit complete interrupt");
|
|
}
|
|
}
|
|
|
|
*fence = gk20a_fence_from_syncpt(sp->host1x_pdev, sp->id, thresh,
|
|
wfi_cmd, need_sync_fence);
|
|
*entry = incr_cmd;
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
true /* wfi */,
|
|
false /* no irq handler */,
|
|
entry, fence, true);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
/* Don't put wfi cmd to this one since we're not returning
|
|
* a fence to user space. */
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
false /* no wfi */,
|
|
true /* register irq */,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
|
|
int wait_fence_fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool wfi,
|
|
bool need_sync_fence)
|
|
{
|
|
/* Need to do 'wfi + host incr' since we return the fence
|
|
* to user space. */
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
wfi,
|
|
true /* register irq */,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_signal_timeline(
|
|
struct gk20a_channel_sync *s)
|
|
{
|
|
/* Nothing to do. */
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
return sp->id;
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
|
|
nvhost_syncpt_put_ref_ext(sp->host1x_pdev, sp->id);
|
|
kfree(sp);
|
|
}
|
|
|
|
static struct gk20a_channel_sync *
|
|
gk20a_channel_syncpt_create(struct channel_gk20a *c)
|
|
{
|
|
struct gk20a_channel_syncpt *sp;
|
|
char syncpt_name[32];
|
|
|
|
sp = kzalloc(sizeof(*sp), GFP_KERNEL);
|
|
if (!sp)
|
|
return NULL;
|
|
|
|
sp->c = c;
|
|
sp->host1x_pdev = c->g->host1x_dev;
|
|
|
|
snprintf(syncpt_name, sizeof(syncpt_name),
|
|
"%s_%d", dev_name(c->g->dev), c->hw_chid);
|
|
|
|
sp->id = nvhost_get_syncpt_host_managed(sp->host1x_pdev,
|
|
c->hw_chid, syncpt_name);
|
|
if (!sp->id) {
|
|
kfree(sp);
|
|
gk20a_err(c->g->dev, "failed to get free syncpt");
|
|
return NULL;
|
|
}
|
|
|
|
nvhost_syncpt_set_min_eq_max_ext(sp->host1x_pdev, sp->id);
|
|
|
|
atomic_set(&sp->ops.refcount, 0);
|
|
sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
|
|
sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
|
|
sp->ops.incr = gk20a_channel_syncpt_incr;
|
|
sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi;
|
|
sp->ops.incr_user = gk20a_channel_syncpt_incr_user;
|
|
sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
|
|
sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline;
|
|
sp->ops.syncpt_id = gk20a_channel_syncpt_id;
|
|
sp->ops.destroy = gk20a_channel_syncpt_destroy;
|
|
|
|
return &sp->ops;
|
|
}
|
|
#endif /* CONFIG_TEGRA_GK20A */
|
|
|
|
struct gk20a_channel_semaphore {
|
|
struct gk20a_channel_sync ops;
|
|
struct channel_gk20a *c;
|
|
|
|
/* A semaphore pool owned by this channel. */
|
|
struct gk20a_semaphore_pool *pool;
|
|
|
|
/* A sync timeline that advances when gpu completes work. */
|
|
struct sync_timeline *timeline;
|
|
};
|
|
|
|
#ifdef CONFIG_SYNC
|
|
struct wait_fence_work {
|
|
struct sync_fence_waiter waiter;
|
|
struct channel_gk20a *ch;
|
|
struct gk20a_semaphore *sema;
|
|
};
|
|
|
|
static void gk20a_channel_semaphore_launcher(
|
|
struct sync_fence *fence,
|
|
struct sync_fence_waiter *waiter)
|
|
{
|
|
int err;
|
|
struct wait_fence_work *w =
|
|
container_of(waiter, struct wait_fence_work, waiter);
|
|
struct gk20a *g = w->ch->g;
|
|
|
|
gk20a_dbg_info("waiting for pre fence %p '%s'",
|
|
fence, fence->name);
|
|
err = sync_fence_wait(fence, -1);
|
|
if (err < 0)
|
|
dev_err(g->dev, "error waiting pre-fence: %d\n", err);
|
|
|
|
gk20a_dbg_info(
|
|
"wait completed (%d) for fence %p '%s', triggering gpu work",
|
|
err, fence, fence->name);
|
|
sync_fence_put(fence);
|
|
gk20a_semaphore_release(w->sema);
|
|
gk20a_semaphore_put(w->sema);
|
|
kfree(w);
|
|
}
|
|
#endif
|
|
|
|
static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
|
|
struct gk20a_semaphore *s, struct priv_cmd_entry *cmd,
|
|
int cmd_size, bool acquire, bool wfi)
|
|
{
|
|
u32 off = cmd->off;
|
|
u64 va;
|
|
|
|
/*
|
|
* RO for acquire (since we just need to read the mem) and RW for
|
|
* release since we will need to write back to the semaphore memory.
|
|
*/
|
|
va = acquire ? gk20a_semaphore_gpu_ro_va(s) :
|
|
gk20a_semaphore_gpu_rw_va(s);
|
|
|
|
/*
|
|
* If the op is not an acquire (so therefor a release) we should
|
|
* incr the underlying sema next_value.
|
|
*/
|
|
if (!acquire)
|
|
gk20a_semaphore_incr(s);
|
|
|
|
/* semaphore_a */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010004);
|
|
/* offset_upper */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff);
|
|
/* semaphore_b */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010005);
|
|
/* offset */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, va & 0xffffffff);
|
|
|
|
if (acquire) {
|
|
/* semaphore_c */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
|
|
/* payload */
|
|
gk20a_mem_wr32(g, cmd->mem, off++,
|
|
gk20a_semaphore_get_value(s));
|
|
/* semaphore_d */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
|
|
/* operation: acq_geq, switch_en */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
|
|
} else {
|
|
/* semaphore_c */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010006);
|
|
/* payload */
|
|
gk20a_mem_wr32(g, cmd->mem, off++,
|
|
gk20a_semaphore_get_value(s));
|
|
/* semaphore_d */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010007);
|
|
/* operation: release, wfi */
|
|
gk20a_mem_wr32(g, cmd->mem, off++,
|
|
0x2 | ((wfi ? 0x0 : 0x1) << 20));
|
|
/* non_stall_int */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0x20010008);
|
|
/* ignored */
|
|
gk20a_mem_wr32(g, cmd->mem, off++, 0);
|
|
}
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_wait_syncpt(
|
|
struct gk20a_channel_sync *s, u32 id,
|
|
u32 thresh, struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct device *dev = dev_from_gk20a(sema->c->g);
|
|
gk20a_err(dev, "trying to use syncpoint synchronization");
|
|
return -ENODEV;
|
|
}
|
|
|
|
/*
|
|
* UGHHH - the sync_fence underlying implementation changes from 3.10 to 3.18.
|
|
* But since there's no API for getting the underlying sync_pts we have to do
|
|
* some conditional compilation.
|
|
*/
|
|
#ifdef CONFIG_SYNC
|
|
static struct gk20a_semaphore *sema_from_sync_fence(struct sync_fence *f)
|
|
{
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
struct sync_pt *pt;
|
|
|
|
pt = list_first_entry(&f->pt_list_head, struct sync_pt, pt_list);
|
|
return gk20a_sync_pt_inst_get_sema(pt);
|
|
#else
|
|
return gk20a_sync_pt_inst_get_sema(f->cbs[0].sync_pt);
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* Attempt a fast path for waiting on a sync_fence. Basically if the passed
|
|
* sync_fence is backed by a gk20a_semaphore then there's no reason to go
|
|
* through the rigmarole of setting up a separate semaphore which waits on an
|
|
* interrupt from the GPU and then triggers a worker thread to execute a SW
|
|
* based semaphore release. Instead just have the GPU wait on the same semaphore
|
|
* that is going to be incremented by the GPU.
|
|
*
|
|
* This function returns 2 possible values: -ENODEV or 0 on success. In the case
|
|
* of -ENODEV the fastpath cannot be taken due to the fence not being backed by
|
|
* a GPU semaphore.
|
|
*/
|
|
static int __semaphore_wait_fd_fast_path(struct channel_gk20a *c,
|
|
struct sync_fence *fence,
|
|
struct priv_cmd_entry **wait_cmd,
|
|
struct gk20a_semaphore **fp_sema)
|
|
{
|
|
struct gk20a_semaphore *sema;
|
|
int err;
|
|
|
|
if (!gk20a_is_sema_backed_sync_fence(fence))
|
|
return -ENODEV;
|
|
|
|
sema = sema_from_sync_fence(fence);
|
|
|
|
/*
|
|
* If there's no underlying sema then that means the underlying sema has
|
|
* already signaled.
|
|
*/
|
|
if (!sema) {
|
|
*fp_sema = NULL;
|
|
return 0;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 8, wait_cmd);
|
|
if (err)
|
|
return err;
|
|
|
|
gk20a_semaphore_get(sema);
|
|
BUG_ON(!atomic_read(&sema->value));
|
|
add_sema_cmd(c->g, c, sema, *wait_cmd, 8, true, false);
|
|
|
|
/*
|
|
* Make sure that gk20a_channel_semaphore_wait_fd() can create another
|
|
* fence with the underlying semaphore.
|
|
*/
|
|
*fp_sema = sema;
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
static int gk20a_channel_semaphore_wait_fd(
|
|
struct gk20a_channel_sync *s, int fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct channel_gk20a *c = sema->c;
|
|
#ifdef CONFIG_SYNC
|
|
struct gk20a_semaphore *fp_sema;
|
|
struct sync_fence *sync_fence;
|
|
struct priv_cmd_entry *wait_cmd = NULL;
|
|
struct wait_fence_work *w = NULL;
|
|
int err, ret, status;
|
|
|
|
sync_fence = gk20a_sync_fence_fdget(fd);
|
|
if (!sync_fence)
|
|
return -EINVAL;
|
|
|
|
ret = __semaphore_wait_fd_fast_path(c, sync_fence, &wait_cmd, &fp_sema);
|
|
if (ret == 0) {
|
|
if (fp_sema)
|
|
*fence = gk20a_fence_from_semaphore(sema->timeline,
|
|
fp_sema,
|
|
&c->semaphore_wq,
|
|
NULL, false);
|
|
else
|
|
/*
|
|
* Allocate an empty fence. It will instantly return
|
|
* from gk20a_fence_wait().
|
|
*/
|
|
*fence = gk20a_alloc_fence(NULL, NULL, false);
|
|
|
|
sync_fence_put(sync_fence);
|
|
goto skip_slow_path;
|
|
}
|
|
|
|
/* If the fence has signaled there is no reason to wait on it. */
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
|
|
status = sync_fence->status;
|
|
#else
|
|
status = atomic_read(&sync_fence->status);
|
|
#endif
|
|
if (status) {
|
|
sync_fence_put(sync_fence);
|
|
goto skip_slow_path;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, 8, &wait_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
sync_fence_put(sync_fence);
|
|
return -ENOMEM;
|
|
}
|
|
|
|
w = kzalloc(sizeof(*w), GFP_KERNEL);
|
|
if (!w) {
|
|
err = -ENOMEM;
|
|
goto fail_free_cmdbuf;
|
|
}
|
|
|
|
sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
|
|
w->ch = c;
|
|
w->sema = gk20a_semaphore_alloc(c);
|
|
if (!w->sema) {
|
|
gk20a_err(dev_from_gk20a(c->g), "ran out of semaphores");
|
|
err = -ENOMEM;
|
|
goto fail_free_worker;
|
|
}
|
|
|
|
/* worker takes one reference */
|
|
gk20a_semaphore_get(w->sema);
|
|
gk20a_semaphore_incr(w->sema);
|
|
|
|
/* GPU unblocked when the semaphore value increments. */
|
|
add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
|
|
|
|
ret = sync_fence_wait_async(sync_fence, &w->waiter);
|
|
|
|
/*
|
|
* If the sync_fence has already signaled then the above async_wait
|
|
* will never trigger. This causes the semaphore release op to never
|
|
* happen which, in turn, hangs the GPU. That's bad. So let's just
|
|
* do the gk20a_semaphore_release() right now.
|
|
*/
|
|
if (ret == 1) {
|
|
sync_fence_put(sync_fence);
|
|
gk20a_semaphore_release(w->sema);
|
|
gk20a_semaphore_put(w->sema);
|
|
}
|
|
|
|
/* XXX - this fixes an actual bug, we need to hold a ref to this
|
|
semaphore while the job is in flight. */
|
|
*fence = gk20a_fence_from_semaphore(sema->timeline, w->sema,
|
|
&c->semaphore_wq,
|
|
NULL, false);
|
|
|
|
skip_slow_path:
|
|
*entry = wait_cmd;
|
|
return 0;
|
|
|
|
fail_free_worker:
|
|
if (w && w->sema)
|
|
gk20a_semaphore_put(w->sema);
|
|
kfree(w);
|
|
sync_fence_put(sync_fence);
|
|
fail_free_cmdbuf:
|
|
if (wait_cmd)
|
|
gk20a_free_priv_cmdbuf(c, wait_cmd);
|
|
return err;
|
|
#else
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"trying to use sync fds with CONFIG_SYNC disabled");
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static int __gk20a_channel_semaphore_incr(
|
|
struct gk20a_channel_sync *s, bool wfi_cmd,
|
|
struct sync_fence *dependency,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
int incr_cmd_size;
|
|
struct priv_cmd_entry *incr_cmd = NULL;
|
|
struct gk20a_channel_semaphore *sp =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
struct gk20a_semaphore *semaphore;
|
|
int err = 0;
|
|
|
|
semaphore = gk20a_semaphore_alloc(c);
|
|
if (!semaphore) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"ran out of semaphores");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
incr_cmd_size = 10;
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, &incr_cmd);
|
|
if (err) {
|
|
gk20a_err(dev_from_gk20a(c->g),
|
|
"not enough priv cmd buffer space");
|
|
gk20a_semaphore_put(semaphore);
|
|
return err;
|
|
}
|
|
|
|
/* Release the completion semaphore. */
|
|
add_sema_cmd(c->g, c, semaphore, incr_cmd, 14, false, wfi_cmd);
|
|
|
|
*fence = gk20a_fence_from_semaphore(sp->timeline, semaphore,
|
|
&c->semaphore_wq,
|
|
dependency, wfi_cmd);
|
|
*entry = incr_cmd;
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr_wfi(
|
|
struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence)
|
|
{
|
|
return __gk20a_channel_semaphore_incr(s,
|
|
true /* wfi */,
|
|
NULL,
|
|
entry, fence, true);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr(
|
|
struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool need_sync_fence)
|
|
{
|
|
/* Don't put wfi cmd to this one since we're not returning
|
|
* a fence to user space. */
|
|
return __gk20a_channel_semaphore_incr(s,
|
|
false /* no wfi */,
|
|
NULL,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr_user(
|
|
struct gk20a_channel_sync *s,
|
|
int wait_fence_fd,
|
|
struct priv_cmd_entry **entry,
|
|
struct gk20a_fence **fence,
|
|
bool wfi,
|
|
bool need_sync_fence)
|
|
{
|
|
#ifdef CONFIG_SYNC
|
|
struct sync_fence *dependency = NULL;
|
|
int err;
|
|
|
|
if (wait_fence_fd >= 0) {
|
|
dependency = gk20a_sync_fence_fdget(wait_fence_fd);
|
|
if (!dependency)
|
|
return -EINVAL;
|
|
}
|
|
|
|
err = __gk20a_channel_semaphore_incr(s, wfi, dependency,
|
|
entry, fence, need_sync_fence);
|
|
if (err) {
|
|
if (dependency)
|
|
sync_fence_put(dependency);
|
|
return err;
|
|
}
|
|
|
|
return 0;
|
|
#else
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
gk20a_err(dev_from_gk20a(sema->c->g),
|
|
"trying to use sync fds with CONFIG_SYNC disabled");
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
|
|
{
|
|
/* Nothing to do. */
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_signal_timeline(
|
|
struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_semaphore *sp =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
gk20a_sync_timeline_signal(sp->timeline);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
if (sema->timeline)
|
|
gk20a_sync_timeline_destroy(sema->timeline);
|
|
|
|
/* The sema pool is cleaned up by the VM destroy. */
|
|
sema->pool = NULL;
|
|
|
|
kfree(sema);
|
|
}
|
|
|
|
static struct gk20a_channel_sync *
|
|
gk20a_channel_semaphore_create(struct channel_gk20a *c)
|
|
{
|
|
int asid = -1;
|
|
struct gk20a_channel_semaphore *sema;
|
|
char pool_name[20];
|
|
|
|
if (WARN_ON(!c->vm))
|
|
return NULL;
|
|
|
|
sema = kzalloc(sizeof(*sema), GFP_KERNEL);
|
|
if (!sema)
|
|
return NULL;
|
|
sema->c = c;
|
|
|
|
if (c->vm->as_share)
|
|
asid = c->vm->as_share->id;
|
|
|
|
sprintf(pool_name, "semaphore_pool-%d", c->hw_chid);
|
|
sema->pool = c->vm->sema_pool;
|
|
|
|
#ifdef CONFIG_SYNC
|
|
sema->timeline = gk20a_sync_timeline_create(
|
|
"gk20a_ch%d_as%d", c->hw_chid, asid);
|
|
if (!sema->timeline) {
|
|
gk20a_channel_semaphore_destroy(&sema->ops);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
atomic_set(&sema->ops.refcount, 0);
|
|
sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
|
|
sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
|
|
sema->ops.incr = gk20a_channel_semaphore_incr;
|
|
sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
|
|
sema->ops.incr_user = gk20a_channel_semaphore_incr_user;
|
|
sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
|
|
sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
|
|
sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id;
|
|
sema->ops.destroy = gk20a_channel_semaphore_destroy;
|
|
|
|
return &sema->ops;
|
|
}
|
|
|
|
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
|
|
{
|
|
sync->destroy(sync);
|
|
}
|
|
|
|
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c)
|
|
{
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
if (gk20a_platform_has_syncpoints(c->g->dev))
|
|
return gk20a_channel_syncpt_create(c);
|
|
#endif
|
|
return gk20a_channel_semaphore_create(c);
|
|
}
|