mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-24 02:22:34 +03:00
The pre-fence wait for semaphores in the submit path has supported a fast path for fences that have only one underlying semaphore. The fast path just inserts the wait on this sema to the pushbuffer directly. For other fences, the path has been using a CPU wait indirection, signaling another semaphore when we get the CPU-side callback. Instead of only supporting prefences with one sema, unroll all the individual semaphores and insert waits for each to a pushbuffer, like we've already been doing with syncpoints. Now all sema-backed syncs get the fast path. This simplifies the logic and makes it more explicit that only foreign fences need the CPU wait. There is no need to hold references to the sync fence or the semas inside: this submitted job only needs the global read-only sema mapping that is guaranteed to stay alive while the VM of this channel stays alive, and the job does not outlive this channel. Jira NVGPU-43 Jira NVGPU-66 Jira NVGPU-513 Change-Id: I7cfbb510001d998a864aed8d6afd1582b9adb80d Signed-off-by: Konsta Holtta <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/1636345 Reviewed-by: Alex Waterman <alexw@nvidia.com> Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com> Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
956 lines
26 KiB
C
956 lines
26 KiB
C
/*
|
|
* GK20A Channel Synchronization Abstraction
|
|
*
|
|
* Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*/
|
|
|
|
#include <nvgpu/semaphore.h>
|
|
#include <nvgpu/kmem.h>
|
|
#include <nvgpu/log.h>
|
|
#include <nvgpu/atomic.h>
|
|
#include <nvgpu/bug.h>
|
|
#include <nvgpu/list.h>
|
|
#include <nvgpu/nvhost.h>
|
|
|
|
#include "channel_sync_gk20a.h"
|
|
#include "gk20a.h"
|
|
#include "fence_gk20a.h"
|
|
#include "sync_gk20a.h"
|
|
#include "mm_gk20a.h"
|
|
|
|
#ifdef CONFIG_SYNC
|
|
#include "../drivers/staging/android/sync.h"
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
|
|
|
struct gk20a_channel_syncpt {
|
|
struct gk20a_channel_sync ops;
|
|
struct channel_gk20a *c;
|
|
struct nvgpu_nvhost_dev *nvhost_dev;
|
|
u32 id;
|
|
struct nvgpu_mem syncpt_buf;
|
|
};
|
|
|
|
static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
|
|
u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
|
|
struct gk20a_fence *fence)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
int err = 0;
|
|
|
|
if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id)) {
|
|
nvgpu_warn(c->g, "invalid wait id in gpfifo submit, elided");
|
|
return 0;
|
|
}
|
|
|
|
if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, id, thresh))
|
|
return 0;
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c,
|
|
c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
|
|
if (err) {
|
|
nvgpu_err(c->g,
|
|
"not enough priv cmd buffer space");
|
|
return err;
|
|
}
|
|
|
|
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
|
|
id, sp->c->vm->syncpt_ro_map_gpu_va);
|
|
c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id,
|
|
thresh, c->vm->syncpt_ro_map_gpu_va);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
|
|
struct priv_cmd_entry *wait_cmd,
|
|
struct gk20a_fence *fence)
|
|
{
|
|
#ifdef CONFIG_SYNC
|
|
int i;
|
|
int num_wait_cmds;
|
|
struct sync_fence *sync_fence;
|
|
struct sync_pt *pt;
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
u32 wait_id;
|
|
int err = 0;
|
|
u32 wait_cmd_size = 0;
|
|
|
|
sync_fence = nvgpu_nvhost_sync_fdget(fd);
|
|
if (!sync_fence)
|
|
return -EINVAL;
|
|
|
|
/* validate syncpt ids */
|
|
for (i = 0; i < sync_fence->num_fences; i++) {
|
|
pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
|
|
wait_id = nvgpu_nvhost_sync_pt_id(pt);
|
|
if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
|
|
sp->nvhost_dev, wait_id)) {
|
|
sync_fence_put(sync_fence);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
|
|
if (num_wait_cmds == 0) {
|
|
sync_fence_put(sync_fence);
|
|
return 0;
|
|
}
|
|
wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c,
|
|
wait_cmd_size * num_wait_cmds,
|
|
wait_cmd);
|
|
if (err) {
|
|
nvgpu_err(c->g,
|
|
"not enough priv cmd buffer space");
|
|
sync_fence_put(sync_fence);
|
|
return err;
|
|
}
|
|
|
|
i = 0;
|
|
for (i = 0; i < sync_fence->num_fences; i++) {
|
|
struct fence *f = sync_fence->cbs[i].sync_pt;
|
|
struct sync_pt *pt = sync_pt_from_fence(f);
|
|
u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
|
|
u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
|
|
|
|
if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev,
|
|
wait_id, wait_value)) {
|
|
nvgpu_memset(c->g, wait_cmd->mem,
|
|
(wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
|
|
0, wait_cmd_size * sizeof(u32));
|
|
} else {
|
|
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
|
|
wait_id, sp->syncpt_buf.gpu_va);
|
|
c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
|
|
i * wait_cmd_size, wait_id, wait_value,
|
|
c->vm->syncpt_ro_map_gpu_va);
|
|
}
|
|
}
|
|
|
|
WARN_ON(i != num_wait_cmds);
|
|
sync_fence_put(sync_fence);
|
|
|
|
return 0;
|
|
#else
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
|
|
{
|
|
struct channel_gk20a *ch = priv;
|
|
|
|
gk20a_channel_update(ch);
|
|
|
|
/* note: channel_get() is in __gk20a_channel_syncpt_incr() */
|
|
gk20a_channel_put(ch);
|
|
}
|
|
|
|
static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|
bool wfi_cmd,
|
|
bool register_irq,
|
|
struct priv_cmd_entry *incr_cmd,
|
|
struct gk20a_fence *fence,
|
|
bool need_sync_fence)
|
|
{
|
|
u32 thresh;
|
|
int err;
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c,
|
|
c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd),
|
|
incr_cmd);
|
|
if (err)
|
|
return err;
|
|
|
|
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
|
|
sp->id, sp->syncpt_buf.gpu_va);
|
|
c->g->ops.fifo.add_syncpt_incr_cmd(c->g, wfi_cmd,
|
|
incr_cmd, sp->id, sp->syncpt_buf.gpu_va);
|
|
|
|
thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost_dev, sp->id,
|
|
c->g->ops.fifo.get_syncpt_incr_per_release());
|
|
|
|
if (register_irq) {
|
|
struct channel_gk20a *referenced = gk20a_channel_get(c);
|
|
|
|
WARN_ON(!referenced);
|
|
|
|
if (referenced) {
|
|
/* note: channel_put() is in
|
|
* gk20a_channel_syncpt_update() */
|
|
|
|
err = nvgpu_nvhost_intr_register_notifier(
|
|
sp->nvhost_dev,
|
|
sp->id, thresh,
|
|
gk20a_channel_syncpt_update, c);
|
|
if (err)
|
|
gk20a_channel_put(referenced);
|
|
|
|
/* Adding interrupt action should
|
|
* never fail. A proper error handling
|
|
* here would require us to decrement
|
|
* the syncpt max back to its original
|
|
* value. */
|
|
WARN(err,
|
|
"failed to set submit complete interrupt");
|
|
}
|
|
}
|
|
|
|
err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev, sp->id, thresh,
|
|
need_sync_fence);
|
|
if (err)
|
|
goto clean_up_priv_cmd;
|
|
|
|
return 0;
|
|
|
|
clean_up_priv_cmd:
|
|
gk20a_free_priv_cmdbuf(c, incr_cmd);
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr_wfi(struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence)
|
|
{
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
true /* wfi */,
|
|
false /* no irq handler */,
|
|
entry, fence, true);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence,
|
|
bool need_sync_fence,
|
|
bool register_irq)
|
|
{
|
|
/* Don't put wfi cmd to this one since we're not returning
|
|
* a fence to user space. */
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
false /* no wfi */,
|
|
register_irq /* register irq */,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_incr_user(struct gk20a_channel_sync *s,
|
|
int wait_fence_fd,
|
|
struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence,
|
|
bool wfi,
|
|
bool need_sync_fence,
|
|
bool register_irq)
|
|
{
|
|
/* Need to do 'wfi + host incr' since we return the fence
|
|
* to user space. */
|
|
return __gk20a_channel_syncpt_incr(s,
|
|
wfi,
|
|
register_irq /* register irq */,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_set_min_eq_max(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_signal_timeline(
|
|
struct gk20a_channel_sync *s)
|
|
{
|
|
/* Nothing to do. */
|
|
}
|
|
|
|
static int gk20a_channel_syncpt_id(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
return sp->id;
|
|
}
|
|
|
|
static u64 gk20a_channel_syncpt_address(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
return sp->syncpt_buf.gpu_va;
|
|
}
|
|
|
|
static void gk20a_channel_syncpt_destroy(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_syncpt *sp =
|
|
container_of(s, struct gk20a_channel_syncpt, ops);
|
|
|
|
|
|
sp->c->g->ops.fifo.free_syncpt_buf(sp->c, &sp->syncpt_buf);
|
|
|
|
nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
|
|
nvgpu_nvhost_syncpt_put_ref_ext(sp->nvhost_dev, sp->id);
|
|
nvgpu_kfree(sp->c->g, sp);
|
|
}
|
|
|
|
static struct gk20a_channel_sync *
|
|
gk20a_channel_syncpt_create(struct channel_gk20a *c, bool user_managed)
|
|
{
|
|
struct gk20a_channel_syncpt *sp;
|
|
char syncpt_name[32];
|
|
|
|
sp = nvgpu_kzalloc(c->g, sizeof(*sp));
|
|
if (!sp)
|
|
return NULL;
|
|
|
|
sp->c = c;
|
|
sp->nvhost_dev = c->g->nvhost_dev;
|
|
|
|
if (user_managed) {
|
|
snprintf(syncpt_name, sizeof(syncpt_name),
|
|
"%s_%d_user", c->g->name, c->chid);
|
|
|
|
sp->id = nvgpu_nvhost_get_syncpt_client_managed(sp->nvhost_dev,
|
|
syncpt_name);
|
|
} else {
|
|
snprintf(syncpt_name, sizeof(syncpt_name),
|
|
"%s_%d", c->g->name, c->chid);
|
|
|
|
sp->id = nvgpu_nvhost_get_syncpt_host_managed(sp->nvhost_dev,
|
|
c->chid, syncpt_name);
|
|
}
|
|
if (!sp->id) {
|
|
nvgpu_kfree(c->g, sp);
|
|
nvgpu_err(c->g, "failed to get free syncpt");
|
|
return NULL;
|
|
}
|
|
|
|
sp->c->g->ops.fifo.alloc_syncpt_buf(sp->c, sp->id,
|
|
&sp->syncpt_buf);
|
|
|
|
nvgpu_nvhost_syncpt_set_min_eq_max_ext(sp->nvhost_dev, sp->id);
|
|
|
|
nvgpu_atomic_set(&sp->ops.refcount, 0);
|
|
sp->ops.wait_syncpt = gk20a_channel_syncpt_wait_syncpt;
|
|
sp->ops.wait_fd = gk20a_channel_syncpt_wait_fd;
|
|
sp->ops.incr = gk20a_channel_syncpt_incr;
|
|
sp->ops.incr_wfi = gk20a_channel_syncpt_incr_wfi;
|
|
sp->ops.incr_user = gk20a_channel_syncpt_incr_user;
|
|
sp->ops.set_min_eq_max = gk20a_channel_syncpt_set_min_eq_max;
|
|
sp->ops.signal_timeline = gk20a_channel_syncpt_signal_timeline;
|
|
sp->ops.syncpt_id = gk20a_channel_syncpt_id;
|
|
sp->ops.syncpt_address = gk20a_channel_syncpt_address;
|
|
sp->ops.destroy = gk20a_channel_syncpt_destroy;
|
|
|
|
return &sp->ops;
|
|
}
|
|
#endif /* CONFIG_TEGRA_GK20A_NVHOST */
|
|
|
|
struct gk20a_channel_semaphore {
|
|
struct gk20a_channel_sync ops;
|
|
struct channel_gk20a *c;
|
|
|
|
/* A semaphore pool owned by this channel. */
|
|
struct nvgpu_semaphore_pool *pool;
|
|
|
|
/* A sync timeline that advances when gpu completes work. */
|
|
struct sync_timeline *timeline;
|
|
};
|
|
|
|
#ifdef CONFIG_SYNC
|
|
struct wait_fence_work {
|
|
struct sync_fence_waiter waiter;
|
|
struct sync_fence *fence;
|
|
struct channel_gk20a *ch;
|
|
struct nvgpu_semaphore *sema;
|
|
struct gk20a *g;
|
|
struct nvgpu_list_node entry;
|
|
};
|
|
|
|
static inline struct wait_fence_work *
|
|
wait_fence_work_from_entry(struct nvgpu_list_node *node)
|
|
{
|
|
return (struct wait_fence_work *)
|
|
((uintptr_t)node - offsetof(struct wait_fence_work, entry));
|
|
};
|
|
|
|
/*
|
|
* Keep track of all the pending waits on semaphores that exist for a GPU. This
|
|
* has to be done because the waits on fences backed by semaphores are
|
|
* asynchronous so it's impossible to otherwise know when they will fire. During
|
|
* driver cleanup this list can be checked and all existing waits can be
|
|
* canceled.
|
|
*/
|
|
static void gk20a_add_pending_sema_wait(struct gk20a *g,
|
|
struct wait_fence_work *work)
|
|
{
|
|
nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
|
|
nvgpu_list_add(&work->entry, &g->pending_sema_waits);
|
|
nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
|
|
}
|
|
|
|
/*
|
|
* Copy the list head from the pending wait list to the passed list and
|
|
* then delete the entire pending list.
|
|
*/
|
|
static void gk20a_start_sema_wait_cancel(struct gk20a *g,
|
|
struct nvgpu_list_node *list)
|
|
{
|
|
nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
|
|
nvgpu_list_replace_init(&g->pending_sema_waits, list);
|
|
nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
|
|
}
|
|
|
|
/*
|
|
* During shutdown this should be called to make sure that any pending sema
|
|
* waits are canceled. This is a fairly delicate and tricky bit of code. Here's
|
|
* how it works.
|
|
*
|
|
* Every time a semaphore wait is initiated in SW the wait_fence_work struct is
|
|
* added to the pending_sema_waits list. When the semaphore launcher code runs
|
|
* it checks the pending_sema_waits list. If this list is non-empty that means
|
|
* that the wait_fence_work struct must be present and can be removed.
|
|
*
|
|
* When the driver shuts down one of the steps is to cancel pending sema waits.
|
|
* To do this the entire list of pending sema waits is removed (and stored in a
|
|
* separate local list). So now, if the semaphore launcher code runs it will see
|
|
* that the pending_sema_waits list is empty and knows that it no longer owns
|
|
* the wait_fence_work struct.
|
|
*/
|
|
void gk20a_channel_cancel_pending_sema_waits(struct gk20a *g)
|
|
{
|
|
struct wait_fence_work *work;
|
|
struct nvgpu_list_node local_pending_sema_waits;
|
|
|
|
gk20a_start_sema_wait_cancel(g, &local_pending_sema_waits);
|
|
|
|
while (!nvgpu_list_empty(&local_pending_sema_waits)) {
|
|
int ret;
|
|
|
|
work = nvgpu_list_first_entry(&local_pending_sema_waits,
|
|
wait_fence_work,
|
|
entry);
|
|
|
|
nvgpu_list_del(&work->entry);
|
|
|
|
/*
|
|
* Only nvgpu_kfree() work if the cancel is successful.
|
|
* Otherwise it's in use by the
|
|
* gk20a_channel_semaphore_launcher() code.
|
|
*/
|
|
ret = sync_fence_cancel_async(work->fence, &work->waiter);
|
|
if (ret == 0)
|
|
nvgpu_kfree(g, work);
|
|
}
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_launcher(
|
|
struct sync_fence *fence,
|
|
struct sync_fence_waiter *waiter)
|
|
{
|
|
int err;
|
|
struct wait_fence_work *w =
|
|
container_of(waiter, struct wait_fence_work, waiter);
|
|
struct gk20a *g = w->g;
|
|
|
|
/*
|
|
* This spinlock must protect a _very_ small critical section -
|
|
* otherwise it's possible that the deterministic submit path suffers.
|
|
*/
|
|
nvgpu_raw_spinlock_acquire(&g->pending_sema_waits_lock);
|
|
if (!nvgpu_list_empty(&g->pending_sema_waits))
|
|
nvgpu_list_del(&w->entry);
|
|
nvgpu_raw_spinlock_release(&g->pending_sema_waits_lock);
|
|
|
|
gk20a_dbg_info("waiting for pre fence %p '%s'",
|
|
fence, fence->name);
|
|
err = sync_fence_wait(fence, -1);
|
|
if (err < 0)
|
|
nvgpu_err(g, "error waiting pre-fence: %d", err);
|
|
|
|
gk20a_dbg_info(
|
|
"wait completed (%d) for fence %p '%s', triggering gpu work",
|
|
err, fence, fence->name);
|
|
sync_fence_put(fence);
|
|
nvgpu_semaphore_release(w->sema, w->ch->hw_sema);
|
|
nvgpu_semaphore_put(w->sema);
|
|
nvgpu_kfree(g, w);
|
|
}
|
|
#endif
|
|
|
|
static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
|
|
struct nvgpu_semaphore *s, struct priv_cmd_entry *cmd,
|
|
u32 offset, bool acquire, bool wfi)
|
|
{
|
|
int ch = c->chid;
|
|
u32 ob, off = cmd->off + offset;
|
|
u64 va;
|
|
|
|
ob = off;
|
|
|
|
/*
|
|
* RO for acquire (since we just need to read the mem) and RW for
|
|
* release since we will need to write back to the semaphore memory.
|
|
*/
|
|
va = acquire ? nvgpu_semaphore_gpu_ro_va(s) :
|
|
nvgpu_semaphore_gpu_rw_va(s);
|
|
|
|
/*
|
|
* If the op is not an acquire (so therefor a release) we should
|
|
* incr the underlying sema next_value.
|
|
*/
|
|
if (!acquire)
|
|
nvgpu_semaphore_incr(s, c->hw_sema);
|
|
|
|
/* semaphore_a */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
|
|
/* offset_upper */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, (va >> 32) & 0xff);
|
|
/* semaphore_b */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010005);
|
|
/* offset */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, va & 0xffffffff);
|
|
|
|
if (acquire) {
|
|
/* semaphore_c */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
|
|
/* payload */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++,
|
|
nvgpu_semaphore_get_value(s));
|
|
/* semaphore_d */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
|
|
/* operation: acq_geq, switch_en */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x4 | (0x1 << 12));
|
|
} else {
|
|
/* semaphore_c */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006);
|
|
/* payload */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++,
|
|
nvgpu_semaphore_get_value(s));
|
|
/* semaphore_d */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007);
|
|
/* operation: release, wfi */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++,
|
|
0x2 | ((wfi ? 0x0 : 0x1) << 20));
|
|
/* non_stall_int */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
|
|
/* ignored */
|
|
nvgpu_mem_wr32(g, cmd->mem, off++, 0);
|
|
}
|
|
|
|
if (acquire)
|
|
gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d"
|
|
"va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
|
|
ch, nvgpu_semaphore_get_value(s),
|
|
s->location.pool->page_idx, va, cmd->gva,
|
|
cmd->mem->gpu_va, ob);
|
|
else
|
|
gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3d"
|
|
"va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
|
|
ch, nvgpu_semaphore_get_value(s),
|
|
nvgpu_semaphore_read(s),
|
|
s->location.pool->page_idx,
|
|
va, cmd->gva, cmd->mem->gpu_va, ob);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_wait_syncpt(
|
|
struct gk20a_channel_sync *s, u32 id,
|
|
u32 thresh, struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct gk20a *g = sema->c->g;
|
|
nvgpu_err(g, "trying to use syncpoint synchronization");
|
|
return -ENODEV;
|
|
}
|
|
|
|
#ifdef CONFIG_SYNC
|
|
static int semaphore_wait_fd_native(struct channel_gk20a *c, int fd,
|
|
struct priv_cmd_entry *wait_cmd)
|
|
{
|
|
struct sync_fence *sync_fence;
|
|
int err;
|
|
const int wait_cmd_size = 8;
|
|
int num_wait_cmds;
|
|
int i;
|
|
|
|
sync_fence = gk20a_sync_fence_fdget(fd);
|
|
if (!sync_fence)
|
|
return -EINVAL;
|
|
|
|
num_wait_cmds = sync_fence->num_fences;
|
|
if (num_wait_cmds == 0) {
|
|
err = 0;
|
|
goto put_fence;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c,
|
|
wait_cmd_size * num_wait_cmds,
|
|
wait_cmd);
|
|
if (err) {
|
|
nvgpu_err(c->g, "not enough priv cmd buffer space");
|
|
goto put_fence;
|
|
}
|
|
|
|
for (i = 0; i < sync_fence->num_fences; i++) {
|
|
struct fence *f = sync_fence->cbs[i].sync_pt;
|
|
struct sync_pt *pt = sync_pt_from_fence(f);
|
|
struct nvgpu_semaphore *sema;
|
|
|
|
sema = gk20a_sync_pt_sema(pt);
|
|
if (!sema) {
|
|
/* expired */
|
|
nvgpu_memset(c->g, wait_cmd->mem,
|
|
(wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
|
|
0, wait_cmd_size * sizeof(u32));
|
|
} else {
|
|
WARN_ON(!sema->incremented);
|
|
add_sema_cmd(c->g, c, sema, wait_cmd,
|
|
i * wait_cmd_size, true, false);
|
|
nvgpu_semaphore_put(sema);
|
|
}
|
|
}
|
|
|
|
put_fence:
|
|
sync_fence_put(sync_fence);
|
|
return err;
|
|
}
|
|
|
|
static int semaphore_wait_fd_proxy(struct channel_gk20a *c, int fd,
|
|
struct priv_cmd_entry *wait_cmd,
|
|
struct gk20a_fence *fence_out,
|
|
struct sync_timeline *timeline)
|
|
{
|
|
const int wait_cmd_size = 8;
|
|
struct sync_fence *sync_fence;
|
|
struct wait_fence_work *w = NULL;
|
|
int err, status;
|
|
|
|
sync_fence = sync_fence_fdget(fd);
|
|
if (!sync_fence)
|
|
return -EINVAL;
|
|
|
|
/* If the fence has signaled there is no reason to wait on it. */
|
|
status = atomic_read(&sync_fence->status);
|
|
if (status == 0) {
|
|
sync_fence_put(sync_fence);
|
|
return 0;
|
|
}
|
|
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, wait_cmd_size, wait_cmd);
|
|
if (err) {
|
|
nvgpu_err(c->g,
|
|
"not enough priv cmd buffer space");
|
|
goto clean_up_sync_fence;
|
|
}
|
|
|
|
w = nvgpu_kzalloc(c->g, sizeof(*w));
|
|
if (!w) {
|
|
err = -ENOMEM;
|
|
goto clean_up_priv_cmd;
|
|
}
|
|
|
|
sync_fence_waiter_init(&w->waiter, gk20a_channel_semaphore_launcher);
|
|
w->fence = sync_fence;
|
|
w->g = c->g;
|
|
w->ch = c;
|
|
w->sema = nvgpu_semaphore_alloc(c);
|
|
if (!w->sema) {
|
|
nvgpu_err(c->g, "ran out of semaphores");
|
|
err = -ENOMEM;
|
|
goto clean_up_worker;
|
|
}
|
|
|
|
/* worker takes one reference */
|
|
nvgpu_semaphore_get(w->sema);
|
|
nvgpu_semaphore_incr(w->sema, c->hw_sema);
|
|
|
|
/* GPU unblocked when the semaphore value increments. */
|
|
add_sema_cmd(c->g, c, w->sema, wait_cmd, 0, true, false);
|
|
|
|
/*
|
|
* We need to create the fence before adding the waiter to ensure
|
|
* that we properly clean up in the event the sync_fence has
|
|
* already signaled
|
|
*/
|
|
err = gk20a_fence_from_semaphore(c->g, fence_out, timeline,
|
|
w->sema, &c->semaphore_wq, false);
|
|
if (err)
|
|
goto clean_up_sema;
|
|
|
|
err = sync_fence_wait_async(sync_fence, &w->waiter);
|
|
gk20a_add_pending_sema_wait(c->g, w);
|
|
|
|
/*
|
|
* If the sync_fence has already signaled then the above wait_async
|
|
* will not get scheduled; the fence completed just after doing the
|
|
* status check above before allocs and waiter init, and won the race.
|
|
* This causes the waiter to be skipped, so let's release the semaphore
|
|
* here and put the refs taken for the worker.
|
|
*/
|
|
if (err == 1) {
|
|
sync_fence_put(sync_fence);
|
|
nvgpu_semaphore_release(w->sema, c->hw_sema);
|
|
nvgpu_semaphore_put(w->sema);
|
|
}
|
|
|
|
return 0;
|
|
|
|
clean_up_sema:
|
|
/*
|
|
* Release the refs to the semaphore, including
|
|
* the one for the worker since it will never run.
|
|
*/
|
|
nvgpu_semaphore_put(w->sema);
|
|
nvgpu_semaphore_put(w->sema);
|
|
clean_up_worker:
|
|
nvgpu_kfree(c->g, w);
|
|
clean_up_priv_cmd:
|
|
gk20a_free_priv_cmdbuf(c, wait_cmd);
|
|
clean_up_sync_fence:
|
|
sync_fence_put(sync_fence);
|
|
return err;
|
|
}
|
|
#endif
|
|
|
|
static int gk20a_channel_semaphore_wait_fd(
|
|
struct gk20a_channel_sync *s, int fd,
|
|
struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct channel_gk20a *c = sema->c;
|
|
#ifdef CONFIG_SYNC
|
|
int err;
|
|
|
|
err = semaphore_wait_fd_native(c, fd, entry);
|
|
if (err)
|
|
err = semaphore_wait_fd_proxy(c, fd, entry, fence, sema->timeline);
|
|
return err;
|
|
#else
|
|
nvgpu_err(c->g,
|
|
"trying to use sync fds with CONFIG_SYNC disabled");
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static int __gk20a_channel_semaphore_incr(
|
|
struct gk20a_channel_sync *s, bool wfi_cmd,
|
|
struct priv_cmd_entry *incr_cmd,
|
|
struct gk20a_fence *fence,
|
|
bool need_sync_fence)
|
|
{
|
|
int incr_cmd_size;
|
|
struct gk20a_channel_semaphore *sp =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
struct channel_gk20a *c = sp->c;
|
|
struct nvgpu_semaphore *semaphore;
|
|
int err = 0;
|
|
|
|
semaphore = nvgpu_semaphore_alloc(c);
|
|
if (!semaphore) {
|
|
nvgpu_err(c->g,
|
|
"ran out of semaphores");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
incr_cmd_size = 10;
|
|
err = gk20a_channel_alloc_priv_cmdbuf(c, incr_cmd_size, incr_cmd);
|
|
if (err) {
|
|
nvgpu_err(c->g,
|
|
"not enough priv cmd buffer space");
|
|
goto clean_up_sema;
|
|
}
|
|
|
|
/* Release the completion semaphore. */
|
|
add_sema_cmd(c->g, c, semaphore, incr_cmd, 0, false, wfi_cmd);
|
|
|
|
err = gk20a_fence_from_semaphore(c->g, fence,
|
|
sp->timeline, semaphore,
|
|
&c->semaphore_wq,
|
|
need_sync_fence);
|
|
if (err)
|
|
goto clean_up_sema;
|
|
|
|
return 0;
|
|
|
|
clean_up_sema:
|
|
nvgpu_semaphore_put(semaphore);
|
|
return err;
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr_wfi(
|
|
struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence)
|
|
{
|
|
return __gk20a_channel_semaphore_incr(s,
|
|
true /* wfi */,
|
|
entry, fence, true);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr(
|
|
struct gk20a_channel_sync *s,
|
|
struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence,
|
|
bool need_sync_fence,
|
|
bool register_irq)
|
|
{
|
|
/* Don't put wfi cmd to this one since we're not returning
|
|
* a fence to user space. */
|
|
return __gk20a_channel_semaphore_incr(s,
|
|
false /* no wfi */,
|
|
entry, fence, need_sync_fence);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_incr_user(
|
|
struct gk20a_channel_sync *s,
|
|
int wait_fence_fd,
|
|
struct priv_cmd_entry *entry,
|
|
struct gk20a_fence *fence,
|
|
bool wfi,
|
|
bool need_sync_fence,
|
|
bool register_irq)
|
|
{
|
|
#ifdef CONFIG_SYNC
|
|
int err;
|
|
|
|
err = __gk20a_channel_semaphore_incr(s, wfi, entry, fence,
|
|
need_sync_fence);
|
|
if (err)
|
|
return err;
|
|
|
|
return 0;
|
|
#else
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
nvgpu_err(sema->c->g,
|
|
"trying to use sync fds with CONFIG_SYNC disabled");
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_set_min_eq_max(struct gk20a_channel_sync *s)
|
|
{
|
|
/* Nothing to do. */
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_signal_timeline(
|
|
struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_semaphore *sp =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
gk20a_sync_timeline_signal(sp->timeline);
|
|
}
|
|
|
|
static int gk20a_channel_semaphore_syncpt_id(struct gk20a_channel_sync *s)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
|
|
static u64 gk20a_channel_semaphore_syncpt_address(struct gk20a_channel_sync *s)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static void gk20a_channel_semaphore_destroy(struct gk20a_channel_sync *s)
|
|
{
|
|
struct gk20a_channel_semaphore *sema =
|
|
container_of(s, struct gk20a_channel_semaphore, ops);
|
|
if (sema->timeline)
|
|
gk20a_sync_timeline_destroy(sema->timeline);
|
|
|
|
/* The sema pool is cleaned up by the VM destroy. */
|
|
sema->pool = NULL;
|
|
|
|
nvgpu_kfree(sema->c->g, sema);
|
|
}
|
|
|
|
static struct gk20a_channel_sync *
|
|
gk20a_channel_semaphore_create(struct channel_gk20a *c, bool user_managed)
|
|
{
|
|
int asid = -1;
|
|
struct gk20a_channel_semaphore *sema;
|
|
char pool_name[20];
|
|
|
|
if (WARN_ON(!c->vm))
|
|
return NULL;
|
|
|
|
sema = nvgpu_kzalloc(c->g, sizeof(*sema));
|
|
if (!sema)
|
|
return NULL;
|
|
sema->c = c;
|
|
|
|
if (c->vm->as_share)
|
|
asid = c->vm->as_share->id;
|
|
|
|
sprintf(pool_name, "semaphore_pool-%d", c->chid);
|
|
sema->pool = c->vm->sema_pool;
|
|
|
|
#ifdef CONFIG_SYNC
|
|
sema->timeline = gk20a_sync_timeline_create(
|
|
"gk20a_ch%d_as%d", c->chid, asid);
|
|
if (!sema->timeline) {
|
|
gk20a_channel_semaphore_destroy(&sema->ops);
|
|
return NULL;
|
|
}
|
|
#endif
|
|
nvgpu_atomic_set(&sema->ops.refcount, 0);
|
|
sema->ops.wait_syncpt = gk20a_channel_semaphore_wait_syncpt;
|
|
sema->ops.wait_fd = gk20a_channel_semaphore_wait_fd;
|
|
sema->ops.incr = gk20a_channel_semaphore_incr;
|
|
sema->ops.incr_wfi = gk20a_channel_semaphore_incr_wfi;
|
|
sema->ops.incr_user = gk20a_channel_semaphore_incr_user;
|
|
sema->ops.set_min_eq_max = gk20a_channel_semaphore_set_min_eq_max;
|
|
sema->ops.signal_timeline = gk20a_channel_semaphore_signal_timeline;
|
|
sema->ops.syncpt_id = gk20a_channel_semaphore_syncpt_id;
|
|
sema->ops.syncpt_address = gk20a_channel_semaphore_syncpt_address;
|
|
sema->ops.destroy = gk20a_channel_semaphore_destroy;
|
|
|
|
return &sema->ops;
|
|
}
|
|
|
|
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync)
|
|
{
|
|
sync->destroy(sync);
|
|
}
|
|
|
|
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c,
|
|
bool user_managed)
|
|
{
|
|
#ifdef CONFIG_TEGRA_GK20A_NVHOST
|
|
if (gk20a_platform_has_syncpoints(c->g))
|
|
return gk20a_channel_syncpt_create(c, user_managed);
|
|
#endif
|
|
return gk20a_channel_semaphore_create(c, user_managed);
|
|
}
|
|
|
|
bool gk20a_channel_sync_needs_sync_framework(struct gk20a *g)
|
|
{
|
|
return !gk20a_platform_has_syncpoints(g);
|
|
}
|