mirror of
git://nv-tegra.nvidia.com/linux-nvgpu.git
synced 2025-12-22 17:36:20 +03:00
Revamp the support the nvgpu driver has for semaphores. The original problem with nvgpu's semaphore support is that it required a SW based wait for every semaphore release. This was because for every fence that gk20a_channel_semaphore_wait_fd() waited on a new semaphore was created. This semaphore would then get released by SW when the fence signaled. This meant that for every release there was necessarily a sync_fence_wait_async() call which could block. The latency of this SW wait was enough to cause massive degredation in performance. To fix this a fast path was implemented. When a fence is passed to gk20a_channel_semaphore_wait_fd() that is backed by a GPU semaphore a semaphore acquire is directly used to block the GPU. No longer is a sync_fence_wait_async() performed nor is there an extra semaphore created. To implement this fast path the semaphore memory had to be shared between channels. Previously since a new semaphore was created every time through gk20a_channel_semaphore_wait_fd() what address space a semaphore was mapped into was irrelevant. However, when using the fast path a sempahore may be released on one address space but acquired in another. Sharing the semaphore memory was done by making a fixed GPU mapping in all channels. This mapping points to the semaphore memory (the so called semaphore sea). This global fixed mapping is read-only to make sure no semaphores can be incremented (i.e released) by a malicious channel. Each channel then gets a RW mapping of it's own semaphore. This way a channel may only acquire other channel's semaphores but may both acquire and release its own semaphore. The gk20a fence code was updated to allow introspection of the GPU backed fences. This allows detection of when the fast path can be taken. If the fast path cannot be used (for example when a fence is sync-pt backed) the original slow path is still present. This gets used when the GPU needs to wait on an event from something which only understands how to use sync-pts. Bug 1732449 JIRA DNVGPU-12 Change-Id: Ic0fea74994da5819a771deac726bb0d47a33c2de Signed-off-by: Alex Waterman <alexw@nvidia.com> Reviewed-on: http://git-master/r/1133792 Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com> Tested-by: Terje Bergstrom <tbergstrom@nvidia.com>
239 lines
5.3 KiB
C
239 lines
5.3 KiB
C
/*
|
|
* Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#include "fence_gk20a.h"
|
|
|
|
#include <linux/gk20a.h>
|
|
#include <linux/file.h>
|
|
#include <linux/version.h>
|
|
|
|
#include "gk20a.h"
|
|
#include "semaphore_gk20a.h"
|
|
#include "channel_gk20a.h"
|
|
#include "sync_gk20a.h"
|
|
|
|
#ifdef CONFIG_SYNC
|
|
#include "../drivers/staging/android/sync.h"
|
|
#endif
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
#include <linux/nvhost.h>
|
|
#include <linux/nvhost_ioctl.h>
|
|
#endif
|
|
|
|
struct gk20a_fence_ops {
|
|
int (*wait)(struct gk20a_fence *, long timeout);
|
|
bool (*is_expired)(struct gk20a_fence *);
|
|
void *(*free)(struct kref *);
|
|
};
|
|
|
|
static void gk20a_fence_free(struct kref *ref)
|
|
{
|
|
struct gk20a_fence *f =
|
|
container_of(ref, struct gk20a_fence, ref);
|
|
#ifdef CONFIG_SYNC
|
|
if (f->sync_fence)
|
|
sync_fence_put(f->sync_fence);
|
|
#endif
|
|
if (f->semaphore)
|
|
gk20a_semaphore_put(f->semaphore);
|
|
kfree(f);
|
|
}
|
|
|
|
void gk20a_fence_put(struct gk20a_fence *f)
|
|
{
|
|
if (f)
|
|
kref_put(&f->ref, gk20a_fence_free);
|
|
}
|
|
|
|
struct gk20a_fence *gk20a_fence_get(struct gk20a_fence *f)
|
|
{
|
|
if (f)
|
|
kref_get(&f->ref);
|
|
return f;
|
|
}
|
|
|
|
int gk20a_fence_wait(struct gk20a_fence *f, int timeout)
|
|
{
|
|
if (!tegra_platform_is_silicon())
|
|
timeout = (u32)MAX_SCHEDULE_TIMEOUT;
|
|
return f->ops->wait(f, timeout);
|
|
}
|
|
|
|
bool gk20a_fence_is_expired(struct gk20a_fence *f)
|
|
{
|
|
if (f && f->ops)
|
|
return f->ops->is_expired(f);
|
|
else
|
|
return true;
|
|
}
|
|
|
|
int gk20a_fence_install_fd(struct gk20a_fence *f)
|
|
{
|
|
#ifdef CONFIG_SYNC
|
|
int fd;
|
|
|
|
if (!f->sync_fence)
|
|
return -EINVAL;
|
|
|
|
fd = get_unused_fd_flags(O_RDWR);
|
|
if (fd < 0)
|
|
return fd;
|
|
|
|
sync_fence_get(f->sync_fence);
|
|
sync_fence_install(f->sync_fence, fd);
|
|
return fd;
|
|
#else
|
|
return -ENODEV;
|
|
#endif
|
|
}
|
|
|
|
struct gk20a_fence *gk20a_alloc_fence(const struct gk20a_fence_ops *ops,
|
|
struct sync_fence *sync_fence, bool wfi)
|
|
{
|
|
struct gk20a_fence *f = kzalloc(sizeof(*f), GFP_KERNEL);
|
|
if (!f)
|
|
return NULL;
|
|
kref_init(&f->ref);
|
|
f->ops = ops;
|
|
f->sync_fence = sync_fence;
|
|
f->wfi = wfi;
|
|
f->syncpt_id = -1;
|
|
return f;
|
|
}
|
|
|
|
/* Fences that are backed by GPU semaphores: */
|
|
|
|
static int gk20a_semaphore_fence_wait(struct gk20a_fence *f, long timeout)
|
|
{
|
|
long remain;
|
|
|
|
if (!gk20a_semaphore_is_acquired(f->semaphore))
|
|
return 0;
|
|
|
|
remain = wait_event_interruptible_timeout(
|
|
*f->semaphore_wq,
|
|
!gk20a_semaphore_is_acquired(f->semaphore),
|
|
timeout);
|
|
if (remain == 0 && gk20a_semaphore_is_acquired(f->semaphore))
|
|
return -ETIMEDOUT;
|
|
else if (remain < 0)
|
|
return remain;
|
|
return 0;
|
|
}
|
|
|
|
static bool gk20a_semaphore_fence_is_expired(struct gk20a_fence *f)
|
|
{
|
|
return !gk20a_semaphore_is_acquired(f->semaphore);
|
|
}
|
|
|
|
static const struct gk20a_fence_ops gk20a_semaphore_fence_ops = {
|
|
.wait = &gk20a_semaphore_fence_wait,
|
|
.is_expired = &gk20a_semaphore_fence_is_expired,
|
|
};
|
|
|
|
/* This function takes ownership of the semaphore */
|
|
struct gk20a_fence *gk20a_fence_from_semaphore(
|
|
struct sync_timeline *timeline,
|
|
struct gk20a_semaphore *semaphore,
|
|
wait_queue_head_t *semaphore_wq,
|
|
struct sync_fence *dependency,
|
|
bool wfi)
|
|
{
|
|
struct gk20a_fence *f;
|
|
struct sync_fence *sync_fence = NULL;
|
|
|
|
#ifdef CONFIG_SYNC
|
|
sync_fence = gk20a_sync_fence_create(timeline, semaphore,
|
|
dependency, "f-gk20a-0x%04x",
|
|
gk20a_semaphore_gpu_ro_va(semaphore));
|
|
if (!sync_fence)
|
|
return NULL;
|
|
#endif
|
|
|
|
f = gk20a_alloc_fence(&gk20a_semaphore_fence_ops, sync_fence, wfi);
|
|
if (!f) {
|
|
#ifdef CONFIG_SYNC
|
|
sync_fence_put(sync_fence);
|
|
#endif
|
|
return NULL;
|
|
}
|
|
|
|
f->semaphore = semaphore;
|
|
f->semaphore_wq = semaphore_wq;
|
|
return f;
|
|
}
|
|
|
|
#ifdef CONFIG_TEGRA_GK20A
|
|
/* Fences that are backed by host1x syncpoints: */
|
|
|
|
static int gk20a_syncpt_fence_wait(struct gk20a_fence *f, long timeout)
|
|
{
|
|
return nvhost_syncpt_wait_timeout_ext(
|
|
f->host1x_pdev, f->syncpt_id, f->syncpt_value,
|
|
(u32)timeout, NULL, NULL);
|
|
}
|
|
|
|
static bool gk20a_syncpt_fence_is_expired(struct gk20a_fence *f)
|
|
{
|
|
return nvhost_syncpt_is_expired_ext(f->host1x_pdev, f->syncpt_id,
|
|
f->syncpt_value);
|
|
}
|
|
|
|
static const struct gk20a_fence_ops gk20a_syncpt_fence_ops = {
|
|
.wait = &gk20a_syncpt_fence_wait,
|
|
.is_expired = &gk20a_syncpt_fence_is_expired,
|
|
};
|
|
|
|
struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
|
|
u32 id, u32 value, bool wfi,
|
|
bool need_sync_fence)
|
|
{
|
|
struct gk20a_fence *f;
|
|
struct sync_fence *sync_fence = NULL;
|
|
|
|
#ifdef CONFIG_SYNC
|
|
struct nvhost_ctrl_sync_fence_info pt = {
|
|
.id = id,
|
|
.thresh = value
|
|
};
|
|
|
|
if (need_sync_fence) {
|
|
sync_fence = nvhost_sync_create_fence(host1x_pdev, &pt, 1,
|
|
"fence");
|
|
if (IS_ERR(sync_fence))
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
f = gk20a_alloc_fence(&gk20a_syncpt_fence_ops, sync_fence, wfi);
|
|
if (!f) {
|
|
#ifdef CONFIG_SYNC
|
|
if (sync_fence)
|
|
sync_fence_put(sync_fence);
|
|
#endif
|
|
return NULL;
|
|
}
|
|
f->host1x_pdev = host1x_pdev;
|
|
f->syncpt_id = id;
|
|
f->syncpt_value = value;
|
|
return f;
|
|
}
|
|
#else
|
|
struct gk20a_fence *gk20a_fence_from_syncpt(struct platform_device *host1x_pdev,
|
|
u32 id, u32 value, bool wfi)
|
|
{
|
|
return NULL;
|
|
}
|
|
#endif
|