gpu: nvgpu: adapt gk20a_channel_syncpt to use os_fence

This patch adapts gk20a_channel_syncpt to use os_fence for post fence
as well as pre-fence(wait) use cases.

Jira NVGPU-66

Change-Id: I49627d1f88d52a53511a02f5de60fed6df8350de
Signed-off-by: Debarshi Dutta <ddutta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1676631
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Debarshi Dutta
2018-04-23 17:26:51 +05:30
committed by mobile promotions
parent 4dfd6e43cf
commit 70e69e2686
7 changed files with 213 additions and 103 deletions

View File

@@ -164,6 +164,10 @@ nvgpu-$(CONFIG_SYNC) += gk20a/sync_gk20a.o \
common/linux/os_fence_android.o \
common/linux/os_fence_android_sema.o
ifeq ($(CONFIG_TEGRA_GK20A_NVHOST), y)
nvgpu-$(CONFIG_SYNC) += common/linux/os_fence_android_syncpt.o
endif
nvgpu-$(CONFIG_GK20A_PCI) += common/linux/pci.o \
common/linux/pci_usermode.o \

View File

@@ -55,15 +55,15 @@ void nvgpu_os_fence_android_drop_ref(struct nvgpu_os_fence *s)
int nvgpu_os_fence_fdget(struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c, int fd)
{
int err;
int err = -ENOSYS;
err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
#ifdef CONFIG_TEGRA_GK20A_NVHOST
err = nvgpu_os_fence_syncpt_fdget(fence_out, c, fd);
#endif
if (err)
err = nvgpu_os_fence_sema_fdget(fence_out, c, fd);
/* TO-DO
* check if fence is empty and if CONFIG_TEGRA_GK20A_NVHOST
* is enabled, try to get a sync_fence using
* corresponding nvhost method.
*/
if (err)
nvgpu_err(c->g, "error obtaining fence from fd %d", fd);

View File

@@ -0,0 +1,121 @@
/*
* Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <nvgpu/errno.h>
#include <nvgpu/types.h>
#include <nvgpu/os_fence.h>
#include <nvgpu/linux/os_fence_android.h>
#include <nvgpu/nvhost.h>
#include <nvgpu/atomic.h>
#include "gk20a/gk20a.h"
#include "gk20a/channel_gk20a.h"
#include "gk20a/sync_gk20a.h"
#include "gk20a/channel_sync_gk20a.h"
#include "gk20a/mm_gk20a.h"
#include "../drivers/staging/android/sync.h"
int nvgpu_os_fence_syncpt_wait_gen_cmd(struct nvgpu_os_fence *s,
struct priv_cmd_entry *wait_cmd,
struct channel_gk20a *c,
int max_wait_cmds)
{
int err;
int wait_cmd_size;
int num_wait_cmds;
int i;
u32 wait_id;
struct sync_pt *pt;
struct sync_fence *sync_fence = (struct sync_fence *)s->priv;
if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds)
return -EINVAL;
/* validate syncpt ids */
for (i = 0; i < sync_fence->num_fences; i++) {
pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
wait_id = nvgpu_nvhost_sync_pt_id(pt);
if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
c->g->nvhost_dev, wait_id)) {
return -EINVAL;
}
}
num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
if (num_wait_cmds == 0)
return 0;
wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
err = gk20a_channel_alloc_priv_cmdbuf(c,
wait_cmd_size * num_wait_cmds, wait_cmd);
if (err) {
nvgpu_err(c->g,
"not enough priv cmd buffer space");
return err;
}
for (i = 0; i < sync_fence->num_fences; i++) {
struct fence *f = sync_fence->cbs[i].sync_pt;
struct sync_pt *pt = sync_pt_from_fence(f);
u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
err = gk20a_channel_gen_syncpt_wait_cmd(c, wait_id, wait_value,
wait_cmd, wait_cmd_size, i, true);
}
WARN_ON(i != num_wait_cmds);
return 0;
}
static const struct nvgpu_os_fence_ops syncpt_ops = {
.program_waits = nvgpu_os_fence_syncpt_wait_gen_cmd,
.drop_ref = nvgpu_os_fence_android_drop_ref,
};
int nvgpu_os_fence_syncpt_create(
struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 thresh)
{
struct sync_fence *fence = nvgpu_nvhost_sync_create_fence(
nvhost_dev, id, thresh, "fence");
if (!fence) {
nvgpu_err(c->g, "error constructing fence %s", "fence");
return -ENOMEM;
}
nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
return 0;
}
int nvgpu_os_fence_syncpt_fdget(struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c, int fd)
{
struct sync_fence *fence = nvgpu_nvhost_sync_fdget(fd);
if (!fence)
return -ENOMEM;
nvgpu_os_fence_init(fence_out, c->g, &syncpt_ops, fence);
return 0;
}

View File

@@ -51,6 +51,39 @@ struct gk20a_channel_syncpt {
struct nvgpu_mem syncpt_buf;
};
int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c,
u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
u32 wait_cmd_size, int pos, bool preallocated)
{
int err = 0;
bool is_expired = nvgpu_nvhost_syncpt_is_expired_ext(
c->g->nvhost_dev, id, thresh);
if (is_expired) {
if (preallocated) {
nvgpu_memset(c->g, wait_cmd->mem,
(wait_cmd->off + pos * wait_cmd_size) * sizeof(u32),
0, wait_cmd_size * sizeof(u32));
}
} else {
if (!preallocated) {
err = gk20a_channel_alloc_priv_cmdbuf(c,
c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
if (err) {
nvgpu_err(c->g, "not enough priv cmd buffer space");
return err;
}
}
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
id, c->vm->syncpt_ro_map_gpu_va);
c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
pos * wait_cmd_size, id, thresh,
c->vm->syncpt_ro_map_gpu_va);
}
return 0;
}
static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd)
{
@@ -58,108 +91,36 @@ static int gk20a_channel_syncpt_wait_syncpt(struct gk20a_channel_sync *s,
container_of(s, struct gk20a_channel_syncpt, ops);
struct channel_gk20a *c = sp->c;
int err = 0;
u32 wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
if (!nvgpu_nvhost_syncpt_is_valid_pt_ext(sp->nvhost_dev, id))
return -EINVAL;
if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev, id, thresh))
return 0;
err = gk20a_channel_gen_syncpt_wait_cmd(c, id, thresh,
wait_cmd, wait_cmd_size, 0, false);
err = gk20a_channel_alloc_priv_cmdbuf(c,
c->g->ops.fifo.get_syncpt_wait_cmd_size(), wait_cmd);
if (err) {
nvgpu_err(c->g,
"not enough priv cmd buffer space");
return err;
}
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
id, sp->c->vm->syncpt_ro_map_gpu_va);
c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd, 0, id,
thresh, c->vm->syncpt_ro_map_gpu_va);
return 0;
return err;
}
static int gk20a_channel_syncpt_wait_fd(struct gk20a_channel_sync *s, int fd,
struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
struct priv_cmd_entry *wait_cmd, int max_wait_cmds)
{
#ifdef CONFIG_SYNC
int i;
int num_wait_cmds;
struct sync_fence *sync_fence;
struct sync_pt *pt;
struct nvgpu_os_fence os_fence = {0};
struct gk20a_channel_syncpt *sp =
container_of(s, struct gk20a_channel_syncpt, ops);
struct channel_gk20a *c = sp->c;
u32 wait_id;
int err = 0;
u32 wait_cmd_size = 0;
sync_fence = nvgpu_nvhost_sync_fdget(fd);
if (!sync_fence)
err = nvgpu_os_fence_fdget(&os_fence, c, fd);
if (err)
return -EINVAL;
if (max_wait_cmds && sync_fence->num_fences > max_wait_cmds) {
sync_fence_put(sync_fence);
return -EINVAL;
}
err = os_fence.ops->program_waits(&os_fence,
wait_cmd, c, max_wait_cmds);
/* validate syncpt ids */
for (i = 0; i < sync_fence->num_fences; i++) {
pt = sync_pt_from_fence(sync_fence->cbs[i].sync_pt);
wait_id = nvgpu_nvhost_sync_pt_id(pt);
if (!wait_id || !nvgpu_nvhost_syncpt_is_valid_pt_ext(
sp->nvhost_dev, wait_id)) {
sync_fence_put(sync_fence);
return -EINVAL;
}
}
os_fence.ops->drop_ref(&os_fence);
num_wait_cmds = nvgpu_nvhost_sync_num_pts(sync_fence);
if (num_wait_cmds == 0) {
sync_fence_put(sync_fence);
return 0;
}
wait_cmd_size = c->g->ops.fifo.get_syncpt_wait_cmd_size();
err = gk20a_channel_alloc_priv_cmdbuf(c,
wait_cmd_size * num_wait_cmds,
wait_cmd);
if (err) {
nvgpu_err(c->g,
"not enough priv cmd buffer space");
sync_fence_put(sync_fence);
return err;
}
i = 0;
for (i = 0; i < sync_fence->num_fences; i++) {
struct fence *f = sync_fence->cbs[i].sync_pt;
struct sync_pt *pt = sync_pt_from_fence(f);
u32 wait_id = nvgpu_nvhost_sync_pt_id(pt);
u32 wait_value = nvgpu_nvhost_sync_pt_thresh(pt);
if (nvgpu_nvhost_syncpt_is_expired_ext(sp->nvhost_dev,
wait_id, wait_value)) {
nvgpu_memset(c->g, wait_cmd->mem,
(wait_cmd->off + i * wait_cmd_size) * sizeof(u32),
0, wait_cmd_size * sizeof(u32));
} else {
nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
wait_id, sp->syncpt_buf.gpu_va);
c->g->ops.fifo.add_syncpt_wait_cmd(c->g, wait_cmd,
i * wait_cmd_size, wait_id, wait_value,
c->vm->syncpt_ro_map_gpu_va);
}
}
WARN_ON(i != num_wait_cmds);
sync_fence_put(sync_fence);
return 0;
#else
return -ENODEV;
#endif
return err;
}
static void gk20a_channel_syncpt_update(void *priv, int nr_completed)
@@ -185,6 +146,7 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
container_of(s, struct gk20a_channel_syncpt, ops);
struct channel_gk20a *c = sp->c;
struct sync_fence *sync_fence = NULL;
struct nvgpu_os_fence os_fence = {0};
err = gk20a_channel_alloc_priv_cmdbuf(c,
c->g->ops.fifo.get_syncpt_incr_cmd_size(wfi_cmd),
@@ -226,26 +188,22 @@ static int __gk20a_channel_syncpt_incr(struct gk20a_channel_sync *s,
}
}
#ifdef CONFIG_SYNC
if (need_sync_fence) {
sync_fence = nvgpu_nvhost_sync_create_fence(sp->nvhost_dev,
sp->id, thresh, "fence");
err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost_dev,
sp->id, thresh);
if (IS_ERR(sync_fence)) {
err = PTR_ERR(sync_fence);
if (err)
goto clean_up_priv_cmd;
}
sync_fence = (struct sync_fence *)os_fence.priv;
}
#endif
err = gk20a_fence_from_syncpt(fence, sp->nvhost_dev,
sp->id, thresh, sync_fence);
if (err) {
#ifdef CONFIG_SYNC
if (sync_fence)
sync_fence_put(sync_fence);
#endif
if (nvgpu_os_fence_is_initialized(&os_fence))
os_fence.ops->drop_ref(&os_fence);
goto clean_up_priv_cmd;
}

View File

@@ -108,6 +108,10 @@ void gk20a_channel_gen_sema_wait_cmd(struct channel_gk20a *c,
struct nvgpu_semaphore *sema, struct priv_cmd_entry *wait_cmd,
u32 wait_cmd_size, int pos);
int gk20a_channel_gen_syncpt_wait_cmd(struct channel_gk20a *c,
u32 id, u32 thresh, struct priv_cmd_entry *wait_cmd,
u32 wait_cmd_size, int pos, bool preallocated);
void gk20a_channel_sync_destroy(struct gk20a_channel_sync *sync,
bool set_safe_state);
struct gk20a_channel_sync *gk20a_channel_sync_create(struct channel_gk20a *c,

View File

@@ -39,4 +39,8 @@ void nvgpu_os_fence_init(struct nvgpu_os_fence *fence_out,
struct gk20a *g, const struct nvgpu_os_fence_ops *fops,
struct sync_fence *fence);
#endif
int nvgpu_os_fence_syncpt_fdget(
struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c, int fd);
#endif /* __NVGPU_OS_FENCE_ANDROID_H__ */

View File

@@ -28,6 +28,7 @@
struct nvgpu_semaphore;
struct channel_gk20a;
struct priv_cmd_entry;
struct nvgpu_nvhost_dev;
/*
* struct nvgpu_os_fence adds an abstraction to the earlier Android Sync
@@ -108,4 +109,22 @@ static inline int nvgpu_os_fence_fdget(
#endif /* CONFIG_SYNC */
#if defined(CONFIG_TEGRA_GK20A_NVHOST) && defined(CONFIG_SYNC)
int nvgpu_os_fence_syncpt_create(struct nvgpu_os_fence *fence_out,
struct channel_gk20a *c, struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 thresh);
#else
static inline int nvgpu_os_fence_syncpt_create(
struct nvgpu_os_fence *fence_out, struct channel_gk20a *c,
struct nvgpu_nvhost_dev *nvhost_dev,
u32 id, u32 thresh)
{
return -ENOSYS;
}
#endif /* CONFIG_TEGRA_GK20A_NVHOST && CONFIG_SYNC */
#endif /* __NVGPU_OS_FENCE__ */