diff --git a/arch/nvgpu-common.yaml b/arch/nvgpu-common.yaml index 3f57422bc..eacdc2639 100644 --- a/arch/nvgpu-common.yaml +++ b/arch/nvgpu-common.yaml @@ -84,7 +84,8 @@ fence: safe: no owner: Seema K sources: [ common/fence/fence.c, - include/nvgpu/fence.h ] + include/nvgpu/fence.h, + include/nvgpu/user_fence.h ] io: safe: yes diff --git a/drivers/gpu/nvgpu/common/fence/fence.c b/drivers/gpu/nvgpu/common/fence/fence.c index 3762b5ab4..76f35ca06 100644 --- a/drivers/gpu/nvgpu/common/fence/fence.c +++ b/drivers/gpu/nvgpu/common/fence/fence.c @@ -30,6 +30,7 @@ #include #include #include +#include static struct nvgpu_fence_type *nvgpu_fence_from_ref(struct nvgpu_ref *ref) { @@ -75,9 +76,23 @@ struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f) return f; } -int nvgpu_fence_install_fd(struct nvgpu_fence_type *f, int fd) +struct nvgpu_user_fence nvgpu_fence_extract_user(struct nvgpu_fence_type *f) { - return f->os_fence.ops->install_fence(&f->os_fence, fd); + struct nvgpu_user_fence uf = (struct nvgpu_user_fence) { + .syncpt_id = f->syncpt_id, + .syncpt_value = f->syncpt_value, + .os_fence = f->os_fence, + }; + + /* + * Keep our ref to the os fence for now so that the user fence can be + * extracted multiple times (for cde). + */ + if (nvgpu_os_fence_is_initialized(&f->os_fence)) { + f->os_fence.ops->dup(&f->os_fence); + } + + return uf; } int nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f, diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index fe315df29..17be7de00 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -806,11 +807,19 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, - struct nvgpu_fence_type **fence_out, + struct nvgpu_user_fence *fence_out, struct nvgpu_swprofiler *profiler) { - return nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries, - flags, fence, fence_out, profiler); + struct nvgpu_fence_type *fence_internal = NULL; + int err; + + err = nvgpu_submit_channel_gpfifo(c, NULL, userdata, num_entries, + flags, fence, &fence_internal, profiler); + if (err == 0 && fence_internal != NULL) { + *fence_out = nvgpu_fence_extract_user(fence_internal); + nvgpu_fence_put(fence_internal); + } + return err; } int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel.h b/drivers/gpu/nvgpu/include/nvgpu/channel.h index fca50b5ff..0186e4898 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/channel.h +++ b/drivers/gpu/nvgpu/include/nvgpu/channel.h @@ -49,6 +49,7 @@ struct nvgpu_debug_context; struct priv_cmd_queue; struct priv_cmd_entry; struct nvgpu_channel_wdt; +struct nvgpu_user_fence; /** * S/W defined invalid channel identifier. @@ -575,7 +576,7 @@ int nvgpu_submit_channel_gpfifo_user(struct nvgpu_channel *c, u32 num_entries, u32 flags, struct nvgpu_channel_fence *fence, - struct nvgpu_fence_type **fence_out, + struct nvgpu_user_fence *fence_out, struct nvgpu_swprofiler *profiler); int nvgpu_submit_channel_gpfifo_kernel(struct nvgpu_channel *c, diff --git a/drivers/gpu/nvgpu/include/nvgpu/fence.h b/drivers/gpu/nvgpu/include/nvgpu/fence.h index a58488364..c71d2a321 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/fence.h +++ b/drivers/gpu/nvgpu/include/nvgpu/fence.h @@ -1,7 +1,5 @@ /* - * Fences - * - * Copyright (c) 2014-2019, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2020, NVIDIA CORPORATION. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -35,6 +33,7 @@ struct platform_device; struct nvgpu_semaphore; #endif struct nvgpu_os_fence; +struct nvgpu_user_fence; struct nvgpu_fence_type { struct gk20a *g; @@ -98,6 +97,6 @@ void nvgpu_fence_put(struct nvgpu_fence_type *f); struct nvgpu_fence_type *nvgpu_fence_get(struct nvgpu_fence_type *f); int nvgpu_fence_wait(struct gk20a *g, struct nvgpu_fence_type *f, u32 timeout); bool nvgpu_fence_is_expired(struct nvgpu_fence_type *f); -int nvgpu_fence_install_fd(struct nvgpu_fence_type *f, int fd); +struct nvgpu_user_fence nvgpu_fence_extract_user(struct nvgpu_fence_type *f); #endif /* NVGPU_FENCE_H */ diff --git a/drivers/gpu/nvgpu/include/nvgpu/user_fence.h b/drivers/gpu/nvgpu/include/nvgpu/user_fence.h new file mode 100644 index 000000000..beed0f74d --- /dev/null +++ b/drivers/gpu/nvgpu/include/nvgpu/user_fence.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020, NVIDIA Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef NVGPU_USER_FENCE_H +#define NVGPU_USER_FENCE_H + +#include +#include + +/* + * A post-submit fence to be given to userspace. Either the syncpt id and value + * pair is valid or the os fence is valid; this depends on the flags that were + * used: NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE implies os fence. + */ +struct nvgpu_user_fence { + u32 syncpt_id, syncpt_value; + struct nvgpu_os_fence os_fence; +}; + +static inline struct nvgpu_user_fence nvgpu_user_fence_init(void) +{ + return (struct nvgpu_user_fence) { + .syncpt_id = NVGPU_INVALID_SYNCPT_ID, + }; +} + +static inline void nvgpu_user_fence_release(struct nvgpu_user_fence *fence) +{ + if (nvgpu_os_fence_is_initialized(&fence->os_fence)) { + fence->os_fence.ops->drop_ref(&fence->os_fence); + } +} + +#endif /* NVGPU_USER_FENCE_H */ diff --git a/drivers/gpu/nvgpu/os/linux/cde.c b/drivers/gpu/nvgpu/os/linux/cde.c index 4b1f5bba9..7214fa4cc 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.c +++ b/drivers/gpu/nvgpu/os/linux/cde.c @@ -40,6 +40,7 @@ #include #include #include +#include #include @@ -1695,7 +1696,7 @@ int gk20a_prepare_compressible_read( u32 width, u32 height, u32 block_height_log2, u32 submit_flags, struct nvgpu_channel_fence *fence, u32 *valid_compbits, u32 *zbc_color, - struct nvgpu_fence_type **fence_out) + struct nvgpu_user_fence *fence_out) { struct gk20a *g = &l->g; int err = 0; @@ -1743,14 +1744,12 @@ int gk20a_prepare_compressible_read( } } - if (state->fence && fence_out) - *fence_out = nvgpu_fence_get(state->fence); + if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET && state->fence != NULL) { + *fence_out = nvgpu_fence_extract_user(state->fence); + } - if (valid_compbits) - *valid_compbits = state->valid_compbits; - - if (zbc_color) - *zbc_color = state->zbc_color; + *valid_compbits = state->valid_compbits; + *zbc_color = state->zbc_color; out: nvgpu_mutex_release(&state->lock); diff --git a/drivers/gpu/nvgpu/os/linux/cde.h b/drivers/gpu/nvgpu/os/linux/cde.h index 2d7259d3b..2645526c0 100644 --- a/drivers/gpu/nvgpu/os/linux/cde.h +++ b/drivers/gpu/nvgpu/os/linux/cde.h @@ -48,6 +48,7 @@ struct nvgpu_channel_fence; struct nvgpu_channel; struct vm_gk20a; struct nvgpu_gpfifo_entry; +struct nvgpu_user_fence; /* * this element defines a buffer that is allocated and mapped into gpu address @@ -317,7 +318,7 @@ int gk20a_prepare_compressible_read( u32 width, u32 height, u32 block_height_log2, u32 submit_flags, struct nvgpu_channel_fence *fence, u32 *valid_compbits, u32 *zbc_color, - struct nvgpu_fence_type **fence_out); + struct nvgpu_user_fence *fence_out); int gk20a_mark_compressible_write( struct gk20a *g, u32 buffer_fd, u32 valid_compbits, u64 offset, u32 zbc_color); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index c8288fb47..e83b53b05 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -48,6 +48,7 @@ #include #include #include +#include #include @@ -793,13 +794,13 @@ static int gk20a_ioctl_channel_submit_gpfifo( struct nvgpu_submit_gpfifo_args *args) { struct nvgpu_channel_fence fence; - struct nvgpu_fence_type *fence_out; + struct nvgpu_user_fence fence_out = nvgpu_user_fence_init(); u32 submit_flags = 0; int fd = -1; struct gk20a *g = ch->g; struct nvgpu_fifo *f = &g->fifo; struct nvgpu_swprofiler *kickoff_profiler = &f->kickoff_profiler; - struct nvgpu_gpfifo_userdata userdata; + struct nvgpu_gpfifo_userdata userdata = { NULL, NULL }; bool flag_fence_wait = (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U; bool flag_fence_get = (args->flags & @@ -860,17 +861,18 @@ static int gk20a_ioctl_channel_submit_gpfifo( /* Convert fence_out to something we can pass back to user space. */ if (flag_fence_get) { if (flag_sync_fence) { - ret = nvgpu_fence_install_fd(fence_out, fd); + ret = fence_out.os_fence.ops->install_fence( + &fence_out.os_fence, fd); if (ret) put_unused_fd(fd); else args->fence.id = fd; } else { - args->fence.id = fence_out->syncpt_id; - args->fence.value = fence_out->syncpt_value; + args->fence.id = fence_out.syncpt_id; + args->fence.value = fence_out.syncpt_value; } + nvgpu_user_fence_release(&fence_out); } - nvgpu_fence_put(fence_out); nvgpu_swprofile_snapshot(kickoff_profiler, PROF_KICKOFF_IOCTL_EXIT); diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c index f1071a2b4..b0890bef8 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_ctrl.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "ioctl_ctrl.h" #include "ioctl_dbg.h" @@ -442,7 +443,7 @@ static int gk20a_ctrl_prepare_compressible_read( #ifdef CONFIG_NVGPU_SUPPORT_CDE struct nvgpu_os_linux *l = nvgpu_os_linux_from_gk20a(g); struct nvgpu_channel_fence fence; - struct nvgpu_fence_type *fence_out = NULL; + struct nvgpu_user_fence fence_out = nvgpu_user_fence_init(); int submit_flags = nvgpu_submit_gpfifo_user_flags_to_common_flags( args->submit_flags); int fd = -1; @@ -472,31 +473,31 @@ static int gk20a_ctrl_prepare_compressible_read( return ret; } - /* Convert fence_out to something we can pass back to user space. */ + /* + * Convert fence_out, if any, to something we can pass back to user + * space. Even if successful, the fence may not exist if there was + * nothing to be done (no compbits requested); that's not an error. + */ if (submit_flags & NVGPU_SUBMIT_FLAGS_FENCE_GET) { if (submit_flags & NVGPU_SUBMIT_FLAGS_SYNC_FENCE) { - if (fence_out) { - ret = nvgpu_fence_install_fd(fence_out, fd); - if (ret) + if (nvgpu_os_fence_is_initialized(&fence_out.os_fence)) { + ret = fence_out.os_fence.ops->install_fence( + &fence_out.os_fence, fd); + if (ret) { put_unused_fd(fd); - else - args->fence.fd = fd; + fd = -1; + } } else { - args->fence.fd = -1; put_unused_fd(fd); + fd = -1; } + args->fence.fd = fd; } else { - if (fence_out) { - args->fence.syncpt_id = fence_out->syncpt_id; - args->fence.syncpt_value = - fence_out->syncpt_value; - } else { - args->fence.syncpt_id = NVGPU_INVALID_SYNCPT_ID; - args->fence.syncpt_value = 0; - } + args->fence.syncpt_id = fence_out.syncpt_id; + args->fence.syncpt_value = fence_out.syncpt_value; } + nvgpu_user_fence_release(&fence_out); } - nvgpu_fence_put(fence_out); #endif return ret;