gpu: nvgpu: require os fence when only supported

If the os fence is the only kind that's supported, fail a submit if the
user wants fences but doesn't explicitly request sync fences, expecting
syncpoints. Syncpoint support is advertised to userspace in the gpu
characteristics, so userspace already has the knowledge to request the
correct sync type.

Do this check at the ioctl level. The in-kernel stuff that needs submits
(cde, copyengine) can work without syncpoints and sync fences are used
only in userspace.

Fail a submit also if CONFIG_SYNC is not set and sync fences are
requested. Lack of kernel support doesn't guarantee that userspace would
still wrongly want that.

Clarify the deferred cleanup requirements. The sync framework is needed
only for post sync fences, but deferred cleanup is still always needed
with semaphores because the internal tracking is done with dynamically
allocated (although small) objects.

Jira NVGPU-4548

Change-Id: I2e5a6554930cb413b2bb46ddfe388e41390bc7e4
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321715
(cherry picked from commit d870956170906eae1088846ec05266c859669771)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2318157
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Konsta Hölttä
2020-03-25 11:06:46 +02:00
committed by Alex Waterman
parent df896cd3c7
commit b813adbf49
2 changed files with 43 additions and 24 deletions

View File

@@ -33,6 +33,7 @@
#include <nvgpu/profile.h> #include <nvgpu/profile.h>
#include <nvgpu/vpr.h> #include <nvgpu/vpr.h>
#include <nvgpu/trace.h> #include <nvgpu/trace.h>
#include <nvgpu/nvhost.h>
/* /*
* Handle the submit synchronization - pre-fences and post-fences. * Handle the submit synchronization - pre-fences and post-fences.
@@ -441,8 +442,6 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
#endif #endif
if (need_job_tracking) { if (need_job_tracking) {
bool need_sync_framework = false;
/* /*
* If the channel is to have deterministic latency and * If the channel is to have deterministic latency and
* job tracking is required, the channel must have * job tracking is required, the channel must have
@@ -453,26 +452,26 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
return -EINVAL; return -EINVAL;
} }
need_sync_framework =
(nvgpu_channel_sync_needs_os_fence_framework(g) ||
(flag_sync_fence && flag_fence_get));
/* /*
* Deferred clean-up is necessary for any of the following * Deferred clean-up is necessary for any of the following
* conditions: * conditions that could make clean-up behaviour
* - channel's deterministic flag is not set * non-deterministic and as such not suitable for the submit
* - dependency on sync framework, which could make the * path:
* behavior of the clean-up operation non-deterministic * - channel's deterministic flag is not set (job tracking is
* (should not be performed in the submit path) * dynamically allocated)
* - channel wdt * - no syncpt support (struct nvgpu_semaphore is dynamically
* - buffer refcounting * allocated, not pooled)
* - dependency on sync framework for post fences
* - buffer refcounting, which is O(n)
* - channel wdt, which needs periodic async cleanup
* *
* If none of the conditions are met, then deferred clean-up * If none of the conditions are met, then deferred clean-up
* is not required, and we clean-up one job-tracking * is not required, and we clean-up one job-tracking
* resource in the submit path. * resource in the submit path.
*/ */
need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) || need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) ||
need_sync_framework || !nvgpu_has_syncpoints(g) ||
(flag_sync_fence && flag_fence_get) ||
!skip_buffer_refcounting; !skip_buffer_refcounting;
#ifdef CONFIG_NVGPU_CHANNEL_WDT #ifdef CONFIG_NVGPU_CHANNEL_WDT

View File

@@ -795,6 +795,12 @@ static int gk20a_ioctl_channel_submit_gpfifo(
int fd = -1; int fd = -1;
struct gk20a *g = ch->g; struct gk20a *g = ch->g;
struct nvgpu_gpfifo_userdata userdata; struct nvgpu_gpfifo_userdata userdata;
bool flag_fence_wait = (args->flags &
NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U;
bool flag_fence_get = (args->flags &
NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) != 0U;
bool flag_sync_fence = (args->flags &
NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) != 0U;
int ret = 0; int ret = 0;
nvgpu_log_fn(g, " "); nvgpu_log_fn(g, " ");
@@ -806,13 +812,23 @@ static int gk20a_ioctl_channel_submit_gpfifo(
return -ETIMEDOUT; return -ETIMEDOUT;
} }
nvgpu_get_fence_args(&args->fence, &fence); #ifndef CONFIG_SYNC
submit_flags = if (flag_sync_fence) {
nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); return -EINVAL;
}
#endif
/*
* In case we need the sync framework, require that the user requests
* it too for any fences. That's advertised in the gpu characteristics.
*/
if (nvgpu_channel_sync_needs_os_fence_framework(g) &&
(flag_fence_wait || flag_fence_get) && !flag_sync_fence) {
return -EINVAL;
}
/* Try and allocate an fd here*/ /* Try and allocate an fd here*/
if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) if (flag_fence_get && flag_sync_fence) {
&& (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) {
fd = get_unused_fd_flags(O_RDWR); fd = get_unused_fd_flags(O_RDWR);
if (fd < 0) if (fd < 0)
return fd; return fd;
@@ -822,6 +838,10 @@ static int gk20a_ioctl_channel_submit_gpfifo(
(uintptr_t)args->gpfifo; (uintptr_t)args->gpfifo;
userdata.context = NULL; userdata.context = NULL;
nvgpu_get_fence_args(&args->fence, &fence);
submit_flags =
nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
ret = nvgpu_submit_channel_gpfifo_user(ch, ret = nvgpu_submit_channel_gpfifo_user(ch,
userdata, args->num_entries, userdata, args->num_entries,
submit_flags, &fence, &fence_out, profile); submit_flags, &fence, &fence_out, profile);
@@ -833,8 +853,8 @@ static int gk20a_ioctl_channel_submit_gpfifo(
} }
/* Convert fence_out to something we can pass back to user space. */ /* Convert fence_out to something we can pass back to user space. */
if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { if (flag_fence_get) {
if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { if (flag_sync_fence) {
ret = nvgpu_fence_install_fd(fence_out, fd); ret = nvgpu_fence_install_fd(fence_out, fd);
if (ret) if (ret)
put_unused_fd(fd); put_unused_fd(fd);