From b813adbf497a42bd6b49ea32b888ce6da1ccfde8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= Date: Wed, 25 Mar 2020 11:06:46 +0200 Subject: [PATCH] gpu: nvgpu: require os fence when only supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the os fence is the only kind that's supported, fail a submit if the user wants fences but doesn't explicitly request sync fences, expecting syncpoints. Syncpoint support is advertised to userspace in the gpu characteristics, so userspace already has the knowledge to request the correct sync type. Do this check at the ioctl level. The in-kernel stuff that needs submits (cde, copyengine) can work without syncpoints and sync fences are used only in userspace. Fail a submit also if CONFIG_SYNC is not set and sync fences are requested. Lack of kernel support doesn't guarantee that userspace would still wrongly want that. Clarify the deferred cleanup requirements. The sync framework is needed only for post sync fences, but deferred cleanup is still always needed with semaphores because the internal tracking is done with dynamically allocated (although small) objects. Jira NVGPU-4548 Change-Id: I2e5a6554930cb413b2bb46ddfe388e41390bc7e4 Signed-off-by: Konsta Hölttä Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321715 (cherry picked from commit d870956170906eae1088846ec05266c859669771) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2318157 Tested-by: mobile promotions Reviewed-by: mobile promotions --- drivers/gpu/nvgpu/common/fifo/submit.c | 27 +++++++-------- drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 40 ++++++++++++++++------ 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c index dd3705398..938f4904a 100644 --- a/drivers/gpu/nvgpu/common/fifo/submit.c +++ b/drivers/gpu/nvgpu/common/fifo/submit.c @@ -33,6 +33,7 @@ #include #include #include +#include /* * Handle the submit synchronization - pre-fences and post-fences. @@ -441,8 +442,6 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, #endif if (need_job_tracking) { - bool need_sync_framework = false; - /* * If the channel is to have deterministic latency and * job tracking is required, the channel must have @@ -453,26 +452,26 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c, return -EINVAL; } - need_sync_framework = - (nvgpu_channel_sync_needs_os_fence_framework(g) || - (flag_sync_fence && flag_fence_get)); - /* * Deferred clean-up is necessary for any of the following - * conditions: - * - channel's deterministic flag is not set - * - dependency on sync framework, which could make the - * behavior of the clean-up operation non-deterministic - * (should not be performed in the submit path) - * - channel wdt - * - buffer refcounting + * conditions that could make clean-up behaviour + * non-deterministic and as such not suitable for the submit + * path: + * - channel's deterministic flag is not set (job tracking is + * dynamically allocated) + * - no syncpt support (struct nvgpu_semaphore is dynamically + * allocated, not pooled) + * - dependency on sync framework for post fences + * - buffer refcounting, which is O(n) + * - channel wdt, which needs periodic async cleanup * * If none of the conditions are met, then deferred clean-up * is not required, and we clean-up one job-tracking * resource in the submit path. */ need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) || - need_sync_framework || + !nvgpu_has_syncpoints(g) || + (flag_sync_fence && flag_fence_get) || !skip_buffer_refcounting; #ifdef CONFIG_NVGPU_CHANNEL_WDT diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c index 315bb45cc..1dd894cb3 100644 --- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c +++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c @@ -795,6 +795,12 @@ static int gk20a_ioctl_channel_submit_gpfifo( int fd = -1; struct gk20a *g = ch->g; struct nvgpu_gpfifo_userdata userdata; + bool flag_fence_wait = (args->flags & + NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U; + bool flag_fence_get = (args->flags & + NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) != 0U; + bool flag_sync_fence = (args->flags & + NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) != 0U; int ret = 0; nvgpu_log_fn(g, " "); @@ -806,22 +812,36 @@ static int gk20a_ioctl_channel_submit_gpfifo( return -ETIMEDOUT; } - nvgpu_get_fence_args(&args->fence, &fence); - submit_flags = - nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); +#ifndef CONFIG_SYNC + if (flag_sync_fence) { + return -EINVAL; + } +#endif + + /* + * In case we need the sync framework, require that the user requests + * it too for any fences. That's advertised in the gpu characteristics. + */ + if (nvgpu_channel_sync_needs_os_fence_framework(g) && + (flag_fence_wait || flag_fence_get) && !flag_sync_fence) { + return -EINVAL; + } /* Try and allocate an fd here*/ - if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) - && (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) { - fd = get_unused_fd_flags(O_RDWR); - if (fd < 0) - return fd; + if (flag_fence_get && flag_sync_fence) { + fd = get_unused_fd_flags(O_RDWR); + if (fd < 0) + return fd; } userdata.entries = (struct nvgpu_gpfifo_entry __user *) (uintptr_t)args->gpfifo; userdata.context = NULL; + nvgpu_get_fence_args(&args->fence, &fence); + submit_flags = + nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags); + ret = nvgpu_submit_channel_gpfifo_user(ch, userdata, args->num_entries, submit_flags, &fence, &fence_out, profile); @@ -833,8 +853,8 @@ static int gk20a_ioctl_channel_submit_gpfifo( } /* Convert fence_out to something we can pass back to user space. */ - if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) { - if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) { + if (flag_fence_get) { + if (flag_sync_fence) { ret = nvgpu_fence_install_fd(fence_out, fd); if (ret) put_unused_fd(fd);