gpu: nvgpu: require os fence when only supported

If the os fence is the only kind that's supported, fail a submit if the user wants fences but doesn't explicitly request sync fences, expecting syncpoints. Syncpoint support is advertised to userspace in the gpu characteristics, so userspace already has the knowledge to request the correct sync type. Do this check at the ioctl level. The in-kernel stuff that needs submits (cde, copyengine) can work without syncpoints and sync fences are used only in userspace. Fail a submit also if CONFIG_SYNC is not set and sync fences are requested. Lack of kernel support doesn't guarantee that userspace would still wrongly want that. Clarify the deferred cleanup requirements. The sync framework is needed only for post sync fences, but deferred cleanup is still always needed with semaphores because the internal tracking is done with dynamically allocated (although small) objects. Jira NVGPU-4548 Change-Id: I2e5a6554930cb413b2bb46ddfe388e41390bc7e4 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321715 (cherry picked from commit d870956170906eae1088846ec05266c859669771) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2318157 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-22 17:36:20 +03:00 · 2020-03-25 11:06:46 +02:00
parent df896cd3c7
commit b813adbf49
2 changed files with 43 additions and 24 deletions
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -33,6 +33,7 @@
 #include <nvgpu/profile.h>
 #include <nvgpu/vpr.h>
 #include <nvgpu/trace.h>
+#include <nvgpu/nvhost.h>

 /*
 * Handle the submit synchronization - pre-fences and post-fences.
@@ -441,8 +442,6 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 #endif

 	if (need_job_tracking) {
-		bool need_sync_framework = false;
-
 		/*
 		 * If the channel is to have deterministic latency and
 		 * job tracking is required, the channel must have
@@ -453,26 +452,26 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 			return -EINVAL;
 		}

-		need_sync_framework =
-			(nvgpu_channel_sync_needs_os_fence_framework(g) ||
-			 (flag_sync_fence && flag_fence_get));
-
 		/*
 		 * Deferred clean-up is necessary for any of the following
-		 * conditions:
-		 * - channel's deterministic flag is not set
-		 * - dependency on sync framework, which could make the
-		 *   behavior of the clean-up operation non-deterministic
-		 *   (should not be performed in the submit path)
-		 * - channel wdt
-		 * - buffer refcounting
+		 * conditions that could make clean-up behaviour
+		 * non-deterministic and as such not suitable for the submit
+		 * path:
+		 * - channel's deterministic flag is not set (job tracking is
+		 *   dynamically allocated)
+		 * - no syncpt support (struct nvgpu_semaphore is dynamically
+		 *   allocated, not pooled)
+		 * - dependency on sync framework for post fences
+		 * - buffer refcounting, which is O(n)
+		 * - channel wdt, which needs periodic async cleanup
 		 *
 		 * If none of the conditions are met, then deferred clean-up
 		 * is not required, and we clean-up one job-tracking
 		 * resource in the submit path.
 		 */
 		need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) ||
-					need_sync_framework ||
+					!nvgpu_has_syncpoints(g) ||
+					(flag_sync_fence && flag_fence_get) ||
 					!skip_buffer_refcounting;

 #ifdef CONFIG_NVGPU_CHANNEL_WDT
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -795,6 +795,12 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	int fd = -1;
 	struct gk20a *g = ch->g;
 	struct nvgpu_gpfifo_userdata userdata;
+	bool flag_fence_wait = (args->flags &
+			NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U;
+	bool flag_fence_get = (args->flags &
+			NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) != 0U;
+	bool flag_sync_fence = (args->flags &
+			NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) != 0U;

 	int ret = 0;
 	nvgpu_log_fn(g, " ");
@@ -806,22 +812,36 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 		return -ETIMEDOUT;
 	}

-	nvgpu_get_fence_args(&args->fence, &fence);
-	submit_flags =
-		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
+#ifndef CONFIG_SYNC
+	if (flag_sync_fence) {
+		return -EINVAL;
+	}
+#endif
+
+	/*
+	 * In case we need the sync framework, require that the user requests
+	 * it too for any fences. That's advertised in the gpu characteristics.
+	 */
+	if (nvgpu_channel_sync_needs_os_fence_framework(g) &&
+		(flag_fence_wait || flag_fence_get) && !flag_sync_fence) {
+		return -EINVAL;
+	}

 	/* Try and allocate an fd here*/
-	if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
-		&& (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) {
-			fd = get_unused_fd_flags(O_RDWR);
-			if (fd < 0)
-				return fd;
+	if (flag_fence_get && flag_sync_fence) {
+		fd = get_unused_fd_flags(O_RDWR);
+		if (fd < 0)
+			return fd;
 	}

 	userdata.entries = (struct nvgpu_gpfifo_entry __user *)
 		(uintptr_t)args->gpfifo;
 	userdata.context = NULL;

+	nvgpu_get_fence_args(&args->fence, &fence);
+	submit_flags =
+		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
+
 	ret = nvgpu_submit_channel_gpfifo_user(ch,
 			userdata, args->num_entries,
 			submit_flags, &fence, &fence_out, profile);
@@ -833,8 +853,8 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	}

 	/* Convert fence_out to something we can pass back to user space. */
-	if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
-		if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+	if (flag_fence_get) {
+		if (flag_sync_fence) {
 			ret = nvgpu_fence_install_fd(fence_out, fd);
 			if (ret)
 				put_unused_fd(fd);