From b813adbf497a42bd6b49ea32b888ce6da1ccfde8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= <kholtta@nvidia.com>
Date: Wed, 25 Mar 2020 11:06:46 +0200
Subject: [PATCH] gpu: nvgpu: require os fence when only supported
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the os fence is the only kind that's supported, fail a submit if the
user wants fences but doesn't explicitly request sync fences, expecting
syncpoints. Syncpoint support is advertised to userspace in the gpu
characteristics, so userspace already has the knowledge to request the
correct sync type.

Do this check at the ioctl level. The in-kernel stuff that needs submits
(cde, copyengine) can work without syncpoints and sync fences are used
only in userspace.

Fail a submit also if CONFIG_SYNC is not set and sync fences are
requested. Lack of kernel support doesn't guarantee that userspace would
still wrongly want that.

Clarify the deferred cleanup requirements. The sync framework is needed
only for post sync fences, but deferred cleanup is still always needed
with semaphores because the internal tracking is done with dynamically
allocated (although small) objects.

Jira NVGPU-4548

Change-Id: I2e5a6554930cb413b2bb46ddfe388e41390bc7e4
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321715
(cherry picked from commit d870956170906eae1088846ec05266c859669771)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2318157
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/submit.c     | 27 +++++++--------
 drivers/gpu/nvgpu/os/linux/ioctl_channel.c | 40 ++++++++++++++++------
 2 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index dd3705398..938f4904a 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -33,6 +33,7 @@
 #include <nvgpu/profile.h>
 #include <nvgpu/vpr.h>
 #include <nvgpu/trace.h>
+#include <nvgpu/nvhost.h>
 
 /*
  * Handle the submit synchronization - pre-fences and post-fences.
@@ -441,8 +442,6 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 #endif
 
 	if (need_job_tracking) {
-		bool need_sync_framework = false;
-
 		/*
 		 * If the channel is to have deterministic latency and
 		 * job tracking is required, the channel must have
@@ -453,26 +452,26 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 			return -EINVAL;
 		}
 
-		need_sync_framework =
-			(nvgpu_channel_sync_needs_os_fence_framework(g) ||
-			 (flag_sync_fence && flag_fence_get));
-
 		/*
 		 * Deferred clean-up is necessary for any of the following
-		 * conditions:
-		 * - channel's deterministic flag is not set
-		 * - dependency on sync framework, which could make the
-		 *   behavior of the clean-up operation non-deterministic
-		 *   (should not be performed in the submit path)
-		 * - channel wdt
-		 * - buffer refcounting
+		 * conditions that could make clean-up behaviour
+		 * non-deterministic and as such not suitable for the submit
+		 * path:
+		 * - channel's deterministic flag is not set (job tracking is
+		 *   dynamically allocated)
+		 * - no syncpt support (struct nvgpu_semaphore is dynamically
+		 *   allocated, not pooled)
+		 * - dependency on sync framework for post fences
+		 * - buffer refcounting, which is O(n)
+		 * - channel wdt, which needs periodic async cleanup
 		 *
 		 * If none of the conditions are met, then deferred clean-up
 		 * is not required, and we clean-up one job-tracking
 		 * resource in the submit path.
 		 */
 		need_deferred_cleanup = !nvgpu_channel_is_deterministic(c) ||
-					need_sync_framework ||
+					!nvgpu_has_syncpoints(g) ||
+					(flag_sync_fence && flag_fence_get) ||
 					!skip_buffer_refcounting;
 
 #ifdef CONFIG_NVGPU_CHANNEL_WDT
diff --git a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
index 315bb45cc..1dd894cb3 100644
--- a/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
+++ b/drivers/gpu/nvgpu/os/linux/ioctl_channel.c
@@ -795,6 +795,12 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	int fd = -1;
 	struct gk20a *g = ch->g;
 	struct nvgpu_gpfifo_userdata userdata;
+	bool flag_fence_wait = (args->flags &
+			NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_WAIT) != 0U;
+	bool flag_fence_get = (args->flags &
+			NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) != 0U;
+	bool flag_sync_fence = (args->flags &
+			NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) != 0U;
 
 	int ret = 0;
 	nvgpu_log_fn(g, " ");
@@ -806,22 +812,36 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 		return -ETIMEDOUT;
 	}
 
-	nvgpu_get_fence_args(&args->fence, &fence);
-	submit_flags =
-		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
+#ifndef CONFIG_SYNC
+	if (flag_sync_fence) {
+		return -EINVAL;
+	}
+#endif
+
+	/*
+	 * In case we need the sync framework, require that the user requests
+	 * it too for any fences. That's advertised in the gpu characteristics.
+	 */
+	if (nvgpu_channel_sync_needs_os_fence_framework(g) &&
+		(flag_fence_wait || flag_fence_get) && !flag_sync_fence) {
+		return -EINVAL;
+	}
 
 	/* Try and allocate an fd here*/
-	if ((args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET)
-		&& (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE)) {
-			fd = get_unused_fd_flags(O_RDWR);
-			if (fd < 0)
-				return fd;
+	if (flag_fence_get && flag_sync_fence) {
+		fd = get_unused_fd_flags(O_RDWR);
+		if (fd < 0)
+			return fd;
 	}
 
 	userdata.entries = (struct nvgpu_gpfifo_entry __user *)
 		(uintptr_t)args->gpfifo;
 	userdata.context = NULL;
 
+	nvgpu_get_fence_args(&args->fence, &fence);
+	submit_flags =
+		nvgpu_submit_gpfifo_user_flags_to_common_flags(args->flags);
+
 	ret = nvgpu_submit_channel_gpfifo_user(ch,
 			userdata, args->num_entries,
 			submit_flags, &fence, &fence_out, profile);
@@ -833,8 +853,8 @@ static int gk20a_ioctl_channel_submit_gpfifo(
 	}
 
 	/* Convert fence_out to something we can pass back to user space. */
-	if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_FENCE_GET) {
-		if (args->flags & NVGPU_SUBMIT_GPFIFO_FLAGS_SYNC_FENCE) {
+	if (flag_fence_get) {
+		if (flag_sync_fence) {
 			ret = nvgpu_fence_install_fd(fence_out, fd);
 			if (ret)
 				put_unused_fd(fd);