From 52835c39aeec34904b9e1c316fd3607277df2ae5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= <kholtta@nvidia.com>
Date: Wed, 1 Apr 2020 09:24:15 +0300
Subject: [PATCH] gpu: nvgpu: do not skip completed syncpt prefences
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A corner case has existed since ancient times for syncpoint-backed
prefences to not cause a gpu wait if the fence is found to be completed
in the submit path. This adds some unnecessary complexity, so don't
check for completion in software. Let the gpu "wait" for these
known-to-be-trivial waits too. Necessary priv cmdbuf space has been
allocated anyway.

Originally nvhost had 16-bit fences which would wrap around relatively
quickly, so waiting for an old fence could have looked like waiting for
a fence that will expire long in the future. With 32-bit thresholds,
this hasn't been the case for several Tegra generations anymore, and
nvhost doesn't ignore waits like this either.

The wait priv cmdbuf in submit path can still be missing even with a
prefence supplied because the Android sync framework supports sync fds
that contain zero fences inside; this can happen at least when merging
fences that have all been expired. In such conditions the wait cmdbuf
wouldn't even get allocated.

[this is squashed with commit 8b3b0cb12d118 (gpu: nvgpu: allow no wait
cmd with valid input fence) from
https://git-master.nvidia.com/r/c/linux-nvgpu/+/2325677]

Jira NVGPU-4548

Change-Id: Ie81fd8735c2614d0fedb7242dc9869d0961610eb
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2321762
(cherry picked from commit 8f3dac44934eb727b1bf4fb853f019cf4c15a5cd)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2324254
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: svc-mobile-misra <svc-mobile-misra@nvidia.com>
Reviewed-by: svc-mobile-cert <svc-mobile-cert@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
GVS: Gerrit_Virtual_Submit
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/submit.c        |  5 +--
 .../nvgpu/common/sync/channel_sync_syncpt.c   | 34 +++++++------------
 2 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index 162cc19b5..1c47aa5f6 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -573,8 +573,9 @@ static int nvgpu_submit_channel_gpfifo(struct nvgpu_channel *c,
 		nvgpu_profile_snapshot(profile, PROFILE_JOB_TRACKING);
 
 		/*
-		 * wait_cmd can be unset even if flag_fence_wait exists. See
-		 * the expiration check in channel_sync_syncpt_gen_wait_cmd.
+		 * wait_cmd can be unset even if flag_fence_wait exists; the
+		 * android sync framework for example can provide entirely
+		 * empty fences that act like trivially expired waits.
 		 */
 		if (wait_cmd != NULL) {
 			nvgpu_submit_append_priv_cmdbuf(c, wait_cmd);
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
index feefd3bf7..adcdd6026 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -60,31 +60,21 @@ static int channel_sync_syncpt_gen_wait_cmd(struct nvgpu_channel *c,
 	u32 wait_cmd_size, u32 pos, bool preallocated)
 {
 	int err = 0;
-	bool is_expired = nvgpu_nvhost_syncpt_is_expired_ext(
-		c->g->nvhost, id, thresh);
 
-	if (is_expired) {
-		if (preallocated) {
-			nvgpu_memset(c->g, wait_cmd->mem,
-			(wait_cmd->off + pos * wait_cmd_size) * (u32)sizeof(u32),
-				0, wait_cmd_size * (u32)sizeof(u32));
+	if (!preallocated) {
+		err = nvgpu_channel_alloc_priv_cmdbuf(c,
+			c->g->ops.sync.syncpt.get_wait_cmd_size(),
+			wait_cmd);
+		if (err != 0) {
+			nvgpu_err(c->g, "not enough priv cmd buffer space");
+			return err;
 		}
-	} else {
-		if (!preallocated) {
-			err = nvgpu_channel_alloc_priv_cmdbuf(c,
-				c->g->ops.sync.syncpt.get_wait_cmd_size(),
-				wait_cmd);
-			if (err != 0) {
-				nvgpu_err(c->g, "not enough priv cmd buffer space");
-				return err;
-			}
-		}
-		nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
-				id, c->vm->syncpt_ro_map_gpu_va);
-		c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd,
-			pos * wait_cmd_size, id, thresh,
-			c->vm->syncpt_ro_map_gpu_va);
 	}
+	nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
+			id, c->vm->syncpt_ro_map_gpu_va);
+	c->g->ops.sync.syncpt.add_wait_cmd(c->g, wait_cmd,
+		pos * wait_cmd_size, id, thresh,
+		c->vm->syncpt_ro_map_gpu_va);
 
 	return 0;
 }