From d916e85171344607a6f3a5408eea7b9828c6ffb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= <kholtta@nvidia.com>
Date: Wed, 6 May 2020 14:23:49 +0300
Subject: [PATCH] gpu: nvgpu: incr sync once submit is ready to go
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split out the max value increment and syncpt interrupt registration out
of nvgpu_channel_sync_incr*(). This API is called in the submit path to
prepare buffers and tracking resources, but later on in the submit path
errors can still occur so that the increment wouldn't happen (unless
artificially forced by sw).

The increment and irq registration cannot easily be undone and it makes
more sense to do these at the moment when the prepared job is finally
ready, so add a new nvgpu_channel_sync_mark_progress() API to be called
later in the submit path to signal that progress shall eventually happen
on the sync. Without this, the max value would stay too large after an
unsuccessful submit until the channel gets closed.

The sync object (syncpt or semaphore) is always exclusively owned by the
channel that allocated it, so nonatomically reading the max value first
in sync_incr() and incrementing it later in mark_progress() is racefree;
all submits per channel are serialized.

Change the channel syncpoint to client managed from host managed so that
nvhost-exported sync fences behave correctly with the temporary state
where the fence threshold is over the max value. Ideally we'd always
track nvgpu-owned syncpts' max values internally, but this is enough for
now.

Jira NVGPU-5491

Change-Id: Idf0bda7ac93d7f2f114cdeb497fe6b5369d21c95
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2340465
Reviewed-by: automaticguardword <automaticguardword@nvidia.com>
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 drivers/gpu/nvgpu/common/fifo/submit.c        |  17 ++-
 .../gpu/nvgpu/common/semaphore/semaphore.c    |   5 +-
 .../gpu/nvgpu/common/semaphore/semaphore_hw.c |   8 +-
 drivers/gpu/nvgpu/common/sync/channel_sync.c  |  15 ++-
 .../gpu/nvgpu/common/sync/channel_sync_priv.h |   9 +-
 .../common/sync/channel_sync_semaphore.c      |  22 +++-
 .../nvgpu/common/sync/channel_sync_syncpt.c   | 101 ++++++++++--------
 .../gpu/nvgpu/include/nvgpu/channel_sync.h    |  16 ++-
 .../nvgpu/include/nvgpu/posix/posix-nvhost.h  |   3 +
 drivers/gpu/nvgpu/os/posix/posix-nvhost.c     |   6 ++
 10 files changed, 130 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index 0ae9e38d1..e153982c3 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -80,7 +80,7 @@ static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c,
 static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
 		struct priv_cmd_entry **incr_cmd,
 		struct nvgpu_fence_type **post_fence, bool flag_fence_get,
-		bool need_wfi, bool need_sync_fence, bool register_irq)
+		bool need_wfi, bool need_sync_fence)
 {
 	int err;
 
@@ -91,11 +91,10 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
 
 	if (flag_fence_get) {
 		err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd,
-				*post_fence, need_wfi, need_sync_fence,
-				register_irq);
+				*post_fence, need_wfi, need_sync_fence);
 	} else {
 		err = nvgpu_channel_sync_incr(c->sync, incr_cmd,
-				*post_fence, need_sync_fence, register_irq);
+				*post_fence, need_sync_fence);
 	}
 
 	if (err != 0) {
@@ -112,7 +111,7 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
 static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
 				      struct nvgpu_channel_fence *fence,
 				      struct nvgpu_channel_job *job,
-				      bool register_irq, u32 flags)
+				      u32 flags)
 {
 	struct gk20a *g = c->g;
 	bool need_sync_fence;
@@ -163,8 +162,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
 	 * if not requested by user.
 	 */
 	err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence,
-			flag_fence_get, need_wfi, need_sync_fence,
-			register_irq);
+			flag_fence_get, need_wfi, need_sync_fence);
 	if (err != 0) {
 		goto clean_up_wait_cmd;
 	}
@@ -354,8 +352,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
 		return err;
 	}
 
-	err = nvgpu_submit_prepare_syncs(c, fence, job, need_deferred_cleanup,
-			flags);
+	err = nvgpu_submit_prepare_syncs(c, fence, job, flags);
 	if (err != 0) {
 		goto clean_up_job;
 	}
@@ -383,6 +380,8 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
 		goto clean_up_gpfifo_incr;
 	}
 
+	nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup);
+
 	if (fence_out != NULL) {
 		*fence_out = nvgpu_fence_get(job->post_fence);
 	}
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore.c b/drivers/gpu/nvgpu/common/semaphore/semaphore.c
index 27383650a..e0b349c5a 100644
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore.c
@@ -155,7 +155,8 @@ bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s)
 void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
 		struct nvgpu_hw_semaphore *hw_sema)
 {
-	int next = nvgpu_hw_semaphore_update_next(hw_sema);
+	/* One submission increments the next value by one. */
+	int next = nvgpu_hw_semaphore_read_next(hw_sema) + 1;
 
 	/* "s" should be an uninitialized sema. */
 	WARN_ON(s->ready_to_wait);
@@ -163,7 +164,7 @@ void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
 	nvgpu_atomic_set(&s->value, next);
 	s->ready_to_wait = true;
 
-	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+	gpu_sema_verbose_dbg(s->g, "PREP sema for c=%d (%u)",
 			     hw_sema->chid, next);
 }
 
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
index 9d8a299a1..fed3e06c9 100644
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
@@ -158,5 +158,11 @@ int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema)
 
 int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema)
 {
-	return nvgpu_atomic_add_return(1, &hw_sema->next_value);
+	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+	struct nvgpu_semaphore_pool *p = hw_sema->location.pool;
+	struct gk20a *g = p->sema_sea->gk20a;
+
+	gpu_sema_verbose_dbg(g, "INCR sema for c=%d (%u)",
+			     hw_sema->chid, next);
+	return next;
 }
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c
index 0d49052d1..205b5e02b 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c
@@ -65,17 +65,22 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
 
 int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool need_sync_fence, bool register_irq)
+	bool need_sync_fence)
 {
-	return s->ops->incr(s, entry, fence, need_sync_fence, register_irq);
+	return s->ops->incr(s, entry, fence, need_sync_fence);
 }
 
 int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool wfi, bool need_sync_fence, bool register_irq)
+	bool wfi, bool need_sync_fence)
 {
-	return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence,
-			register_irq);
+	return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence);
+}
+
+void nvgpu_channel_sync_mark_progress(struct nvgpu_channel_sync *s,
+	bool register_irq)
+{
+	s->ops->mark_progress(s, register_irq);
 }
 
 void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s)
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
index 77966bcdb..4916b5ba5 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
@@ -62,15 +62,16 @@ struct nvgpu_channel_sync_ops {
 	int (*incr)(struct nvgpu_channel_sync *s,
 		    struct priv_cmd_entry **entry,
 		    struct nvgpu_fence_type *fence,
-		    bool need_sync_fence,
-		    bool register_irq);
+		    bool need_sync_fence);
 
 	int (*incr_user)(struct nvgpu_channel_sync *s,
 			 struct priv_cmd_entry **entry,
 			 struct nvgpu_fence_type *fence,
 			 bool wfi,
-			 bool need_sync_fence,
-			 bool register_irq);
+			 bool need_sync_fence);
+
+	void (*mark_progress)(struct nvgpu_channel_sync *s,
+			     bool register_irq);
 
 	void (*set_min_eq_max)(struct nvgpu_channel_sync *s);
 
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
index c236e1766..acfab9fa3 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
@@ -100,7 +100,7 @@ static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c,
 	/* release will need to write back to the semaphore memory. */
 	va = nvgpu_semaphore_gpu_rw_va(s);
 
-	/* incr the underlying sema next_value (like syncpt's max). */
+	/* find the right sema next_value to write (like syncpt's max). */
 	nvgpu_semaphore_prepare(s, hw_sema);
 
 	g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, wfi);
@@ -237,8 +237,7 @@ static int channel_sync_semaphore_incr(
 		struct nvgpu_channel_sync *s,
 		struct priv_cmd_entry **entry,
 		struct nvgpu_fence_type *fence,
-		bool need_sync_fence,
-		bool register_irq)
+		bool need_sync_fence)
 {
 	/* Don't put wfi cmd to this one since we're not returning
 	 * a fence to user space. */
@@ -252,8 +251,7 @@ static int channel_sync_semaphore_incr_user(
 		struct priv_cmd_entry **entry,
 		struct nvgpu_fence_type *fence,
 		bool wfi,
-		bool need_sync_fence,
-		bool register_irq)
+		bool need_sync_fence)
 {
 #ifndef CONFIG_NVGPU_SYNCFD_NONE
 	int err;
@@ -275,6 +273,19 @@ static int channel_sync_semaphore_incr_user(
 #endif
 }
 
+static void channel_sync_semaphore_mark_progress(struct nvgpu_channel_sync *s,
+		bool register_irq)
+{
+	struct nvgpu_channel_sync_semaphore *sp =
+		nvgpu_channel_sync_semaphore_from_base(s);
+
+	(void)nvgpu_hw_semaphore_update_next(sp->hw_sema);
+	/*
+	 * register_irq is ignored: there is only one semaphore interrupt that
+	 * triggers nvgpu_channel_update() and it's always active.
+	 */
+}
+
 static void channel_sync_semaphore_set_min_eq_max(struct nvgpu_channel_sync *s)
 {
 	struct nvgpu_channel_sync_semaphore *sp =
@@ -310,6 +321,7 @@ static const struct nvgpu_channel_sync_ops channel_sync_semaphore_ops = {
 	.wait_fence_fd		= channel_sync_semaphore_wait_fd,
 	.incr			= channel_sync_semaphore_incr,
 	.incr_user		= channel_sync_semaphore_incr_user,
+	.mark_progress		= channel_sync_semaphore_mark_progress,
 	.set_min_eq_max		= channel_sync_semaphore_set_min_eq_max,
 	.destroy		= channel_sync_semaphore_destroy,
 };
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
index 29a72a130..50ee638f5 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -159,13 +159,12 @@ static void channel_sync_syncpt_update(void *priv, int nr_completed)
 
 	nvgpu_channel_update(ch);
 
-	/* note: channel_get() is in channel_sync_syncpt_incr_common() */
+	/* note: channel_get() is in channel_sync_syncpt_mark_progress() */
 	nvgpu_channel_put(ch);
 }
 
 static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
 				       bool wfi_cmd,
-				       bool register_irq,
 				       struct priv_cmd_entry **incr_cmd,
 				       struct nvgpu_fence_type *fence,
 				       bool need_sync_fence)
@@ -189,36 +188,10 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
 	c->g->ops.sync.syncpt.add_incr_cmd(c->g, *incr_cmd,
 			sp->id, sp->syncpt_buf.gpu_va, wfi_cmd);
 
-	thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
+	thresh = nvgpu_wrapping_add_u32(
+			nvgpu_nvhost_syncpt_read_maxval(sp->nvhost, sp->id),
 			c->g->ops.sync.syncpt.get_incr_per_release());
 
-	if (register_irq) {
-		struct nvgpu_channel *referenced = nvgpu_channel_get(c);
-
-		WARN_ON(!referenced);
-
-		if (referenced) {
-			/* note: channel_put() is in
-			 * channel_sync_syncpt_update() */
-
-			err = nvgpu_nvhost_intr_register_notifier(
-				sp->nvhost,
-				sp->id, thresh,
-				channel_sync_syncpt_update, c);
-			if (err != 0) {
-				nvgpu_channel_put(referenced);
-			}
-
-			/* Adding interrupt action should
-			 * never fail. A proper error handling
-			 * here would require us to decrement
-			 * the syncpt max back to its original
-			 * value. */
-			WARN(err,
-			     "failed to set submit complete interrupt");
-		}
-	}
-
 	if (need_sync_fence) {
 		err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost,
 			sp->id, thresh);
@@ -248,30 +221,69 @@ clean_up_priv_cmd:
 static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s,
 			      struct priv_cmd_entry **entry,
 			      struct nvgpu_fence_type *fence,
-			      bool need_sync_fence,
-			      bool register_irq)
+			      bool need_sync_fence)
 {
 	/* Don't put wfi cmd to this one since we're not returning
 	 * a fence to user space. */
-	return channel_sync_syncpt_incr_common(s,
-			false /* no wfi */,
-			register_irq /* register irq */,
-			entry, fence, need_sync_fence);
+	return channel_sync_syncpt_incr_common(s, false, entry, fence,
+			need_sync_fence);
 }
 
 static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s,
 				   struct priv_cmd_entry **entry,
 				   struct nvgpu_fence_type *fence,
 				   bool wfi,
-				   bool need_sync_fence,
-				   bool register_irq)
+				   bool need_sync_fence)
 {
 	/* Need to do 'wfi + host incr' since we return the fence
 	 * to user space. */
-	return channel_sync_syncpt_incr_common(s,
-			wfi,
-			register_irq /* register irq */,
-			entry, fence, need_sync_fence);
+	return channel_sync_syncpt_incr_common(s, wfi, entry, fence,
+			need_sync_fence);
+}
+
+static void channel_sync_syncpt_mark_progress(struct nvgpu_channel_sync *s,
+				   bool register_irq)
+{
+	struct nvgpu_channel_sync_syncpt *sp =
+		nvgpu_channel_sync_syncpt_from_base(s);
+	struct nvgpu_channel *c = sp->c;
+	struct gk20a *g = c->g;
+	u32 thresh;
+
+	thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
+			g->ops.sync.syncpt.get_incr_per_release());
+
+	if (register_irq) {
+		struct nvgpu_channel *referenced = nvgpu_channel_get(c);
+
+		WARN_ON(referenced == NULL);
+
+		if (referenced != NULL) {
+			/*
+			 * note: the matching channel_put() is in
+			 * channel_sync_syncpt_update() that gets called when
+			 * the job completes.
+			 */
+
+			int err = nvgpu_nvhost_intr_register_notifier(
+				sp->nvhost,
+				sp->id, thresh,
+				channel_sync_syncpt_update, c);
+			if (err != 0) {
+				nvgpu_channel_put(referenced);
+			}
+
+			/*
+			 * This never fails in practice. If it does, we won't
+			 * be getting a completion signal to free the job
+			 * resources, but maybe this succeeds on a possible
+			 * subsequent submit, and the channel closure path will
+			 * eventually mark everything completed anyway.
+			 */
+			WARN(err != 0,
+			     "failed to set submit complete interrupt");
+		}
+	}
 }
 
 int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s,
@@ -314,6 +326,7 @@ static const struct nvgpu_channel_sync_ops channel_sync_syncpt_ops = {
 	.wait_fence_fd		= channel_sync_syncpt_wait_fd,
 	.incr			= channel_sync_syncpt_incr,
 	.incr_user		= channel_sync_syncpt_incr_user,
+	.mark_progress		= channel_sync_syncpt_mark_progress,
 	.set_min_eq_max		= channel_sync_syncpt_set_min_eq_max,
 	.destroy		= channel_sync_syncpt_destroy,
 };
@@ -348,8 +361,8 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c)
 	snprintf(syncpt_name, sizeof(syncpt_name),
 		"%s_%d", c->g->name, c->chid);
 
-	sp->id = nvgpu_nvhost_get_syncpt_host_managed(sp->nvhost,
-					c->chid, syncpt_name);
+	sp->id = nvgpu_nvhost_get_syncpt_client_managed(sp->nvhost,
+					syncpt_name);
 
 	/**
 	 * This is a WAR to handle invalid value of a syncpt.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
index e19d36e39..067079614 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
@@ -53,7 +53,7 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
  */
 int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool need_sync_fence, bool register_irq);
+	bool need_sync_fence);
 
 /*
  * Increment syncpoint/semaphore, so that the returned fence represents
@@ -65,7 +65,19 @@ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
  */
 int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool wfi, bool need_sync_fence, bool register_irq);
+	bool wfi, bool need_sync_fence);
+
+/*
+ * Tell the sync that some progress will eventually happen on it: increase the
+ * tracked max value of the underlying syncpoint/semaphore and maybe register
+ * an interrupt notifier to be called if needed so that the channel gets a
+ * job completion signal.
+ *
+ * @param register_irq [in] Register an interrupt for the increment.
+ */
+void nvgpu_channel_sync_mark_progress(struct nvgpu_channel_sync *s,
+	bool register_irq);
+
 /*
  * Reset the channel syncpoint/semaphore. Syncpoint increments generally
  * wrap around the range of integer values. Current max value encompasses
diff --git a/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h
index 39817cd3e..fe705d09d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h
@@ -85,4 +85,7 @@ int nvgpu_nvhost_syncpt_wait_timeout_ext(
 int nvgpu_nvhost_syncpt_read_ext_check(
 	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val);
 
+u32 nvgpu_nvhost_syncpt_read_maxval(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id);
+
 #endif
diff --git a/drivers/gpu/nvgpu/os/posix/posix-nvhost.c b/drivers/gpu/nvgpu/os/posix/posix-nvhost.c
index 9375b0ca3..3500df743 100644
--- a/drivers/gpu/nvgpu/os/posix/posix-nvhost.c
+++ b/drivers/gpu/nvgpu/os/posix/posix-nvhost.c
@@ -178,6 +178,12 @@ int nvgpu_nvhost_syncpt_read_ext_check(
 	return -ENOSYS;
 }
 
+u32 nvgpu_nvhost_syncpt_read_maxval(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	return 0U;
+}
+
 int nvgpu_nvhost_syncpt_wait_timeout_ext(
 	struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
 	u32 thresh, u32 timeout, u32 waiter_index)