diff --git a/drivers/gpu/nvgpu/common/fifo/submit.c b/drivers/gpu/nvgpu/common/fifo/submit.c
index 0ae9e38d1..e153982c3 100644
--- a/drivers/gpu/nvgpu/common/fifo/submit.c
+++ b/drivers/gpu/nvgpu/common/fifo/submit.c
@@ -80,7 +80,7 @@ static int nvgpu_submit_create_wait_cmd(struct nvgpu_channel *c,
 static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
 		struct priv_cmd_entry **incr_cmd,
 		struct nvgpu_fence_type **post_fence, bool flag_fence_get,
-		bool need_wfi, bool need_sync_fence, bool register_irq)
+		bool need_wfi, bool need_sync_fence)
 {
 	int err;
 
@@ -91,11 +91,10 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
 
 	if (flag_fence_get) {
 		err = nvgpu_channel_sync_incr_user(c->sync, incr_cmd,
-				*post_fence, need_wfi, need_sync_fence,
-				register_irq);
+				*post_fence, need_wfi, need_sync_fence);
 	} else {
 		err = nvgpu_channel_sync_incr(c->sync, incr_cmd,
-				*post_fence, need_sync_fence, register_irq);
+				*post_fence, need_sync_fence);
 	}
 
 	if (err != 0) {
@@ -112,7 +111,7 @@ static int nvgpu_submit_create_incr_cmd(struct nvgpu_channel *c,
 static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
 				      struct nvgpu_channel_fence *fence,
 				      struct nvgpu_channel_job *job,
-				      bool register_irq, u32 flags)
+				      u32 flags)
 {
 	struct gk20a *g = c->g;
 	bool need_sync_fence;
@@ -163,8 +162,7 @@ static int nvgpu_submit_prepare_syncs(struct nvgpu_channel *c,
 	 * if not requested by user.
 	 */
 	err = nvgpu_submit_create_incr_cmd(c, &job->incr_cmd, &job->post_fence,
-			flag_fence_get, need_wfi, need_sync_fence,
-			register_irq);
+			flag_fence_get, need_wfi, need_sync_fence);
 	if (err != 0) {
 		goto clean_up_wait_cmd;
 	}
@@ -354,8 +352,7 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
 		return err;
 	}
 
-	err = nvgpu_submit_prepare_syncs(c, fence, job, need_deferred_cleanup,
-			flags);
+	err = nvgpu_submit_prepare_syncs(c, fence, job, flags);
 	if (err != 0) {
 		goto clean_up_job;
 	}
@@ -383,6 +380,8 @@ static int nvgpu_submit_prepare_gpfifo_track(struct nvgpu_channel *c,
 		goto clean_up_gpfifo_incr;
 	}
 
+	nvgpu_channel_sync_mark_progress(c->sync, need_deferred_cleanup);
+
 	if (fence_out != NULL) {
 		*fence_out = nvgpu_fence_get(job->post_fence);
 	}
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore.c b/drivers/gpu/nvgpu/common/semaphore/semaphore.c
index 27383650a..e0b349c5a 100644
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore.c
@@ -155,7 +155,8 @@ bool nvgpu_semaphore_can_wait(struct nvgpu_semaphore *s)
 void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
 		struct nvgpu_hw_semaphore *hw_sema)
 {
-	int next = nvgpu_hw_semaphore_update_next(hw_sema);
+	/* One submission increments the next value by one. */
+	int next = nvgpu_hw_semaphore_read_next(hw_sema) + 1;
 
 	/* "s" should be an uninitialized sema. */
 	WARN_ON(s->ready_to_wait);
@@ -163,7 +164,7 @@ void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s,
 	nvgpu_atomic_set(&s->value, next);
 	s->ready_to_wait = true;
 
-	gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)",
+	gpu_sema_verbose_dbg(s->g, "PREP sema for c=%d (%u)",
 			     hw_sema->chid, next);
 }
 
diff --git a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
index 9d8a299a1..fed3e06c9 100644
--- a/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
+++ b/drivers/gpu/nvgpu/common/semaphore/semaphore_hw.c
@@ -158,5 +158,11 @@ int nvgpu_hw_semaphore_read_next(struct nvgpu_hw_semaphore *hw_sema)
 
 int nvgpu_hw_semaphore_update_next(struct nvgpu_hw_semaphore *hw_sema)
 {
-	return nvgpu_atomic_add_return(1, &hw_sema->next_value);
+	int next = nvgpu_atomic_add_return(1, &hw_sema->next_value);
+	struct nvgpu_semaphore_pool *p = hw_sema->location.pool;
+	struct gk20a *g = p->sema_sea->gk20a;
+
+	gpu_sema_verbose_dbg(g, "INCR sema for c=%d (%u)",
+			     hw_sema->chid, next);
+	return next;
 }
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync.c b/drivers/gpu/nvgpu/common/sync/channel_sync.c
index 0d49052d1..205b5e02b 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync.c
@@ -65,17 +65,22 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
 
 int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool need_sync_fence, bool register_irq)
+	bool need_sync_fence)
 {
-	return s->ops->incr(s, entry, fence, need_sync_fence, register_irq);
+	return s->ops->incr(s, entry, fence, need_sync_fence);
 }
 
 int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool wfi, bool need_sync_fence, bool register_irq)
+	bool wfi, bool need_sync_fence)
 {
-	return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence,
-			register_irq);
+	return s->ops->incr_user(s, entry, fence, wfi, need_sync_fence);
+}
+
+void nvgpu_channel_sync_mark_progress(struct nvgpu_channel_sync *s,
+	bool register_irq)
+{
+	s->ops->mark_progress(s, register_irq);
 }
 
 void nvgpu_channel_sync_set_min_eq_max(struct nvgpu_channel_sync *s)
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
index 77966bcdb..4916b5ba5 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_priv.h
@@ -62,15 +62,16 @@ struct nvgpu_channel_sync_ops {
 	int (*incr)(struct nvgpu_channel_sync *s,
 		    struct priv_cmd_entry **entry,
 		    struct nvgpu_fence_type *fence,
-		    bool need_sync_fence,
-		    bool register_irq);
+		    bool need_sync_fence);
 
 	int (*incr_user)(struct nvgpu_channel_sync *s,
 			 struct priv_cmd_entry **entry,
 			 struct nvgpu_fence_type *fence,
 			 bool wfi,
-			 bool need_sync_fence,
-			 bool register_irq);
+			 bool need_sync_fence);
+
+	void (*mark_progress)(struct nvgpu_channel_sync *s,
+			     bool register_irq);
 
 	void (*set_min_eq_max)(struct nvgpu_channel_sync *s);
 
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
index c236e1766..acfab9fa3 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
@@ -100,7 +100,7 @@ static void add_sema_incr_cmd(struct gk20a *g, struct nvgpu_channel *c,
 	/* release will need to write back to the semaphore memory. */
 	va = nvgpu_semaphore_gpu_rw_va(s);
 
-	/* incr the underlying sema next_value (like syncpt's max). */
+	/* find the right sema next_value to write (like syncpt's max). */
 	nvgpu_semaphore_prepare(s, hw_sema);
 
 	g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, wfi);
@@ -237,8 +237,7 @@ static int channel_sync_semaphore_incr(
 		struct nvgpu_channel_sync *s,
 		struct priv_cmd_entry **entry,
 		struct nvgpu_fence_type *fence,
-		bool need_sync_fence,
-		bool register_irq)
+		bool need_sync_fence)
 {
 	/* Don't put wfi cmd to this one since we're not returning
 	 * a fence to user space. */
@@ -252,8 +251,7 @@ static int channel_sync_semaphore_incr_user(
 		struct priv_cmd_entry **entry,
 		struct nvgpu_fence_type *fence,
 		bool wfi,
-		bool need_sync_fence,
-		bool register_irq)
+		bool need_sync_fence)
 {
 #ifndef CONFIG_NVGPU_SYNCFD_NONE
 	int err;
@@ -275,6 +273,19 @@ static int channel_sync_semaphore_incr_user(
 #endif
 }
 
+static void channel_sync_semaphore_mark_progress(struct nvgpu_channel_sync *s,
+		bool register_irq)
+{
+	struct nvgpu_channel_sync_semaphore *sp =
+		nvgpu_channel_sync_semaphore_from_base(s);
+
+	(void)nvgpu_hw_semaphore_update_next(sp->hw_sema);
+	/*
+	 * register_irq is ignored: there is only one semaphore interrupt that
+	 * triggers nvgpu_channel_update() and it's always active.
+	 */
+}
+
 static void channel_sync_semaphore_set_min_eq_max(struct nvgpu_channel_sync *s)
 {
 	struct nvgpu_channel_sync_semaphore *sp =
@@ -310,6 +321,7 @@ static const struct nvgpu_channel_sync_ops channel_sync_semaphore_ops = {
 	.wait_fence_fd		= channel_sync_semaphore_wait_fd,
 	.incr			= channel_sync_semaphore_incr,
 	.incr_user		= channel_sync_semaphore_incr_user,
+	.mark_progress		= channel_sync_semaphore_mark_progress,
 	.set_min_eq_max		= channel_sync_semaphore_set_min_eq_max,
 	.destroy		= channel_sync_semaphore_destroy,
 };
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
index 29a72a130..50ee638f5 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -159,13 +159,12 @@ static void channel_sync_syncpt_update(void *priv, int nr_completed)
 
 	nvgpu_channel_update(ch);
 
-	/* note: channel_get() is in channel_sync_syncpt_incr_common() */
+	/* note: channel_get() is in channel_sync_syncpt_mark_progress() */
 	nvgpu_channel_put(ch);
 }
 
 static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
 				       bool wfi_cmd,
-				       bool register_irq,
 				       struct priv_cmd_entry **incr_cmd,
 				       struct nvgpu_fence_type *fence,
 				       bool need_sync_fence)
@@ -189,36 +188,10 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
 	c->g->ops.sync.syncpt.add_incr_cmd(c->g, *incr_cmd,
 			sp->id, sp->syncpt_buf.gpu_va, wfi_cmd);
 
-	thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
+	thresh = nvgpu_wrapping_add_u32(
+			nvgpu_nvhost_syncpt_read_maxval(sp->nvhost, sp->id),
 			c->g->ops.sync.syncpt.get_incr_per_release());
 
-	if (register_irq) {
-		struct nvgpu_channel *referenced = nvgpu_channel_get(c);
-
-		WARN_ON(!referenced);
-
-		if (referenced) {
-			/* note: channel_put() is in
-			 * channel_sync_syncpt_update() */
-
-			err = nvgpu_nvhost_intr_register_notifier(
-				sp->nvhost,
-				sp->id, thresh,
-				channel_sync_syncpt_update, c);
-			if (err != 0) {
-				nvgpu_channel_put(referenced);
-			}
-
-			/* Adding interrupt action should
-			 * never fail. A proper error handling
-			 * here would require us to decrement
-			 * the syncpt max back to its original
-			 * value. */
-			WARN(err,
-			     "failed to set submit complete interrupt");
-		}
-	}
-
 	if (need_sync_fence) {
 		err = nvgpu_os_fence_syncpt_create(&os_fence, c, sp->nvhost,
 			sp->id, thresh);
@@ -248,30 +221,69 @@ clean_up_priv_cmd:
 static int channel_sync_syncpt_incr(struct nvgpu_channel_sync *s,
 			      struct priv_cmd_entry **entry,
 			      struct nvgpu_fence_type *fence,
-			      bool need_sync_fence,
-			      bool register_irq)
+			      bool need_sync_fence)
 {
 	/* Don't put wfi cmd to this one since we're not returning
 	 * a fence to user space. */
-	return channel_sync_syncpt_incr_common(s,
-			false /* no wfi */,
-			register_irq /* register irq */,
-			entry, fence, need_sync_fence);
+	return channel_sync_syncpt_incr_common(s, false, entry, fence,
+			need_sync_fence);
 }
 
 static int channel_sync_syncpt_incr_user(struct nvgpu_channel_sync *s,
 				   struct priv_cmd_entry **entry,
 				   struct nvgpu_fence_type *fence,
 				   bool wfi,
-				   bool need_sync_fence,
-				   bool register_irq)
+				   bool need_sync_fence)
 {
 	/* Need to do 'wfi + host incr' since we return the fence
 	 * to user space. */
-	return channel_sync_syncpt_incr_common(s,
-			wfi,
-			register_irq /* register irq */,
-			entry, fence, need_sync_fence);
+	return channel_sync_syncpt_incr_common(s, wfi, entry, fence,
+			need_sync_fence);
+}
+
+static void channel_sync_syncpt_mark_progress(struct nvgpu_channel_sync *s,
+				   bool register_irq)
+{
+	struct nvgpu_channel_sync_syncpt *sp =
+		nvgpu_channel_sync_syncpt_from_base(s);
+	struct nvgpu_channel *c = sp->c;
+	struct gk20a *g = c->g;
+	u32 thresh;
+
+	thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
+			g->ops.sync.syncpt.get_incr_per_release());
+
+	if (register_irq) {
+		struct nvgpu_channel *referenced = nvgpu_channel_get(c);
+
+		WARN_ON(referenced == NULL);
+
+		if (referenced != NULL) {
+			/*
+			 * note: the matching channel_put() is in
+			 * channel_sync_syncpt_update() that gets called when
+			 * the job completes.
+			 */
+
+			int err = nvgpu_nvhost_intr_register_notifier(
+				sp->nvhost,
+				sp->id, thresh,
+				channel_sync_syncpt_update, c);
+			if (err != 0) {
+				nvgpu_channel_put(referenced);
+			}
+
+			/*
+			 * This never fails in practice. If it does, we won't
+			 * be getting a completion signal to free the job
+			 * resources, but maybe this succeeds on a possible
+			 * subsequent submit, and the channel closure path will
+			 * eventually mark everything completed anyway.
+			 */
+			WARN(err != 0,
+			     "failed to set submit complete interrupt");
+		}
+	}
 }
 
 int nvgpu_channel_sync_wait_syncpt(struct nvgpu_channel_sync_syncpt *s,
@@ -314,6 +326,7 @@ static const struct nvgpu_channel_sync_ops channel_sync_syncpt_ops = {
 	.wait_fence_fd		= channel_sync_syncpt_wait_fd,
 	.incr			= channel_sync_syncpt_incr,
 	.incr_user		= channel_sync_syncpt_incr_user,
+	.mark_progress		= channel_sync_syncpt_mark_progress,
 	.set_min_eq_max		= channel_sync_syncpt_set_min_eq_max,
 	.destroy		= channel_sync_syncpt_destroy,
 };
@@ -348,8 +361,8 @@ nvgpu_channel_sync_syncpt_create(struct nvgpu_channel *c)
 	snprintf(syncpt_name, sizeof(syncpt_name),
 		"%s_%d", c->g->name, c->chid);
 
-	sp->id = nvgpu_nvhost_get_syncpt_host_managed(sp->nvhost,
-					c->chid, syncpt_name);
+	sp->id = nvgpu_nvhost_get_syncpt_client_managed(sp->nvhost,
+					syncpt_name);
 
 	/**
 	 * This is a WAR to handle invalid value of a syncpt.
diff --git a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
index e19d36e39..067079614 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/channel_sync.h
@@ -53,7 +53,7 @@ int nvgpu_channel_sync_wait_fence_fd(struct nvgpu_channel_sync *s, int fd,
  */
 int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool need_sync_fence, bool register_irq);
+	bool need_sync_fence);
 
 /*
  * Increment syncpoint/semaphore, so that the returned fence represents
@@ -65,7 +65,19 @@ int nvgpu_channel_sync_incr(struct nvgpu_channel_sync *s,
  */
 int nvgpu_channel_sync_incr_user(struct nvgpu_channel_sync *s,
 	struct priv_cmd_entry **entry, struct nvgpu_fence_type *fence,
-	bool wfi, bool need_sync_fence, bool register_irq);
+	bool wfi, bool need_sync_fence);
+
+/*
+ * Tell the sync that some progress will eventually happen on it: increase the
+ * tracked max value of the underlying syncpoint/semaphore and maybe register
+ * an interrupt notifier to be called if needed so that the channel gets a
+ * job completion signal.
+ *
+ * @param register_irq [in] Register an interrupt for the increment.
+ */
+void nvgpu_channel_sync_mark_progress(struct nvgpu_channel_sync *s,
+	bool register_irq);
+
 /*
  * Reset the channel syncpoint/semaphore. Syncpoint increments generally
  * wrap around the range of integer values. Current max value encompasses
diff --git a/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h b/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h
index 39817cd3e..fe705d09d 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/posix/posix-nvhost.h
@@ -85,4 +85,7 @@ int nvgpu_nvhost_syncpt_wait_timeout_ext(
 int nvgpu_nvhost_syncpt_read_ext_check(
 	struct nvgpu_nvhost_dev *nvhost_dev, u32 id, u32 *val);
 
+u32 nvgpu_nvhost_syncpt_read_maxval(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id);
+
 #endif
diff --git a/drivers/gpu/nvgpu/os/posix/posix-nvhost.c b/drivers/gpu/nvgpu/os/posix/posix-nvhost.c
index 9375b0ca3..3500df743 100644
--- a/drivers/gpu/nvgpu/os/posix/posix-nvhost.c
+++ b/drivers/gpu/nvgpu/os/posix/posix-nvhost.c
@@ -178,6 +178,12 @@ int nvgpu_nvhost_syncpt_read_ext_check(
 	return -ENOSYS;
 }
 
+u32 nvgpu_nvhost_syncpt_read_maxval(
+	struct nvgpu_nvhost_dev *nvhost_dev, u32 id)
+{
+	return 0U;
+}
+
 int nvgpu_nvhost_syncpt_wait_timeout_ext(
 	struct nvgpu_nvhost_dev *nvhost_dev, u32 id,
 	u32 thresh, u32 timeout, u32 waiter_index)