From 6202ead057a582d0bb49a47e138652fe9a8e9263 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Konsta=20H=C3=B6ltt=C3=A4?= <kholtta@nvidia.com>
Date: Fri, 3 Apr 2020 14:04:02 +0300
Subject: [PATCH] gpu: nvgpu: split sema sync hal to wait and incr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of one HAL op with a boolean flag to decide whether to do one
thing or another entirely different thing, use two separate HAL ops for
filling priv cmd bufs with semaphore wait and semaphore increment
commands. It's already two ops for syncpoints, and explicit commands are
more readable than boolean flags.

Change offset into cmdbuf in sem wait HAL to be relative to the cmdbuf,
so the HAL adds the cmdbuf internal offset to it.

While at it, modify the syncpoint cmdbuf HAL ops' prototypes to be
consistent.

Jira NVGPU-4548

Change-Id: Ibac1fc5fe2ef113e4e16b56358ecfa8904464c82
Signed-off-by: Konsta Hölttä <kholtta@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2323319
(cherry picked from commit 08c1fa38c0fe4effe6ff7a992af55f46e03e77d0)
Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2328409
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
---
 .../common/sync/channel_sync_semaphore.c      | 11 +--
 .../nvgpu/common/sync/channel_sync_syncpt.c   |  4 +-
 drivers/gpu/nvgpu/hal/init/hal_gm20b.c        |  3 +-
 drivers/gpu/nvgpu/hal/init/hal_gp10b.c        |  3 +-
 drivers/gpu/nvgpu/hal/init/hal_gv11b.c        |  3 +-
 drivers/gpu/nvgpu/hal/init/hal_tu104.c        |  3 +-
 .../gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c    | 83 ++++++++++++-------
 .../gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h    | 10 ++-
 .../gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c    | 59 ++++++++-----
 .../gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h    |  9 +-
 .../gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c  |  8 +-
 .../gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h  | 14 ++--
 .../gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c  |  6 +-
 .../gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h  |  8 +-
 .../gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c  |  3 +-
 .../gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c  |  3 +-
 drivers/gpu/nvgpu/include/nvgpu/gops_sync.h   | 15 ++--
 17 files changed, 149 insertions(+), 96 deletions(-)

diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
index ee2314844..9fa79c2e6 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
@@ -61,11 +61,8 @@ static void add_sema_cmd(struct gk20a *g, struct nvgpu_channel *c,
 			 u32 offset, bool acquire, bool wfi)
 {
 	int ch = c->chid;
-	u32 ob, off = cmd->off + offset;
 	u64 va;
 
-	ob = off;
-
 	/*
 	 * RO for acquire (since we just need to read the mem) and RW for
 	 * release since we will need to write back to the semaphore memory.
@@ -81,21 +78,21 @@ static void add_sema_cmd(struct gk20a *g, struct nvgpu_channel *c,
 		nvgpu_semaphore_prepare(s, c->hw_sema);
 	}
 
-	g->ops.sync.sema.add_cmd(g, s, va, cmd, off, acquire, wfi);
-
 	if (acquire) {
+		g->ops.sync.sema.add_wait_cmd(g, cmd, offset, s, va);
 		gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3llu"
 				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
 				     nvgpu_semaphore_get_hw_pool_page_idx(s),
-				     va, cmd->gva, cmd->mem->gpu_va, ob);
+				     va, cmd->gva, cmd->mem->gpu_va, offset);
 	} else {
+		g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, wfi);
 		gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3llu"
 				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
 				     nvgpu_semaphore_read(s),
 				     nvgpu_semaphore_get_hw_pool_page_idx(s),
-				     va, cmd->gva, cmd->mem->gpu_va, ob);
+				     va, cmd->gva, cmd->mem->gpu_va, offset);
 	}
 }
 
diff --git a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
index 5d0e68222..f228a10ed 100644
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -195,8 +195,8 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,
 
 	nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
 				sp->id, sp->syncpt_buf.gpu_va);
-	c->g->ops.sync.syncpt.add_incr_cmd(c->g, wfi_cmd,
-			incr_cmd, sp->id, sp->syncpt_buf.gpu_va);
+	c->g->ops.sync.syncpt.add_incr_cmd(c->g, incr_cmd,
+			sp->id, sp->syncpt_buf.gpu_va, wfi_cmd);
 
 	thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
 			c->g->ops.sync.syncpt.get_incr_per_release());
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
index ba1c57800..a64c131d3 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -739,9 +739,10 @@ static const struct gpu_ops gm20b_ops = {
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gk20a_sema_add_wait_cmd,
 			.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
+			.add_incr_cmd = gk20a_sema_add_incr_cmd,
 			.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
-			.add_cmd = gk20a_sema_add_cmd,
 		},
 #endif
 	},
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
index 0a361ec0a..c72bd12d6 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -837,9 +837,10 @@ static const struct gpu_ops gp10b_ops = {
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gk20a_sema_add_wait_cmd,
 			.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
+			.add_incr_cmd = gk20a_sema_add_incr_cmd,
 			.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
-			.add_cmd = gk20a_sema_add_cmd,
 		},
 #endif
 	},
diff --git a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
index 2b1a53011..87363538f 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1041,9 +1041,10 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_SW_SEMAPHORE
 		.sema = {
+			.add_wait_cmd = gv11b_sema_add_wait_cmd,
 			.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
+			.add_incr_cmd = gv11b_sema_add_incr_cmd,
 			.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
-			.add_cmd = gv11b_sema_add_cmd,
 		},
 #endif
 	},
diff --git a/drivers/gpu/nvgpu/hal/init/hal_tu104.c b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
index dcd2a77d2..90f8db7af 100644
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1067,9 +1067,10 @@ static const struct gpu_ops tu104_ops = {
 #if defined(CONFIG_NVGPU_KERNEL_MODE_SUBMIT) && \
 	defined(CONFIG_NVGPU_SW_SEMAPHORE)
 		.sema = {
+			.add_wait_cmd = gv11b_sema_add_wait_cmd,
 			.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
+			.add_incr_cmd = gv11b_sema_add_incr_cmd,
 			.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
-			.add_cmd = gv11b_sema_add_cmd,
 		},
 #endif
 	},
diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
index 5c3da8e4e..f5b3db27f 100644
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
@@ -40,12 +40,10 @@ u32 gk20a_sema_get_incr_cmd_size(void)
 	return 10U;
 }
 
-void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
-		u64 sema_va, struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi)
+static u32 gk20a_sema_add_header(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		u64 sema_va)
 {
-	nvgpu_log_fn(g, " ");
-
 	/* semaphore_a */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004U);
 	/* offset_upper */
@@ -55,30 +53,53 @@ void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
 	/* offset */
 	nvgpu_mem_wr32(g, cmd->mem, off++, (u32)sema_va & 0xffffffff);
 
-	if (acquire) {
-		/* semaphore_c */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
-		/* payload */
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-			       nvgpu_semaphore_get_value(s));
-		/* semaphore_d */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
-		/* operation: acq_geq, switch_en */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12));
-	} else {
-		/* semaphore_c */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
-		/* payload */
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-			       nvgpu_semaphore_get_value(s));
-		/* semaphore_d */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
-		/* operation: release, wfi */
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-				0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20));
-		/* non_stall_int */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U);
-		/* ignored */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
-	}
+	return off;
+}
+
+void gk20a_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va)
+{
+	nvgpu_log_fn(g, " ");
+
+	off = cmd->off + off;
+	off = gk20a_sema_add_header(g, cmd, off, sema_va);
+
+	/* semaphore_c */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
+	/* payload */
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+		       nvgpu_semaphore_get_value(s));
+	/* semaphore_d */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
+	/* operation: acq_geq, switch_en */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12));
+}
+
+void gk20a_sema_add_incr_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd,
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi)
+
+{
+	u32 off = cmd->off;
+
+	nvgpu_log_fn(g, " ");
+
+	off = gk20a_sema_add_header(g, cmd, off, sema_va);
+
+	/* semaphore_c */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
+	/* payload */
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+		       nvgpu_semaphore_get_value(s));
+	/* semaphore_d */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
+	/* operation: release, wfi */
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+			0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20));
+	/* non_stall_int */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U);
+	/* ignored */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
 }
diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h
index b9a4aa3f7..1e80d7821 100644
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h
@@ -30,8 +30,12 @@ struct nvgpu_semaphore;
 
 u32 gk20a_sema_get_wait_cmd_size(void);
 u32 gk20a_sema_get_incr_cmd_size(void);
-void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
-		u64 sema_va, struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi);
+void gk20a_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va);
+void gk20a_sema_add_incr_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd,
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi);
 
 #endif /* NVGPU_SYNC_SEMA_CMDBUF_GK20A_H */
diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
index f6cc5b1a0..01c4dcbb2 100644
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
@@ -40,13 +40,10 @@ u32 gv11b_sema_get_incr_cmd_size(void)
 	return 12U;
 }
 
-void gv11b_sema_add_cmd(struct gk20a *g,
-		struct nvgpu_semaphore *s, u64 sema_va,
-		struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi)
+static u32 gv11b_sema_add_header(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va)
 {
-	nvgpu_log_fn(g, " ");
-
 	/* sema_addr_lo */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017);
 	nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffffULL);
@@ -63,18 +60,40 @@ void gv11b_sema_add_cmd(struct gk20a *g,
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a);
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0);
 
-	if (acquire) {
-		/* sema_execute : acq_strict_geq | switch_en | 32bit */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
-		nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12));
-	} else {
-		/* sema_execute : release | wfi | 32bit */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-			U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U));
-
-		/* non_stall_int : payload is ignored */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0);
-	}
+	return off;
+}
+
+void gv11b_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va)
+{
+	nvgpu_log_fn(g, " ");
+
+	off = cmd->off + off;
+	off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
+
+	/* sema_execute : acq_strict_geq | switch_en | 32bit */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
+	nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12));
+}
+
+void gv11b_sema_add_incr_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd,
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi)
+{
+	u32 off = cmd->off;
+
+	nvgpu_log_fn(g, " ");
+
+	off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
+
+	/* sema_execute : release | wfi | 32bit */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+		U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U));
+
+	/* non_stall_int : payload is ignored */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0);
 }
diff --git a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h
index d38fcc4d6..d7a1ee56a 100644
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h
@@ -30,9 +30,12 @@ struct nvgpu_semaphore;
 
 u32 gv11b_sema_get_wait_cmd_size(void);
 u32 gv11b_sema_get_incr_cmd_size(void);
-void gv11b_sema_add_cmd(struct gk20a *g,
-		struct nvgpu_semaphore *s, u64 sema_va,
+void gv11b_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va);
+void gv11b_sema_add_incr_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi);
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi);
 
 #endif /* NVGPU_SYNC_SEMA_CMDBUF_GV11B_H */
diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
index 34f65fc6d..07783510a 100644
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
@@ -32,7 +32,7 @@
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd, u32 off,
-		u32 id, u32 thresh, u64 gpu_va)
+		u32 id, u32 thresh, u64 gpu_va_base)
 {
 	nvgpu_log_fn(g, " ");
 
@@ -58,13 +58,13 @@ u32 gk20a_syncpt_get_incr_per_release(void)
 }
 
 void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 	u32 off = cmd->off;
 
 	nvgpu_log_fn(g, " ");
-	if (wfi_cmd) {
+	if (wfi) {
 		/* wfi */
 		nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001EU);
 		/* handle, ignored */
diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h
index 190bc8cfa..b4c46ca45 100644
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h
@@ -33,12 +33,12 @@ struct nvgpu_mem;
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd, u32 off,
-		u32 id, u32 thresh, u64 gpu_va);
+		u32 id, u32 thresh, u64 gpu_va_base);
 u32 gk20a_syncpt_get_wait_cmd_size(void);
 u32 gk20a_syncpt_get_incr_per_release(void);
 void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va);
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi);
 u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd);
 #endif
 
@@ -53,7 +53,7 @@ int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c,
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 static inline void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd, u32 off,
-		u32 id, u32 thresh, u64 gpu_va)
+		u32 id, u32 thresh, u64 gpu_va_base)
 {
 }
 static inline u32 gk20a_syncpt_get_wait_cmd_size(void)
@@ -65,8 +65,8 @@ static inline u32 gk20a_syncpt_get_incr_per_release(void)
 	return 0U;
 }
 static inline void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 }
 static inline u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd)
@@ -87,4 +87,4 @@ static inline int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c,
 
 #endif
 
-#endif /* NVGPU_SYNC_SYNCPT_CMDBUF_GK20A_H */
\ No newline at end of file
+#endif /* NVGPU_SYNC_SYNCPT_CMDBUF_GK20A_H */
diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
index 69cfc0198..3f9104533 100644
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
@@ -81,8 +81,8 @@ u32 gv11b_syncpt_get_incr_per_release(void)
 }
 
 void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 	u32 off = cmd->off;
 
@@ -109,7 +109,7 @@ void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
 	/* sema_execute : release | wfi | 32bit */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
 	nvgpu_mem_wr32(g, cmd->mem, off, (0x1U |
-					((u32)(wfi_cmd ? 0x1U : 0x0U) << 20U)));
+					((u32)(wfi ? 0x1U : 0x0U) << 20U)));
 }
 
 u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd)
diff --git a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h
index 0163c1a44..dc71f9cfd 100644
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h
@@ -40,8 +40,8 @@ void gv11b_syncpt_add_wait_cmd(struct gk20a *g,
 u32 gv11b_syncpt_get_wait_cmd_size(void);
 u32 gv11b_syncpt_get_incr_per_release(void);
 void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va);
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi);
 u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd);
 #endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */
 
@@ -71,8 +71,8 @@ static inline u32 gv11b_syncpt_get_incr_per_release(void)
 	return 0U;
 }
 static inline void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 }
 static inline u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd)
diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c
index 9237220af..db2cd21cf 100644
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c
@@ -541,9 +541,10 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gk20a_sema_add_wait_cmd,
 			.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
+			.add_incr_cmd = gk20a_sema_add_incr_cmd,
 			.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
-			.add_cmd = gk20a_sema_add_cmd,
 		},
 #endif
 	},
diff --git a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
index 879e3d9eb..b513d89db 100644
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -661,9 +661,10 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 #endif
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gv11b_sema_add_wait_cmd,
 			.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
+			.add_incr_cmd = gv11b_sema_add_incr_cmd,
 			.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
-			.add_cmd = gv11b_sema_add_cmd,
 		},
 #endif
 	},
diff --git a/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h b/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h
index fbe919281..8331bec38 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h
@@ -77,12 +77,12 @@ struct gops_sync {
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		void (*add_wait_cmd)(struct gk20a *g,
 				struct priv_cmd_entry *cmd, u32 off,
-				u32 id, u32 thresh, u64 gpu_va);
+				u32 id, u32 thresh, u64 gpu_va_base);
 		u32 (*get_wait_cmd_size)(void);
 		void (*add_incr_cmd)(struct gk20a *g,
-				bool wfi_cmd,
 				struct priv_cmd_entry *cmd,
-				u32 id, u64 gpu_va);
+				u32 id, u64 gpu_va,
+				bool wfi);
 		u32 (*get_incr_cmd_size)(bool wfi_cmd);
 		u32 (*get_incr_per_release)(void);
 #endif
@@ -96,10 +96,13 @@ struct gops_sync {
 	struct gops_sync_sema {
 		u32 (*get_wait_cmd_size)(void);
 		u32 (*get_incr_cmd_size)(void);
-		void (*add_cmd)(struct gk20a *g,
-			struct nvgpu_semaphore *s, u64 sema_va,
+		void (*add_wait_cmd)(struct gk20a *g,
+			struct priv_cmd_entry *cmd, u32 off,
+			struct nvgpu_semaphore *s, u64 sema_va);
+		void (*add_incr_cmd)(struct gk20a *g,
 			struct priv_cmd_entry *cmd,
-			u32 off, bool acquire, bool wfi);
+			struct nvgpu_semaphore *s, u64 sema_va,
+			bool wfi);
 	} sema;
 /** @endcond DOXYGEN_SHOULD_SKIP_THIS */
 #endif