gpu: nvgpu: split sema sync hal to wait and incr

Instead of one HAL op with a boolean flag to decide whether to do one thing or another entirely different thing, use two separate HAL ops for filling priv cmd bufs with semaphore wait and semaphore increment commands. It's already two ops for syncpoints, and explicit commands are more readable than boolean flags. Change offset into cmdbuf in sem wait HAL to be relative to the cmdbuf, so the HAL adds the cmdbuf internal offset to it. While at it, modify the syncpoint cmdbuf HAL ops' prototypes to be consistent. Jira NVGPU-4548 Change-Id: Ibac1fc5fe2ef113e4e16b56358ecfa8904464c82 Signed-off-by: Konsta Hölttä <kholtta@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2323319 (cherry picked from commit 08c1fa38c0fe4effe6ff7a992af55f46e03e77d0) Reviewed-on: https://git-master.nvidia.com/r/c/linux-nvgpu/+/2328409 Tested-by: mobile promotions <svcmobile_promotions@nvidia.com> Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
2025-12-25 11:04:51 +03:00 · 2020-04-03 14:04:02 +03:00
parent 6a7bf6cdc0
commit 6202ead057
17 changed files with 149 additions and 96 deletions
--- a/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_semaphore.c
@@ -61,11 +61,8 @@ static void add_sema_cmd(struct gk20a *g, struct nvgpu_channel *c,
 			 u32 offset, bool acquire, bool wfi)
 {
 	int ch = c->chid;
-	u32 ob, off = cmd->off + offset;
 	u64 va;

-	ob = off;
-
 	/*
 	 * RO for acquire (since we just need to read the mem) and RW for
 	 * release since we will need to write back to the semaphore memory.
@@ -81,21 +78,21 @@ static void add_sema_cmd(struct gk20a *g, struct nvgpu_channel *c,
 		nvgpu_semaphore_prepare(s, c->hw_sema);
 	}

-	g->ops.sync.sema.add_cmd(g, s, va, cmd, off, acquire, wfi);
-
 	if (acquire) {
+		g->ops.sync.sema.add_wait_cmd(g, cmd, offset, s, va);
 		gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3llu"
 				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
 				     nvgpu_semaphore_get_hw_pool_page_idx(s),
-				     va, cmd->gva, cmd->mem->gpu_va, ob);
+				     va, cmd->gva, cmd->mem->gpu_va, offset);
 	} else {
+		g->ops.sync.sema.add_incr_cmd(g, cmd, s, va, wfi);
 		gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3llu"
 				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
 				     nvgpu_semaphore_read(s),
 				     nvgpu_semaphore_get_hw_pool_page_idx(s),
-				     va, cmd->gva, cmd->mem->gpu_va, ob);
+				     va, cmd->gva, cmd->mem->gpu_va, offset);
 	}
 }

--- a/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
+++ b/drivers/gpu/nvgpu/common/sync/channel_sync_syncpt.c
@@ -195,8 +195,8 @@ static int channel_sync_syncpt_incr_common(struct nvgpu_channel_sync *s,

 	nvgpu_log(c->g, gpu_dbg_info, "sp->id %d gpu va %llx",
 				sp->id, sp->syncpt_buf.gpu_va);
-	c->g->ops.sync.syncpt.add_incr_cmd(c->g, wfi_cmd,
-			incr_cmd, sp->id, sp->syncpt_buf.gpu_va);
+	c->g->ops.sync.syncpt.add_incr_cmd(c->g, incr_cmd,
+			sp->id, sp->syncpt_buf.gpu_va, wfi_cmd);

 	thresh = nvgpu_nvhost_syncpt_incr_max_ext(sp->nvhost, sp->id,
 			c->g->ops.sync.syncpt.get_incr_per_release());
--- a/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gm20b.c
@@ -739,9 +739,10 @@ static const struct gpu_ops gm20b_ops = {
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gk20a_sema_add_wait_cmd,
 			.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
+			.add_incr_cmd = gk20a_sema_add_incr_cmd,
 			.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
-			.add_cmd = gk20a_sema_add_cmd,
 		},
 #endif
 	},
--- a/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gp10b.c
@@ -837,9 +837,10 @@ static const struct gpu_ops gp10b_ops = {
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gk20a_sema_add_wait_cmd,
 			.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
+			.add_incr_cmd = gk20a_sema_add_incr_cmd,
 			.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
-			.add_cmd = gk20a_sema_add_cmd,
 		},
 #endif
 	},
--- a/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_gv11b.c
@@ -1041,9 +1041,10 @@ NVGPU_COV_WHITELIST_BLOCK_END(NVGPU_MISRA(Rule, 8_7))
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_SW_SEMAPHORE
 		.sema = {
+			.add_wait_cmd = gv11b_sema_add_wait_cmd,
 			.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
+			.add_incr_cmd = gv11b_sema_add_incr_cmd,
 			.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
-			.add_cmd = gv11b_sema_add_cmd,
 		},
 #endif
 	},
--- a/drivers/gpu/nvgpu/hal/init/hal_tu104.c
+++ b/drivers/gpu/nvgpu/hal/init/hal_tu104.c
@@ -1067,9 +1067,10 @@ static const struct gpu_ops tu104_ops = {
 #if defined(CONFIG_NVGPU_KERNEL_MODE_SUBMIT) && \
 	defined(CONFIG_NVGPU_SW_SEMAPHORE)
 		.sema = {
+			.add_wait_cmd = gv11b_sema_add_wait_cmd,
 			.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
+			.add_incr_cmd = gv11b_sema_add_incr_cmd,
 			.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
-			.add_cmd = gv11b_sema_add_cmd,
 		},
 #endif
 	},
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.c
@@ -40,12 +40,10 @@ u32 gk20a_sema_get_incr_cmd_size(void)
 	return 10U;
 }

-void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
-		u64 sema_va, struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi)
+static u32 gk20a_sema_add_header(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		u64 sema_va)
 {
-	nvgpu_log_fn(g, " ");
-
 	/* semaphore_a */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004U);
 	/* offset_upper */
@@ -55,30 +53,53 @@ void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
 	/* offset */
 	nvgpu_mem_wr32(g, cmd->mem, off++, (u32)sema_va & 0xffffffff);

-	if (acquire) {
-		/* semaphore_c */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
-		/* payload */
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-			       nvgpu_semaphore_get_value(s));
-		/* semaphore_d */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
-		/* operation: acq_geq, switch_en */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12));
-	} else {
-		/* semaphore_c */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
-		/* payload */
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-			       nvgpu_semaphore_get_value(s));
-		/* semaphore_d */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
-		/* operation: release, wfi */
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-				0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20));
-		/* non_stall_int */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U);
-		/* ignored */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
-	}
+	return off;
+}
+
+void gk20a_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va)
+{
+	nvgpu_log_fn(g, " ");
+
+	off = cmd->off + off;
+	off = gk20a_sema_add_header(g, cmd, off, sema_va);
+
+	/* semaphore_c */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
+	/* payload */
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+		       nvgpu_semaphore_get_value(s));
+	/* semaphore_d */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
+	/* operation: acq_geq, switch_en */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x4U | BIT32(12));
+}
+
+void gk20a_sema_add_incr_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd,
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi)
+
+{
+	u32 off = cmd->off;
+
+	nvgpu_log_fn(g, " ");
+
+	off = gk20a_sema_add_header(g, cmd, off, sema_va);
+
+	/* semaphore_c */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010006U);
+	/* payload */
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+		       nvgpu_semaphore_get_value(s));
+	/* semaphore_d */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010007U);
+	/* operation: release, wfi */
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+			0x2UL | ((wfi ? 0x0UL : 0x1UL) << 20));
+	/* non_stall_int */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008U);
+	/* ignored */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0U);
 }
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gk20a.h
@@ -30,8 +30,12 @@ struct nvgpu_semaphore;

 u32 gk20a_sema_get_wait_cmd_size(void);
 u32 gk20a_sema_get_incr_cmd_size(void);
-void gk20a_sema_add_cmd(struct gk20a *g, struct nvgpu_semaphore *s,
-		u64 sema_va, struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi);
+void gk20a_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va);
+void gk20a_sema_add_incr_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd,
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi);

 #endif /* NVGPU_SYNC_SEMA_CMDBUF_GK20A_H */
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.c
@@ -40,13 +40,10 @@ u32 gv11b_sema_get_incr_cmd_size(void)
 	return 12U;
 }

-void gv11b_sema_add_cmd(struct gk20a *g,
-		struct nvgpu_semaphore *s, u64 sema_va,
-		struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi)
+static u32 gv11b_sema_add_header(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va)
 {
-	nvgpu_log_fn(g, " ");
-
 	/* sema_addr_lo */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010017);
 	nvgpu_mem_wr32(g, cmd->mem, off++, sema_va & 0xffffffffULL);
@@ -63,18 +60,40 @@ void gv11b_sema_add_cmd(struct gk20a *g,
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001a);
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0);

-	if (acquire) {
-		/* sema_execute : acq_strict_geq | switch_en | 32bit */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
-		nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12));
-	} else {
-		/* sema_execute : release | wfi | 32bit */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
-		nvgpu_mem_wr32(g, cmd->mem, off++,
-			U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U));
-
-		/* non_stall_int : payload is ignored */
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
-		nvgpu_mem_wr32(g, cmd->mem, off++, 0);
-	}
+	return off;
+}
+
+void gv11b_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va)
+{
+	nvgpu_log_fn(g, " ");
+
+	off = cmd->off + off;
+	off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
+
+	/* sema_execute : acq_strict_geq | switch_en | 32bit */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
+	nvgpu_mem_wr32(g, cmd->mem, off++, U32(0x2) | BIT32(12));
+}
+
+void gv11b_sema_add_incr_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd,
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi)
+{
+	u32 off = cmd->off;
+
+	nvgpu_log_fn(g, " ");
+
+	off = gv11b_sema_add_header(g, cmd, off, s, sema_va);
+
+	/* sema_execute : release | wfi | 32bit */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
+	nvgpu_mem_wr32(g, cmd->mem, off++,
+		U32(0x1) | ((wfi ? U32(0x1) : U32(0x0)) << 20U));
+
+	/* non_stall_int : payload is ignored */
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010008);
+	nvgpu_mem_wr32(g, cmd->mem, off++, 0);
 }
--- a/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/sync/sema_cmdbuf_gv11b.h
@@ -30,9 +30,12 @@ struct nvgpu_semaphore;

 u32 gv11b_sema_get_wait_cmd_size(void);
 u32 gv11b_sema_get_incr_cmd_size(void);
-void gv11b_sema_add_cmd(struct gk20a *g,
-		struct nvgpu_semaphore *s, u64 sema_va,
+void gv11b_sema_add_wait_cmd(struct gk20a *g,
+		struct priv_cmd_entry *cmd, u32 off,
+		struct nvgpu_semaphore *s, u64 sema_va);
+void gv11b_sema_add_incr_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd,
-		u32 off, bool acquire, bool wfi);
+		struct nvgpu_semaphore *s, u64 sema_va,
+		bool wfi);

 #endif /* NVGPU_SYNC_SEMA_CMDBUF_GV11B_H */
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.c
@@ -32,7 +32,7 @@
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd, u32 off,
-		u32 id, u32 thresh, u64 gpu_va)
+		u32 id, u32 thresh, u64 gpu_va_base)
 {
 	nvgpu_log_fn(g, " ");

@@ -58,13 +58,13 @@ u32 gk20a_syncpt_get_incr_per_release(void)
 }

 void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 	u32 off = cmd->off;

 	nvgpu_log_fn(g, " ");
-	if (wfi_cmd) {
+	if (wfi) {
 		/* wfi */
 		nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001EU);
 		/* handle, ignored */
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gk20a.h
@@ -33,12 +33,12 @@ struct nvgpu_mem;
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd, u32 off,
-		u32 id, u32 thresh, u64 gpu_va);
+		u32 id, u32 thresh, u64 gpu_va_base);
 u32 gk20a_syncpt_get_wait_cmd_size(void);
 u32 gk20a_syncpt_get_incr_per_release(void);
 void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va);
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi);
 u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd);
 #endif

@@ -53,7 +53,7 @@ int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c,
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 static inline void gk20a_syncpt_add_wait_cmd(struct gk20a *g,
 		struct priv_cmd_entry *cmd, u32 off,
-		u32 id, u32 thresh, u64 gpu_va)
+		u32 id, u32 thresh, u64 gpu_va_base)
 {
 }
 static inline u32 gk20a_syncpt_get_wait_cmd_size(void)
@@ -65,8 +65,8 @@ static inline u32 gk20a_syncpt_get_incr_per_release(void)
 	return 0U;
 }
 static inline void gk20a_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 }
 static inline u32 gk20a_syncpt_get_incr_cmd_size(bool wfi_cmd)
@@ -87,4 +87,4 @@ static inline int gk20a_syncpt_alloc_buf(struct nvgpu_channel *c,

 #endif

-#endif /* NVGPU_SYNC_SYNCPT_CMDBUF_GK20A_H */
+#endif /* NVGPU_SYNC_SYNCPT_CMDBUF_GK20A_H */
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.c
@@ -81,8 +81,8 @@ u32 gv11b_syncpt_get_incr_per_release(void)
 }

 void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 	u32 off = cmd->off;

@@ -109,7 +109,7 @@ void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
 	/* sema_execute : release | wfi | 32bit */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x2001001b);
 	nvgpu_mem_wr32(g, cmd->mem, off, (0x1U |
-					((u32)(wfi_cmd ? 0x1U : 0x0U) << 20U)));
+					((u32)(wfi ? 0x1U : 0x0U) << 20U)));
 }

 u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd)
--- a/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h
+++ b/drivers/gpu/nvgpu/hal/sync/syncpt_cmdbuf_gv11b.h
@@ -40,8 +40,8 @@ void gv11b_syncpt_add_wait_cmd(struct gk20a *g,
 u32 gv11b_syncpt_get_wait_cmd_size(void);
 u32 gv11b_syncpt_get_incr_per_release(void);
 void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va);
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi);
 u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd);
 #endif /* CONFIG_NVGPU_KERNEL_MODE_SUBMIT */

@@ -71,8 +71,8 @@ static inline u32 gv11b_syncpt_get_incr_per_release(void)
 	return 0U;
 }
 static inline void gv11b_syncpt_add_incr_cmd(struct gk20a *g,
-		bool wfi_cmd, struct priv_cmd_entry *cmd,
-		u32 id, u64 gpu_va)
+		struct priv_cmd_entry *cmd,
+		u32 id, u64 gpu_va, bool wfi)
 {
 }
 static inline u32 gv11b_syncpt_get_incr_cmd_size(bool wfi_cmd)
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gp10b.c
@@ -541,9 +541,10 @@ static const struct gpu_ops vgpu_gp10b_ops = {
 #endif /* CONFIG_TEGRA_GK20A_NVHOST */
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gk20a_sema_add_wait_cmd,
 			.get_wait_cmd_size = gk20a_sema_get_wait_cmd_size,
+			.add_incr_cmd = gk20a_sema_add_incr_cmd,
 			.get_incr_cmd_size = gk20a_sema_get_incr_cmd_size,
-			.add_cmd = gk20a_sema_add_cmd,
 		},
 #endif
 	},
--- a/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
+++ b/drivers/gpu/nvgpu/hal/vgpu/init/vgpu_hal_gv11b.c
@@ -661,9 +661,10 @@ static const struct gpu_ops vgpu_gv11b_ops = {
 #endif
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		.sema = {
+			.add_wait_cmd = gv11b_sema_add_wait_cmd,
 			.get_wait_cmd_size = gv11b_sema_get_wait_cmd_size,
+			.add_incr_cmd = gv11b_sema_add_incr_cmd,
 			.get_incr_cmd_size = gv11b_sema_get_incr_cmd_size,
-			.add_cmd = gv11b_sema_add_cmd,
 		},
 #endif
 	},
--- a/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/gops_sync.h
@@ -77,12 +77,12 @@ struct gops_sync {
 #ifdef CONFIG_NVGPU_KERNEL_MODE_SUBMIT
 		void (*add_wait_cmd)(struct gk20a *g,
 				struct priv_cmd_entry *cmd, u32 off,
-				u32 id, u32 thresh, u64 gpu_va);
+				u32 id, u32 thresh, u64 gpu_va_base);
 		u32 (*get_wait_cmd_size)(void);
 		void (*add_incr_cmd)(struct gk20a *g,
-				bool wfi_cmd,
 				struct priv_cmd_entry *cmd,
-				u32 id, u64 gpu_va);
+				u32 id, u64 gpu_va,
+				bool wfi);
 		u32 (*get_incr_cmd_size)(bool wfi_cmd);
 		u32 (*get_incr_per_release)(void);
 #endif
@@ -96,10 +96,13 @@ struct gops_sync {
 	struct gops_sync_sema {
 		u32 (*get_wait_cmd_size)(void);
 		u32 (*get_incr_cmd_size)(void);
-		void (*add_cmd)(struct gk20a *g,
-			struct nvgpu_semaphore *s, u64 sema_va,
+		void (*add_wait_cmd)(struct gk20a *g,
+			struct priv_cmd_entry *cmd, u32 off,
+			struct nvgpu_semaphore *s, u64 sema_va);
+		void (*add_incr_cmd)(struct gk20a *g,
 			struct priv_cmd_entry *cmd,
-			u32 off, bool acquire, bool wfi);
+			struct nvgpu_semaphore *s, u64 sema_va,
+			bool wfi);
 	} sema;
 /** @endcond DOXYGEN_SHOULD_SKIP_THIS */
 #endif