diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c
index dfed35883..b2238bb73 100644
--- a/drivers/gpu/nvgpu/common/semaphore.c
+++ b/drivers/gpu/nvgpu/common/semaphore.c
@@ -376,10 +376,10 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch)
 
 	ch->hw_sema = hw_sema;
 	hw_sema->ch = ch;
-	hw_sema->p = p;
-	hw_sema->idx = hw_sema_idx;
-	hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx;
-	current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, hw_sema->offset);
+	hw_sema->location.pool = p;
+	hw_sema->location.offset = SEMAPHORE_SIZE * hw_sema_idx;
+	current_value = nvgpu_mem_rd(ch->g, &p->rw_mem,
+			hw_sema->location.offset);
 	nvgpu_atomic_set(&hw_sema->next_value, current_value);
 
 	nvgpu_mutex_release(&p->pool_lock);
@@ -399,15 +399,16 @@ fail:
 void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch)
 {
 	struct nvgpu_semaphore_pool *p = ch->vm->sema_pool;
+	struct nvgpu_semaphore_int *hw_sema = ch->hw_sema;
+	int idx = hw_sema->location.offset / SEMAPHORE_SIZE;
 
 	BUG_ON(!p);
 
 	nvgpu_mutex_acquire(&p->pool_lock);
 
-	clear_bit(ch->hw_sema->idx, p->semas_alloced);
+	clear_bit(idx, p->semas_alloced);
 
-	/* Make sure that when the ch is re-opened it will get a new HW sema. */
-	nvgpu_kfree(ch->g, ch->hw_sema);
+	nvgpu_kfree(ch->g, hw_sema);
 	ch->hw_sema = NULL;
 
 	nvgpu_mutex_release(&p->pool_lock);
@@ -435,14 +436,15 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch)
 		return NULL;
 
 	nvgpu_ref_init(&s->ref);
-	s->hw_sema = ch->hw_sema;
+	s->g = ch->g;
+	s->location = ch->hw_sema->location;
 	nvgpu_atomic_set(&s->value, 0);
 
 	/*
 	 * Take a ref on the pool so that we can keep this pool alive for
 	 * as long as this semaphore is alive.
 	 */
-	nvgpu_semaphore_pool_get(s->hw_sema->p);
+	nvgpu_semaphore_pool_get(s->location.pool);
 
 	gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid);
 
@@ -454,9 +456,9 @@ static void nvgpu_semaphore_free(struct nvgpu_ref *ref)
 	struct nvgpu_semaphore *s =
 		container_of(ref, struct nvgpu_semaphore, ref);
 
-	nvgpu_semaphore_pool_put(s->hw_sema->p);
+	nvgpu_semaphore_pool_put(s->location.pool);
 
-	nvgpu_kfree(s->hw_sema->ch->g, s);
+	nvgpu_kfree(s->g, s);
 }
 
 void nvgpu_semaphore_put(struct nvgpu_semaphore *s)
diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
index 5cd7223f0..fb7406da3 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c
@@ -215,7 +215,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 			job = &ch->joblist.pre_alloc.jobs[tmp_get];
 			if (job->post_fence->semaphore) {
 				__nvgpu_semaphore_release(
-					job->post_fence->semaphore, true);
+					job->post_fence->semaphore, true,
+					ch->hw_sema);
 				released_job_semaphore = true;
 			}
 			tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length;
@@ -226,7 +227,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch)
 				channel_gk20a_job, list) {
 			if (job->post_fence->semaphore) {
 				__nvgpu_semaphore_release(
-					job->post_fence->semaphore, true);
+					job->post_fence->semaphore, true,
+					ch->hw_sema);
 				released_job_semaphore = true;
 			}
 		}
diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
index 45d9ae9c8..4b1be8b91 100644
--- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c
@@ -494,7 +494,7 @@ static void gk20a_channel_semaphore_launcher(
 		  "wait completed (%d) for fence %p '%s', triggering gpu work",
 		  err, fence, fence->name);
 	sync_fence_put(fence);
-	nvgpu_semaphore_release(w->sema);
+	nvgpu_semaphore_release(w->sema, w->ch->hw_sema);
 	nvgpu_semaphore_put(w->sema);
 	nvgpu_kfree(g, w);
 }
@@ -522,7 +522,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 	 * incr the underlying sema next_value.
 	 */
 	if (!acquire)
-		nvgpu_semaphore_incr(s);
+		nvgpu_semaphore_incr(s, c->hw_sema);
 
 	/* semaphore_a */
 	nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004);
@@ -561,17 +561,18 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c,
 	}
 
 	if (acquire)
-		gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d"
+		gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d"
 				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
-				     s->hw_sema->ch->chid, va, cmd->gva,
+				     s->location.pool->page_idx, va, cmd->gva,
 				     cmd->mem->gpu_va, ob);
 	else
-		gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx "
-				     "cmd_mem=0x%llx b=0x%llx off=%u",
+		gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3d"
+				     "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u",
 				     ch, nvgpu_semaphore_get_value(s),
-				     nvgpu_semaphore_read(s), va, cmd->gva,
-				     cmd->mem->gpu_va, ob);
+				     nvgpu_semaphore_read(s),
+				     s->location.pool->page_idx,
+				     va, cmd->gva, cmd->mem->gpu_va, ob);
 }
 
 static int gk20a_channel_semaphore_wait_syncpt(
@@ -714,7 +715,7 @@ static int gk20a_channel_semaphore_wait_fd(
 
 	/* worker takes one reference */
 	nvgpu_semaphore_get(w->sema);
-	nvgpu_semaphore_incr(w->sema);
+	nvgpu_semaphore_incr(w->sema, c->hw_sema);
 
 	/* GPU unblocked when the semaphore value increments. */
 	add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false);
@@ -740,7 +741,7 @@ static int gk20a_channel_semaphore_wait_fd(
 	 */
 	if (ret == 1) {
 		sync_fence_put(sync_fence);
-		nvgpu_semaphore_release(w->sema);
+		nvgpu_semaphore_release(w->sema, c->hw_sema);
 		nvgpu_semaphore_put(w->sema);
 	}
 
diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
index cd0c094fe..f6d16b902 100644
--- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
+++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c
@@ -338,8 +338,8 @@ static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt,
 {
 	struct nvgpu_semaphore *s = pt->sema;
 
-	snprintf(str, size, "S: c=%d [v=%u,r_v=%u]",
-		 s->hw_sema->ch->chid,
+	snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]",
+		 s->location.pool->page_idx,
 		 nvgpu_semaphore_get_value(s),
 		 nvgpu_semaphore_read(s));
 }
diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
index 9c74d3008..e66b21881 100644
--- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
+++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h
@@ -50,15 +50,18 @@
 
 struct nvgpu_semaphore_sea;
 
+struct nvgpu_semaphore_loc {
+	struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */
+	u32 offset;			   /* Byte offset into the pool. */
+};
+
 /*
  * Underlying semaphore data structure. This semaphore can be shared amongst
  * other semaphore instances.
  */
 struct nvgpu_semaphore_int {
-	int idx;			/* Semaphore index. */
-	u32 offset;			/* Offset into the pool. */
+	struct nvgpu_semaphore_loc location;
 	nvgpu_atomic_t next_value;	/* Next available value. */
-	struct nvgpu_semaphore_pool *p;	/* Pool that owns this sema. */
 	struct channel_gk20a *ch;	/* Channel that owns this sema. */
 };
 
@@ -68,7 +71,8 @@ struct nvgpu_semaphore_int {
  * semaphore to be shared among an essentially infinite number of submits.
  */
 struct nvgpu_semaphore {
-	struct nvgpu_semaphore_int *hw_sema;
+	struct gk20a *g;
+	struct nvgpu_semaphore_loc location;
 
 	nvgpu_atomic_t value;
 	int incremented;
@@ -195,8 +199,8 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch);
  */
 static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
 {
-	return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, false) +
-		s->hw_sema->offset;
+	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) +
+		s->location.offset;
 }
 
 /*
@@ -205,20 +209,20 @@ static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s)
  */
 static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s)
 {
-	return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, true) +
-		s->hw_sema->offset;
+	return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) +
+		s->location.offset;
 }
 
 static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema)
 {
-	return __nvgpu_semaphore_pool_gpu_va(hw_sema->p, true) +
-		hw_sema->offset;
+	return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) +
+		hw_sema->location.offset;
 }
 
 static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
 {
-	return nvgpu_mem_rd(hw_sema->ch->g,
-			    &hw_sema->p->rw_mem, hw_sema->offset);
+	return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+			hw_sema->location.offset);
 }
 
 /*
@@ -226,7 +230,8 @@ static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema)
  */
 static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s)
 {
-	return __nvgpu_semaphore_read(s->hw_sema);
+	return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem,
+			s->location.offset);
 }
 
 /*
@@ -270,19 +275,14 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s)
 	return !nvgpu_semaphore_is_released(s);
 }
 
-static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s)
-{
-	return (u32)nvgpu_atomic_read(&s->hw_sema->next_value);
-}
-
 /*
  * If @force is set then this will not wait for the underlying semaphore to
  * catch up to the passed semaphore threshold.
  */
 static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
-					     bool force)
+					     bool force,
+					     struct nvgpu_semaphore_int *hw_sema)
 {
-	struct nvgpu_semaphore_int *hw_sema = s->hw_sema;
 	u32 current_val;
 	u32 threshold = nvgpu_semaphore_get_value(s);
 	int attempts = 0;
@@ -312,16 +312,17 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s,
 	if (__nvgpu_semaphore_value_released(threshold, current_val))
 		return;
 
-	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset,
-			threshold);
+	nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem,
+			hw_sema->location.offset, threshold);
 
-	gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a,
+	gpu_sema_verbose_dbg(hw_sema->location.pool->sema_sea->gk20a,
 			     "(c=%d) WRITE %u", hw_sema->ch->chid, threshold);
 }
 
-static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
+static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s,
+					   struct nvgpu_semaphore_int *hw_sema)
 {
-	__nvgpu_semaphore_release(s, false);
+	__nvgpu_semaphore_release(s, false, hw_sema);
 }
 
 /*
@@ -333,16 +334,17 @@ static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)
  *
  * Also used to prep a semaphore for an INCR by the GPU.
  */
-static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s)
+static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s,
+		struct nvgpu_semaphore_int *hw_sema)
 {
 	BUG_ON(s->incremented);
 
-	nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value));
+	nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &hw_sema->next_value));
 	s->incremented = 1;
 
-	gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a,
+	gpu_sema_verbose_dbg(s->g,
 			     "INCR sema for c=%d (%u)",
-			     s->hw_sema->ch->chid,
-			     nvgpu_semaphore_next_value(s));
+			     hw_sema->ch->chid,
+			     nvgpu_atomic_read(&hw_sema->next_value));
 }
 #endif