diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index dfed35883..b2238bb73 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c @@ -376,10 +376,10 @@ static int __nvgpu_init_hw_sema(struct channel_gk20a *ch) ch->hw_sema = hw_sema; hw_sema->ch = ch; - hw_sema->p = p; - hw_sema->idx = hw_sema_idx; - hw_sema->offset = SEMAPHORE_SIZE * hw_sema_idx; - current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, hw_sema->offset); + hw_sema->location.pool = p; + hw_sema->location.offset = SEMAPHORE_SIZE * hw_sema_idx; + current_value = nvgpu_mem_rd(ch->g, &p->rw_mem, + hw_sema->location.offset); nvgpu_atomic_set(&hw_sema->next_value, current_value); nvgpu_mutex_release(&p->pool_lock); @@ -399,15 +399,16 @@ fail: void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch) { struct nvgpu_semaphore_pool *p = ch->vm->sema_pool; + struct nvgpu_semaphore_int *hw_sema = ch->hw_sema; + int idx = hw_sema->location.offset / SEMAPHORE_SIZE; BUG_ON(!p); nvgpu_mutex_acquire(&p->pool_lock); - clear_bit(ch->hw_sema->idx, p->semas_alloced); + clear_bit(idx, p->semas_alloced); - /* Make sure that when the ch is re-opened it will get a new HW sema. */ - nvgpu_kfree(ch->g, ch->hw_sema); + nvgpu_kfree(ch->g, hw_sema); ch->hw_sema = NULL; nvgpu_mutex_release(&p->pool_lock); @@ -435,14 +436,15 @@ struct nvgpu_semaphore *nvgpu_semaphore_alloc(struct channel_gk20a *ch) return NULL; nvgpu_ref_init(&s->ref); - s->hw_sema = ch->hw_sema; + s->g = ch->g; + s->location = ch->hw_sema->location; nvgpu_atomic_set(&s->value, 0); /* * Take a ref on the pool so that we can keep this pool alive for * as long as this semaphore is alive. */ - nvgpu_semaphore_pool_get(s->hw_sema->p); + nvgpu_semaphore_pool_get(s->location.pool); gpu_sema_dbg(ch->g, "Allocated semaphore (c=%d)", ch->chid); @@ -454,9 +456,9 @@ static void nvgpu_semaphore_free(struct nvgpu_ref *ref) struct nvgpu_semaphore *s = container_of(ref, struct nvgpu_semaphore, ref); - nvgpu_semaphore_pool_put(s->hw_sema->p); + nvgpu_semaphore_pool_put(s->location.pool); - nvgpu_kfree(s->hw_sema->ch->g, s); + nvgpu_kfree(s->g, s); } void nvgpu_semaphore_put(struct nvgpu_semaphore *s) diff --git a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c index 5cd7223f0..fb7406da3 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_gk20a.c @@ -215,7 +215,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) job = &ch->joblist.pre_alloc.jobs[tmp_get]; if (job->post_fence->semaphore) { __nvgpu_semaphore_release( - job->post_fence->semaphore, true); + job->post_fence->semaphore, true, + ch->hw_sema); released_job_semaphore = true; } tmp_get = (tmp_get + 1) % ch->joblist.pre_alloc.length; @@ -226,7 +227,8 @@ void gk20a_channel_abort_clean_up(struct channel_gk20a *ch) channel_gk20a_job, list) { if (job->post_fence->semaphore) { __nvgpu_semaphore_release( - job->post_fence->semaphore, true); + job->post_fence->semaphore, true, + ch->hw_sema); released_job_semaphore = true; } } diff --git a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c index 45d9ae9c8..4b1be8b91 100644 --- a/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/channel_sync_gk20a.c @@ -494,7 +494,7 @@ static void gk20a_channel_semaphore_launcher( "wait completed (%d) for fence %p '%s', triggering gpu work", err, fence, fence->name); sync_fence_put(fence); - nvgpu_semaphore_release(w->sema); + nvgpu_semaphore_release(w->sema, w->ch->hw_sema); nvgpu_semaphore_put(w->sema); nvgpu_kfree(g, w); } @@ -522,7 +522,7 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, * incr the underlying sema next_value. */ if (!acquire) - nvgpu_semaphore_incr(s); + nvgpu_semaphore_incr(s, c->hw_sema); /* semaphore_a */ nvgpu_mem_wr32(g, cmd->mem, off++, 0x20010004); @@ -561,17 +561,18 @@ static void add_sema_cmd(struct gk20a *g, struct channel_gk20a *c, } if (acquire) - gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u owner=%-3d" + gpu_sema_verbose_dbg(g, "(A) c=%d ACQ_GE %-4u pool=%-3d" "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", ch, nvgpu_semaphore_get_value(s), - s->hw_sema->ch->chid, va, cmd->gva, + s->location.pool->page_idx, va, cmd->gva, cmd->mem->gpu_va, ob); else - gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) va=0x%llx " - "cmd_mem=0x%llx b=0x%llx off=%u", + gpu_sema_verbose_dbg(g, "(R) c=%d INCR %u (%u) pool=%-3d" + "va=0x%llx cmd_mem=0x%llx b=0x%llx off=%u", ch, nvgpu_semaphore_get_value(s), - nvgpu_semaphore_read(s), va, cmd->gva, - cmd->mem->gpu_va, ob); + nvgpu_semaphore_read(s), + s->location.pool->page_idx, + va, cmd->gva, cmd->mem->gpu_va, ob); } static int gk20a_channel_semaphore_wait_syncpt( @@ -714,7 +715,7 @@ static int gk20a_channel_semaphore_wait_fd( /* worker takes one reference */ nvgpu_semaphore_get(w->sema); - nvgpu_semaphore_incr(w->sema); + nvgpu_semaphore_incr(w->sema, c->hw_sema); /* GPU unblocked when the semaphore value increments. */ add_sema_cmd(c->g, c, w->sema, wait_cmd, 8, true, false); @@ -740,7 +741,7 @@ static int gk20a_channel_semaphore_wait_fd( */ if (ret == 1) { sync_fence_put(sync_fence); - nvgpu_semaphore_release(w->sema); + nvgpu_semaphore_release(w->sema, c->hw_sema); nvgpu_semaphore_put(w->sema); } diff --git a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c index cd0c094fe..f6d16b902 100644 --- a/drivers/gpu/nvgpu/gk20a/sync_gk20a.c +++ b/drivers/gpu/nvgpu/gk20a/sync_gk20a.c @@ -338,8 +338,8 @@ static void gk20a_sync_pt_value_str_for_sema(struct gk20a_sync_pt *pt, { struct nvgpu_semaphore *s = pt->sema; - snprintf(str, size, "S: c=%d [v=%u,r_v=%u]", - s->hw_sema->ch->chid, + snprintf(str, size, "S: pool=%d [v=%u,r_v=%u]", + s->location.pool->page_idx, nvgpu_semaphore_get_value(s), nvgpu_semaphore_read(s)); } diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 9c74d3008..e66b21881 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h @@ -50,15 +50,18 @@ struct nvgpu_semaphore_sea; +struct nvgpu_semaphore_loc { + struct nvgpu_semaphore_pool *pool; /* Pool that owns this sema. */ + u32 offset; /* Byte offset into the pool. */ +}; + /* * Underlying semaphore data structure. This semaphore can be shared amongst * other semaphore instances. */ struct nvgpu_semaphore_int { - int idx; /* Semaphore index. */ - u32 offset; /* Offset into the pool. */ + struct nvgpu_semaphore_loc location; nvgpu_atomic_t next_value; /* Next available value. */ - struct nvgpu_semaphore_pool *p; /* Pool that owns this sema. */ struct channel_gk20a *ch; /* Channel that owns this sema. */ }; @@ -68,7 +71,8 @@ struct nvgpu_semaphore_int { * semaphore to be shared among an essentially infinite number of submits. */ struct nvgpu_semaphore { - struct nvgpu_semaphore_int *hw_sema; + struct gk20a *g; + struct nvgpu_semaphore_loc location; nvgpu_atomic_t value; int incremented; @@ -195,8 +199,8 @@ void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch); */ static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) { - return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, false) + - s->hw_sema->offset; + return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + + s->location.offset; } /* @@ -205,20 +209,20 @@ static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) */ static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) { - return __nvgpu_semaphore_pool_gpu_va(s->hw_sema->p, true) + - s->hw_sema->offset; + return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + + s->location.offset; } static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) { - return __nvgpu_semaphore_pool_gpu_va(hw_sema->p, true) + - hw_sema->offset; + return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + + hw_sema->location.offset; } static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) { - return nvgpu_mem_rd(hw_sema->ch->g, - &hw_sema->p->rw_mem, hw_sema->offset); + return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, + hw_sema->location.offset); } /* @@ -226,7 +230,8 @@ static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) */ static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) { - return __nvgpu_semaphore_read(s->hw_sema); + return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, + s->location.offset); } /* @@ -270,19 +275,14 @@ static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) return !nvgpu_semaphore_is_released(s); } -static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) -{ - return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); -} - /* * If @force is set then this will not wait for the underlying semaphore to * catch up to the passed semaphore threshold. */ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, - bool force) + bool force, + struct nvgpu_semaphore_int *hw_sema) { - struct nvgpu_semaphore_int *hw_sema = s->hw_sema; u32 current_val; u32 threshold = nvgpu_semaphore_get_value(s); int attempts = 0; @@ -312,16 +312,17 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, if (__nvgpu_semaphore_value_released(threshold, current_val)) return; - nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, - threshold); + nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, + hw_sema->location.offset, threshold); - gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, + gpu_sema_verbose_dbg(hw_sema->location.pool->sema_sea->gk20a, "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); } -static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) +static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s, + struct nvgpu_semaphore_int *hw_sema) { - __nvgpu_semaphore_release(s, false); + __nvgpu_semaphore_release(s, false, hw_sema); } /* @@ -333,16 +334,17 @@ static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s) * * Also used to prep a semaphore for an INCR by the GPU. */ -static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s) +static inline void nvgpu_semaphore_incr(struct nvgpu_semaphore *s, + struct nvgpu_semaphore_int *hw_sema) { BUG_ON(s->incremented); - nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &s->hw_sema->next_value)); + nvgpu_atomic_set(&s->value, nvgpu_atomic_add_return(1, &hw_sema->next_value)); s->incremented = 1; - gpu_sema_verbose_dbg(s->hw_sema->p->sema_sea->gk20a, + gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)", - s->hw_sema->ch->chid, - nvgpu_semaphore_next_value(s)); + hw_sema->ch->chid, + nvgpu_atomic_read(&hw_sema->next_value)); } #endif