diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index a4af1ca33..9c74d3008 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h @@ -230,23 +230,26 @@ static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) } /* - * TODO: handle wrap around... Hmm, how to do this? + * Check if "racer" is over "goal" with wraparound handling. */ -static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) +static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) { - u32 sema_val = nvgpu_semaphore_read(s); - /* - * If the underlying semaphore value is greater than or equal to - * the value of the semaphore then the semaphore has been signaled - * (a.k.a. released). + * Handle wraparound with the same heuristic as the hardware does: + * although the integer will eventually wrap around, consider a sema + * released against a threshold if its value has passed that threshold + * but has not wrapped over half of the u32 range over that threshold; + * such wrapping is unlikely to happen during a sema lifetime. + * + * Values for [goal, goal + 0x7fffffff] are considered signaled; that's + * precisely half of the 32-bit space. If racer == goal + 0x80000000, + * then it needs 0x80000000 increments to wrap again and signal. + * + * Unsigned arithmetic is used because it's well-defined. This is + * effectively the same as: signed_racer - signed_goal > 0. */ - return (int)sema_val >= nvgpu_atomic_read(&s->value); -} -static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) -{ - return !nvgpu_semaphore_is_released(s); + return racer - goal < 0x80000000; } static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) @@ -254,6 +257,19 @@ static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) return (u32)nvgpu_atomic_read(&s->value); } +static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) +{ + u32 sema_val = nvgpu_semaphore_read(s); + u32 wait_payload = nvgpu_semaphore_get_value(s); + + return __nvgpu_semaphore_value_released(wait_payload, sema_val); +} + +static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) +{ + return !nvgpu_semaphore_is_released(s); +} + static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) { return (u32)nvgpu_atomic_read(&s->hw_sema->next_value); @@ -261,14 +277,14 @@ static inline u32 nvgpu_semaphore_next_value(struct nvgpu_semaphore *s) /* * If @force is set then this will not wait for the underlying semaphore to - * catch up to the passed semaphore. + * catch up to the passed semaphore threshold. */ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, bool force) { struct nvgpu_semaphore_int *hw_sema = s->hw_sema; u32 current_val; - u32 val = nvgpu_semaphore_get_value(s); + u32 threshold = nvgpu_semaphore_get_value(s); int attempts = 0; /* @@ -277,7 +293,8 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, * * TODO: tune the wait a little better. */ - while ((current_val = nvgpu_semaphore_read(s)) < (val - 1)) { + while (!__nvgpu_semaphore_value_released(threshold - 1, + current_val = nvgpu_semaphore_read(s))) { if (force) break; nvgpu_msleep(100); @@ -292,13 +309,14 @@ static inline void __nvgpu_semaphore_release(struct nvgpu_semaphore *s, * If the semaphore has already passed the value we would write then * this is really just a NO-OP. */ - if (current_val >= val) + if (__nvgpu_semaphore_value_released(threshold, current_val)) return; - nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, val); + nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->p->rw_mem, hw_sema->offset, + threshold); gpu_sema_verbose_dbg(hw_sema->p->sema_sea->gk20a, - "(c=%d) WRITE %u", hw_sema->ch->chid, val); + "(c=%d) WRITE %u", hw_sema->ch->chid, threshold); } static inline void nvgpu_semaphore_release(struct nvgpu_semaphore *s)