diff --git a/drivers/gpu/nvgpu/common/semaphore.c b/drivers/gpu/nvgpu/common/semaphore.c index 25bd3be33..5814a7370 100644 --- a/drivers/gpu/nvgpu/common/semaphore.c +++ b/drivers/gpu/nvgpu/common/semaphore.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" @@ -485,3 +486,141 @@ void nvgpu_semaphore_get(struct nvgpu_semaphore *s) { nvgpu_ref_get(&s->ref); } + +/* + * Return the address of a specific semaphore. + * + * Don't call this on a semaphore you don't own - the VA returned will make no + * sense in your specific channel's VM. + */ +u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) +{ + return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + + s->location.offset; +} + +/* + * Get the global RO address for the semaphore. Can be called on any semaphore + * regardless of whether you own it. + */ +u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) +{ + return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + + s->location.offset; +} + +u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) +{ + return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + + hw_sema->location.offset; +} + +u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) +{ + return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, + hw_sema->location.offset); +} + +/* + * Read the underlying value from a semaphore. + */ +u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) +{ + return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, + s->location.offset); +} + +/* + * Check if "racer" is over "goal" with wraparound handling. + */ +static bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) +{ + /* + * Handle wraparound with the same heuristic as the hardware does: + * although the integer will eventually wrap around, consider a sema + * released against a threshold if its value has passed that threshold + * but has not wrapped over half of the u32 range over that threshold; + * such wrapping is unlikely to happen during a sema lifetime. + * + * Values for [goal, goal + 0x7fffffff] are considered signaled; that's + * precisely half of the 32-bit space. If racer == goal + 0x80000000, + * then it needs 0x80000000 increments to wrap again and signal. + * + * Unsigned arithmetic is used because it's well-defined. This is + * effectively the same as: signed_racer - signed_goal > 0. + */ + + return racer - goal < 0x80000000; +} + +u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) +{ + return (u32)nvgpu_atomic_read(&s->value); +} + +bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) +{ + u32 sema_val = nvgpu_semaphore_read(s); + u32 wait_payload = nvgpu_semaphore_get_value(s); + + return __nvgpu_semaphore_value_released(wait_payload, sema_val); +} + +bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) +{ + return !nvgpu_semaphore_is_released(s); +} + +/* + * Fast-forward the hw sema to its tracked max value. + * + * Return true if the sema wasn't at the max value and needed updating, false + * otherwise. + */ +bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema) +{ + u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value); + u32 current_val = __nvgpu_semaphore_read(hw_sema); + + /* + * If the semaphore has already reached the value we would write then + * this is really just a NO-OP. However, the sema value shouldn't be + * more than what we expect to be the max. + */ + + if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1, + current_val))) + return false; + + if (current_val == threshold) + return false; + + nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, + hw_sema->location.offset, threshold); + + gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u", + hw_sema->ch->chid, current_val, threshold); + + return true; +} + +/* + * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold + * value to "s" which represents the increment that the caller must write in a + * pushbuf. The same nvgpu_semaphore will also represent an output fence; when + * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment. + */ +void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, + struct nvgpu_semaphore_int *hw_sema) +{ + int next = nvgpu_atomic_add_return(1, &hw_sema->next_value); + + /* "s" should be an uninitialized sema. */ + WARN_ON(s->incremented); + + nvgpu_atomic_set(&s->value, next); + s->incremented = 1; + + gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)", + hw_sema->ch->chid, next); +} diff --git a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h index 9ab6cc674..a7bd79cb0 100644 --- a/drivers/gpu/nvgpu/include/nvgpu/semaphore.h +++ b/drivers/gpu/nvgpu/include/nvgpu/semaphore.h @@ -24,16 +24,13 @@ #define SEMAPHORE_GK20A_H #include -#include #include -#include #include #include #include #include "gk20a/gk20a.h" #include "gk20a/mm_gk20a.h" -#include "gk20a/channel_gk20a.h" #define gpu_sema_dbg(g, fmt, args...) \ nvgpu_log(g, gpu_dbg_sema, fmt, ##args) @@ -191,141 +188,18 @@ void nvgpu_semaphore_put(struct nvgpu_semaphore *s); void nvgpu_semaphore_get(struct nvgpu_semaphore *s); void nvgpu_semaphore_free_hw_sema(struct channel_gk20a *ch); -/* - * Return the address of a specific semaphore. - * - * Don't call this on a semaphore you don't own - the VA returned will make no - * sense in your specific channel's VM. - */ -static inline u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s) -{ - return __nvgpu_semaphore_pool_gpu_va(s->location.pool, false) + - s->location.offset; -} +u64 nvgpu_semaphore_gpu_rw_va(struct nvgpu_semaphore *s); +u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s); +u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema); -/* - * Get the global RO address for the semaphore. Can be called on any semaphore - * regardless of whether you own it. - */ -static inline u64 nvgpu_semaphore_gpu_ro_va(struct nvgpu_semaphore *s) -{ - return __nvgpu_semaphore_pool_gpu_va(s->location.pool, true) + - s->location.offset; -} +u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema); +u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s); +u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s); +bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s); +bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s); -static inline u64 nvgpu_hw_sema_addr(struct nvgpu_semaphore_int *hw_sema) -{ - return __nvgpu_semaphore_pool_gpu_va(hw_sema->location.pool, true) + - hw_sema->location.offset; -} +bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema); +void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, + struct nvgpu_semaphore_int *hw_sema); -static inline u32 __nvgpu_semaphore_read(struct nvgpu_semaphore_int *hw_sema) -{ - return nvgpu_mem_rd(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, - hw_sema->location.offset); -} - -/* - * Read the underlying value from a semaphore. - */ -static inline u32 nvgpu_semaphore_read(struct nvgpu_semaphore *s) -{ - return nvgpu_mem_rd(s->g, &s->location.pool->rw_mem, - s->location.offset); -} - -/* - * Check if "racer" is over "goal" with wraparound handling. - */ -static inline bool __nvgpu_semaphore_value_released(u32 goal, u32 racer) -{ - /* - * Handle wraparound with the same heuristic as the hardware does: - * although the integer will eventually wrap around, consider a sema - * released against a threshold if its value has passed that threshold - * but has not wrapped over half of the u32 range over that threshold; - * such wrapping is unlikely to happen during a sema lifetime. - * - * Values for [goal, goal + 0x7fffffff] are considered signaled; that's - * precisely half of the 32-bit space. If racer == goal + 0x80000000, - * then it needs 0x80000000 increments to wrap again and signal. - * - * Unsigned arithmetic is used because it's well-defined. This is - * effectively the same as: signed_racer - signed_goal > 0. - */ - - return racer - goal < 0x80000000; -} - -static inline u32 nvgpu_semaphore_get_value(struct nvgpu_semaphore *s) -{ - return (u32)nvgpu_atomic_read(&s->value); -} - -static inline bool nvgpu_semaphore_is_released(struct nvgpu_semaphore *s) -{ - u32 sema_val = nvgpu_semaphore_read(s); - u32 wait_payload = nvgpu_semaphore_get_value(s); - - return __nvgpu_semaphore_value_released(wait_payload, sema_val); -} - -static inline bool nvgpu_semaphore_is_acquired(struct nvgpu_semaphore *s) -{ - return !nvgpu_semaphore_is_released(s); -} - -/* - * Fast-forward the hw sema to its tracked max value. - * - * Return true if the sema wasn't at the max value and needed updating, false - * otherwise. - */ -static inline bool nvgpu_semaphore_reset(struct nvgpu_semaphore_int *hw_sema) -{ - u32 threshold = (u32)nvgpu_atomic_read(&hw_sema->next_value); - u32 current_val = __nvgpu_semaphore_read(hw_sema); - - /* - * If the semaphore has already reached the value we would write then - * this is really just a NO-OP. However, the sema value shouldn't be - * more than what we expect to be the max. - */ - - if (WARN_ON(__nvgpu_semaphore_value_released(threshold + 1, - current_val))) - return false; - - if (current_val == threshold) - return false; - - nvgpu_mem_wr(hw_sema->ch->g, &hw_sema->location.pool->rw_mem, - hw_sema->location.offset, threshold); - - gpu_sema_verbose_dbg(hw_sema->ch->g, "(c=%d) RESET %u -> %u", - hw_sema->ch->chid, current_val, threshold); - - return true; -} - -/* - * Update nvgpu-tracked shadow of the value in "hw_sema" and mark the threshold - * value to "s" which represents the increment that the caller must write in a - * pushbuf. The same nvgpu_semaphore will also represent an output fence; when - * nvgpu_semaphore_is_released(s) == true, the gpu is done with this increment. - */ -static inline void nvgpu_semaphore_prepare(struct nvgpu_semaphore *s, - struct nvgpu_semaphore_int *hw_sema) -{ - int next = nvgpu_atomic_add_return(1, &hw_sema->next_value); - - /* "s" should be an uninitialized sema. */ - WARN_ON(s->incremented); - - nvgpu_atomic_set(&s->value, next); - s->incremented = 1; - - gpu_sema_verbose_dbg(s->g, "INCR sema for c=%d (%u)", - hw_sema->ch->chid, next); -} #endif