gpu: nvgpu: posix: Use GCC builtins for ffs(), fls()

These intrinsics will be fast for the given platform that the
compiler is targeting. This also reduces complexity in the
code.

Change-Id: I6cfb761d6f881056446fa9a5de53dca50ed93c34
Signed-off-by: Alex Waterman <alexw@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/1727383
Reviewed-by: Konsta Holtta <kholtta@nvidia.com>
Reviewed-by: Richard Zhao <rizhao@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Terje Bergstrom <tbergstrom@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
This commit is contained in:
Alex Waterman
2018-05-22 11:24:22 -07:00
committed by mobile promotions
parent a3a9ed3b12
commit 64b6918ea9

View File

@@ -31,65 +31,12 @@
unsigned long __nvgpu_posix_fls(unsigned long word)
{
int num = BITS_PER_LONG - 1;
#if BITS_PER_LONG == 64
if (!(word & (~0ul << 32))) {
num -= 32;
word <<= 32;
}
#endif
if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
num -= 16;
word <<= 16;
}
if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
num -= 8;
word <<= 8;
}
if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
num -= 4;
word <<= 4;
}
if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
num -= 2;
word <<= 2;
}
if (!(word & (~0ul << (BITS_PER_LONG-1))))
num -= 1;
return num;
return __builtin_clzl(word);
}
unsigned long __nvgpu_posix_ffs(unsigned long word)
{
int num = 0;
#if BITS_PER_LONG == 64
if ((word & 0xffffffff) == 0) {
num += 32;
word >>= 32;
}
#endif
if ((word & 0xffff) == 0) {
num += 16;
word >>= 16;
}
if ((word & 0xff) == 0) {
num += 8;
word >>= 8;
}
if ((word & 0xf) == 0) {
num += 4;
word >>= 4;
}
if ((word & 0x3) == 0) {
num += 2;
word >>= 2;
}
if ((word & 0x1) == 0)
num += 1;
return num;
return __builtin_ffsl(word);
}
static unsigned long __find_next_bit(const unsigned long *addr,