xref: /src/sys/contrib/zstd/lib/common/bits.h (revision c0d9a07101a1e72769ee0619a583f63a078fb391)
17e509d50SXin LI /*
27e509d50SXin LI  * Copyright (c) Meta Platforms, Inc. and affiliates.
37e509d50SXin LI  * All rights reserved.
47e509d50SXin LI  *
57e509d50SXin LI  * This source code is licensed under both the BSD-style license (found in the
67e509d50SXin LI  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
77e509d50SXin LI  * in the COPYING file in the root directory of this source tree).
87e509d50SXin LI  * You may select, at your option, one of the above-listed licenses.
97e509d50SXin LI  */
107e509d50SXin LI 
117e509d50SXin LI #ifndef ZSTD_BITS_H
127e509d50SXin LI #define ZSTD_BITS_H
137e509d50SXin LI 
147e509d50SXin LI #include "mem.h"
157e509d50SXin LI 
ZSTD_countTrailingZeros32_fallback(U32 val)167e509d50SXin LI MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val)
177e509d50SXin LI {
187e509d50SXin LI     assert(val != 0);
197e509d50SXin LI     {
207e509d50SXin LI         static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3,
217e509d50SXin LI                                                 30, 22, 20, 15, 25, 17, 4, 8,
227e509d50SXin LI                                                 31, 27, 13, 23, 21, 19, 16, 7,
237e509d50SXin LI                                                 26, 12, 18, 6, 11, 5, 10, 9};
247e509d50SXin LI         return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27];
257e509d50SXin LI     }
267e509d50SXin LI }
277e509d50SXin LI 
ZSTD_countTrailingZeros32(U32 val)287e509d50SXin LI MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val)
297e509d50SXin LI {
307e509d50SXin LI     assert(val != 0);
317e509d50SXin LI #if defined(_MSC_VER)
327e509d50SXin LI #  if STATIC_BMI2
337e509d50SXin LI     return (unsigned)_tzcnt_u32(val);
347e509d50SXin LI #  else
357e509d50SXin LI     if (val != 0) {
367e509d50SXin LI         unsigned long r;
377e509d50SXin LI         _BitScanForward(&r, val);
387e509d50SXin LI         return (unsigned)r;
397e509d50SXin LI     } else {
407e509d50SXin LI         __assume(0); /* Should not reach this code path */
417e509d50SXin LI     }
427e509d50SXin LI #  endif
437e509d50SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4)
447e509d50SXin LI     return (unsigned)__builtin_ctz(val);
457e509d50SXin LI #elif defined(__ICCARM__)
467e509d50SXin LI     return (unsigned)__builtin_ctz(val);
477e509d50SXin LI #else
487e509d50SXin LI     return ZSTD_countTrailingZeros32_fallback(val);
497e509d50SXin LI #endif
507e509d50SXin LI }
517e509d50SXin LI 
ZSTD_countLeadingZeros32_fallback(U32 val)527e509d50SXin LI MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val)
537e509d50SXin LI {
547e509d50SXin LI     assert(val != 0);
557e509d50SXin LI     {
567e509d50SXin LI         static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29,
577e509d50SXin LI                                             11, 14, 16, 18, 22, 25, 3, 30,
587e509d50SXin LI                                             8, 12, 20, 28, 15, 17, 24, 7,
597e509d50SXin LI                                             19, 27, 23, 6, 26, 5, 4, 31};
607e509d50SXin LI         val |= val >> 1;
617e509d50SXin LI         val |= val >> 2;
627e509d50SXin LI         val |= val >> 4;
637e509d50SXin LI         val |= val >> 8;
647e509d50SXin LI         val |= val >> 16;
657e509d50SXin LI         return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27];
667e509d50SXin LI     }
677e509d50SXin LI }
687e509d50SXin LI 
ZSTD_countLeadingZeros32(U32 val)697e509d50SXin LI MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val)
707e509d50SXin LI {
717e509d50SXin LI     assert(val != 0);
727e509d50SXin LI #if defined(_MSC_VER)
737e509d50SXin LI #  if STATIC_BMI2
747e509d50SXin LI     return (unsigned)_lzcnt_u32(val);
757e509d50SXin LI #  else
767e509d50SXin LI     if (val != 0) {
777e509d50SXin LI         unsigned long r;
787e509d50SXin LI         _BitScanReverse(&r, val);
797e509d50SXin LI         return (unsigned)(31 - r);
807e509d50SXin LI     } else {
817e509d50SXin LI         __assume(0); /* Should not reach this code path */
827e509d50SXin LI     }
837e509d50SXin LI #  endif
847e509d50SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4)
857e509d50SXin LI     return (unsigned)__builtin_clz(val);
867e509d50SXin LI #elif defined(__ICCARM__)
877e509d50SXin LI     return (unsigned)__builtin_clz(val);
887e509d50SXin LI #else
897e509d50SXin LI     return ZSTD_countLeadingZeros32_fallback(val);
907e509d50SXin LI #endif
917e509d50SXin LI }
927e509d50SXin LI 
ZSTD_countTrailingZeros64(U64 val)937e509d50SXin LI MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val)
947e509d50SXin LI {
957e509d50SXin LI     assert(val != 0);
967e509d50SXin LI #if defined(_MSC_VER) && defined(_WIN64)
977e509d50SXin LI #  if STATIC_BMI2
987e509d50SXin LI     return (unsigned)_tzcnt_u64(val);
997e509d50SXin LI #  else
1007e509d50SXin LI     if (val != 0) {
1017e509d50SXin LI         unsigned long r;
1027e509d50SXin LI         _BitScanForward64(&r, val);
1037e509d50SXin LI         return (unsigned)r;
1047e509d50SXin LI     } else {
1057e509d50SXin LI         __assume(0); /* Should not reach this code path */
1067e509d50SXin LI     }
1077e509d50SXin LI #  endif
1087e509d50SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__)
1097e509d50SXin LI     return (unsigned)__builtin_ctzll(val);
1107e509d50SXin LI #elif defined(__ICCARM__)
1117e509d50SXin LI     return (unsigned)__builtin_ctzll(val);
1127e509d50SXin LI #else
1137e509d50SXin LI     {
1147e509d50SXin LI         U32 mostSignificantWord = (U32)(val >> 32);
1157e509d50SXin LI         U32 leastSignificantWord = (U32)val;
1167e509d50SXin LI         if (leastSignificantWord == 0) {
1177e509d50SXin LI             return 32 + ZSTD_countTrailingZeros32(mostSignificantWord);
1187e509d50SXin LI         } else {
1197e509d50SXin LI             return ZSTD_countTrailingZeros32(leastSignificantWord);
1207e509d50SXin LI         }
1217e509d50SXin LI     }
1227e509d50SXin LI #endif
1237e509d50SXin LI }
1247e509d50SXin LI 
ZSTD_countLeadingZeros64(U64 val)1257e509d50SXin LI MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val)
1267e509d50SXin LI {
1277e509d50SXin LI     assert(val != 0);
1287e509d50SXin LI #if defined(_MSC_VER) && defined(_WIN64)
1297e509d50SXin LI #  if STATIC_BMI2
1307e509d50SXin LI     return (unsigned)_lzcnt_u64(val);
1317e509d50SXin LI #  else
1327e509d50SXin LI     if (val != 0) {
1337e509d50SXin LI         unsigned long r;
1347e509d50SXin LI         _BitScanReverse64(&r, val);
1357e509d50SXin LI         return (unsigned)(63 - r);
1367e509d50SXin LI     } else {
1377e509d50SXin LI         __assume(0); /* Should not reach this code path */
1387e509d50SXin LI     }
1397e509d50SXin LI #  endif
1407e509d50SXin LI #elif defined(__GNUC__) && (__GNUC__ >= 4)
1417e509d50SXin LI     return (unsigned)(__builtin_clzll(val));
1427e509d50SXin LI #elif defined(__ICCARM__)
1437e509d50SXin LI     return (unsigned)(__builtin_clzll(val));
1447e509d50SXin LI #else
1457e509d50SXin LI     {
1467e509d50SXin LI         U32 mostSignificantWord = (U32)(val >> 32);
1477e509d50SXin LI         U32 leastSignificantWord = (U32)val;
1487e509d50SXin LI         if (mostSignificantWord == 0) {
1497e509d50SXin LI             return 32 + ZSTD_countLeadingZeros32(leastSignificantWord);
1507e509d50SXin LI         } else {
1517e509d50SXin LI             return ZSTD_countLeadingZeros32(mostSignificantWord);
1527e509d50SXin LI         }
1537e509d50SXin LI     }
1547e509d50SXin LI #endif
1557e509d50SXin LI }
1567e509d50SXin LI 
ZSTD_NbCommonBytes(size_t val)1577e509d50SXin LI MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val)
1587e509d50SXin LI {
1597e509d50SXin LI     if (MEM_isLittleEndian()) {
1607e509d50SXin LI         if (MEM_64bits()) {
1617e509d50SXin LI             return ZSTD_countTrailingZeros64((U64)val) >> 3;
1627e509d50SXin LI         } else {
1637e509d50SXin LI             return ZSTD_countTrailingZeros32((U32)val) >> 3;
1647e509d50SXin LI         }
1657e509d50SXin LI     } else {  /* Big Endian CPU */
1667e509d50SXin LI         if (MEM_64bits()) {
1677e509d50SXin LI             return ZSTD_countLeadingZeros64((U64)val) >> 3;
1687e509d50SXin LI         } else {
1697e509d50SXin LI             return ZSTD_countLeadingZeros32((U32)val) >> 3;
1707e509d50SXin LI         }
1717e509d50SXin LI     }
1727e509d50SXin LI }
1737e509d50SXin LI 
ZSTD_highbit32(U32 val)1747e509d50SXin LI MEM_STATIC unsigned ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
1757e509d50SXin LI {
1767e509d50SXin LI     assert(val != 0);
1777e509d50SXin LI     return 31 - ZSTD_countLeadingZeros32(val);
1787e509d50SXin LI }
1797e509d50SXin LI 
1807e509d50SXin LI /* ZSTD_rotateRight_*():
1817e509d50SXin LI  * Rotates a bitfield to the right by "count" bits.
1827e509d50SXin LI  * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
1837e509d50SXin LI  */
1847e509d50SXin LI MEM_STATIC
ZSTD_rotateRight_U64(U64 const value,U32 count)1857e509d50SXin LI U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
1867e509d50SXin LI     assert(count < 64);
1877e509d50SXin LI     count &= 0x3F; /* for fickle pattern recognition */
1887e509d50SXin LI     return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
1897e509d50SXin LI }
1907e509d50SXin LI 
1917e509d50SXin LI MEM_STATIC
ZSTD_rotateRight_U32(U32 const value,U32 count)1927e509d50SXin LI U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
1937e509d50SXin LI     assert(count < 32);
1947e509d50SXin LI     count &= 0x1F; /* for fickle pattern recognition */
1957e509d50SXin LI     return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
1967e509d50SXin LI }
1977e509d50SXin LI 
1987e509d50SXin LI MEM_STATIC
ZSTD_rotateRight_U16(U16 const value,U32 count)1997e509d50SXin LI U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
2007e509d50SXin LI     assert(count < 16);
2017e509d50SXin LI     count &= 0x0F; /* for fickle pattern recognition */
2027e509d50SXin LI     return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
2037e509d50SXin LI }
2047e509d50SXin LI 
2057e509d50SXin LI #endif /* ZSTD_BITS_H */
206