pycore_bitutils.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. /* Bit and bytes utilities.
  2. Bytes swap functions, reverse order of bytes:
  3. - _Py_bswap16(uint16_t)
  4. - _Py_bswap32(uint32_t)
  5. - _Py_bswap64(uint64_t)
  6. */
  7. #ifndef Py_INTERNAL_BITUTILS_H
  8. #define Py_INTERNAL_BITUTILS_H
  9. #ifdef __cplusplus
  10. extern "C" {
  11. #endif
  12. #ifndef Py_BUILD_CORE
  13. # error "this header requires Py_BUILD_CORE define"
  14. #endif
  15. #if defined(__GNUC__) \
  16. && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
  17. /* __builtin_bswap16() is available since GCC 4.8,
  18. __builtin_bswap32() is available since GCC 4.3,
  19. __builtin_bswap64() is available since GCC 4.3. */
  20. # define _PY_HAVE_BUILTIN_BSWAP
  21. #endif
  22. #ifdef _MSC_VER
  23. /* Get _byteswap_ushort(), _byteswap_ulong(), _byteswap_uint64() */
  24. # include <intrin.h>
  25. #endif
  26. static inline uint16_t
  27. _Py_bswap16(uint16_t word)
  28. {
  29. #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
  30. return __builtin_bswap16(word);
  31. #elif defined(_MSC_VER)
  32. Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
  33. return _byteswap_ushort(word);
  34. #else
  35. // Portable implementation which doesn't rely on circular bit shift
  36. return ( ((word & UINT16_C(0x00FF)) << 8)
  37. | ((word & UINT16_C(0xFF00)) >> 8));
  38. #endif
  39. }
  40. static inline uint32_t
  41. _Py_bswap32(uint32_t word)
  42. {
  43. #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
  44. return __builtin_bswap32(word);
  45. #elif defined(_MSC_VER)
  46. Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
  47. return _byteswap_ulong(word);
  48. #else
  49. // Portable implementation which doesn't rely on circular bit shift
  50. return ( ((word & UINT32_C(0x000000FF)) << 24)
  51. | ((word & UINT32_C(0x0000FF00)) << 8)
  52. | ((word & UINT32_C(0x00FF0000)) >> 8)
  53. | ((word & UINT32_C(0xFF000000)) >> 24));
  54. #endif
  55. }
  56. static inline uint64_t
  57. _Py_bswap64(uint64_t word)
  58. {
  59. #if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
  60. return __builtin_bswap64(word);
  61. #elif defined(_MSC_VER)
  62. return _byteswap_uint64(word);
  63. #else
  64. // Portable implementation which doesn't rely on circular bit shift
  65. return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
  66. | ((word & UINT64_C(0x000000000000FF00)) << 40)
  67. | ((word & UINT64_C(0x0000000000FF0000)) << 24)
  68. | ((word & UINT64_C(0x00000000FF000000)) << 8)
  69. | ((word & UINT64_C(0x000000FF00000000)) >> 8)
  70. | ((word & UINT64_C(0x0000FF0000000000)) >> 24)
  71. | ((word & UINT64_C(0x00FF000000000000)) >> 40)
  72. | ((word & UINT64_C(0xFF00000000000000)) >> 56));
  73. #endif
  74. }
  75. // Population count: count the number of 1's in 'x'
  76. // (number of bits set to 1), also known as the hamming weight.
  77. //
  78. // Implementation note. CPUID is not used, to test if x86 POPCNT instruction
  79. // can be used, to keep the implementation simple. For example, Visual Studio
  80. // __popcnt() is not used this reason. The clang and GCC builtin function can
  81. // use the x86 POPCNT instruction if the target architecture has SSE4a or
  82. // newer.
  83. static inline int
  84. _Py_popcount32(uint32_t x)
  85. {
  86. #if (defined(__clang__) || defined(__GNUC__))
  87. #if SIZEOF_INT >= 4
  88. Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
  89. return __builtin_popcount(x);
  90. #else
  91. // The C standard guarantees that unsigned long will always be big enough
  92. // to hold a uint32_t value without losing information.
  93. Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
  94. return __builtin_popcountl(x);
  95. #endif
  96. #else
  97. // 32-bit SWAR (SIMD Within A Register) popcount
  98. // Binary: 0 1 0 1 ...
  99. const uint32_t M1 = 0x55555555;
  100. // Binary: 00 11 00 11. ..
  101. const uint32_t M2 = 0x33333333;
  102. // Binary: 0000 1111 0000 1111 ...
  103. const uint32_t M4 = 0x0F0F0F0F;
  104. // Put count of each 2 bits into those 2 bits
  105. x = x - ((x >> 1) & M1);
  106. // Put count of each 4 bits into those 4 bits
  107. x = (x & M2) + ((x >> 2) & M2);
  108. // Put count of each 8 bits into those 8 bits
  109. x = (x + (x >> 4)) & M4;
  110. // Sum of the 4 byte counts.
  111. // Take care when considering changes to the next line. Portability and
  112. // correctness are delicate here, thanks to C's "integer promotions" (C99
  113. // §6.3.1.1p2). On machines where the `int` type has width greater than 32
  114. // bits, `x` will be promoted to an `int`, and following C's "usual
  115. // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
  116. // performed as a multiplication of two `unsigned int` operands. In this
  117. // case it's critical that we cast back to `uint32_t` in order to keep only
  118. // the least significant 32 bits. On machines where the `int` type has
  119. // width no greater than 32, the multiplication is of two 32-bit unsigned
  120. // integer types, and the (uint32_t) cast is a no-op. In both cases, we
  121. // avoid the risk of undefined behaviour due to overflow of a
  122. // multiplication of signed integer types.
  123. return (uint32_t)(x * 0x01010101U) >> 24;
  124. #endif
  125. }
  126. // Return the index of the most significant 1 bit in 'x'. This is the smallest
  127. // integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
  128. static inline int
  129. _Py_bit_length(unsigned long x)
  130. {
  131. #if (defined(__clang__) || defined(__GNUC__))
  132. if (x != 0) {
  133. // __builtin_clzl() is available since GCC 3.4.
  134. // Undefined behavior for x == 0.
  135. return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
  136. }
  137. else {
  138. return 0;
  139. }
  140. #elif defined(_MSC_VER)
  141. // _BitScanReverse() is documented to search 32 bits.
  142. Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
  143. unsigned long msb;
  144. if (_BitScanReverse(&msb, x)) {
  145. return (int)msb + 1;
  146. }
  147. else {
  148. return 0;
  149. }
  150. #else
  151. const int BIT_LENGTH_TABLE[32] = {
  152. 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
  153. 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
  154. };
  155. int msb = 0;
  156. while (x >= 32) {
  157. msb += 6;
  158. x >>= 6;
  159. }
  160. msb += BIT_LENGTH_TABLE[x];
  161. return msb;
  162. #endif
  163. }
  164. #ifdef __cplusplus
  165. }
  166. #endif
  167. #endif /* !Py_INTERNAL_BITUTILS_H */