bitops.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. #pragma once
  2. #include "ylimits.h"
  3. #include "typelist.h"
  4. #include <util/system/compiler.h>
  5. #include <util/system/yassert.h>
  6. #ifdef _MSC_VER
  7. #include <intrin.h>
  8. #endif
  9. namespace NBitOps {
  10. namespace NPrivate {
  11. template <unsigned N, typename T>
  12. struct TClp2Helper {
  13. static Y_FORCE_INLINE T Calc(T t) noexcept {
  14. const T prev = TClp2Helper<N / 2, T>::Calc(t);
  15. return prev | (prev >> N);
  16. }
  17. };
  18. template <typename T>
  19. struct TClp2Helper<0u, T> {
  20. static Y_FORCE_INLINE T Calc(T t) noexcept {
  21. return t - 1;
  22. }
  23. };
  24. extern const ui64 WORD_MASK[];
  25. extern const ui64 INVERSE_WORD_MASK[];
  26. // see http://www-graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
  27. Y_FORCE_INLINE ui64 SwapOddEvenBits(ui64 v) {
  28. return ((v >> 1ULL) & 0x5555555555555555ULL) | ((v & 0x5555555555555555ULL) << 1ULL);
  29. }
  30. Y_FORCE_INLINE ui64 SwapBitPairs(ui64 v) {
  31. return ((v >> 2ULL) & 0x3333333333333333ULL) | ((v & 0x3333333333333333ULL) << 2ULL);
  32. }
  33. Y_FORCE_INLINE ui64 SwapNibbles(ui64 v) {
  34. return ((v >> 4ULL) & 0x0F0F0F0F0F0F0F0FULL) | ((v & 0x0F0F0F0F0F0F0F0FULL) << 4ULL);
  35. }
  36. Y_FORCE_INLINE ui64 SwapOddEvenBytes(ui64 v) {
  37. return ((v >> 8ULL) & 0x00FF00FF00FF00FFULL) | ((v & 0x00FF00FF00FF00FFULL) << 8ULL);
  38. }
  39. Y_FORCE_INLINE ui64 SwapBytePairs(ui64 v) {
  40. return ((v >> 16ULL) & 0x0000FFFF0000FFFFULL) | ((v & 0x0000FFFF0000FFFFULL) << 16ULL);
  41. }
  42. Y_FORCE_INLINE ui64 SwapByteQuads(ui64 v) {
  43. return (v >> 32ULL) | (v << 32ULL);
  44. }
  45. #if defined(__GNUC__)
  46. inline unsigned GetValueBitCountImpl(unsigned int value) noexcept {
  47. Y_ASSERT(value); // because __builtin_clz* have undefined result for zero.
  48. return std::numeric_limits<unsigned int>::digits - __builtin_clz(value);
  49. }
  50. inline unsigned GetValueBitCountImpl(unsigned long value) noexcept {
  51. Y_ASSERT(value); // because __builtin_clz* have undefined result for zero.
  52. return std::numeric_limits<unsigned long>::digits - __builtin_clzl(value);
  53. }
  54. inline unsigned GetValueBitCountImpl(unsigned long long value) noexcept {
  55. Y_ASSERT(value); // because __builtin_clz* have undefined result for zero.
  56. return std::numeric_limits<unsigned long long>::digits - __builtin_clzll(value);
  57. }
  58. #else
  59. /// Stupid realization for non-GCC. Can use BSR from x86 instructions set.
  60. template <typename T>
  61. inline unsigned GetValueBitCountImpl(T value) noexcept {
  62. Y_ASSERT(value); // because __builtin_clz* have undefined result for zero.
  63. unsigned result = 1; // result == 0 - impossible value, see Y_ASSERT().
  64. value >>= 1;
  65. while (value) {
  66. value >>= 1;
  67. ++result;
  68. }
  69. return result;
  70. }
  71. #endif
  72. #if defined(__GNUC__)
  73. inline unsigned CountTrailingZeroBitsImpl(unsigned int value) noexcept {
  74. Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero.
  75. return __builtin_ctz(value);
  76. }
  77. inline unsigned CountTrailingZeroBitsImpl(unsigned long value) noexcept {
  78. Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero.
  79. return __builtin_ctzl(value);
  80. }
  81. inline unsigned CountTrailingZeroBitsImpl(unsigned long long value) noexcept {
  82. Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero.
  83. return __builtin_ctzll(value);
  84. }
  85. #else
  86. /// Stupid realization for non-GCC. Can use BSF from x86 instructions set.
  87. template <typename T>
  88. inline unsigned CountTrailingZeroBitsImpl(T value) noexcept {
  89. Y_ASSERT(value); // because __builtin_ctz* have undefined result for zero.
  90. unsigned result = 0;
  91. while (!(value & 1)) {
  92. value >>= 1;
  93. ++result;
  94. }
  95. return result;
  96. }
  97. #endif
  98. template <typename T>
  99. Y_FORCE_INLINE T RotateBitsLeftImpl(T value, const ui8 shift) noexcept {
  100. constexpr ui8 bits = sizeof(T) * 8;
  101. constexpr ui8 mask = bits - 1;
  102. Y_ASSERT(shift <= mask);
  103. // do trick with mask to avoid undefined behaviour
  104. return (value << shift) | (value >> ((-shift) & mask));
  105. }
  106. template <typename T>
  107. Y_FORCE_INLINE T RotateBitsRightImpl(T value, const ui8 shift) noexcept {
  108. constexpr ui8 bits = sizeof(T) * 8;
  109. constexpr ui8 mask = bits - 1;
  110. Y_ASSERT(shift <= mask);
  111. // do trick with mask to avoid undefined behaviour
  112. return (value >> shift) | (value << ((-shift) & mask));
  113. }
  114. #if defined(_x86_) && defined(__GNUC__)
  115. Y_FORCE_INLINE ui8 RotateBitsRightImpl(ui8 value, ui8 shift) noexcept {
  116. __asm__("rorb %%cl, %0"
  117. : "=r"(value)
  118. : "0"(value), "c"(shift));
  119. return value;
  120. }
  121. Y_FORCE_INLINE ui16 RotateBitsRightImpl(ui16 value, ui8 shift) noexcept {
  122. __asm__("rorw %%cl, %0"
  123. : "=r"(value)
  124. : "0"(value), "c"(shift));
  125. return value;
  126. }
  127. Y_FORCE_INLINE ui32 RotateBitsRightImpl(ui32 value, ui8 shift) noexcept {
  128. __asm__("rorl %%cl, %0"
  129. : "=r"(value)
  130. : "0"(value), "c"(shift));
  131. return value;
  132. }
  133. Y_FORCE_INLINE ui8 RotateBitsLeftImpl(ui8 value, ui8 shift) noexcept {
  134. __asm__("rolb %%cl, %0"
  135. : "=r"(value)
  136. : "0"(value), "c"(shift));
  137. return value;
  138. }
  139. Y_FORCE_INLINE ui16 RotateBitsLeftImpl(ui16 value, ui8 shift) noexcept {
  140. __asm__("rolw %%cl, %0"
  141. : "=r"(value)
  142. : "0"(value), "c"(shift));
  143. return value;
  144. }
  145. Y_FORCE_INLINE ui32 RotateBitsLeftImpl(ui32 value, ui8 shift) noexcept {
  146. __asm__("roll %%cl, %0"
  147. : "=r"(value)
  148. : "0"(value), "c"(shift));
  149. return value;
  150. }
  151. #if defined(_x86_64_)
  152. Y_FORCE_INLINE ui64 RotateBitsRightImpl(ui64 value, ui8 shift) noexcept {
  153. __asm__("rorq %%cl, %0"
  154. : "=r"(value)
  155. : "0"(value), "c"(shift));
  156. return value;
  157. }
  158. Y_FORCE_INLINE ui64 RotateBitsLeftImpl(ui64 value, ui8 shift) noexcept {
  159. __asm__("rolq %%cl, %0"
  160. : "=r"(value)
  161. : "0"(value), "c"(shift));
  162. return value;
  163. }
  164. #endif
  165. #endif
  166. } // namespace NPrivate
  167. } // namespace NBitOps
  168. /**
  169. * Computes the next power of 2 higher or equal to the integer parameter `t`.
  170. * If `t` is a power of 2 will return `t`.
  171. * Result is undefined for `t == 0`.
  172. */
  173. template <typename T>
  174. static inline T FastClp2(T t) noexcept {
  175. Y_ASSERT(t > 0);
  176. using TCvt = typename ::TUnsignedInts::template TSelectBy<TSizeOfPredicate<sizeof(T)>::template TResult>::type;
  177. return 1 + ::NBitOps::NPrivate::TClp2Helper<sizeof(TCvt) * 4, T>::Calc(static_cast<TCvt>(t));
  178. }
  179. /**
  180. * Check if integer is a power of 2.
  181. */
  182. template <typename T>
  183. Y_CONST_FUNCTION constexpr bool IsPowerOf2(T v) noexcept {
  184. return v > 0 && (v & (v - 1)) == 0;
  185. }
  186. /**
  187. * Returns the number of leading 0-bits in `value`, starting at the most significant bit position.
  188. */
  189. template <typename T>
  190. static inline unsigned GetValueBitCount(T value) noexcept {
  191. Y_ASSERT(value > 0);
  192. using TCvt = typename ::TUnsignedInts::template TSelectBy<TSizeOfPredicate<sizeof(T)>::template TResult>::type;
  193. return ::NBitOps::NPrivate::GetValueBitCountImpl(static_cast<TCvt>(value));
  194. }
  195. /**
  196. * Returns the number of trailing 0-bits in `value`, starting at the least significant bit position
  197. */
  198. template <typename T>
  199. static inline unsigned CountTrailingZeroBits(T value) noexcept {
  200. Y_ASSERT(value > 0);
  201. using TCvt = typename ::TUnsignedInts::template TSelectBy<TSizeOfPredicate<sizeof(T)>::template TResult>::type;
  202. return ::NBitOps::NPrivate::CountTrailingZeroBitsImpl(static_cast<TCvt>(value));
  203. }
  204. /*
  205. * Returns 64-bit mask with `bits` lower bits set.
  206. */
  207. Y_FORCE_INLINE ui64 MaskLowerBits(ui64 bits) {
  208. return ::NBitOps::NPrivate::WORD_MASK[bits];
  209. }
  210. /*
  211. * Return 64-bit mask with `bits` set starting from `skipbits`.
  212. */
  213. Y_FORCE_INLINE ui64 MaskLowerBits(ui64 bits, ui64 skipbits) {
  214. return MaskLowerBits(bits) << skipbits;
  215. }
  216. /*
  217. * Return 64-bit mask with all bits set except for `bits` lower bits.
  218. */
  219. Y_FORCE_INLINE ui64 InverseMaskLowerBits(ui64 bits) {
  220. return ::NBitOps::NPrivate::INVERSE_WORD_MASK[bits];
  221. }
  222. /*
  223. * Return 64-bit mask with all bits set except for `bits` bitst starting from `skipbits`.
  224. */
  225. Y_FORCE_INLINE ui64 InverseMaskLowerBits(ui64 bits, ui64 skipbits) {
  226. return ~MaskLowerBits(bits, skipbits);
  227. }
  228. /*
  229. * Returns 0-based position of the most significant bit that is set. 0 for 0.
  230. */
  231. Y_FORCE_INLINE ui64 MostSignificantBit(ui64 v) {
  232. #ifdef __GNUC__
  233. ui64 res = v ? (63 - __builtin_clzll(v)) : 0;
  234. #elif defined(_MSC_VER) && defined(_64_)
  235. unsigned long res = 0;
  236. if (v) {
  237. _BitScanReverse64(&res, v);
  238. }
  239. #else
  240. ui64 res = 0;
  241. if (v) {
  242. while (v >>= 1) {
  243. ++res;
  244. }
  245. }
  246. #endif
  247. return res;
  248. }
  249. /**
  250. * Returns 0-based position of the least significant bit that is set. 0 for 0.
  251. */
  252. Y_FORCE_INLINE ui64 LeastSignificantBit(ui64 v) {
  253. #ifdef __GNUC__
  254. ui64 res = v ? __builtin_ffsll(v) - 1 : 0;
  255. #elif defined(_MSC_VER) && defined(_64_)
  256. unsigned long res = 0;
  257. if (v) {
  258. _BitScanForward64(&res, v);
  259. }
  260. #else
  261. ui64 res = 0;
  262. if (v) {
  263. while (!(v & 1)) {
  264. ++res;
  265. v >>= 1;
  266. }
  267. }
  268. #endif
  269. return res;
  270. }
  271. /*
  272. * Returns 0 - based position of the most significant bit (compile time)
  273. * 0 for 0.
  274. */
  275. constexpr ui64 MostSignificantBitCT(ui64 x) {
  276. return x > 1 ? 1 + MostSignificantBitCT(x >> 1) : 0;
  277. }
  278. /*
  279. * Return rounded up binary logarithm of `x`.
  280. */
  281. Y_FORCE_INLINE ui8 CeilLog2(ui64 x) {
  282. return static_cast<ui8>(MostSignificantBit(x - 1)) + 1;
  283. }
  284. Y_FORCE_INLINE ui8 ReverseBytes(ui8 t) {
  285. return t;
  286. }
  287. Y_FORCE_INLINE ui16 ReverseBytes(ui16 t) {
  288. return static_cast<ui16>(::NBitOps::NPrivate::SwapOddEvenBytes(t));
  289. }
  290. Y_FORCE_INLINE ui32 ReverseBytes(ui32 t) {
  291. return static_cast<ui32>(::NBitOps::NPrivate::SwapBytePairs(
  292. ::NBitOps::NPrivate::SwapOddEvenBytes(t)));
  293. }
  294. Y_FORCE_INLINE ui64 ReverseBytes(ui64 t) {
  295. return ::NBitOps::NPrivate::SwapByteQuads((::NBitOps::NPrivate::SwapOddEvenBytes(t)));
  296. }
  297. Y_FORCE_INLINE ui8 ReverseBits(ui8 t) {
  298. return static_cast<ui8>(::NBitOps::NPrivate::SwapNibbles(
  299. ::NBitOps::NPrivate::SwapBitPairs(
  300. ::NBitOps::NPrivate::SwapOddEvenBits(t))));
  301. }
  302. Y_FORCE_INLINE ui16 ReverseBits(ui16 t) {
  303. return static_cast<ui16>(::NBitOps::NPrivate::SwapOddEvenBytes(
  304. ::NBitOps::NPrivate::SwapNibbles(
  305. ::NBitOps::NPrivate::SwapBitPairs(
  306. ::NBitOps::NPrivate::SwapOddEvenBits(t)))));
  307. }
  308. Y_FORCE_INLINE ui32 ReverseBits(ui32 t) {
  309. return static_cast<ui32>(::NBitOps::NPrivate::SwapBytePairs(
  310. ::NBitOps::NPrivate::SwapOddEvenBytes(
  311. ::NBitOps::NPrivate::SwapNibbles(
  312. ::NBitOps::NPrivate::SwapBitPairs(
  313. ::NBitOps::NPrivate::SwapOddEvenBits(t))))));
  314. }
  315. Y_FORCE_INLINE ui64 ReverseBits(ui64 t) {
  316. return ::NBitOps::NPrivate::SwapByteQuads(
  317. ::NBitOps::NPrivate::SwapBytePairs(
  318. ::NBitOps::NPrivate::SwapOddEvenBytes(
  319. ::NBitOps::NPrivate::SwapNibbles(
  320. ::NBitOps::NPrivate::SwapBitPairs(
  321. ::NBitOps::NPrivate::SwapOddEvenBits(t))))));
  322. }
  323. /*
  324. * Reverse first `bits` bits
  325. * 1000111000111000 , bits = 6 => 1000111000000111
  326. */
  327. template <typename T>
  328. Y_FORCE_INLINE T ReverseBits(T v, ui64 bits) {
  329. return bits ? (T(v & ::InverseMaskLowerBits(bits)) | T(ReverseBits(T(v & ::MaskLowerBits(bits)))) >> ((ui64{sizeof(T)} << ui64{3}) - bits)) : v;
  330. }
  331. /*
  332. * Referse first `bits` bits starting from `skipbits` bits
  333. * 1000111000111000 , bits = 4, skipbits = 2 => 1000111000011100
  334. */
  335. template <typename T>
  336. Y_FORCE_INLINE T ReverseBits(T v, ui64 bits, ui64 skipbits) {
  337. return (T(ReverseBits((v >> skipbits), bits)) << skipbits) | T(v & MaskLowerBits(skipbits));
  338. }
  339. /* Rotate bits left. Also known as left circular shift.
  340. */
  341. template <typename T>
  342. Y_FORCE_INLINE T RotateBitsLeft(T value, const ui8 shift) noexcept {
  343. static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type");
  344. return ::NBitOps::NPrivate::RotateBitsLeftImpl((TFixedWidthUnsignedInt<T>)value, shift);
  345. }
  346. /* Rotate bits right. Also known as right circular shift.
  347. */
  348. template <typename T>
  349. Y_FORCE_INLINE T RotateBitsRight(T value, const ui8 shift) noexcept {
  350. static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type");
  351. return ::NBitOps::NPrivate::RotateBitsRightImpl((TFixedWidthUnsignedInt<T>)value, shift);
  352. }
  353. /* Rotate bits left. Also known as left circular shift.
  354. */
  355. template <typename T>
  356. constexpr T RotateBitsLeftCT(T value, const ui8 shift) noexcept {
  357. static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type");
  358. // do trick with mask to avoid undefined behaviour
  359. return (value << shift) | (value >> ((-shift) & (sizeof(T) * 8 - 1)));
  360. }
  361. /* Rotate bits right. Also known as right circular shift.
  362. */
  363. template <typename T>
  364. constexpr T RotateBitsRightCT(T value, const ui8 shift) noexcept {
  365. static_assert(std::is_unsigned<T>::value, "must be unsigned arithmetic type");
  366. // do trick with mask to avoid undefined behaviour
  367. return (value >> shift) | (value << ((-shift) & (sizeof(T) * 8 - 1)));
  368. }
  369. /* Remain `size` bits to current `offset` of `value`
  370. size, offset are less than number of bits in size_type
  371. */
  372. template <size_t Offset, size_t Size, class T>
  373. Y_FORCE_INLINE T SelectBits(T value) {
  374. static_assert(Size < sizeof(T) * 8, "violated: Size < sizeof(T) * 8");
  375. static_assert(Offset < sizeof(T) * 8, "violated: Offset < sizeof(T) * 8");
  376. T id = 1;
  377. return (value >> Offset) & ((id << Size) - id);
  378. }
  379. /* Set `size` bits of `bits` to current offset of `value`. Requires that bits <= (1 << size) - 1
  380. size, offset are less than number of bits in size_type
  381. */
  382. template <size_t Offset, size_t Size, class T>
  383. void SetBits(T& value, T bits) {
  384. static_assert(Size < sizeof(T) * 8, "violated: Size < sizeof(T) * 8");
  385. static_assert(Offset < sizeof(T) * 8, "violated: Offset < sizeof(T) * 8");
  386. T id = 1;
  387. T maxValue = ((id << Size) - id);
  388. Y_ASSERT(bits <= maxValue);
  389. value &= ~(maxValue << Offset);
  390. value |= bits << Offset;
  391. }
  392. inline constexpr ui64 NthBit64(int bit) {
  393. return ui64(1) << bit;
  394. }
  395. inline constexpr ui64 Mask64(int bits) {
  396. return NthBit64(bits) - 1;
  397. }