pcg_uint128.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. /*
  2. * PCG Random Number Generation for C++
  3. *
  4. * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
  5. * and the PCG Project contributors.
  6. *
  7. * SPDX-License-Identifier: (Apache-2.0 OR MIT)
  8. *
  9. * Licensed under the Apache License, Version 2.0 (provided in
  10. * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
  11. * or under the MIT license (provided in LICENSE-MIT.txt and at
  12. * http://opensource.org/licenses/MIT), at your option. This file may not
  13. * be copied, modified, or distributed except according to those terms.
  14. *
  15. * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
  16. * express or implied. See your chosen license for details.
  17. *
  18. * For additional information about the PCG random number generation scheme,
  19. * visit http://www.pcg-random.org/.
  20. */
  21. /*
  22. * This code provides a a C++ class that can provide 128-bit (or higher)
  23. * integers. To produce 2K-bit integers, it uses two K-bit integers,
  24. * placed in a union that allows the code to also see them as four K/2 bit
  25. * integers (and access them either directly name, or by index).
  26. *
  27. * It may seem like we're reinventing the wheel here, because several
  28. * libraries already exist that support large integers, but most existing
  29. * libraries provide a very generic multiprecision code, but here we're
  30. * operating at a fixed size. Also, most other libraries are fairly
  31. * heavyweight. So we use a direct implementation. Sadly, it's much slower
  32. * than hand-coded assembly or direct CPU support.
  33. */
  34. #ifndef PCG_UINT128_HPP_INCLUDED
  35. #define PCG_UINT128_HPP_INCLUDED 1
  36. #include <cstdint>
  37. #include <cstdio>
  38. #include <cassert>
  39. #include <climits>
  40. #include <utility>
  41. #include <initializer_list>
  42. #include <type_traits>
  43. /*
  44. * We want to lay the type out the same way that a native type would be laid
  45. * out, which means we must know the machine's endian, at compile time.
  46. * This ugliness attempts to do so.
  47. */
  48. #ifndef PCG_LITTLE_ENDIAN
  49. #if defined(__BYTE_ORDER__)
  50. #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  51. #define PCG_LITTLE_ENDIAN 1
  52. #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  53. #define PCG_LITTLE_ENDIAN 0
  54. #else
  55. #error __BYTE_ORDER__ does not match a standard endian, pick a side
  56. #endif
  57. #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN
  58. #define PCG_LITTLE_ENDIAN 1
  59. #elif __BIG_ENDIAN__ || _BIG_ENDIAN
  60. #define PCG_LITTLE_ENDIAN 0
  61. #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86
  62. #define PCG_LITTLE_ENDIAN 1
  63. #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
  64. || __m68k__ || __mc68000__
  65. #define PCG_LITTLE_ENDIAN 0
  66. #else
  67. #error Unable to determine target endianness
  68. #endif
  69. #endif
  70. namespace pcg_extras {
  71. // Recent versions of GCC have intrinsics we can use to quickly calculate
  72. // the number of leading and trailing zeros in a number. If possible, we
  73. // use them, otherwise we fall back to old-fashioned bit twiddling to figure
  74. // them out.
  75. #ifndef PCG_BITCOUNT_T
  76. typedef uint8_t bitcount_t;
  77. #else
  78. typedef PCG_BITCOUNT_T bitcount_t;
  79. #endif
  80. /*
  81. * Provide some useful helper functions
  82. * * flog2 floor(log2(x))
  83. * * trailingzeros number of trailing zero bits
  84. */
  85. #ifdef __GNUC__ // Any GNU-compatible compiler supporting C++11 has
  86. // some useful intrinsics we can use.
  87. inline bitcount_t flog2(uint32_t v)
  88. {
  89. return 31 - __builtin_clz(v);
  90. }
  91. inline bitcount_t trailingzeros(uint32_t v)
  92. {
  93. return __builtin_ctz(v);
  94. }
  95. inline bitcount_t flog2(uint64_t v)
  96. {
  97. #if UINT64_MAX == ULONG_MAX
  98. return 63 - __builtin_clzl(v);
  99. #elif UINT64_MAX == ULLONG_MAX
  100. return 63 - __builtin_clzll(v);
  101. #else
  102. #error Cannot find a function for uint64_t
  103. #endif
  104. }
  105. inline bitcount_t trailingzeros(uint64_t v)
  106. {
  107. #if UINT64_MAX == ULONG_MAX
  108. return __builtin_ctzl(v);
  109. #elif UINT64_MAX == ULLONG_MAX
  110. return __builtin_ctzll(v);
  111. #else
  112. #error Cannot find a function for uint64_t
  113. #endif
  114. }
  115. #else // Otherwise, we fall back to bit twiddling
  116. // implementations
  117. inline bitcount_t flog2(uint32_t v)
  118. {
  119. // Based on code by Eric Cole and Mark Dickinson, which appears at
  120. // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
  121. static const uint8_t multiplyDeBruijnBitPos[32] = {
  122. 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
  123. 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
  124. };
  125. v |= v >> 1; // first round down to one less than a power of 2
  126. v |= v >> 2;
  127. v |= v >> 4;
  128. v |= v >> 8;
  129. v |= v >> 16;
  130. return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27];
  131. }
  132. inline bitcount_t trailingzeros(uint32_t v)
  133. {
  134. static const uint8_t multiplyDeBruijnBitPos[32] = {
  135. 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
  136. 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
  137. };
  138. return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
  139. }
  140. inline bitcount_t flog2(uint64_t v)
  141. {
  142. uint32_t high = v >> 32;
  143. uint32_t low = uint32_t(v);
  144. return high ? 32+flog2(high) : flog2(low);
  145. }
  146. inline bitcount_t trailingzeros(uint64_t v)
  147. {
  148. uint32_t high = v >> 32;
  149. uint32_t low = uint32_t(v);
  150. return low ? trailingzeros(low) : trailingzeros(high)+32;
  151. }
  152. #endif
  153. template <typename UInt>
  154. inline bitcount_t clog2(UInt v)
  155. {
  156. return flog2(v) + ((v & (-v)) != v);
  157. }
  158. template <typename UInt>
  159. inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
  160. {
  161. UInt half_result = y + carryin;
  162. UInt result = x + half_result;
  163. *carryout = (half_result < y) || (result < x);
  164. return result;
  165. }
  166. template <typename UInt>
  167. inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
  168. {
  169. UInt half_result = y + carryin;
  170. UInt result = x - half_result;
  171. *carryout = (half_result < y) || (result > x);
  172. return result;
  173. }
  174. template <typename UInt, typename UIntX2>
  175. class uint_x4 {
  176. // private:
  177. public:
  178. union {
  179. #if PCG_LITTLE_ENDIAN
  180. struct {
  181. UInt v0, v1, v2, v3;
  182. } w;
  183. struct {
  184. UIntX2 v01, v23;
  185. } d;
  186. #else
  187. struct {
  188. UInt v3, v2, v1, v0;
  189. } w;
  190. struct {
  191. UIntX2 v23, v01;
  192. } d;
  193. #endif
  194. // For the array access versions, the code that uses the array
  195. // must handle endian itself. Yuck.
  196. UInt wa[4];
  197. UIntX2 da[2];
  198. };
  199. public:
  200. uint_x4() = default;
  201. constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0)
  202. #if PCG_LITTLE_ENDIAN
  203. : w{v0, v1, v2, v3}
  204. #else
  205. : w{v3, v2, v1, v0}
  206. #endif
  207. {
  208. // Nothing (else) to do
  209. }
  210. constexpr uint_x4(UIntX2 v23, UIntX2 v01)
  211. #if PCG_LITTLE_ENDIAN
  212. : d{v01,v23}
  213. #else
  214. : d{v23,v01}
  215. #endif
  216. {
  217. // Nothing (else) to do
  218. }
  219. template<class Integral,
  220. typename std::enable_if<(std::is_integral<Integral>::value
  221. && sizeof(Integral) <= sizeof(UIntX2))
  222. >::type* = nullptr>
  223. constexpr uint_x4(Integral v01)
  224. #if PCG_LITTLE_ENDIAN
  225. : d{UIntX2(v01),0UL}
  226. #else
  227. : d{0UL,UIntX2(v01)}
  228. #endif
  229. {
  230. // Nothing (else) to do
  231. }
  232. explicit constexpr operator uint64_t() const
  233. {
  234. return d.v01;
  235. }
  236. explicit constexpr operator uint32_t() const
  237. {
  238. return w.v0;
  239. }
  240. explicit constexpr operator int() const
  241. {
  242. return w.v0;
  243. }
  244. explicit constexpr operator uint16_t() const
  245. {
  246. return w.v0;
  247. }
  248. explicit constexpr operator uint8_t() const
  249. {
  250. return w.v0;
  251. }
  252. typedef typename std::conditional<std::is_same<uint64_t,
  253. unsigned long>::value,
  254. unsigned long long,
  255. unsigned long>::type
  256. uint_missing_t;
  257. explicit constexpr operator uint_missing_t() const
  258. {
  259. return d.v01;
  260. }
  261. explicit constexpr operator bool() const
  262. {
  263. return d.v01 || d.v23;
  264. }
  265. template<typename U, typename V>
  266. friend uint_x4<U,V> operator*(const uint_x4<U,V>&, const uint_x4<U,V>&);
  267. template<typename U, typename V>
  268. friend std::pair< uint_x4<U,V>,uint_x4<U,V> >
  269. divmod(const uint_x4<U,V>&, const uint_x4<U,V>&);
  270. template<typename U, typename V>
  271. friend uint_x4<U,V> operator+(const uint_x4<U,V>&, const uint_x4<U,V>&);
  272. template<typename U, typename V>
  273. friend uint_x4<U,V> operator-(const uint_x4<U,V>&, const uint_x4<U,V>&);
  274. template<typename U, typename V>
  275. friend uint_x4<U,V> operator<<(const uint_x4<U,V>&, const uint_x4<U,V>&);
  276. template<typename U, typename V>
  277. friend uint_x4<U,V> operator>>(const uint_x4<U,V>&, const uint_x4<U,V>&);
  278. template<typename U, typename V>
  279. friend uint_x4<U,V> operator&(const uint_x4<U,V>&, const uint_x4<U,V>&);
  280. template<typename U, typename V>
  281. friend uint_x4<U,V> operator|(const uint_x4<U,V>&, const uint_x4<U,V>&);
  282. template<typename U, typename V>
  283. friend uint_x4<U,V> operator^(const uint_x4<U,V>&, const uint_x4<U,V>&);
  284. template<typename U, typename V>
  285. friend bool operator==(const uint_x4<U,V>&, const uint_x4<U,V>&);
  286. template<typename U, typename V>
  287. friend bool operator!=(const uint_x4<U,V>&, const uint_x4<U,V>&);
  288. template<typename U, typename V>
  289. friend bool operator<(const uint_x4<U,V>&, const uint_x4<U,V>&);
  290. template<typename U, typename V>
  291. friend bool operator<=(const uint_x4<U,V>&, const uint_x4<U,V>&);
  292. template<typename U, typename V>
  293. friend bool operator>(const uint_x4<U,V>&, const uint_x4<U,V>&);
  294. template<typename U, typename V>
  295. friend bool operator>=(const uint_x4<U,V>&, const uint_x4<U,V>&);
  296. template<typename U, typename V>
  297. friend uint_x4<U,V> operator~(const uint_x4<U,V>&);
  298. template<typename U, typename V>
  299. friend uint_x4<U,V> operator-(const uint_x4<U,V>&);
  300. template<typename U, typename V>
  301. friend bitcount_t flog2(const uint_x4<U,V>&);
  302. template<typename U, typename V>
  303. friend bitcount_t trailingzeros(const uint_x4<U,V>&);
  304. uint_x4& operator*=(const uint_x4& rhs)
  305. {
  306. uint_x4 result = *this * rhs;
  307. return *this = result;
  308. }
  309. uint_x4& operator/=(const uint_x4& rhs)
  310. {
  311. uint_x4 result = *this / rhs;
  312. return *this = result;
  313. }
  314. uint_x4& operator%=(const uint_x4& rhs)
  315. {
  316. uint_x4 result = *this % rhs;
  317. return *this = result;
  318. }
  319. uint_x4& operator+=(const uint_x4& rhs)
  320. {
  321. uint_x4 result = *this + rhs;
  322. return *this = result;
  323. }
  324. uint_x4& operator-=(const uint_x4& rhs)
  325. {
  326. uint_x4 result = *this - rhs;
  327. return *this = result;
  328. }
  329. uint_x4& operator&=(const uint_x4& rhs)
  330. {
  331. uint_x4 result = *this & rhs;
  332. return *this = result;
  333. }
  334. uint_x4& operator|=(const uint_x4& rhs)
  335. {
  336. uint_x4 result = *this | rhs;
  337. return *this = result;
  338. }
  339. uint_x4& operator^=(const uint_x4& rhs)
  340. {
  341. uint_x4 result = *this ^ rhs;
  342. return *this = result;
  343. }
  344. uint_x4& operator>>=(bitcount_t shift)
  345. {
  346. uint_x4 result = *this >> shift;
  347. return *this = result;
  348. }
  349. uint_x4& operator<<=(bitcount_t shift)
  350. {
  351. uint_x4 result = *this << shift;
  352. return *this = result;
  353. }
  354. };
  355. template<typename U, typename V>
  356. bitcount_t flog2(const uint_x4<U,V>& v)
  357. {
  358. #if PCG_LITTLE_ENDIAN
  359. for (uint8_t i = 4; i !=0; /* dec in loop */) {
  360. --i;
  361. #else
  362. for (uint8_t i = 0; i < 4; ++i) {
  363. #endif
  364. if (v.wa[i] == 0)
  365. continue;
  366. return flog2(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
  367. }
  368. abort();
  369. }
  370. template<typename U, typename V>
  371. bitcount_t trailingzeros(const uint_x4<U,V>& v)
  372. {
  373. #if PCG_LITTLE_ENDIAN
  374. for (uint8_t i = 0; i < 4; ++i) {
  375. #else
  376. for (uint8_t i = 4; i !=0; /* dec in loop */) {
  377. --i;
  378. #endif
  379. if (v.wa[i] != 0)
  380. return trailingzeros(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
  381. }
  382. return (sizeof(U)*CHAR_BIT)*4;
  383. }
  384. template <typename UInt, typename UIntX2>
  385. std::pair< uint_x4<UInt,UIntX2>, uint_x4<UInt,UIntX2> >
  386. divmod(const uint_x4<UInt,UIntX2>& orig_dividend,
  387. const uint_x4<UInt,UIntX2>& divisor)
  388. {
  389. // If the dividend is less than the divisor, the answer is always zero.
  390. // This takes care of boundary cases like 0/x (which would otherwise be
  391. // problematic because we can't take the log of zero. (The boundary case
  392. // of division by zero is undefined.)
  393. if (orig_dividend < divisor)
  394. return { uint_x4<UInt,UIntX2>(0UL), orig_dividend };
  395. auto dividend = orig_dividend;
  396. auto log2_divisor = flog2(divisor);
  397. auto log2_dividend = flog2(dividend);
  398. // assert(log2_dividend >= log2_divisor);
  399. bitcount_t logdiff = log2_dividend - log2_divisor;
  400. constexpr uint_x4<UInt,UIntX2> ONE(1UL);
  401. if (logdiff == 0)
  402. return { ONE, dividend - divisor };
  403. // Now we change the log difference to
  404. // floor(log2(divisor)) - ceil(log2(dividend))
  405. // to ensure that we *underestimate* the result.
  406. logdiff -= 1;
  407. uint_x4<UInt,UIntX2> quotient(0UL);
  408. auto qfactor = ONE << logdiff;
  409. auto factor = divisor << logdiff;
  410. do {
  411. dividend -= factor;
  412. quotient += qfactor;
  413. while (dividend < factor) {
  414. factor >>= 1;
  415. qfactor >>= 1;
  416. }
  417. } while (dividend >= divisor);
  418. return { quotient, dividend };
  419. }
  420. template <typename UInt, typename UIntX2>
  421. uint_x4<UInt,UIntX2> operator/(const uint_x4<UInt,UIntX2>& dividend,
  422. const uint_x4<UInt,UIntX2>& divisor)
  423. {
  424. return divmod(dividend, divisor).first;
  425. }
  426. template <typename UInt, typename UIntX2>
  427. uint_x4<UInt,UIntX2> operator%(const uint_x4<UInt,UIntX2>& dividend,
  428. const uint_x4<UInt,UIntX2>& divisor)
  429. {
  430. return divmod(dividend, divisor).second;
  431. }
  432. template <typename UInt, typename UIntX2>
  433. uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
  434. const uint_x4<UInt,UIntX2>& b)
  435. {
  436. uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
  437. bool carryin = false;
  438. bool carryout;
  439. UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0);
  440. r.w.v0 = UInt(a0b0);
  441. r.w.v1 = UInt(a0b0 >> 32);
  442. UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0);
  443. r.w.v2 = UInt(a1b0 >> 32);
  444. r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
  445. carryin = carryout;
  446. r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
  447. carryin = carryout;
  448. r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
  449. UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1);
  450. carryin = false;
  451. r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> 32), carryin, &carryout);
  452. carryin = carryout;
  453. r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
  454. carryin = false;
  455. r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
  456. carryin = carryout;
  457. r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
  458. carryin = carryout;
  459. r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
  460. UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1);
  461. carryin = false;
  462. r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
  463. carryin = carryout;
  464. r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> 32), carryin, &carryout);
  465. r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01;
  466. return r;
  467. }
  468. template <typename UInt, typename UIntX2>
  469. uint_x4<UInt,UIntX2> operator+(const uint_x4<UInt,UIntX2>& a,
  470. const uint_x4<UInt,UIntX2>& b)
  471. {
  472. uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
  473. bool carryin = false;
  474. bool carryout;
  475. r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
  476. carryin = carryout;
  477. r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
  478. carryin = carryout;
  479. r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
  480. carryin = carryout;
  481. r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
  482. return r;
  483. }
  484. template <typename UInt, typename UIntX2>
  485. uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& a,
  486. const uint_x4<UInt,UIntX2>& b)
  487. {
  488. uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
  489. bool carryin = false;
  490. bool carryout;
  491. r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
  492. carryin = carryout;
  493. r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
  494. carryin = carryout;
  495. r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
  496. carryin = carryout;
  497. r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
  498. return r;
  499. }
  500. template <typename UInt, typename UIntX2>
  501. uint_x4<UInt,UIntX2> operator&(const uint_x4<UInt,UIntX2>& a,
  502. const uint_x4<UInt,UIntX2>& b)
  503. {
  504. return uint_x4<UInt,UIntX2>(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01);
  505. }
  506. template <typename UInt, typename UIntX2>
  507. uint_x4<UInt,UIntX2> operator|(const uint_x4<UInt,UIntX2>& a,
  508. const uint_x4<UInt,UIntX2>& b)
  509. {
  510. return uint_x4<UInt,UIntX2>(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01);
  511. }
  512. template <typename UInt, typename UIntX2>
  513. uint_x4<UInt,UIntX2> operator^(const uint_x4<UInt,UIntX2>& a,
  514. const uint_x4<UInt,UIntX2>& b)
  515. {
  516. return uint_x4<UInt,UIntX2>(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01);
  517. }
  518. template <typename UInt, typename UIntX2>
  519. uint_x4<UInt,UIntX2> operator~(const uint_x4<UInt,UIntX2>& v)
  520. {
  521. return uint_x4<UInt,UIntX2>(~v.d.v23, ~v.d.v01);
  522. }
  523. template <typename UInt, typename UIntX2>
  524. uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& v)
  525. {
  526. return uint_x4<UInt,UIntX2>(0UL,0UL) - v;
  527. }
  528. template <typename UInt, typename UIntX2>
  529. bool operator==(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
  530. {
  531. return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23);
  532. }
  533. template <typename UInt, typename UIntX2>
  534. bool operator!=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
  535. {
  536. return !operator==(a,b);
  537. }
  538. template <typename UInt, typename UIntX2>
  539. bool operator<(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
  540. {
  541. return (a.d.v23 < b.d.v23)
  542. || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01));
  543. }
  544. template <typename UInt, typename UIntX2>
  545. bool operator>(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
  546. {
  547. return operator<(b,a);
  548. }
  549. template <typename UInt, typename UIntX2>
  550. bool operator<=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
  551. {
  552. return !(operator<(b,a));
  553. }
  554. template <typename UInt, typename UIntX2>
  555. bool operator>=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
  556. {
  557. return !(operator<(a,b));
  558. }
  559. template <typename UInt, typename UIntX2>
  560. uint_x4<UInt,UIntX2> operator<<(const uint_x4<UInt,UIntX2>& v,
  561. const bitcount_t shift)
  562. {
  563. uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
  564. const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
  565. const bitcount_t bitmask = bits - 1;
  566. const bitcount_t shiftdiv = shift / bits;
  567. const bitcount_t shiftmod = shift & bitmask;
  568. if (shiftmod) {
  569. UInt carryover = 0;
  570. #if PCG_LITTLE_ENDIAN
  571. for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
  572. #else
  573. for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
  574. --out, --in;
  575. #endif
  576. r.wa[out] = (v.wa[in] << shiftmod) | carryover;
  577. carryover = (v.wa[in] >> (bits - shiftmod));
  578. }
  579. } else {
  580. #if PCG_LITTLE_ENDIAN
  581. for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
  582. #else
  583. for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
  584. --out, --in;
  585. #endif
  586. r.wa[out] = v.wa[in];
  587. }
  588. }
  589. return r;
  590. }
  591. template <typename UInt, typename UIntX2>
  592. uint_x4<UInt,UIntX2> operator>>(const uint_x4<UInt,UIntX2>& v,
  593. const bitcount_t shift)
  594. {
  595. uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
  596. const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
  597. const bitcount_t bitmask = bits - 1;
  598. const bitcount_t shiftdiv = shift / bits;
  599. const bitcount_t shiftmod = shift & bitmask;
  600. if (shiftmod) {
  601. UInt carryover = 0;
  602. #if PCG_LITTLE_ENDIAN
  603. for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
  604. --out, --in;
  605. #else
  606. for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
  607. #endif
  608. r.wa[out] = (v.wa[in] >> shiftmod) | carryover;
  609. carryover = (v.wa[in] << (bits - shiftmod));
  610. }
  611. } else {
  612. #if PCG_LITTLE_ENDIAN
  613. for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
  614. --out, --in;
  615. #else
  616. for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
  617. #endif
  618. r.wa[out] = v.wa[in];
  619. }
  620. }
  621. return r;
  622. }
  623. } // namespace pcg_extras
  624. #endif // PCG_UINT128_HPP_INCLUDED