123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748 |
- /*
- * PCG Random Number Generation for C++
- *
- * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
- * and the PCG Project contributors.
- *
- * SPDX-License-Identifier: (Apache-2.0 OR MIT)
- *
- * Licensed under the Apache License, Version 2.0 (provided in
- * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
- * or under the MIT license (provided in LICENSE-MIT.txt and at
- * http://opensource.org/licenses/MIT), at your option. This file may not
- * be copied, modified, or distributed except according to those terms.
- *
- * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
- * express or implied. See your chosen license for details.
- *
- * For additional information about the PCG random number generation scheme,
- * visit http://www.pcg-random.org/.
- */
- /*
- * This code provides a a C++ class that can provide 128-bit (or higher)
- * integers. To produce 2K-bit integers, it uses two K-bit integers,
- * placed in a union that allows the code to also see them as four K/2 bit
- * integers (and access them either directly name, or by index).
- *
- * It may seem like we're reinventing the wheel here, because several
- * libraries already exist that support large integers, but most existing
- * libraries provide a very generic multiprecision code, but here we're
- * operating at a fixed size. Also, most other libraries are fairly
- * heavyweight. So we use a direct implementation. Sadly, it's much slower
- * than hand-coded assembly or direct CPU support.
- */
- #ifndef PCG_UINT128_HPP_INCLUDED
- #define PCG_UINT128_HPP_INCLUDED 1
- #include <cstdint>
- #include <cstdio>
- #include <cassert>
- #include <climits>
- #include <utility>
- #include <initializer_list>
- #include <type_traits>
- /*
- * We want to lay the type out the same way that a native type would be laid
- * out, which means we must know the machine's endian, at compile time.
- * This ugliness attempts to do so.
- */
- #ifndef PCG_LITTLE_ENDIAN
- #if defined(__BYTE_ORDER__)
- #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
- #define PCG_LITTLE_ENDIAN 1
- #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
- #define PCG_LITTLE_ENDIAN 0
- #else
- #error __BYTE_ORDER__ does not match a standard endian, pick a side
- #endif
- #elif __LITTLE_ENDIAN__ || _LITTLE_ENDIAN
- #define PCG_LITTLE_ENDIAN 1
- #elif __BIG_ENDIAN__ || _BIG_ENDIAN
- #define PCG_LITTLE_ENDIAN 0
- #elif __x86_64 || __x86_64__ || _M_X64 || __i386 || __i386__ || _M_IX86
- #define PCG_LITTLE_ENDIAN 1
- #elif __powerpc__ || __POWERPC__ || __ppc__ || __PPC__ \
- || __m68k__ || __mc68000__
- #define PCG_LITTLE_ENDIAN 0
- #else
- #error Unable to determine target endianness
- #endif
- #endif
- namespace pcg_extras {
- // Recent versions of GCC have intrinsics we can use to quickly calculate
- // the number of leading and trailing zeros in a number. If possible, we
- // use them, otherwise we fall back to old-fashioned bit twiddling to figure
- // them out.
- #ifndef PCG_BITCOUNT_T
- typedef uint8_t bitcount_t;
- #else
- typedef PCG_BITCOUNT_T bitcount_t;
- #endif
- /*
- * Provide some useful helper functions
- * * flog2 floor(log2(x))
- * * trailingzeros number of trailing zero bits
- */
- #ifdef __GNUC__ // Any GNU-compatible compiler supporting C++11 has
- // some useful intrinsics we can use.
- inline bitcount_t flog2(uint32_t v)
- {
- return 31 - __builtin_clz(v);
- }
- inline bitcount_t trailingzeros(uint32_t v)
- {
- return __builtin_ctz(v);
- }
- inline bitcount_t flog2(uint64_t v)
- {
- #if UINT64_MAX == ULONG_MAX
- return 63 - __builtin_clzl(v);
- #elif UINT64_MAX == ULLONG_MAX
- return 63 - __builtin_clzll(v);
- #else
- #error Cannot find a function for uint64_t
- #endif
- }
- inline bitcount_t trailingzeros(uint64_t v)
- {
- #if UINT64_MAX == ULONG_MAX
- return __builtin_ctzl(v);
- #elif UINT64_MAX == ULLONG_MAX
- return __builtin_ctzll(v);
- #else
- #error Cannot find a function for uint64_t
- #endif
- }
- #else // Otherwise, we fall back to bit twiddling
- // implementations
- inline bitcount_t flog2(uint32_t v)
- {
- // Based on code by Eric Cole and Mark Dickinson, which appears at
- // https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn
- static const uint8_t multiplyDeBruijnBitPos[32] = {
- 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
- 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
- };
- v |= v >> 1; // first round down to one less than a power of 2
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- return multiplyDeBruijnBitPos[(uint32_t)(v * 0x07C4ACDDU) >> 27];
- }
- inline bitcount_t trailingzeros(uint32_t v)
- {
- static const uint8_t multiplyDeBruijnBitPos[32] = {
- 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
- 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
- };
- return multiplyDeBruijnBitPos[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];
- }
- inline bitcount_t flog2(uint64_t v)
- {
- uint32_t high = v >> 32;
- uint32_t low = uint32_t(v);
- return high ? 32+flog2(high) : flog2(low);
- }
- inline bitcount_t trailingzeros(uint64_t v)
- {
- uint32_t high = v >> 32;
- uint32_t low = uint32_t(v);
- return low ? trailingzeros(low) : trailingzeros(high)+32;
- }
- #endif
- template <typename UInt>
- inline bitcount_t clog2(UInt v)
- {
- return flog2(v) + ((v & (-v)) != v);
- }
- template <typename UInt>
- inline UInt addwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
- {
- UInt half_result = y + carryin;
- UInt result = x + half_result;
- *carryout = (half_result < y) || (result < x);
- return result;
- }
- template <typename UInt>
- inline UInt subwithcarry(UInt x, UInt y, bool carryin, bool* carryout)
- {
- UInt half_result = y + carryin;
- UInt result = x - half_result;
- *carryout = (half_result < y) || (result > x);
- return result;
- }
- template <typename UInt, typename UIntX2>
- class uint_x4 {
- // private:
- public:
- union {
- #if PCG_LITTLE_ENDIAN
- struct {
- UInt v0, v1, v2, v3;
- } w;
- struct {
- UIntX2 v01, v23;
- } d;
- #else
- struct {
- UInt v3, v2, v1, v0;
- } w;
- struct {
- UIntX2 v23, v01;
- } d;
- #endif
- // For the array access versions, the code that uses the array
- // must handle endian itself. Yuck.
- UInt wa[4];
- UIntX2 da[2];
- };
- public:
- uint_x4() = default;
- constexpr uint_x4(UInt v3, UInt v2, UInt v1, UInt v0)
- #if PCG_LITTLE_ENDIAN
- : w{v0, v1, v2, v3}
- #else
- : w{v3, v2, v1, v0}
- #endif
- {
- // Nothing (else) to do
- }
- constexpr uint_x4(UIntX2 v23, UIntX2 v01)
- #if PCG_LITTLE_ENDIAN
- : d{v01,v23}
- #else
- : d{v23,v01}
- #endif
- {
- // Nothing (else) to do
- }
- template<class Integral,
- typename std::enable_if<(std::is_integral<Integral>::value
- && sizeof(Integral) <= sizeof(UIntX2))
- >::type* = nullptr>
- constexpr uint_x4(Integral v01)
- #if PCG_LITTLE_ENDIAN
- : d{UIntX2(v01),0UL}
- #else
- : d{0UL,UIntX2(v01)}
- #endif
- {
- // Nothing (else) to do
- }
- explicit constexpr operator uint64_t() const
- {
- return d.v01;
- }
- explicit constexpr operator uint32_t() const
- {
- return w.v0;
- }
- explicit constexpr operator int() const
- {
- return w.v0;
- }
- explicit constexpr operator uint16_t() const
- {
- return w.v0;
- }
- explicit constexpr operator uint8_t() const
- {
- return w.v0;
- }
- typedef typename std::conditional<std::is_same<uint64_t,
- unsigned long>::value,
- unsigned long long,
- unsigned long>::type
- uint_missing_t;
- explicit constexpr operator uint_missing_t() const
- {
- return d.v01;
- }
- explicit constexpr operator bool() const
- {
- return d.v01 || d.v23;
- }
- template<typename U, typename V>
- friend uint_x4<U,V> operator*(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend std::pair< uint_x4<U,V>,uint_x4<U,V> >
- divmod(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator+(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator-(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator<<(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator>>(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator&(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator|(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator^(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bool operator==(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bool operator!=(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bool operator<(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bool operator<=(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bool operator>(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bool operator>=(const uint_x4<U,V>&, const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator~(const uint_x4<U,V>&);
- template<typename U, typename V>
- friend uint_x4<U,V> operator-(const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bitcount_t flog2(const uint_x4<U,V>&);
- template<typename U, typename V>
- friend bitcount_t trailingzeros(const uint_x4<U,V>&);
- uint_x4& operator*=(const uint_x4& rhs)
- {
- uint_x4 result = *this * rhs;
- return *this = result;
- }
- uint_x4& operator/=(const uint_x4& rhs)
- {
- uint_x4 result = *this / rhs;
- return *this = result;
- }
- uint_x4& operator%=(const uint_x4& rhs)
- {
- uint_x4 result = *this % rhs;
- return *this = result;
- }
- uint_x4& operator+=(const uint_x4& rhs)
- {
- uint_x4 result = *this + rhs;
- return *this = result;
- }
- uint_x4& operator-=(const uint_x4& rhs)
- {
- uint_x4 result = *this - rhs;
- return *this = result;
- }
- uint_x4& operator&=(const uint_x4& rhs)
- {
- uint_x4 result = *this & rhs;
- return *this = result;
- }
- uint_x4& operator|=(const uint_x4& rhs)
- {
- uint_x4 result = *this | rhs;
- return *this = result;
- }
- uint_x4& operator^=(const uint_x4& rhs)
- {
- uint_x4 result = *this ^ rhs;
- return *this = result;
- }
- uint_x4& operator>>=(bitcount_t shift)
- {
- uint_x4 result = *this >> shift;
- return *this = result;
- }
- uint_x4& operator<<=(bitcount_t shift)
- {
- uint_x4 result = *this << shift;
- return *this = result;
- }
- };
- template<typename U, typename V>
- bitcount_t flog2(const uint_x4<U,V>& v)
- {
- #if PCG_LITTLE_ENDIAN
- for (uint8_t i = 4; i !=0; /* dec in loop */) {
- --i;
- #else
- for (uint8_t i = 0; i < 4; ++i) {
- #endif
- if (v.wa[i] == 0)
- continue;
- return flog2(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
- }
- abort();
- }
- template<typename U, typename V>
- bitcount_t trailingzeros(const uint_x4<U,V>& v)
- {
- #if PCG_LITTLE_ENDIAN
- for (uint8_t i = 0; i < 4; ++i) {
- #else
- for (uint8_t i = 4; i !=0; /* dec in loop */) {
- --i;
- #endif
- if (v.wa[i] != 0)
- return trailingzeros(v.wa[i]) + (sizeof(U)*CHAR_BIT)*i;
- }
- return (sizeof(U)*CHAR_BIT)*4;
- }
- template <typename UInt, typename UIntX2>
- std::pair< uint_x4<UInt,UIntX2>, uint_x4<UInt,UIntX2> >
- divmod(const uint_x4<UInt,UIntX2>& orig_dividend,
- const uint_x4<UInt,UIntX2>& divisor)
- {
- // If the dividend is less than the divisor, the answer is always zero.
- // This takes care of boundary cases like 0/x (which would otherwise be
- // problematic because we can't take the log of zero. (The boundary case
- // of division by zero is undefined.)
- if (orig_dividend < divisor)
- return { uint_x4<UInt,UIntX2>(0UL), orig_dividend };
- auto dividend = orig_dividend;
- auto log2_divisor = flog2(divisor);
- auto log2_dividend = flog2(dividend);
- // assert(log2_dividend >= log2_divisor);
- bitcount_t logdiff = log2_dividend - log2_divisor;
- constexpr uint_x4<UInt,UIntX2> ONE(1UL);
- if (logdiff == 0)
- return { ONE, dividend - divisor };
- // Now we change the log difference to
- // floor(log2(divisor)) - ceil(log2(dividend))
- // to ensure that we *underestimate* the result.
- logdiff -= 1;
- uint_x4<UInt,UIntX2> quotient(0UL);
- auto qfactor = ONE << logdiff;
- auto factor = divisor << logdiff;
- do {
- dividend -= factor;
- quotient += qfactor;
- while (dividend < factor) {
- factor >>= 1;
- qfactor >>= 1;
- }
- } while (dividend >= divisor);
- return { quotient, dividend };
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator/(const uint_x4<UInt,UIntX2>& dividend,
- const uint_x4<UInt,UIntX2>& divisor)
- {
- return divmod(dividend, divisor).first;
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator%(const uint_x4<UInt,UIntX2>& dividend,
- const uint_x4<UInt,UIntX2>& divisor)
- {
- return divmod(dividend, divisor).second;
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator*(const uint_x4<UInt,UIntX2>& a,
- const uint_x4<UInt,UIntX2>& b)
- {
- uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
- bool carryin = false;
- bool carryout;
- UIntX2 a0b0 = UIntX2(a.w.v0) * UIntX2(b.w.v0);
- r.w.v0 = UInt(a0b0);
- r.w.v1 = UInt(a0b0 >> 32);
- UIntX2 a1b0 = UIntX2(a.w.v1) * UIntX2(b.w.v0);
- r.w.v2 = UInt(a1b0 >> 32);
- r.w.v1 = addwithcarry(r.w.v1, UInt(a1b0), carryin, &carryout);
- carryin = carryout;
- r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
- carryin = carryout;
- r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
- UIntX2 a0b1 = UIntX2(a.w.v0) * UIntX2(b.w.v1);
- carryin = false;
- r.w.v2 = addwithcarry(r.w.v2, UInt(a0b1 >> 32), carryin, &carryout);
- carryin = carryout;
- r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
- carryin = false;
- r.w.v1 = addwithcarry(r.w.v1, UInt(a0b1), carryin, &carryout);
- carryin = carryout;
- r.w.v2 = addwithcarry(r.w.v2, UInt(0U), carryin, &carryout);
- carryin = carryout;
- r.w.v3 = addwithcarry(r.w.v3, UInt(0U), carryin, &carryout);
- UIntX2 a1b1 = UIntX2(a.w.v1) * UIntX2(b.w.v1);
- carryin = false;
- r.w.v2 = addwithcarry(r.w.v2, UInt(a1b1), carryin, &carryout);
- carryin = carryout;
- r.w.v3 = addwithcarry(r.w.v3, UInt(a1b1 >> 32), carryin, &carryout);
- r.d.v23 += a.d.v01 * b.d.v23 + a.d.v23 * b.d.v01;
- return r;
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator+(const uint_x4<UInt,UIntX2>& a,
- const uint_x4<UInt,UIntX2>& b)
- {
- uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
- bool carryin = false;
- bool carryout;
- r.w.v0 = addwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
- carryin = carryout;
- r.w.v1 = addwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
- carryin = carryout;
- r.w.v2 = addwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
- carryin = carryout;
- r.w.v3 = addwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
- return r;
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& a,
- const uint_x4<UInt,UIntX2>& b)
- {
- uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
- bool carryin = false;
- bool carryout;
- r.w.v0 = subwithcarry(a.w.v0, b.w.v0, carryin, &carryout);
- carryin = carryout;
- r.w.v1 = subwithcarry(a.w.v1, b.w.v1, carryin, &carryout);
- carryin = carryout;
- r.w.v2 = subwithcarry(a.w.v2, b.w.v2, carryin, &carryout);
- carryin = carryout;
- r.w.v3 = subwithcarry(a.w.v3, b.w.v3, carryin, &carryout);
- return r;
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator&(const uint_x4<UInt,UIntX2>& a,
- const uint_x4<UInt,UIntX2>& b)
- {
- return uint_x4<UInt,UIntX2>(a.d.v23 & b.d.v23, a.d.v01 & b.d.v01);
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator|(const uint_x4<UInt,UIntX2>& a,
- const uint_x4<UInt,UIntX2>& b)
- {
- return uint_x4<UInt,UIntX2>(a.d.v23 | b.d.v23, a.d.v01 | b.d.v01);
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator^(const uint_x4<UInt,UIntX2>& a,
- const uint_x4<UInt,UIntX2>& b)
- {
- return uint_x4<UInt,UIntX2>(a.d.v23 ^ b.d.v23, a.d.v01 ^ b.d.v01);
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator~(const uint_x4<UInt,UIntX2>& v)
- {
- return uint_x4<UInt,UIntX2>(~v.d.v23, ~v.d.v01);
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator-(const uint_x4<UInt,UIntX2>& v)
- {
- return uint_x4<UInt,UIntX2>(0UL,0UL) - v;
- }
- template <typename UInt, typename UIntX2>
- bool operator==(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
- {
- return (a.d.v01 == b.d.v01) && (a.d.v23 == b.d.v23);
- }
- template <typename UInt, typename UIntX2>
- bool operator!=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
- {
- return !operator==(a,b);
- }
- template <typename UInt, typename UIntX2>
- bool operator<(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
- {
- return (a.d.v23 < b.d.v23)
- || ((a.d.v23 == b.d.v23) && (a.d.v01 < b.d.v01));
- }
- template <typename UInt, typename UIntX2>
- bool operator>(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
- {
- return operator<(b,a);
- }
- template <typename UInt, typename UIntX2>
- bool operator<=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
- {
- return !(operator<(b,a));
- }
- template <typename UInt, typename UIntX2>
- bool operator>=(const uint_x4<UInt,UIntX2>& a, const uint_x4<UInt,UIntX2>& b)
- {
- return !(operator<(a,b));
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator<<(const uint_x4<UInt,UIntX2>& v,
- const bitcount_t shift)
- {
- uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
- const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
- const bitcount_t bitmask = bits - 1;
- const bitcount_t shiftdiv = shift / bits;
- const bitcount_t shiftmod = shift & bitmask;
- if (shiftmod) {
- UInt carryover = 0;
- #if PCG_LITTLE_ENDIAN
- for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
- #else
- for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
- --out, --in;
- #endif
- r.wa[out] = (v.wa[in] << shiftmod) | carryover;
- carryover = (v.wa[in] >> (bits - shiftmod));
- }
- } else {
- #if PCG_LITTLE_ENDIAN
- for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
- #else
- for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
- --out, --in;
- #endif
- r.wa[out] = v.wa[in];
- }
- }
- return r;
- }
- template <typename UInt, typename UIntX2>
- uint_x4<UInt,UIntX2> operator>>(const uint_x4<UInt,UIntX2>& v,
- const bitcount_t shift)
- {
- uint_x4<UInt,UIntX2> r = {0U, 0U, 0U, 0U};
- const bitcount_t bits = sizeof(UInt) * CHAR_BIT;
- const bitcount_t bitmask = bits - 1;
- const bitcount_t shiftdiv = shift / bits;
- const bitcount_t shiftmod = shift & bitmask;
- if (shiftmod) {
- UInt carryover = 0;
- #if PCG_LITTLE_ENDIAN
- for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
- --out, --in;
- #else
- for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
- #endif
- r.wa[out] = (v.wa[in] >> shiftmod) | carryover;
- carryover = (v.wa[in] << (bits - shiftmod));
- }
- } else {
- #if PCG_LITTLE_ENDIAN
- for (uint8_t out = 4-shiftdiv, in = 4; out != 0; /* dec in loop */) {
- --out, --in;
- #else
- for (uint8_t out = shiftdiv, in = 0; out < 4; ++out, ++in) {
- #endif
- r.wa[out] = v.wa[in];
- }
- }
- return r;
- }
- } // namespace pcg_extras
- #endif // PCG_UINT128_HPP_INCLUDED
|