pcg_extras.hpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /*
  2. * PCG Random Number Generation for C++
  3. *
  4. * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
  5. * and the PCG Project contributors.
  6. *
  7. * SPDX-License-Identifier: (Apache-2.0 OR MIT)
  8. *
  9. * Licensed under the Apache License, Version 2.0 (provided in
  10. * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
  11. * or under the MIT license (provided in LICENSE-MIT.txt and at
  12. * http://opensource.org/licenses/MIT), at your option. This file may not
  13. * be copied, modified, or distributed except according to those terms.
  14. *
  15. * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
  16. * express or implied. See your chosen license for details.
  17. *
  18. * For additional information about the PCG random number generation scheme,
  19. * visit http://www.pcg-random.org/.
  20. */
  21. /*
  22. * This file provides support code that is useful for random-number generation
  23. * but not specific to the PCG generation scheme, including:
  24. * - 128-bit int support for platforms where it isn't available natively
  25. * - bit twiddling operations
  26. * - I/O of 128-bit and 8-bit integers
  27. * - Handling the evilness of SeedSeq
  28. * - Support for efficiently producing random numbers less than a given
  29. * bound
  30. */
  31. #ifndef PCG_EXTRAS_HPP_INCLUDED
  32. #define PCG_EXTRAS_HPP_INCLUDED 1
  33. #include <cinttypes>
  34. #include <cstddef>
  35. #include <cstdlib>
  36. #include <cstring>
  37. #include <cassert>
  38. #include <limits>
  39. #include <iostream>
  40. #include <type_traits>
  41. #include <utility>
  42. #include <locale>
  43. #include <iterator>
  44. #ifdef __GNUC__
  45. #include <cxxabi.h>
  46. #endif
  47. // NOLINTBEGIN(readability-identifier-naming, modernize-use-using, bugprone-macro-parentheses, google-explicit-constructor)
  48. /*
  49. * Abstractions for compiler-specific directives
  50. */
  51. #ifdef __GNUC__
  52. #define PCG_NOINLINE __attribute__((noinline))
  53. #else
  54. #define PCG_NOINLINE
  55. #endif
  56. /*
  57. * Some members of the PCG library use 128-bit math. When compiling on 64-bit
  58. * platforms, both GCC and Clang provide 128-bit integer types that are ideal
  59. * for the job.
  60. *
  61. * On 32-bit platforms (or with other compilers), we fall back to a C++
  62. * class that provides 128-bit unsigned integers instead. It may seem
  63. * like we're reinventing the wheel here, because libraries already exist
  64. * that support large integers, but most existing libraries provide a very
  65. * generic multiprecision code, but here we're operating at a fixed size.
  66. * Also, most other libraries are fairly heavyweight. So we use a direct
  67. * implementation. Sadly, it's much slower than hand-coded assembly or
  68. * direct CPU support.
  69. *
  70. */
  71. #if __SIZEOF_INT128__
  72. namespace pcg_extras {
  73. typedef __uint128_t pcg128_t;
  74. }
  75. #define PCG_128BIT_CONSTANT(high,low) \
  76. ((pcg128_t(high) << 64) + low)
  77. #else
  78. #include "pcg_uint128.hpp"
  79. namespace pcg_extras {
  80. typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
  81. }
  82. #define PCG_128BIT_CONSTANT(high,low) \
  83. pcg128_t(high,low)
  84. #define PCG_EMULATED_128BIT_MATH 1
  85. #endif
  86. namespace pcg_extras {
  87. /*
  88. * We often need to represent a "number of bits". When used normally, these
  89. * numbers are never greater than 128, so an unsigned char is plenty.
  90. * If you're using a nonstandard generator of a larger size, you can set
  91. * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers
  92. * might produce faster code if you set it to an unsigned int.)
  93. */
  94. #ifndef PCG_BITCOUNT_T
  95. typedef uint8_t bitcount_t;
  96. #else
  97. typedef PCG_BITCOUNT_T bitcount_t;
  98. #endif
  99. /*
  100. * C++ requires us to be able to serialize RNG state by printing or reading
  101. * it from a stream. Because we use 128-bit ints, we also need to be able
  102. * or print them, so here is code to do so.
  103. *
  104. * This code provides enough functionality to print 128-bit ints in decimal
  105. * and zero-padded in hex. It's not a full-featured implementation.
  106. */
  107. template <typename CharT, typename Traits>
  108. std::basic_ostream<CharT,Traits>&
  109. operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
  110. {
  111. auto desired_base = out.flags() & out.basefield;
  112. bool want_hex = desired_base == out.hex;
  113. if (want_hex) {
  114. uint64_t highpart = uint64_t(value >> 64);
  115. uint64_t lowpart = uint64_t(value);
  116. auto desired_width = out.width();
  117. if (desired_width > 16) {
  118. out.width(desired_width - 16);
  119. }
  120. if (highpart != 0 || desired_width > 16)
  121. out << highpart;
  122. CharT oldfill = '\0';
  123. if (highpart != 0) {
  124. out.width(16);
  125. oldfill = out.fill('0');
  126. }
  127. auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
  128. out << lowpart;
  129. out.setf(oldflags);
  130. if (highpart != 0) {
  131. out.fill(oldfill);
  132. }
  133. return out;
  134. }
  135. constexpr size_t MAX_CHARS_128BIT = 40;
  136. char buffer[MAX_CHARS_128BIT];
  137. char* pos = buffer+sizeof(buffer);
  138. *(--pos) = '\0';
  139. constexpr auto BASE = pcg128_t(10ULL);
  140. do {
  141. auto div = value / BASE;
  142. auto mod = uint32_t(value - (div * BASE));
  143. *(--pos) = '0' + char(mod);
  144. value = div;
  145. } while(value != pcg128_t(0ULL));
  146. return out << pos;
  147. }
  148. template <typename CharT, typename Traits>
  149. std::basic_istream<CharT,Traits>&
  150. operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
  151. {
  152. typename std::basic_istream<CharT,Traits>::sentry s(in);
  153. if (!s)
  154. return in;
  155. constexpr auto BASE = pcg128_t(10ULL);
  156. pcg128_t current(0ULL);
  157. bool did_nothing = true;
  158. bool overflow = false;
  159. for(;;) {
  160. CharT wide_ch = in.get();
  161. if (!in.good())
  162. break;
  163. auto ch = in.narrow(wide_ch, '\0');
  164. if (ch < '0' || ch > '9') {
  165. in.unget();
  166. break;
  167. }
  168. did_nothing = false;
  169. pcg128_t digit(uint32_t(ch - '0'));
  170. pcg128_t timesbase = current*BASE;
  171. overflow = overflow || timesbase < current;
  172. current = timesbase + digit;
  173. overflow = overflow || current < digit;
  174. }
  175. if (did_nothing || overflow) {
  176. in.setstate(std::ios::failbit);
  177. if (overflow)
  178. current = ~pcg128_t(0ULL);
  179. }
  180. value = current;
  181. return in;
  182. }
  183. /*
  184. * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
  185. * If we just used the provided IO operators, they'd read/write chars,
  186. * not ints, so we need to define our own. We *can* redefine this operator
  187. * here because we're in our own namespace.
  188. */
  189. template <typename CharT, typename Traits>
  190. std::basic_ostream<CharT,Traits>&
  191. operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
  192. {
  193. return out << uint32_t(value);
  194. }
  195. template <typename CharT, typename Traits>
  196. std::basic_istream<CharT,Traits>&
  197. operator>>(std::basic_istream<CharT,Traits>& in, uint8_t& target)
  198. {
  199. uint32_t value = 0xdecea5edU;
  200. in >> value;
  201. if (!in && value == 0xdecea5edU)
  202. return in;
  203. if (value > uint8_t(~0)) {
  204. in.setstate(std::ios::failbit);
  205. value = ~0U;
  206. }
  207. target = uint8_t(value);
  208. return in;
  209. }
  210. /* Unfortunately, the above functions don't get found in preference to the
  211. * built in ones, so we create some more specific overloads that will.
  212. * Ugh.
  213. */
  214. inline std::ostream& operator<<(std::ostream& out, uint8_t value)
  215. {
  216. return pcg_extras::operator<< <char>(out, value);
  217. }
  218. inline std::istream& operator>>(std::istream& in, uint8_t& value)
  219. {
  220. return pcg_extras::operator>> <char>(in, value);
  221. }
  222. /*
  223. * Useful bitwise operations.
  224. */
  225. /*
  226. * XorShifts are invertable, but they are something of a pain to invert.
  227. * This function backs them out. It's used by the whacky "inside out"
  228. * generator defined later.
  229. */
  230. template <typename itype>
  231. inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
  232. {
  233. if (2*shift >= bits) {
  234. return x ^ (x >> shift);
  235. }
  236. itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
  237. itype highmask1 = ~lowmask1;
  238. itype top1 = x;
  239. itype bottom1 = x & lowmask1;
  240. top1 ^= top1 >> shift;
  241. top1 &= highmask1;
  242. x = top1 | bottom1;
  243. itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
  244. itype bottom2 = x & lowmask2;
  245. bottom2 = unxorshift(bottom2, bits - shift, shift);
  246. bottom2 &= lowmask1;
  247. return top1 | bottom2;
  248. }
  249. /*
  250. * Rotate left and right.
  251. *
  252. * In ideal world, compilers would spot idiomatic rotate code and convert it
  253. * to a rotate instruction. Of course, opinions vary on what the correct
  254. * idiom is and how to spot it. For clang, sometimes it generates better
  255. * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
  256. */
  257. template <typename itype>
  258. inline itype rotl(itype value, bitcount_t rot)
  259. {
  260. constexpr bitcount_t bits = sizeof(itype) * 8;
  261. constexpr bitcount_t mask = bits - 1;
  262. #if defined(PCG_USE_ZEROCHECK_ROTATE_IDIOM)
  263. return rot ? (value << rot) | (value >> (bits - rot)) : value;
  264. #else
  265. return (value << rot) | (value >> ((- rot) & mask));
  266. #endif
  267. }
  268. template <typename itype>
  269. inline itype rotr(itype value, bitcount_t rot)
  270. {
  271. constexpr bitcount_t bits = sizeof(itype) * 8;
  272. constexpr bitcount_t mask = bits - 1;
  273. #if defined(PCG_USE_ZEROCHECK_ROTATE_IDIOM)
  274. return rot ? (value >> rot) | (value << (bits - rot)) : value;
  275. #else
  276. return (value >> rot) | (value << ((- rot) & mask));
  277. #endif
  278. }
  279. /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
  280. * to properly recognizing idiomatic rotate code, so for we also provide
  281. * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss.
  282. * (I hope that these compilers get better so that this code can die.)
  283. *
  284. * These overloads will be preferred over the general template code above.
  285. */
  286. #if defined(PCG_USE_INLINE_ASM) && __GNUC__ && (__x86_64__ || __i386__)
  287. inline uint8_t rotr(uint8_t value, bitcount_t rot)
  288. {
  289. asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  290. return value;
  291. }
  292. inline uint16_t rotr(uint16_t value, bitcount_t rot)
  293. {
  294. asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  295. return value;
  296. }
  297. inline uint32_t rotr(uint32_t value, bitcount_t rot)
  298. {
  299. asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  300. return value;
  301. }
  302. #if __x86_64__
  303. inline uint64_t rotr(uint64_t value, bitcount_t rot)
  304. {
  305. asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
  306. return value;
  307. }
  308. #endif // __x86_64__
  309. #endif // PCG_USE_INLINE_ASM
  310. /*
  311. * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
  312. * 32-bit integers with seed data, but sometimes we want to produce
  313. * larger or smaller integers.
  314. *
  315. * The following code handles this annoyance.
  316. *
  317. * uneven_copy will copy an array of 32-bit ints to an array of larger or
  318. * smaller ints (actually, the code is general it only needing forward
  319. * iterators). The copy is identical to the one that would be performed if
  320. * we just did memcpy on a standard little-endian machine, but works
  321. * regardless of the endian of the machine (or the weirdness of the ints
  322. * involved).
  323. *
  324. * generate_to initializes an array of integers using a SeedSeq
  325. * object. It is given the size as a static constant at compile time and
  326. * tries to avoid memory allocation. If we're filling in 32-bit constants
  327. * we just do it directly. If we need a separate buffer and it's small,
  328. * we allocate it on the stack. Otherwise, we fall back to heap allocation.
  329. * Ugh.
  330. *
  331. * generate_one produces a single value of some integral type using a
  332. * SeedSeq object.
  333. */
  334. /* uneven_copy helper, case where destination ints are less than 32 bit. */
  335. template<class SrcIter, class DestIter>
  336. SrcIter uneven_copy_impl(
  337. SrcIter src_first, DestIter dest_first, DestIter dest_last,
  338. std::true_type)
  339. {
  340. typedef typename std::iterator_traits<SrcIter>::value_type src_t;
  341. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  342. constexpr bitcount_t SRC_SIZE = sizeof(src_t);
  343. constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
  344. constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
  345. constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE;
  346. size_t count = 0;
  347. src_t value = 0;
  348. while (dest_first != dest_last) {
  349. if ((count++ % SCALE) == 0)
  350. value = *src_first++; // Get more bits
  351. else
  352. value >>= DEST_BITS; // Move down bits
  353. *dest_first++ = dest_t(value); // Truncates, ignores high bits.
  354. }
  355. return src_first;
  356. }
  357. /* uneven_copy helper, case where destination ints are more than 32 bit. */
  358. template<class SrcIter, class DestIter>
  359. SrcIter uneven_copy_impl(
  360. SrcIter src_first, DestIter dest_first, DestIter dest_last,
  361. std::false_type)
  362. {
  363. typedef typename std::iterator_traits<SrcIter>::value_type src_t;
  364. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  365. constexpr auto SRC_SIZE = sizeof(src_t);
  366. constexpr auto SRC_BITS = SRC_SIZE * 8;
  367. constexpr auto DEST_SIZE = sizeof(dest_t);
  368. constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
  369. while (dest_first != dest_last) {
  370. dest_t value(0UL);
  371. unsigned int shift = 0;
  372. for (size_t i = 0; i < SCALE; ++i) {
  373. value |= dest_t(*src_first++) << shift;
  374. shift += SRC_BITS;
  375. }
  376. *dest_first++ = value;
  377. }
  378. return src_first;
  379. }
  380. /* uneven_copy, call the right code for larger vs. smaller */
  381. template<class SrcIter, class DestIter>
  382. inline SrcIter uneven_copy(SrcIter src_first,
  383. DestIter dest_first, DestIter dest_last)
  384. {
  385. typedef typename std::iterator_traits<SrcIter>::value_type src_t;
  386. typedef typename std::iterator_traits<DestIter>::value_type dest_t;
  387. constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
  388. return uneven_copy_impl(src_first, dest_first, dest_last,
  389. std::integral_constant<bool, DEST_IS_SMALLER>{});
  390. }
  391. template <typename RngType>
  392. auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
  393. -> typename RngType::result_type
  394. {
  395. typedef typename RngType::result_type rtype;
  396. rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
  397. % upper_bound;
  398. for (;;) {
  399. rtype r = rng() - RngType::min();
  400. if (r >= threshold)
  401. return r % upper_bound;
  402. }
  403. }
  404. template <typename Iter, typename RandType>
  405. void shuffle(Iter from, Iter to, RandType&& rng)
  406. {
  407. typedef typename std::iterator_traits<Iter>::difference_type delta_t;
  408. typedef typename std::remove_reference<RandType>::type::result_type result_t;
  409. auto count = to - from;
  410. while (count > 1) {
  411. delta_t chosen = delta_t(bounded_rand(rng, result_t(count)));
  412. --count;
  413. --to;
  414. using std::swap;
  415. swap(*(from + chosen), *to);
  416. }
  417. }
  418. /*
  419. * Although std::seed_seq is useful, it isn't everything. Often we want to
  420. * initialize a random-number generator some other way, such as from a random
  421. * device.
  422. *
  423. * Technically, it does not meet the requirements of a SeedSequence because
  424. * it lacks some of the rarely-used member functions (some of which would
  425. * be impossible to provide). However the C++ standard is quite specific
  426. * that actual engines only called the generate method, so it ought not to be
  427. * a problem in practice.
  428. */
  429. template <typename RngType>
  430. class seed_seq_from {
  431. private:
  432. RngType rng_;
  433. typedef uint_least32_t result_type;
  434. public:
  435. template<typename... Args>
  436. seed_seq_from(Args&&... args) :
  437. rng_(std::forward<Args>(args)...)
  438. {
  439. // Nothing (else) to do...
  440. }
  441. template<typename Iter>
  442. void generate(Iter start, Iter finish)
  443. {
  444. for (auto i = start; i != finish; ++i)
  445. *i = result_type(rng_());
  446. }
  447. constexpr size_t size() const
  448. {
  449. return (sizeof(typename RngType::result_type) > sizeof(result_type)
  450. && RngType::max() > ~size_t(0UL))
  451. ? ~size_t(0UL)
  452. : size_t(RngType::max());
  453. }
  454. };
  455. // Sometimes, when debugging or testing, it's handy to be able print the name
  456. // of a (in human-readable form). This code allows the idiom:
  457. //
  458. // cout << printable_typename<my_foo_type_t>()
  459. //
  460. // to print out my_foo_type_t (or its concrete type if it is a synonym)
  461. #if __cpp_rtti || __GXX_RTTI
  462. template <typename T>
  463. struct printable_typename {};
  464. template <typename T>
  465. std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
  466. const char *implementation_typename = typeid(T).name();
  467. #ifdef __GNUC__
  468. int status;
  469. char* pretty_name =
  470. abi::__cxa_demangle(implementation_typename, nullptr, nullptr, &status);
  471. if (status == 0)
  472. out << pretty_name;
  473. free(static_cast<void*>(pretty_name));
  474. if (status == 0)
  475. return out;
  476. #endif
  477. out << implementation_typename;
  478. return out;
  479. }
  480. #endif // __cpp_rtti || __GXX_RTTI
  481. } // namespace pcg_extras
  482. // NOLINTEND(readability-identifier-naming, modernize-use-using, bugprone-macro-parentheses, google-explicit-constructor)
  483. #endif // PCG_EXTRAS_HPP_INCLUDED