random.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. /* ----------------------------------------------------------------------------
  2. Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
  3. This is free software; you can redistribute it and/or modify it under the
  4. terms of the MIT license. A copy of the license can be found in the file
  5. "LICENSE" at the root of this distribution.
  6. -----------------------------------------------------------------------------*/
  7. #include "mimalloc.h"
  8. #include "mimalloc-internal.h"
  9. #include <string.h> // memset
  10. /* ----------------------------------------------------------------------------
  11. We use our own PRNG to keep predictable performance of random number generation
  12. and to avoid implementations that use a lock. We only use the OS provided
  13. random source to initialize the initial seeds. Since we do not need ultimate
  14. performance but we do rely on the security (for secret cookies in secure mode)
  15. we use a cryptographically secure generator (chacha20).
  16. -----------------------------------------------------------------------------*/
  17. #define MI_CHACHA_ROUNDS (20) // perhaps use 12 for better performance?
  18. /* ----------------------------------------------------------------------------
  19. Chacha20 implementation as the original algorithm with a 64-bit nonce
  20. and counter: https://en.wikipedia.org/wiki/Salsa20
  21. The input matrix has sixteen 32-bit values:
  22. Position 0 to 3: constant key
  23. Position 4 to 11: the key
  24. Position 12 to 13: the counter.
  25. Position 14 to 15: the nonce.
  26. The implementation uses regular C code which compiles very well on modern compilers.
  27. (gcc x64 has no register spills, and clang 6+ uses SSE instructions)
  28. -----------------------------------------------------------------------------*/
  29. static inline uint32_t rotl(uint32_t x, uint32_t shift) {
  30. return (x << shift) | (x >> (32 - shift));
  31. }
  32. static inline void qround(uint32_t x[16], size_t a, size_t b, size_t c, size_t d) {
  33. x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 16);
  34. x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 12);
  35. x[a] += x[b]; x[d] = rotl(x[d] ^ x[a], 8);
  36. x[c] += x[d]; x[b] = rotl(x[b] ^ x[c], 7);
  37. }
  38. static void chacha_block(mi_random_ctx_t* ctx)
  39. {
  40. // scramble into `x`
  41. uint32_t x[16];
  42. for (size_t i = 0; i < 16; i++) {
  43. x[i] = ctx->input[i];
  44. }
  45. for (size_t i = 0; i < MI_CHACHA_ROUNDS; i += 2) {
  46. qround(x, 0, 4, 8, 12);
  47. qround(x, 1, 5, 9, 13);
  48. qround(x, 2, 6, 10, 14);
  49. qround(x, 3, 7, 11, 15);
  50. qround(x, 0, 5, 10, 15);
  51. qround(x, 1, 6, 11, 12);
  52. qround(x, 2, 7, 8, 13);
  53. qround(x, 3, 4, 9, 14);
  54. }
  55. // add scrambled data to the initial state
  56. for (size_t i = 0; i < 16; i++) {
  57. ctx->output[i] = x[i] + ctx->input[i];
  58. }
  59. ctx->output_available = 16;
  60. // increment the counter for the next round
  61. ctx->input[12] += 1;
  62. if (ctx->input[12] == 0) {
  63. ctx->input[13] += 1;
  64. if (ctx->input[13] == 0) { // and keep increasing into the nonce
  65. ctx->input[14] += 1;
  66. }
  67. }
  68. }
  69. static uint32_t chacha_next32(mi_random_ctx_t* ctx) {
  70. if (ctx->output_available <= 0) {
  71. chacha_block(ctx);
  72. ctx->output_available = 16; // (assign again to suppress static analysis warning)
  73. }
  74. const uint32_t x = ctx->output[16 - ctx->output_available];
  75. ctx->output[16 - ctx->output_available] = 0; // reset once the data is handed out
  76. ctx->output_available--;
  77. return x;
  78. }
  79. static inline uint32_t read32(const uint8_t* p, size_t idx32) {
  80. const size_t i = 4*idx32;
  81. return ((uint32_t)p[i+0] | (uint32_t)p[i+1] << 8 | (uint32_t)p[i+2] << 16 | (uint32_t)p[i+3] << 24);
  82. }
  83. static void chacha_init(mi_random_ctx_t* ctx, const uint8_t key[32], uint64_t nonce)
  84. {
  85. // since we only use chacha for randomness (and not encryption) we
  86. // do not _need_ to read 32-bit values as little endian but we do anyways
  87. // just for being compatible :-)
  88. memset(ctx, 0, sizeof(*ctx));
  89. for (size_t i = 0; i < 4; i++) {
  90. const uint8_t* sigma = (uint8_t*)"expand 32-byte k";
  91. ctx->input[i] = read32(sigma,i);
  92. }
  93. for (size_t i = 0; i < 8; i++) {
  94. ctx->input[i + 4] = read32(key,i);
  95. }
  96. ctx->input[12] = 0;
  97. ctx->input[13] = 0;
  98. ctx->input[14] = (uint32_t)nonce;
  99. ctx->input[15] = (uint32_t)(nonce >> 32);
  100. }
  101. static void chacha_split(mi_random_ctx_t* ctx, uint64_t nonce, mi_random_ctx_t* ctx_new) {
  102. memset(ctx_new, 0, sizeof(*ctx_new));
  103. _mi_memcpy(ctx_new->input, ctx->input, sizeof(ctx_new->input));
  104. ctx_new->input[12] = 0;
  105. ctx_new->input[13] = 0;
  106. ctx_new->input[14] = (uint32_t)nonce;
  107. ctx_new->input[15] = (uint32_t)(nonce >> 32);
  108. mi_assert_internal(ctx->input[14] != ctx_new->input[14] || ctx->input[15] != ctx_new->input[15]); // do not reuse nonces!
  109. chacha_block(ctx_new);
  110. }
  111. /* ----------------------------------------------------------------------------
  112. Random interface
  113. -----------------------------------------------------------------------------*/
  114. #if MI_DEBUG>1
  115. static bool mi_random_is_initialized(mi_random_ctx_t* ctx) {
  116. return (ctx != NULL && ctx->input[0] != 0);
  117. }
  118. #endif
  119. void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* ctx_new) {
  120. mi_assert_internal(mi_random_is_initialized(ctx));
  121. mi_assert_internal(ctx != ctx_new);
  122. chacha_split(ctx, (uintptr_t)ctx_new /*nonce*/, ctx_new);
  123. }
  124. uintptr_t _mi_random_next(mi_random_ctx_t* ctx) {
  125. mi_assert_internal(mi_random_is_initialized(ctx));
  126. #if MI_INTPTR_SIZE <= 4
  127. return chacha_next32(ctx);
  128. #elif MI_INTPTR_SIZE == 8
  129. return (((uintptr_t)chacha_next32(ctx) << 32) | chacha_next32(ctx));
  130. #else
  131. # error "define mi_random_next for this platform"
  132. #endif
  133. }
  134. /* ----------------------------------------------------------------------------
  135. To initialize a fresh random context we rely on the OS:
  136. - Windows : BCryptGenRandom (or RtlGenRandom)
  137. - osX,bsd,wasi: arc4random_buf
  138. - Linux : getrandom,/dev/urandom
  139. If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR.
  140. -----------------------------------------------------------------------------*/
  141. #if defined(_WIN32)
  142. #if !defined(MI_USE_RTLGENRANDOM)
  143. // We prefer BCryptGenRandom over RtlGenRandom
  144. #pragma comment (lib,"bcrypt.lib")
  145. #include <bcrypt.h>
  146. static bool os_random_buf(void* buf, size_t buf_len) {
  147. return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
  148. }
  149. #else
  150. // Use (unofficial) RtlGenRandom
  151. #pragma comment (lib,"advapi32.lib")
  152. #define RtlGenRandom SystemFunction036
  153. #ifdef __cplusplus
  154. extern "C" {
  155. #endif
  156. BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength);
  157. #ifdef __cplusplus
  158. }
  159. #endif
  160. static bool os_random_buf(void* buf, size_t buf_len) {
  161. return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
  162. }
  163. #endif
  164. #elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \
  165. defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
  166. defined(__sun) || defined(__wasi__)
  167. #include <stdlib.h>
  168. static bool os_random_buf(void* buf, size_t buf_len) {
  169. arc4random_buf(buf, buf_len);
  170. return true;
  171. }
  172. #elif defined(__linux__)
  173. #include <sys/syscall.h>
  174. #include <unistd.h>
  175. #include <sys/types.h>
  176. #include <sys/stat.h>
  177. #include <fcntl.h>
  178. #include <errno.h>
  179. static bool os_random_buf(void* buf, size_t buf_len) {
  180. // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h`
  181. // and for the latter the actual `getrandom` call is not always defined.
  182. // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>)
  183. // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed.
  184. #ifdef SYS_getrandom
  185. #ifndef GRND_NONBLOCK
  186. #define GRND_NONBLOCK (1)
  187. #endif
  188. static _Atomic(uintptr_t) no_getrandom; // = 0
  189. if (mi_atomic_load_acquire(&no_getrandom)==0) {
  190. ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK);
  191. if (ret >= 0) return (buf_len == (size_t)ret);
  192. if (ret != ENOSYS) return false;
  193. mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom
  194. }
  195. #endif
  196. int flags = O_RDONLY;
  197. #if defined(O_CLOEXEC)
  198. flags |= O_CLOEXEC;
  199. #endif
  200. int fd = open("/dev/urandom", flags, 0);
  201. if (fd < 0) return false;
  202. size_t count = 0;
  203. while(count < buf_len) {
  204. ssize_t ret = read(fd, (char*)buf + count, buf_len - count);
  205. if (ret<=0) {
  206. if (errno!=EAGAIN && errno!=EINTR) break;
  207. }
  208. else {
  209. count += ret;
  210. }
  211. }
  212. close(fd);
  213. return (count==buf_len);
  214. }
  215. #else
  216. static bool os_random_buf(void* buf, size_t buf_len) {
  217. return false;
  218. }
  219. #endif
  220. #if defined(_WIN32)
  221. #include <windows.h>
  222. #elif defined(__APPLE__)
  223. #include <mach/mach_time.h>
  224. #else
  225. #include <time.h>
  226. #endif
  227. uintptr_t _os_random_weak(uintptr_t extra_seed) {
  228. uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random
  229. #if defined(_WIN32)
  230. LARGE_INTEGER pcount;
  231. QueryPerformanceCounter(&pcount);
  232. x ^= (uintptr_t)(pcount.QuadPart);
  233. #elif defined(__APPLE__)
  234. x ^= (uintptr_t)mach_absolute_time();
  235. #else
  236. struct timespec time;
  237. clock_gettime(CLOCK_MONOTONIC, &time);
  238. x ^= (uintptr_t)time.tv_sec;
  239. x ^= (uintptr_t)time.tv_nsec;
  240. #endif
  241. // and do a few randomization steps
  242. uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1;
  243. for (uintptr_t i = 0; i < max; i++) {
  244. x = _mi_random_shuffle(x);
  245. }
  246. mi_assert_internal(x != 0);
  247. return x;
  248. }
  249. void _mi_random_init(mi_random_ctx_t* ctx) {
  250. uint8_t key[32];
  251. if (!os_random_buf(key, sizeof(key))) {
  252. // if we fail to get random data from the OS, we fall back to a
  253. // weak random source based on the current time
  254. _mi_warning_message("unable to use secure randomness\n");
  255. uintptr_t x = _os_random_weak(0);
  256. for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
  257. x = _mi_random_shuffle(x);
  258. ((uint32_t*)key)[i] = (uint32_t)x;
  259. }
  260. }
  261. chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
  262. }
  263. /* --------------------------------------------------------
  264. test vectors from <https://tools.ietf.org/html/rfc8439>
  265. ----------------------------------------------------------- */
  266. /*
  267. static bool array_equals(uint32_t* x, uint32_t* y, size_t n) {
  268. for (size_t i = 0; i < n; i++) {
  269. if (x[i] != y[i]) return false;
  270. }
  271. return true;
  272. }
  273. static void chacha_test(void)
  274. {
  275. uint32_t x[4] = { 0x11111111, 0x01020304, 0x9b8d6f43, 0x01234567 };
  276. uint32_t x_out[4] = { 0xea2a92f4, 0xcb1cf8ce, 0x4581472e, 0x5881c4bb };
  277. qround(x, 0, 1, 2, 3);
  278. mi_assert_internal(array_equals(x, x_out, 4));
  279. uint32_t y[16] = {
  280. 0x879531e0, 0xc5ecf37d, 0x516461b1, 0xc9a62f8a,
  281. 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0x2a5f714c,
  282. 0x53372767, 0xb00a5631, 0x974c541a, 0x359e9963,
  283. 0x5c971061, 0x3d631689, 0x2098d9d6, 0x91dbd320 };
  284. uint32_t y_out[16] = {
  285. 0x879531e0, 0xc5ecf37d, 0xbdb886dc, 0xc9a62f8a,
  286. 0x44c20ef3, 0x3390af7f, 0xd9fc690b, 0xcfacafd2,
  287. 0xe46bea80, 0xb00a5631, 0x974c541a, 0x359e9963,
  288. 0x5c971061, 0xccc07c79, 0x2098d9d6, 0x91dbd320 };
  289. qround(y, 2, 7, 8, 13);
  290. mi_assert_internal(array_equals(y, y_out, 16));
  291. mi_random_ctx_t r = {
  292. { 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574,
  293. 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
  294. 0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c,
  295. 0x00000001, 0x09000000, 0x4a000000, 0x00000000 },
  296. {0},
  297. 0
  298. };
  299. uint32_t r_out[16] = {
  300. 0xe4e7f110, 0x15593bd1, 0x1fdd0f50, 0xc47120a3,
  301. 0xc7f4d1c7, 0x0368c033, 0x9aaa2204, 0x4e6cd4c3,
  302. 0x466482d2, 0x09aa9f07, 0x05d7c214, 0xa2028bd9,
  303. 0xd19c12b5, 0xb94e16de, 0xe883d0cb, 0x4e3c50a2 };
  304. chacha_block(&r);
  305. mi_assert_internal(array_equals(r.output, r_out, 16));
  306. }
  307. */