mimalloc-atomic.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. /* ----------------------------------------------------------------------------
  2. Copyright (c) 2018-2021 Microsoft Research, Daan Leijen
  3. This is free software; you can redistribute it and/or modify it under the
  4. terms of the MIT license. A copy of the license can be found in the file
  5. "LICENSE" at the root of this distribution.
  6. -----------------------------------------------------------------------------*/
  7. #pragma once
  8. #ifndef MIMALLOC_ATOMIC_H
  9. #define MIMALLOC_ATOMIC_H
  10. // --------------------------------------------------------------------------------------------
  11. // Atomics
  12. // We need to be portable between C, C++, and MSVC.
  13. // We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
  14. // This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
  15. // To gain better insight in the range of used atomics, we use explicitly named memory order operations
  16. // instead of passing the memory order as a parameter.
  17. // -----------------------------------------------------------------------------------------------
  18. #if defined(__cplusplus)
  19. // Use C++ atomics
  20. #include <atomic>
  21. #define _Atomic(tp) std::atomic<tp>
  22. #define mi_atomic(name) std::atomic_##name
  23. #define mi_memory_order(name) std::memory_order_##name
  24. #elif defined(_MSC_VER)
  25. // Use MSVC C wrapper for C11 atomics
  26. #define _Atomic(tp) tp
  27. #define ATOMIC_VAR_INIT(x) x
  28. #define mi_atomic(name) mi_atomic_##name
  29. #define mi_memory_order(name) mi_memory_order_##name
  30. #else
  31. // Use C11 atomics
  32. #include <stdatomic.h>
  33. #define mi_atomic(name) atomic_##name
  34. #define mi_memory_order(name) memory_order_##name
  35. #endif
  36. // Various defines for all used memory orders in mimalloc
  37. #define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \
  38. mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail)
  39. #define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \
  40. mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail)
  41. #define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
  42. #define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
  43. #define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
  44. #define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
  45. #define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release))
  46. #define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel))
  47. #define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
  48. #define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
  49. #define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed))
  50. #define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire))
  51. #define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed))
  52. #define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed))
  53. #define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel))
  54. #define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel))
  55. #define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel))
  56. #define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel))
  57. #define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1)
  58. #define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1)
  59. #define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1)
  60. #define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1)
  61. static inline void mi_atomic_yield(void);
  62. static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add);
  63. static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub);
  64. #if defined(__cplusplus) || !defined(_MSC_VER)
  65. // In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value)
  66. // We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well
  67. #define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p)
  68. #define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p)
  69. // In C++ we need to add casts to help resolve templates if NULL is passed
  70. #if defined(__cplusplus)
  71. #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x)
  72. #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x)
  73. #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des)
  74. #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des)
  75. #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des)
  76. #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x)
  77. #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x)
  78. #else
  79. #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x)
  80. #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x)
  81. #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des)
  82. #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des)
  83. #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des)
  84. #define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x)
  85. #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x)
  86. #endif
  87. // These are used by the statistics
  88. static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) {
  89. return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed));
  90. }
  91. static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) {
  92. int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p);
  93. while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, &current, x)) { /* nothing */ };
  94. }
  95. // Used by timers
  96. #define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire))
  97. #define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed))
  98. #define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release))
  99. #define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed))
  100. #elif defined(_MSC_VER)
  101. // MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics.
  102. #define WIN32_LEAN_AND_MEAN
  103. #include <windows.h>
  104. #include <intrin.h>
  105. #ifdef _WIN64
  106. typedef LONG64 msc_intptr_t;
  107. #define MI_64(f) f##64
  108. #else
  109. typedef LONG msc_intptr_t;
  110. #define MI_64(f) f
  111. #endif
  112. typedef enum mi_memory_order_e {
  113. mi_memory_order_relaxed,
  114. mi_memory_order_consume,
  115. mi_memory_order_acquire,
  116. mi_memory_order_release,
  117. mi_memory_order_acq_rel,
  118. mi_memory_order_seq_cst
  119. } mi_memory_order;
  120. static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) {
  121. (void)(mo);
  122. return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add);
  123. }
  124. static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) {
  125. (void)(mo);
  126. return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub));
  127. }
  128. static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
  129. (void)(mo);
  130. return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
  131. }
  132. static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
  133. (void)(mo);
  134. return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x);
  135. }
  136. static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
  137. (void)(mo1); (void)(mo2);
  138. uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected));
  139. if (read == *expected) {
  140. return true;
  141. }
  142. else {
  143. *expected = read;
  144. return false;
  145. }
  146. }
  147. static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) {
  148. return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2);
  149. }
  150. static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) {
  151. (void)(mo);
  152. return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
  153. }
  154. static inline void mi_atomic_thread_fence(mi_memory_order mo) {
  155. (void)(mo);
  156. _Atomic(uintptr_t)x = 0;
  157. mi_atomic_exchange_explicit(&x, 1, mo);
  158. }
  159. static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) {
  160. (void)(mo);
  161. #if defined(_M_IX86) || defined(_M_X64)
  162. return *p;
  163. #else
  164. uintptr_t x = *p;
  165. if (mo > mi_memory_order_relaxed) {
  166. while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ };
  167. }
  168. return x;
  169. #endif
  170. }
  171. static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) {
  172. (void)(mo);
  173. #if defined(_M_IX86) || defined(_M_X64)
  174. *p = x;
  175. #else
  176. mi_atomic_exchange_explicit(p, x, mo);
  177. #endif
  178. }
  179. static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) {
  180. (void)(mo);
  181. #if defined(_M_X64)
  182. return *p;
  183. #else
  184. int64_t old = *p;
  185. int64_t x = old;
  186. while ((old = InterlockedCompareExchange64(p, x, old)) != x) {
  187. x = old;
  188. }
  189. return x;
  190. #endif
  191. }
  192. static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) {
  193. (void)(mo);
  194. #if defined(x_M_IX86) || defined(_M_X64)
  195. *p = x;
  196. #else
  197. InterlockedExchange64(p, x);
  198. #endif
  199. }
  200. // These are used by the statistics
  201. static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) {
  202. #ifdef _WIN64
  203. return (int64_t)mi_atomic_addi((int64_t*)p, add);
  204. #else
  205. int64_t current;
  206. int64_t sum;
  207. do {
  208. current = *p;
  209. sum = current + add;
  210. } while (_InterlockedCompareExchange64(p, sum, current) != current);
  211. return current;
  212. #endif
  213. }
  214. static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) {
  215. int64_t current;
  216. do {
  217. current = *p;
  218. } while (current < x && _InterlockedCompareExchange64(p, x, current) != current);
  219. }
  220. // The pointer macros cast to `uintptr_t`.
  221. #define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p))
  222. #define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p))
  223. #define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
  224. #define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x))
  225. #define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
  226. #define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
  227. #define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des)
  228. #define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
  229. #define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x)
  230. #define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire))
  231. #define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed))
  232. #define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release))
  233. #define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed))
  234. #endif
  235. // Atomically add a signed value; returns the previous value.
  236. static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) {
  237. return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add);
  238. }
  239. // Atomically subtract a signed value; returns the previous value.
  240. static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
  241. return (intptr_t)mi_atomic_addi(p, -sub);
  242. }
  243. // Yield
  244. #if defined(__cplusplus)
  245. #include <thread>
  246. static inline void mi_atomic_yield(void) {
  247. std::this_thread::yield();
  248. }
  249. #elif defined(_WIN32)
  250. #define WIN32_LEAN_AND_MEAN
  251. #include <windows.h>
  252. static inline void mi_atomic_yield(void) {
  253. YieldProcessor();
  254. }
  255. #elif defined(__SSE2__)
  256. #include <emmintrin.h>
  257. static inline void mi_atomic_yield(void) {
  258. _mm_pause();
  259. }
  260. #elif (defined(__GNUC__) || defined(__clang__)) && \
  261. (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \
  262. defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
  263. #if defined(__x86_64__) || defined(__i386__)
  264. static inline void mi_atomic_yield(void) {
  265. __asm__ volatile ("pause" ::: "memory");
  266. }
  267. #elif defined(__aarch64__)
  268. static inline void mi_atomic_yield(void) {
  269. __asm__ volatile("wfe");
  270. }
  271. #elif (defined(__arm__) && __ARM_ARCH__ >= 7)
  272. static inline void mi_atomic_yield(void) {
  273. __asm__ volatile("yield" ::: "memory");
  274. }
  275. #elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
  276. static inline void mi_atomic_yield(void) {
  277. __asm__ __volatile__ ("or 27,27,27" ::: "memory");
  278. }
  279. #elif defined(__armel__) || defined(__ARMEL__)
  280. static inline void mi_atomic_yield(void) {
  281. __asm__ volatile ("nop" ::: "memory");
  282. }
  283. #endif
  284. #elif defined(__sun)
  285. // Fallback for other archs
  286. #error #include <synch.h>
  287. static inline void mi_atomic_yield(void) {
  288. smt_pause();
  289. }
  290. #elif defined(__wasi__)
  291. #include <sched.h>
  292. static inline void mi_atomic_yield(void) {
  293. sched_yield();
  294. }
  295. #else
  296. #include <unistd.h>
  297. static inline void mi_atomic_yield(void) {
  298. sleep(0);
  299. }
  300. #endif
  301. #endif // __MIMALLOC_ATOMIC_H