pycore_atomic.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. #ifndef Py_ATOMIC_H
  2. #define Py_ATOMIC_H
  3. #ifdef __cplusplus
  4. extern "C" {
  5. #endif
  6. #ifndef Py_BUILD_CORE
  7. # error "this header requires Py_BUILD_CORE define"
  8. #endif
  9. #include "dynamic_annotations.h" /* _Py_ANNOTATE_MEMORY_ORDER */
  10. #include "pyconfig.h"
  11. #ifdef HAVE_STD_ATOMIC
  12. # include <stdatomic.h>
  13. #endif
  14. #if defined(_MSC_VER)
  15. #include <intrin.h>
  16. #if defined(_M_IX86) || defined(_M_X64)
  17. # include <immintrin.h>
  18. #endif
  19. #endif
  20. /* This is modeled after the atomics interface from C1x, according to
  21. * the draft at
  22. * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
  23. * Operations and types are named the same except with a _Py_ prefix
  24. * and have the same semantics.
  25. *
  26. * Beware, the implementations here are deep magic.
  27. */
  28. #if defined(HAVE_STD_ATOMIC)
  29. typedef enum _Py_memory_order {
  30. _Py_memory_order_relaxed = memory_order_relaxed,
  31. _Py_memory_order_acquire = memory_order_acquire,
  32. _Py_memory_order_release = memory_order_release,
  33. _Py_memory_order_acq_rel = memory_order_acq_rel,
  34. _Py_memory_order_seq_cst = memory_order_seq_cst
  35. } _Py_memory_order;
  36. typedef struct _Py_atomic_address {
  37. atomic_uintptr_t _value;
  38. } _Py_atomic_address;
  39. typedef struct _Py_atomic_int {
  40. atomic_int _value;
  41. } _Py_atomic_int;
  42. #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
  43. atomic_signal_fence(ORDER)
  44. #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
  45. atomic_thread_fence(ORDER)
  46. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  47. atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
  48. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  49. atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
  50. // Use builtin atomic operations in GCC >= 4.7 and clang
  51. #elif defined(HAVE_BUILTIN_ATOMIC)
  52. typedef enum _Py_memory_order {
  53. _Py_memory_order_relaxed = __ATOMIC_RELAXED,
  54. _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
  55. _Py_memory_order_release = __ATOMIC_RELEASE,
  56. _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
  57. _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
  58. } _Py_memory_order;
  59. typedef struct _Py_atomic_address {
  60. uintptr_t _value;
  61. } _Py_atomic_address;
  62. typedef struct _Py_atomic_int {
  63. int _value;
  64. } _Py_atomic_int;
  65. #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
  66. __atomic_signal_fence(ORDER)
  67. #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
  68. __atomic_thread_fence(ORDER)
  69. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  70. (assert((ORDER) == __ATOMIC_RELAXED \
  71. || (ORDER) == __ATOMIC_SEQ_CST \
  72. || (ORDER) == __ATOMIC_RELEASE), \
  73. __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
  74. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  75. (assert((ORDER) == __ATOMIC_RELAXED \
  76. || (ORDER) == __ATOMIC_SEQ_CST \
  77. || (ORDER) == __ATOMIC_ACQUIRE \
  78. || (ORDER) == __ATOMIC_CONSUME), \
  79. __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
  80. /* Only support GCC (for expression statements) and x86 (for simple
  81. * atomic semantics) and MSVC x86/x64/ARM */
  82. #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
  83. typedef enum _Py_memory_order {
  84. _Py_memory_order_relaxed,
  85. _Py_memory_order_acquire,
  86. _Py_memory_order_release,
  87. _Py_memory_order_acq_rel,
  88. _Py_memory_order_seq_cst
  89. } _Py_memory_order;
  90. typedef struct _Py_atomic_address {
  91. uintptr_t _value;
  92. } _Py_atomic_address;
  93. typedef struct _Py_atomic_int {
  94. int _value;
  95. } _Py_atomic_int;
  96. static __inline__ void
  97. _Py_atomic_signal_fence(_Py_memory_order order)
  98. {
  99. if (order != _Py_memory_order_relaxed)
  100. __asm__ volatile("":::"memory");
  101. }
  102. static __inline__ void
  103. _Py_atomic_thread_fence(_Py_memory_order order)
  104. {
  105. if (order != _Py_memory_order_relaxed)
  106. __asm__ volatile("mfence":::"memory");
  107. }
  108. /* Tell the race checker about this operation's effects. */
  109. static __inline__ void
  110. _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
  111. {
  112. (void)address; /* shut up -Wunused-parameter */
  113. switch(order) {
  114. case _Py_memory_order_release:
  115. case _Py_memory_order_acq_rel:
  116. case _Py_memory_order_seq_cst:
  117. _Py_ANNOTATE_HAPPENS_BEFORE(address);
  118. break;
  119. case _Py_memory_order_relaxed:
  120. case _Py_memory_order_acquire:
  121. break;
  122. }
  123. switch(order) {
  124. case _Py_memory_order_acquire:
  125. case _Py_memory_order_acq_rel:
  126. case _Py_memory_order_seq_cst:
  127. _Py_ANNOTATE_HAPPENS_AFTER(address);
  128. break;
  129. case _Py_memory_order_relaxed:
  130. case _Py_memory_order_release:
  131. break;
  132. }
  133. }
  134. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  135. __extension__ ({ \
  136. __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
  137. __typeof__(atomic_val->_value) new_val = NEW_VAL;\
  138. volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
  139. _Py_memory_order order = ORDER; \
  140. _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
  141. \
  142. /* Perform the operation. */ \
  143. _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
  144. switch(order) { \
  145. case _Py_memory_order_release: \
  146. _Py_atomic_signal_fence(_Py_memory_order_release); \
  147. /* fallthrough */ \
  148. case _Py_memory_order_relaxed: \
  149. *volatile_data = new_val; \
  150. break; \
  151. \
  152. case _Py_memory_order_acquire: \
  153. case _Py_memory_order_acq_rel: \
  154. case _Py_memory_order_seq_cst: \
  155. __asm__ volatile("xchg %0, %1" \
  156. : "+r"(new_val) \
  157. : "m"(atomic_val->_value) \
  158. : "memory"); \
  159. break; \
  160. } \
  161. _Py_ANNOTATE_IGNORE_WRITES_END(); \
  162. })
  163. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  164. __extension__ ({ \
  165. __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
  166. __typeof__(atomic_val->_value) result; \
  167. volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
  168. _Py_memory_order order = ORDER; \
  169. _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
  170. \
  171. /* Perform the operation. */ \
  172. _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
  173. switch(order) { \
  174. case _Py_memory_order_release: \
  175. case _Py_memory_order_acq_rel: \
  176. case _Py_memory_order_seq_cst: \
  177. /* Loads on x86 are not releases by default, so need a */ \
  178. /* thread fence. */ \
  179. _Py_atomic_thread_fence(_Py_memory_order_release); \
  180. break; \
  181. default: \
  182. /* No fence */ \
  183. break; \
  184. } \
  185. result = *volatile_data; \
  186. switch(order) { \
  187. case _Py_memory_order_acquire: \
  188. case _Py_memory_order_acq_rel: \
  189. case _Py_memory_order_seq_cst: \
  190. /* Loads on x86 are automatically acquire operations so */ \
  191. /* can get by with just a compiler fence. */ \
  192. _Py_atomic_signal_fence(_Py_memory_order_acquire); \
  193. break; \
  194. default: \
  195. /* No fence */ \
  196. break; \
  197. } \
  198. _Py_ANNOTATE_IGNORE_READS_END(); \
  199. result; \
  200. })
  201. #elif defined(_MSC_VER)
  202. /* _Interlocked* functions provide a full memory barrier and are therefore
  203. enough for acq_rel and seq_cst. If the HLE variants aren't available
  204. in hardware they will fall back to a full memory barrier as well.
  205. This might affect performance but likely only in some very specific and
  206. hard to measure scenario.
  207. */
  208. #if defined(_M_IX86) || defined(_M_X64)
  209. typedef enum _Py_memory_order {
  210. _Py_memory_order_relaxed,
  211. _Py_memory_order_acquire,
  212. _Py_memory_order_release,
  213. _Py_memory_order_acq_rel,
  214. _Py_memory_order_seq_cst
  215. } _Py_memory_order;
  216. typedef struct _Py_atomic_address {
  217. volatile uintptr_t _value;
  218. } _Py_atomic_address;
  219. typedef struct _Py_atomic_int {
  220. volatile int _value;
  221. } _Py_atomic_int;
  222. #if defined(_M_X64)
  223. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  224. switch (ORDER) { \
  225. case _Py_memory_order_acquire: \
  226. _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
  227. break; \
  228. case _Py_memory_order_release: \
  229. _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
  230. break; \
  231. default: \
  232. _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
  233. break; \
  234. }
  235. #else
  236. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
  237. #endif
  238. #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  239. switch (ORDER) { \
  240. case _Py_memory_order_acquire: \
  241. _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
  242. break; \
  243. case _Py_memory_order_release: \
  244. _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
  245. break; \
  246. default: \
  247. _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
  248. break; \
  249. }
  250. #if defined(_M_X64)
  251. /* This has to be an intptr_t for now.
  252. gil_created() uses -1 as a sentinel value, if this returns
  253. a uintptr_t it will do an unsigned compare and crash
  254. */
  255. inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
  256. __int64 old;
  257. switch (order) {
  258. case _Py_memory_order_acquire:
  259. {
  260. do {
  261. old = *value;
  262. } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
  263. break;
  264. }
  265. case _Py_memory_order_release:
  266. {
  267. do {
  268. old = *value;
  269. } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
  270. break;
  271. }
  272. case _Py_memory_order_relaxed:
  273. old = *value;
  274. break;
  275. default:
  276. {
  277. do {
  278. old = *value;
  279. } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
  280. break;
  281. }
  282. }
  283. return old;
  284. }
  285. #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
  286. _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
  287. #else
  288. #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
  289. #endif
  290. inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
  291. long old;
  292. switch (order) {
  293. case _Py_memory_order_acquire:
  294. {
  295. do {
  296. old = *value;
  297. } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
  298. break;
  299. }
  300. case _Py_memory_order_release:
  301. {
  302. do {
  303. old = *value;
  304. } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
  305. break;
  306. }
  307. case _Py_memory_order_relaxed:
  308. old = *value;
  309. break;
  310. default:
  311. {
  312. do {
  313. old = *value;
  314. } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
  315. break;
  316. }
  317. }
  318. return old;
  319. }
  320. #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
  321. _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
  322. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  323. if (sizeof((ATOMIC_VAL)->_value) == 8) { \
  324. _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
  325. _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
  326. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  327. ( \
  328. sizeof((ATOMIC_VAL)->_value) == 8 ? \
  329. _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
  330. _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
  331. )
  332. #elif defined(_M_ARM) || defined(_M_ARM64)
  333. typedef enum _Py_memory_order {
  334. _Py_memory_order_relaxed,
  335. _Py_memory_order_acquire,
  336. _Py_memory_order_release,
  337. _Py_memory_order_acq_rel,
  338. _Py_memory_order_seq_cst
  339. } _Py_memory_order;
  340. typedef struct _Py_atomic_address {
  341. volatile uintptr_t _value;
  342. } _Py_atomic_address;
  343. typedef struct _Py_atomic_int {
  344. volatile int _value;
  345. } _Py_atomic_int;
  346. #if defined(_M_ARM64)
  347. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  348. switch (ORDER) { \
  349. case _Py_memory_order_acquire: \
  350. _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
  351. break; \
  352. case _Py_memory_order_release: \
  353. _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
  354. break; \
  355. default: \
  356. _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
  357. break; \
  358. }
  359. #else
  360. #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
  361. #endif
  362. #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
  363. switch (ORDER) { \
  364. case _Py_memory_order_acquire: \
  365. _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
  366. break; \
  367. case _Py_memory_order_release: \
  368. _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
  369. break; \
  370. default: \
  371. _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
  372. break; \
  373. }
  374. #if defined(_M_ARM64)
  375. /* This has to be an intptr_t for now.
  376. gil_created() uses -1 as a sentinel value, if this returns
  377. a uintptr_t it will do an unsigned compare and crash
  378. */
  379. inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
  380. uintptr_t old;
  381. switch (order) {
  382. case _Py_memory_order_acquire:
  383. {
  384. do {
  385. old = *value;
  386. } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
  387. break;
  388. }
  389. case _Py_memory_order_release:
  390. {
  391. do {
  392. old = *value;
  393. } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
  394. break;
  395. }
  396. case _Py_memory_order_relaxed:
  397. old = *value;
  398. break;
  399. default:
  400. {
  401. do {
  402. old = *value;
  403. } while(_InterlockedCompareExchange64(value, old, old) != old);
  404. break;
  405. }
  406. }
  407. return old;
  408. }
  409. #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
  410. _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
  411. #else
  412. #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
  413. #endif
  414. inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
  415. int old;
  416. switch (order) {
  417. case _Py_memory_order_acquire:
  418. {
  419. do {
  420. old = *value;
  421. } while(_InterlockedCompareExchange_acq(value, old, old) != old);
  422. break;
  423. }
  424. case _Py_memory_order_release:
  425. {
  426. do {
  427. old = *value;
  428. } while(_InterlockedCompareExchange_rel(value, old, old) != old);
  429. break;
  430. }
  431. case _Py_memory_order_relaxed:
  432. old = *value;
  433. break;
  434. default:
  435. {
  436. do {
  437. old = *value;
  438. } while(_InterlockedCompareExchange(value, old, old) != old);
  439. break;
  440. }
  441. }
  442. return old;
  443. }
  444. #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
  445. _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
  446. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  447. if (sizeof((ATOMIC_VAL)->_value) == 8) { \
  448. _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
  449. _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
  450. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  451. ( \
  452. sizeof((ATOMIC_VAL)->_value) == 8 ? \
  453. _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
  454. _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
  455. )
  456. #endif
  457. #else /* !gcc x86 !_msc_ver */
  458. typedef enum _Py_memory_order {
  459. _Py_memory_order_relaxed,
  460. _Py_memory_order_acquire,
  461. _Py_memory_order_release,
  462. _Py_memory_order_acq_rel,
  463. _Py_memory_order_seq_cst
  464. } _Py_memory_order;
  465. typedef struct _Py_atomic_address {
  466. uintptr_t _value;
  467. } _Py_atomic_address;
  468. typedef struct _Py_atomic_int {
  469. int _value;
  470. } _Py_atomic_int;
  471. /* Fall back to other compilers and processors by assuming that simple
  472. volatile accesses are atomic. This is false, so people should port
  473. this. */
  474. #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
  475. #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
  476. #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
  477. ((ATOMIC_VAL)->_value = NEW_VAL)
  478. #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
  479. ((ATOMIC_VAL)->_value)
  480. #endif
  481. /* Standardized shortcuts. */
  482. #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
  483. _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
  484. #define _Py_atomic_load(ATOMIC_VAL) \
  485. _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
  486. /* Python-local extensions */
  487. #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
  488. _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
  489. #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
  490. _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
  491. #ifdef __cplusplus
  492. }
  493. #endif
  494. #endif /* Py_ATOMIC_H */