emutls.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. //===---------- emutls.c - Implements __emutls_get_address ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include <stdint.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include "int_lib.h"
  12. #ifdef __BIONIC__
  13. // There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
  14. // to round 2. We need to delay deallocation because:
  15. // - Android versions older than M lack __cxa_thread_atexit_impl, so apps
  16. // use a pthread key destructor to call C++ destructors.
  17. // - Apps might use __thread/thread_local variables in pthread destructors.
  18. // We can't wait until the final two rounds, because jemalloc needs two rounds
  19. // after the final malloc/free call to free its thread-specific data (see
  20. // https://reviews.llvm.org/D46978#1107507).
  21. #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
  22. #else
  23. #define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
  24. #endif
  25. #if defined(_MSC_VER) && !defined(__clang__)
  26. // MSVC raises a warning about a nonstandard extension being used for the 0
  27. // sized element in this array. Disable this for warn-as-error builds.
  28. #pragma warning(push)
  29. #pragma warning(disable : 4200)
  30. #endif
  31. typedef struct emutls_address_array {
  32. uintptr_t skip_destructor_rounds;
  33. uintptr_t size; // number of elements in the 'data' array
  34. void *data[];
  35. } emutls_address_array;
  36. #if defined(_MSC_VER) && !defined(__clang__)
  37. #pragma warning(pop)
  38. #endif
  39. static void emutls_shutdown(emutls_address_array *array);
  40. #ifndef _WIN32
  41. #include <pthread.h>
  42. static pthread_mutex_t emutls_mutex = PTHREAD_MUTEX_INITIALIZER;
  43. static pthread_key_t emutls_pthread_key;
  44. static bool emutls_key_created = false;
  45. typedef unsigned int gcc_word __attribute__((mode(word)));
  46. typedef unsigned int gcc_pointer __attribute__((mode(pointer)));
  47. // Default is not to use posix_memalign, so systems like Android
  48. // can use thread local data without heavier POSIX memory allocators.
  49. #ifndef EMUTLS_USE_POSIX_MEMALIGN
  50. #define EMUTLS_USE_POSIX_MEMALIGN 0
  51. #endif
  52. static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
  53. void *base;
  54. #if EMUTLS_USE_POSIX_MEMALIGN
  55. if (posix_memalign(&base, align, size) != 0)
  56. abort();
  57. #else
  58. #define EXTRA_ALIGN_PTR_BYTES (align - 1 + sizeof(void *))
  59. char *object;
  60. if ((object = (char *)malloc(EXTRA_ALIGN_PTR_BYTES + size)) == NULL)
  61. abort();
  62. base = (void *)(((uintptr_t)(object + EXTRA_ALIGN_PTR_BYTES)) &
  63. ~(uintptr_t)(align - 1));
  64. ((void **)base)[-1] = object;
  65. #endif
  66. return base;
  67. }
  68. static __inline void emutls_memalign_free(void *base) {
  69. #if EMUTLS_USE_POSIX_MEMALIGN
  70. free(base);
  71. #else
  72. // The mallocated address is in ((void**)base)[-1]
  73. free(((void **)base)[-1]);
  74. #endif
  75. }
  76. static __inline void emutls_setspecific(emutls_address_array *value) {
  77. pthread_setspecific(emutls_pthread_key, (void *)value);
  78. }
  79. static __inline emutls_address_array *emutls_getspecific(void) {
  80. return (emutls_address_array *)pthread_getspecific(emutls_pthread_key);
  81. }
  82. static void emutls_key_destructor(void *ptr) {
  83. emutls_address_array *array = (emutls_address_array *)ptr;
  84. if (array->skip_destructor_rounds > 0) {
  85. // emutls is deallocated using a pthread key destructor. These
  86. // destructors are called in several rounds to accommodate destructor
  87. // functions that (re)initialize key values with pthread_setspecific.
  88. // Delay the emutls deallocation to accommodate other end-of-thread
  89. // cleanup tasks like calling thread_local destructors (e.g. the
  90. // __cxa_thread_atexit fallback in libc++abi).
  91. array->skip_destructor_rounds--;
  92. emutls_setspecific(array);
  93. } else {
  94. emutls_shutdown(array);
  95. free(ptr);
  96. }
  97. }
  98. static __inline void emutls_init(void) {
  99. if (pthread_key_create(&emutls_pthread_key, emutls_key_destructor) != 0)
  100. abort();
  101. emutls_key_created = true;
  102. }
  103. static __inline void emutls_init_once(void) {
  104. static pthread_once_t once = PTHREAD_ONCE_INIT;
  105. pthread_once(&once, emutls_init);
  106. }
  107. static __inline void emutls_lock(void) { pthread_mutex_lock(&emutls_mutex); }
  108. static __inline void emutls_unlock(void) { pthread_mutex_unlock(&emutls_mutex); }
  109. #else // _WIN32
  110. #include <assert.h>
  111. #include <malloc.h>
  112. #include <stdio.h>
  113. #include <windows.h>
  114. static LPCRITICAL_SECTION emutls_mutex;
  115. static DWORD emutls_tls_index = TLS_OUT_OF_INDEXES;
  116. typedef uintptr_t gcc_word;
  117. typedef void *gcc_pointer;
  118. static void win_error(DWORD last_err, const char *hint) {
  119. char *buffer = NULL;
  120. if (FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER |
  121. FORMAT_MESSAGE_FROM_SYSTEM |
  122. FORMAT_MESSAGE_MAX_WIDTH_MASK,
  123. NULL, last_err, 0, (LPSTR)&buffer, 1, NULL)) {
  124. fprintf(stderr, "Windows error: %s\n", buffer);
  125. } else {
  126. fprintf(stderr, "Unknown Windows error: %s\n", hint);
  127. }
  128. LocalFree(buffer);
  129. }
  130. static __inline void win_abort(DWORD last_err, const char *hint) {
  131. win_error(last_err, hint);
  132. abort();
  133. }
  134. static __inline void *emutls_memalign_alloc(size_t align, size_t size) {
  135. void *base = _aligned_malloc(size, align);
  136. if (!base)
  137. win_abort(GetLastError(), "_aligned_malloc");
  138. return base;
  139. }
  140. static __inline void emutls_memalign_free(void *base) { _aligned_free(base); }
  141. static void emutls_exit(void) {
  142. if (emutls_mutex) {
  143. DeleteCriticalSection(emutls_mutex);
  144. _aligned_free(emutls_mutex);
  145. emutls_mutex = NULL;
  146. }
  147. if (emutls_tls_index != TLS_OUT_OF_INDEXES) {
  148. emutls_shutdown((emutls_address_array *)TlsGetValue(emutls_tls_index));
  149. TlsFree(emutls_tls_index);
  150. emutls_tls_index = TLS_OUT_OF_INDEXES;
  151. }
  152. }
  153. static BOOL CALLBACK emutls_init(PINIT_ONCE p0, PVOID p1, PVOID *p2) {
  154. (void)p0;
  155. (void)p1;
  156. (void)p2;
  157. emutls_mutex =
  158. (LPCRITICAL_SECTION)_aligned_malloc(sizeof(CRITICAL_SECTION), 16);
  159. if (!emutls_mutex) {
  160. win_error(GetLastError(), "_aligned_malloc");
  161. return FALSE;
  162. }
  163. InitializeCriticalSection(emutls_mutex);
  164. emutls_tls_index = TlsAlloc();
  165. if (emutls_tls_index == TLS_OUT_OF_INDEXES) {
  166. emutls_exit();
  167. win_error(GetLastError(), "TlsAlloc");
  168. return FALSE;
  169. }
  170. atexit(&emutls_exit);
  171. return TRUE;
  172. }
  173. static __inline void emutls_init_once(void) {
  174. static INIT_ONCE once;
  175. InitOnceExecuteOnce(&once, emutls_init, NULL, NULL);
  176. }
  177. static __inline void emutls_lock(void) { EnterCriticalSection(emutls_mutex); }
  178. static __inline void emutls_unlock(void) { LeaveCriticalSection(emutls_mutex); }
  179. static __inline void emutls_setspecific(emutls_address_array *value) {
  180. if (TlsSetValue(emutls_tls_index, (LPVOID)value) == 0)
  181. win_abort(GetLastError(), "TlsSetValue");
  182. }
  183. static __inline emutls_address_array *emutls_getspecific(void) {
  184. LPVOID value = TlsGetValue(emutls_tls_index);
  185. if (value == NULL) {
  186. const DWORD err = GetLastError();
  187. if (err != ERROR_SUCCESS)
  188. win_abort(err, "TlsGetValue");
  189. }
  190. return (emutls_address_array *)value;
  191. }
  192. // Provide atomic load/store functions for emutls_get_index if built with MSVC.
  193. #if !defined(__ATOMIC_RELEASE)
  194. #include <intrin.h>
  195. enum { __ATOMIC_ACQUIRE = 2, __ATOMIC_RELEASE = 3 };
  196. static __inline uintptr_t __atomic_load_n(void *ptr, unsigned type) {
  197. assert(type == __ATOMIC_ACQUIRE);
  198. // These return the previous value - but since we do an OR with 0,
  199. // it's equivalent to a plain load.
  200. #ifdef _WIN64
  201. return InterlockedOr64(ptr, 0);
  202. #else
  203. return InterlockedOr(ptr, 0);
  204. #endif
  205. }
  206. static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
  207. assert(type == __ATOMIC_RELEASE);
  208. InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
  209. }
  210. #endif // __ATOMIC_RELEASE
  211. #endif // _WIN32
  212. static size_t emutls_num_object = 0; // number of allocated TLS objects
  213. // Free the allocated TLS data
  214. static void emutls_shutdown(emutls_address_array *array) {
  215. if (array) {
  216. uintptr_t i;
  217. for (i = 0; i < array->size; ++i) {
  218. if (array->data[i])
  219. emutls_memalign_free(array->data[i]);
  220. }
  221. }
  222. }
  223. // For every TLS variable xyz,
  224. // there is one __emutls_control variable named __emutls_v.xyz.
  225. // If xyz has non-zero initial value, __emutls_v.xyz's "value"
  226. // will point to __emutls_t.xyz, which has the initial value.
  227. typedef struct __emutls_control {
  228. // Must use gcc_word here, instead of size_t, to match GCC. When
  229. // gcc_word is larger than size_t, the upper extra bits are all
  230. // zeros. We can use variables of size_t to operate on size and
  231. // align.
  232. gcc_word size; // size of the object in bytes
  233. gcc_word align; // alignment of the object in bytes
  234. union {
  235. uintptr_t index; // data[index-1] is the object address
  236. void *address; // object address, when in single thread env
  237. } object;
  238. void *value; // null or non-zero initial value for the object
  239. } __emutls_control;
  240. // Emulated TLS objects are always allocated at run-time.
  241. static __inline void *emutls_allocate_object(__emutls_control *control) {
  242. // Use standard C types, check with gcc's emutls.o.
  243. COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(gcc_pointer));
  244. COMPILE_TIME_ASSERT(sizeof(uintptr_t) == sizeof(void *));
  245. size_t size = control->size;
  246. size_t align = control->align;
  247. void *base;
  248. if (align < sizeof(void *))
  249. align = sizeof(void *);
  250. // Make sure that align is power of 2.
  251. if ((align & (align - 1)) != 0)
  252. abort();
  253. base = emutls_memalign_alloc(align, size);
  254. if (control->value)
  255. memcpy(base, control->value, size);
  256. else
  257. memset(base, 0, size);
  258. return base;
  259. }
  260. // Returns control->object.index; set index if not allocated yet.
  261. static __inline uintptr_t emutls_get_index(__emutls_control *control) {
  262. uintptr_t index = __atomic_load_n(&control->object.index, __ATOMIC_ACQUIRE);
  263. if (!index) {
  264. emutls_init_once();
  265. emutls_lock();
  266. index = control->object.index;
  267. if (!index) {
  268. index = ++emutls_num_object;
  269. __atomic_store_n(&control->object.index, index, __ATOMIC_RELEASE);
  270. }
  271. emutls_unlock();
  272. }
  273. return index;
  274. }
  275. // Updates newly allocated thread local emutls_address_array.
  276. static __inline void emutls_check_array_set_size(emutls_address_array *array,
  277. uintptr_t size) {
  278. if (array == NULL)
  279. abort();
  280. array->size = size;
  281. emutls_setspecific(array);
  282. }
  283. // Returns the new 'data' array size, number of elements,
  284. // which must be no smaller than the given index.
  285. static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
  286. // Need to allocate emutls_address_array with extra slots
  287. // to store the header.
  288. // Round up the emutls_address_array size to multiple of 16.
  289. uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
  290. return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
  291. }
  292. // Returns the size in bytes required for an emutls_address_array with
  293. // N number of elements for data field.
  294. static __inline uintptr_t emutls_asize(uintptr_t N) {
  295. return N * sizeof(void *) + sizeof(emutls_address_array);
  296. }
  297. // Returns the thread local emutls_address_array.
  298. // Extends its size if necessary to hold address at index.
  299. static __inline emutls_address_array *
  300. emutls_get_address_array(uintptr_t index) {
  301. emutls_address_array *array = emutls_getspecific();
  302. if (array == NULL) {
  303. uintptr_t new_size = emutls_new_data_array_size(index);
  304. array = (emutls_address_array *)malloc(emutls_asize(new_size));
  305. if (array) {
  306. memset(array->data, 0, new_size * sizeof(void *));
  307. array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
  308. }
  309. emutls_check_array_set_size(array, new_size);
  310. } else if (index > array->size) {
  311. uintptr_t orig_size = array->size;
  312. uintptr_t new_size = emutls_new_data_array_size(index);
  313. array = (emutls_address_array *)realloc(array, emutls_asize(new_size));
  314. if (array)
  315. memset(array->data + orig_size, 0,
  316. (new_size - orig_size) * sizeof(void *));
  317. emutls_check_array_set_size(array, new_size);
  318. }
  319. return array;
  320. }
  321. #ifndef _WIN32
  322. // Our emulated TLS implementation relies on local state (e.g. for the pthread
  323. // key), and if we duplicate this state across different shared libraries,
  324. // accesses to the same TLS variable from different shared libraries will yield
  325. // different results (see https://github.com/android/ndk/issues/1551 for an
  326. // example). __emutls_get_address is the only external entry point for emulated
  327. // TLS, and by making it default visibility and weak, we can rely on the dynamic
  328. // linker to coalesce multiple copies at runtime and ensure a single unique copy
  329. // of TLS state. This is a best effort; it won't work if the user is linking
  330. // with -Bsymbolic or -Bsymbolic-functions, and it also won't work on Windows,
  331. // where the dynamic linker has no notion of coalescing weak symbols at runtime.
  332. // A more robust solution would be to create a separate shared library for
  333. // emulated TLS, to ensure a single copy of its state.
  334. __attribute__((visibility("default"), weak))
  335. #endif
  336. void *__emutls_get_address(__emutls_control *control) {
  337. uintptr_t index = emutls_get_index(control);
  338. emutls_address_array *array = emutls_get_address_array(index--);
  339. if (array->data[index] == NULL)
  340. array->data[index] = emutls_allocate_object(control);
  341. return array->data[index];
  342. }
  343. #ifdef __BIONIC__
  344. // Called by Bionic on dlclose to delete the emutls pthread key.
  345. __attribute__((visibility("hidden"))) void __emutls_unregister_key(void) {
  346. if (emutls_key_created) {
  347. pthread_key_delete(emutls_pthread_key);
  348. emutls_key_created = false;
  349. }
  350. }
  351. #endif