tsd.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. #include "jemalloc/internal/jemalloc_preamble.h"
  2. #include "jemalloc/internal/jemalloc_internal_includes.h"
  3. #include "jemalloc/internal/assert.h"
  4. #include "jemalloc/internal/san.h"
  5. #include "jemalloc/internal/mutex.h"
  6. #include "jemalloc/internal/rtree.h"
  7. /******************************************************************************/
  8. /* Data. */
  9. /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
  10. JEMALLOC_DIAGNOSTIC_PUSH
  11. JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
  12. #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
  13. JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
  14. JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
  15. bool tsd_booted = false;
  16. #elif (defined(JEMALLOC_TLS))
  17. JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
  18. pthread_key_t tsd_tsd;
  19. bool tsd_booted = false;
  20. #elif (defined(_WIN32))
  21. DWORD tsd_tsd;
  22. tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
  23. bool tsd_booted = false;
  24. #else
  25. /*
  26. * This contains a mutex, but it's pretty convenient to allow the mutex code to
  27. * have a dependency on tsd. So we define the struct here, and only refer to it
  28. * by pointer in the header.
  29. */
  30. struct tsd_init_head_s {
  31. ql_head(tsd_init_block_t) blocks;
  32. malloc_mutex_t lock;
  33. };
  34. pthread_key_t tsd_tsd;
  35. tsd_init_head_t tsd_init_head = {
  36. ql_head_initializer(blocks),
  37. MALLOC_MUTEX_INITIALIZER
  38. };
  39. tsd_wrapper_t tsd_boot_wrapper = {
  40. false,
  41. TSD_INITIALIZER
  42. };
  43. bool tsd_booted = false;
  44. #endif
  45. JEMALLOC_DIAGNOSTIC_POP
  46. /******************************************************************************/
  47. /* A list of all the tsds in the nominal state. */
  48. typedef ql_head(tsd_t) tsd_list_t;
  49. static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
  50. static malloc_mutex_t tsd_nominal_tsds_lock;
  51. /* How many slow-path-enabling features are turned on. */
  52. static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
  53. static bool
  54. tsd_in_nominal_list(tsd_t *tsd) {
  55. tsd_t *tsd_list;
  56. bool found = false;
  57. /*
  58. * We don't know that tsd is nominal; it might not be safe to get data
  59. * out of it here.
  60. */
  61. malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
  62. ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
  63. if (tsd == tsd_list) {
  64. found = true;
  65. break;
  66. }
  67. }
  68. malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
  69. return found;
  70. }
  71. static void
  72. tsd_add_nominal(tsd_t *tsd) {
  73. assert(!tsd_in_nominal_list(tsd));
  74. assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
  75. ql_elm_new(tsd, TSD_MANGLE(tsd_link));
  76. malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
  77. ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
  78. malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
  79. }
  80. static void
  81. tsd_remove_nominal(tsd_t *tsd) {
  82. assert(tsd_in_nominal_list(tsd));
  83. assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
  84. malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
  85. ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
  86. malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
  87. }
  88. static void
  89. tsd_force_recompute(tsdn_t *tsdn) {
  90. /*
  91. * The stores to tsd->state here need to synchronize with the exchange
  92. * in tsd_slow_update.
  93. */
  94. atomic_fence(ATOMIC_RELEASE);
  95. malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
  96. tsd_t *remote_tsd;
  97. ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
  98. assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
  99. <= tsd_state_nominal_max);
  100. tsd_atomic_store(&remote_tsd->state,
  101. tsd_state_nominal_recompute, ATOMIC_RELAXED);
  102. /* See comments in te_recompute_fast_threshold(). */
  103. atomic_fence(ATOMIC_SEQ_CST);
  104. te_next_event_fast_set_non_nominal(remote_tsd);
  105. }
  106. malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
  107. }
  108. void
  109. tsd_global_slow_inc(tsdn_t *tsdn) {
  110. atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
  111. /*
  112. * We unconditionally force a recompute, even if the global slow count
  113. * was already positive. If we didn't, then it would be possible for us
  114. * to return to the user, have the user synchronize externally with some
  115. * other thread, and then have that other thread not have picked up the
  116. * update yet (since the original incrementing thread might still be
  117. * making its way through the tsd list).
  118. */
  119. tsd_force_recompute(tsdn);
  120. }
  121. void tsd_global_slow_dec(tsdn_t *tsdn) {
  122. atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
  123. /* See the note in ..._inc(). */
  124. tsd_force_recompute(tsdn);
  125. }
  126. static bool
  127. tsd_local_slow(tsd_t *tsd) {
  128. return !tsd_tcache_enabled_get(tsd)
  129. || tsd_reentrancy_level_get(tsd) > 0;
  130. }
  131. bool
  132. tsd_global_slow() {
  133. return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
  134. }
  135. /******************************************************************************/
  136. static uint8_t
  137. tsd_state_compute(tsd_t *tsd) {
  138. if (!tsd_nominal(tsd)) {
  139. return tsd_state_get(tsd);
  140. }
  141. /* We're in *a* nominal state; but which one? */
  142. if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
  143. return tsd_state_nominal_slow;
  144. } else {
  145. return tsd_state_nominal;
  146. }
  147. }
  148. void
  149. tsd_slow_update(tsd_t *tsd) {
  150. uint8_t old_state;
  151. do {
  152. uint8_t new_state = tsd_state_compute(tsd);
  153. old_state = tsd_atomic_exchange(&tsd->state, new_state,
  154. ATOMIC_ACQUIRE);
  155. } while (old_state == tsd_state_nominal_recompute);
  156. te_recompute_fast_threshold(tsd);
  157. }
  158. void
  159. tsd_state_set(tsd_t *tsd, uint8_t new_state) {
  160. /* Only the tsd module can change the state *to* recompute. */
  161. assert(new_state != tsd_state_nominal_recompute);
  162. uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
  163. if (old_state > tsd_state_nominal_max) {
  164. /*
  165. * Not currently in the nominal list, but it might need to be
  166. * inserted there.
  167. */
  168. assert(!tsd_in_nominal_list(tsd));
  169. tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
  170. if (new_state <= tsd_state_nominal_max) {
  171. tsd_add_nominal(tsd);
  172. }
  173. } else {
  174. /*
  175. * We're currently nominal. If the new state is non-nominal,
  176. * great; we take ourselves off the list and just enter the new
  177. * state.
  178. */
  179. assert(tsd_in_nominal_list(tsd));
  180. if (new_state > tsd_state_nominal_max) {
  181. tsd_remove_nominal(tsd);
  182. tsd_atomic_store(&tsd->state, new_state,
  183. ATOMIC_RELAXED);
  184. } else {
  185. /*
  186. * This is the tricky case. We're transitioning from
  187. * one nominal state to another. The caller can't know
  188. * about any races that are occurring at the same time,
  189. * so we always have to recompute no matter what.
  190. */
  191. tsd_slow_update(tsd);
  192. }
  193. }
  194. te_recompute_fast_threshold(tsd);
  195. }
  196. static void
  197. tsd_prng_state_init(tsd_t *tsd) {
  198. /*
  199. * A nondeterministic seed based on the address of tsd reduces
  200. * the likelihood of lockstep non-uniform cache index
  201. * utilization among identical concurrent processes, but at the
  202. * cost of test repeatability. For debug builds, instead use a
  203. * deterministic seed.
  204. */
  205. *tsd_prng_statep_get(tsd) = config_debug ? 0 :
  206. (uint64_t)(uintptr_t)tsd;
  207. }
  208. static bool
  209. tsd_data_init(tsd_t *tsd) {
  210. /*
  211. * We initialize the rtree context first (before the tcache), since the
  212. * tcache initialization depends on it.
  213. */
  214. rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
  215. tsd_prng_state_init(tsd);
  216. tsd_te_init(tsd); /* event_init may use the prng state above. */
  217. tsd_san_init(tsd);
  218. return tsd_tcache_enabled_data_init(tsd);
  219. }
  220. static void
  221. assert_tsd_data_cleanup_done(tsd_t *tsd) {
  222. assert(!tsd_nominal(tsd));
  223. assert(!tsd_in_nominal_list(tsd));
  224. assert(*tsd_arenap_get_unsafe(tsd) == NULL);
  225. assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
  226. assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
  227. assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
  228. }
  229. static bool
  230. tsd_data_init_nocleanup(tsd_t *tsd) {
  231. assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
  232. tsd_state_get(tsd) == tsd_state_minimal_initialized);
  233. /*
  234. * During reincarnation, there is no guarantee that the cleanup function
  235. * will be called (deallocation may happen after all tsd destructors).
  236. * We set up tsd in a way that no cleanup is needed.
  237. */
  238. rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
  239. *tsd_tcache_enabledp_get_unsafe(tsd) = false;
  240. *tsd_reentrancy_levelp_get(tsd) = 1;
  241. tsd_prng_state_init(tsd);
  242. tsd_te_init(tsd); /* event_init may use the prng state above. */
  243. tsd_san_init(tsd);
  244. assert_tsd_data_cleanup_done(tsd);
  245. return false;
  246. }
  247. tsd_t *
  248. tsd_fetch_slow(tsd_t *tsd, bool minimal) {
  249. assert(!tsd_fast(tsd));
  250. if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
  251. /*
  252. * On slow path but no work needed. Note that we can't
  253. * necessarily *assert* that we're slow, because we might be
  254. * slow because of an asynchronous modification to global state,
  255. * which might be asynchronously modified *back*.
  256. */
  257. } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
  258. tsd_slow_update(tsd);
  259. } else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
  260. if (!minimal) {
  261. if (tsd_booted) {
  262. tsd_state_set(tsd, tsd_state_nominal);
  263. tsd_slow_update(tsd);
  264. /* Trigger cleanup handler registration. */
  265. tsd_set(tsd);
  266. tsd_data_init(tsd);
  267. }
  268. } else {
  269. tsd_state_set(tsd, tsd_state_minimal_initialized);
  270. tsd_set(tsd);
  271. tsd_data_init_nocleanup(tsd);
  272. }
  273. } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
  274. if (!minimal) {
  275. /* Switch to fully initialized. */
  276. tsd_state_set(tsd, tsd_state_nominal);
  277. assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
  278. (*tsd_reentrancy_levelp_get(tsd))--;
  279. tsd_slow_update(tsd);
  280. tsd_data_init(tsd);
  281. } else {
  282. assert_tsd_data_cleanup_done(tsd);
  283. }
  284. } else if (tsd_state_get(tsd) == tsd_state_purgatory) {
  285. tsd_state_set(tsd, tsd_state_reincarnated);
  286. tsd_set(tsd);
  287. tsd_data_init_nocleanup(tsd);
  288. } else {
  289. assert(tsd_state_get(tsd) == tsd_state_reincarnated);
  290. }
  291. return tsd;
  292. }
  293. void *
  294. malloc_tsd_malloc(size_t size) {
  295. return a0malloc(CACHELINE_CEILING(size));
  296. }
  297. void
  298. malloc_tsd_dalloc(void *wrapper) {
  299. a0dalloc(wrapper);
  300. }
  301. #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
  302. static unsigned ncleanups;
  303. static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
  304. #ifndef _WIN32
  305. JEMALLOC_EXPORT
  306. #endif
  307. void
  308. _malloc_thread_cleanup(void) {
  309. bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
  310. unsigned i;
  311. for (i = 0; i < ncleanups; i++) {
  312. pending[i] = true;
  313. }
  314. do {
  315. again = false;
  316. for (i = 0; i < ncleanups; i++) {
  317. if (pending[i]) {
  318. pending[i] = cleanups[i]();
  319. if (pending[i]) {
  320. again = true;
  321. }
  322. }
  323. }
  324. } while (again);
  325. }
  326. #ifndef _WIN32
  327. JEMALLOC_EXPORT
  328. #endif
  329. void
  330. _malloc_tsd_cleanup_register(bool (*f)(void)) {
  331. assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
  332. cleanups[ncleanups] = f;
  333. ncleanups++;
  334. }
  335. #endif
  336. static void
  337. tsd_do_data_cleanup(tsd_t *tsd) {
  338. prof_tdata_cleanup(tsd);
  339. iarena_cleanup(tsd);
  340. arena_cleanup(tsd);
  341. tcache_cleanup(tsd);
  342. witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
  343. *tsd_reentrancy_levelp_get(tsd) = 1;
  344. }
  345. void
  346. tsd_cleanup(void *arg) {
  347. tsd_t *tsd = (tsd_t *)arg;
  348. switch (tsd_state_get(tsd)) {
  349. case tsd_state_uninitialized:
  350. /* Do nothing. */
  351. break;
  352. case tsd_state_minimal_initialized:
  353. /* This implies the thread only did free() in its life time. */
  354. /* Fall through. */
  355. case tsd_state_reincarnated:
  356. /*
  357. * Reincarnated means another destructor deallocated memory
  358. * after the destructor was called. Cleanup isn't required but
  359. * is still called for testing and completeness.
  360. */
  361. assert_tsd_data_cleanup_done(tsd);
  362. JEMALLOC_FALLTHROUGH;
  363. case tsd_state_nominal:
  364. case tsd_state_nominal_slow:
  365. tsd_do_data_cleanup(tsd);
  366. tsd_state_set(tsd, tsd_state_purgatory);
  367. tsd_set(tsd);
  368. break;
  369. case tsd_state_purgatory:
  370. /*
  371. * The previous time this destructor was called, we set the
  372. * state to tsd_state_purgatory so that other destructors
  373. * wouldn't cause re-creation of the tsd. This time, do
  374. * nothing, and do not request another callback.
  375. */
  376. break;
  377. default:
  378. not_reached();
  379. }
  380. #ifdef JEMALLOC_JET
  381. test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
  382. int *data = tsd_test_datap_get_unsafe(tsd);
  383. if (test_callback != NULL) {
  384. test_callback(data);
  385. }
  386. #endif
  387. }
  388. tsd_t *
  389. malloc_tsd_boot0(void) {
  390. tsd_t *tsd;
  391. #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
  392. ncleanups = 0;
  393. #endif
  394. if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
  395. WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
  396. return NULL;
  397. }
  398. if (tsd_boot0()) {
  399. return NULL;
  400. }
  401. tsd = tsd_fetch();
  402. return tsd;
  403. }
  404. void
  405. malloc_tsd_boot1(void) {
  406. tsd_boot1();
  407. tsd_t *tsd = tsd_fetch();
  408. /* malloc_slow has been set properly. Update tsd_slow. */
  409. tsd_slow_update(tsd);
  410. }
  411. #ifdef _WIN32
  412. static BOOL WINAPI
  413. _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
  414. switch (fdwReason) {
  415. #ifdef JEMALLOC_LAZY_LOCK
  416. case DLL_THREAD_ATTACH:
  417. isthreaded = true;
  418. break;
  419. #endif
  420. case DLL_THREAD_DETACH:
  421. _malloc_thread_cleanup();
  422. break;
  423. default:
  424. break;
  425. }
  426. return true;
  427. }
  428. /*
  429. * We need to be able to say "read" here (in the "pragma section"), but have
  430. * hooked "read". We won't read for the rest of the file, so we can get away
  431. * with unhooking.
  432. */
  433. #ifdef read
  434. # undef read
  435. #endif
  436. #ifdef _MSC_VER
  437. # ifdef _M_IX86
  438. # pragma comment(linker, "/INCLUDE:__tls_used")
  439. # pragma comment(linker, "/INCLUDE:_tls_callback")
  440. # else
  441. # pragma comment(linker, "/INCLUDE:_tls_used")
  442. # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
  443. # endif
  444. # pragma section(".CRT$XLY",long,read)
  445. #endif
  446. JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
  447. BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL,
  448. DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
  449. #endif
  450. #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
  451. !defined(_WIN32))
  452. void *
  453. tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
  454. pthread_t self = pthread_self();
  455. tsd_init_block_t *iter;
  456. /* Check whether this thread has already inserted into the list. */
  457. malloc_mutex_lock(TSDN_NULL, &head->lock);
  458. ql_foreach(iter, &head->blocks, link) {
  459. if (iter->thread == self) {
  460. malloc_mutex_unlock(TSDN_NULL, &head->lock);
  461. return iter->data;
  462. }
  463. }
  464. /* Insert block into list. */
  465. ql_elm_new(block, link);
  466. block->thread = self;
  467. ql_tail_insert(&head->blocks, block, link);
  468. malloc_mutex_unlock(TSDN_NULL, &head->lock);
  469. return NULL;
  470. }
  471. void
  472. tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
  473. malloc_mutex_lock(TSDN_NULL, &head->lock);
  474. ql_remove(&head->blocks, block, link);
  475. malloc_mutex_unlock(TSDN_NULL, &head->lock);
  476. }
  477. #endif
  478. void
  479. tsd_prefork(tsd_t *tsd) {
  480. malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
  481. }
  482. void
  483. tsd_postfork_parent(tsd_t *tsd) {
  484. malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
  485. }
  486. void
  487. tsd_postfork_child(tsd_t *tsd) {
  488. malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
  489. ql_new(&tsd_nominal_tsds);
  490. if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
  491. tsd_add_nominal(tsd);
  492. }
  493. }