sec.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. #include "jemalloc/internal/jemalloc_preamble.h"
  2. #include "jemalloc/internal/jemalloc_internal_includes.h"
  3. #include "jemalloc/internal/sec.h"
  4. static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
  5. size_t alignment, bool zero, bool guarded, bool frequent_reuse,
  6. bool *deferred_work_generated);
  7. static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
  8. size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
  9. static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
  10. size_t old_size, size_t new_size, bool *deferred_work_generated);
  11. static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
  12. bool *deferred_work_generated);
  13. static void
  14. sec_bin_init(sec_bin_t *bin) {
  15. bin->being_batch_filled = false;
  16. bin->bytes_cur = 0;
  17. edata_list_active_init(&bin->freelist);
  18. }
  19. bool
  20. sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
  21. const sec_opts_t *opts) {
  22. assert(opts->max_alloc >= PAGE);
  23. size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
  24. pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
  25. size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
  26. size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
  27. size_t sz_alloc = sz_shards + sz_bins;
  28. void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
  29. if (dynalloc == NULL) {
  30. return true;
  31. }
  32. sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
  33. sec->shards = shard_cur;
  34. sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
  35. /* Just for asserts, below. */
  36. sec_bin_t *bin_start = bin_cur;
  37. for (size_t i = 0; i < opts->nshards; i++) {
  38. sec_shard_t *shard = shard_cur;
  39. shard_cur++;
  40. bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
  41. WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
  42. if (err) {
  43. return true;
  44. }
  45. shard->enabled = true;
  46. shard->bins = bin_cur;
  47. for (pszind_t j = 0; j < npsizes; j++) {
  48. sec_bin_init(&shard->bins[j]);
  49. bin_cur++;
  50. }
  51. shard->bytes_cur = 0;
  52. shard->to_flush_next = 0;
  53. }
  54. /*
  55. * Should have exactly matched the bin_start to the first unused byte
  56. * after the shards.
  57. */
  58. assert((void *)shard_cur == (void *)bin_start);
  59. /* And the last bin to use up the last bytes of the allocation. */
  60. assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
  61. sec->fallback = fallback;
  62. sec->opts = *opts;
  63. sec->npsizes = npsizes;
  64. /*
  65. * Initialize these last so that an improper use of an SEC whose
  66. * initialization failed will segfault in an easy-to-spot way.
  67. */
  68. sec->pai.alloc = &sec_alloc;
  69. sec->pai.alloc_batch = &pai_alloc_batch_default;
  70. sec->pai.expand = &sec_expand;
  71. sec->pai.shrink = &sec_shrink;
  72. sec->pai.dalloc = &sec_dalloc;
  73. sec->pai.dalloc_batch = &pai_dalloc_batch_default;
  74. return false;
  75. }
  76. static sec_shard_t *
  77. sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
  78. /*
  79. * Eventually, we should implement affinity, tracking source shard using
  80. * the edata_t's newly freed up fields. For now, just randomly
  81. * distribute across all shards.
  82. */
  83. if (tsdn_null(tsdn)) {
  84. return &sec->shards[0];
  85. }
  86. tsd_t *tsd = tsdn_tsd(tsdn);
  87. uint8_t *idxp = tsd_sec_shardp_get(tsd);
  88. if (*idxp == (uint8_t)-1) {
  89. /*
  90. * First use; initialize using the trick from Daniel Lemire's
  91. * "A fast alternative to the modulo reduction. Use a 64 bit
  92. * number to store 32 bits, since we'll deliberately overflow
  93. * when we multiply by the number of shards.
  94. */
  95. uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
  96. uint32_t idx =
  97. (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
  98. assert(idx < (uint32_t)sec->opts.nshards);
  99. *idxp = (uint8_t)idx;
  100. }
  101. return &sec->shards[*idxp];
  102. }
  103. /*
  104. * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
  105. * empty cache, we'll try to fill it, which can push the shard over it's limit.
  106. */
  107. static void
  108. sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
  109. malloc_mutex_assert_owner(tsdn, &shard->mtx);
  110. edata_list_active_t to_flush;
  111. edata_list_active_init(&to_flush);
  112. while (shard->bytes_cur > sec->opts.bytes_after_flush) {
  113. /* Pick a victim. */
  114. sec_bin_t *bin = &shard->bins[shard->to_flush_next];
  115. /* Update our victim-picking state. */
  116. shard->to_flush_next++;
  117. if (shard->to_flush_next == sec->npsizes) {
  118. shard->to_flush_next = 0;
  119. }
  120. assert(shard->bytes_cur >= bin->bytes_cur);
  121. if (bin->bytes_cur != 0) {
  122. shard->bytes_cur -= bin->bytes_cur;
  123. bin->bytes_cur = 0;
  124. edata_list_active_concat(&to_flush, &bin->freelist);
  125. }
  126. /*
  127. * Either bin->bytes_cur was 0, in which case we didn't touch
  128. * the bin list but it should be empty anyways (or else we
  129. * missed a bytes_cur update on a list modification), or it
  130. * *was* 0 and we emptied it ourselves. Either way, it should
  131. * be empty now.
  132. */
  133. assert(edata_list_active_empty(&bin->freelist));
  134. }
  135. malloc_mutex_unlock(tsdn, &shard->mtx);
  136. bool deferred_work_generated = false;
  137. pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
  138. &deferred_work_generated);
  139. }
  140. static edata_t *
  141. sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
  142. sec_bin_t *bin) {
  143. malloc_mutex_assert_owner(tsdn, &shard->mtx);
  144. if (!shard->enabled) {
  145. return NULL;
  146. }
  147. edata_t *edata = edata_list_active_first(&bin->freelist);
  148. if (edata != NULL) {
  149. edata_list_active_remove(&bin->freelist, edata);
  150. assert(edata_size_get(edata) <= bin->bytes_cur);
  151. bin->bytes_cur -= edata_size_get(edata);
  152. assert(edata_size_get(edata) <= shard->bytes_cur);
  153. shard->bytes_cur -= edata_size_get(edata);
  154. }
  155. return edata;
  156. }
  157. static edata_t *
  158. sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
  159. sec_bin_t *bin, size_t size) {
  160. malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
  161. edata_list_active_t result;
  162. edata_list_active_init(&result);
  163. bool deferred_work_generated = false;
  164. size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
  165. 1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
  166. edata_t *ret = edata_list_active_first(&result);
  167. if (ret != NULL) {
  168. edata_list_active_remove(&result, ret);
  169. }
  170. malloc_mutex_lock(tsdn, &shard->mtx);
  171. bin->being_batch_filled = false;
  172. /*
  173. * Handle the easy case first: nothing to cache. Note that this can
  174. * only happen in case of OOM, since sec_alloc checks the expected
  175. * number of allocs, and doesn't bother going down the batch_fill
  176. * pathway if there won't be anything left to cache. So to be in this
  177. * code path, we must have asked for > 1 alloc, but only gotten 1 back.
  178. */
  179. if (nalloc <= 1) {
  180. malloc_mutex_unlock(tsdn, &shard->mtx);
  181. return ret;
  182. }
  183. size_t new_cached_bytes = (nalloc - 1) * size;
  184. edata_list_active_concat(&bin->freelist, &result);
  185. bin->bytes_cur += new_cached_bytes;
  186. shard->bytes_cur += new_cached_bytes;
  187. if (shard->bytes_cur > sec->opts.max_bytes) {
  188. sec_flush_some_and_unlock(tsdn, sec, shard);
  189. } else {
  190. malloc_mutex_unlock(tsdn, &shard->mtx);
  191. }
  192. return ret;
  193. }
  194. static edata_t *
  195. sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
  196. bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
  197. assert((size & PAGE_MASK) == 0);
  198. assert(!guarded);
  199. sec_t *sec = (sec_t *)self;
  200. if (zero || alignment > PAGE || sec->opts.nshards == 0
  201. || size > sec->opts.max_alloc) {
  202. return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
  203. /* guarded */ false, frequent_reuse,
  204. deferred_work_generated);
  205. }
  206. pszind_t pszind = sz_psz2ind(size);
  207. assert(pszind < sec->npsizes);
  208. sec_shard_t *shard = sec_shard_pick(tsdn, sec);
  209. sec_bin_t *bin = &shard->bins[pszind];
  210. bool do_batch_fill = false;
  211. malloc_mutex_lock(tsdn, &shard->mtx);
  212. edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
  213. if (edata == NULL) {
  214. if (!bin->being_batch_filled
  215. && sec->opts.batch_fill_extra > 0) {
  216. bin->being_batch_filled = true;
  217. do_batch_fill = true;
  218. }
  219. }
  220. malloc_mutex_unlock(tsdn, &shard->mtx);
  221. if (edata == NULL) {
  222. if (do_batch_fill) {
  223. edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
  224. size);
  225. } else {
  226. edata = pai_alloc(tsdn, sec->fallback, size, alignment,
  227. zero, /* guarded */ false, frequent_reuse,
  228. deferred_work_generated);
  229. }
  230. }
  231. return edata;
  232. }
  233. static bool
  234. sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
  235. size_t new_size, bool zero, bool *deferred_work_generated) {
  236. sec_t *sec = (sec_t *)self;
  237. return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
  238. deferred_work_generated);
  239. }
  240. static bool
  241. sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
  242. size_t new_size, bool *deferred_work_generated) {
  243. sec_t *sec = (sec_t *)self;
  244. return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
  245. deferred_work_generated);
  246. }
  247. static void
  248. sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
  249. malloc_mutex_assert_owner(tsdn, &shard->mtx);
  250. shard->bytes_cur = 0;
  251. edata_list_active_t to_flush;
  252. edata_list_active_init(&to_flush);
  253. for (pszind_t i = 0; i < sec->npsizes; i++) {
  254. sec_bin_t *bin = &shard->bins[i];
  255. bin->bytes_cur = 0;
  256. edata_list_active_concat(&to_flush, &bin->freelist);
  257. }
  258. /*
  259. * Ordinarily we would try to avoid doing the batch deallocation while
  260. * holding the shard mutex, but the flush_all pathways only happen when
  261. * we're disabling the HPA or resetting the arena, both of which are
  262. * rare pathways.
  263. */
  264. bool deferred_work_generated = false;
  265. pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
  266. &deferred_work_generated);
  267. }
  268. static void
  269. sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
  270. edata_t *edata) {
  271. malloc_mutex_assert_owner(tsdn, &shard->mtx);
  272. assert(shard->bytes_cur <= sec->opts.max_bytes);
  273. size_t size = edata_size_get(edata);
  274. pszind_t pszind = sz_psz2ind(size);
  275. assert(pszind < sec->npsizes);
  276. /*
  277. * Prepending here results in LIFO allocation per bin, which seems
  278. * reasonable.
  279. */
  280. sec_bin_t *bin = &shard->bins[pszind];
  281. edata_list_active_prepend(&bin->freelist, edata);
  282. bin->bytes_cur += size;
  283. shard->bytes_cur += size;
  284. if (shard->bytes_cur > sec->opts.max_bytes) {
  285. /*
  286. * We've exceeded the shard limit. We make two nods in the
  287. * direction of fragmentation avoidance: we flush everything in
  288. * the shard, rather than one particular bin, and we hold the
  289. * lock while flushing (in case one of the extents we flush is
  290. * highly preferred from a fragmentation-avoidance perspective
  291. * in the backing allocator). This has the extra advantage of
  292. * not requiring advanced cache balancing strategies.
  293. */
  294. sec_flush_some_and_unlock(tsdn, sec, shard);
  295. malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
  296. } else {
  297. malloc_mutex_unlock(tsdn, &shard->mtx);
  298. }
  299. }
  300. static void
  301. sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
  302. bool *deferred_work_generated) {
  303. sec_t *sec = (sec_t *)self;
  304. if (sec->opts.nshards == 0
  305. || edata_size_get(edata) > sec->opts.max_alloc) {
  306. pai_dalloc(tsdn, sec->fallback, edata,
  307. deferred_work_generated);
  308. return;
  309. }
  310. sec_shard_t *shard = sec_shard_pick(tsdn, sec);
  311. malloc_mutex_lock(tsdn, &shard->mtx);
  312. if (shard->enabled) {
  313. sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
  314. } else {
  315. malloc_mutex_unlock(tsdn, &shard->mtx);
  316. pai_dalloc(tsdn, sec->fallback, edata,
  317. deferred_work_generated);
  318. }
  319. }
  320. void
  321. sec_flush(tsdn_t *tsdn, sec_t *sec) {
  322. for (size_t i = 0; i < sec->opts.nshards; i++) {
  323. malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
  324. sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
  325. malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
  326. }
  327. }
  328. void
  329. sec_disable(tsdn_t *tsdn, sec_t *sec) {
  330. for (size_t i = 0; i < sec->opts.nshards; i++) {
  331. malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
  332. sec->shards[i].enabled = false;
  333. sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
  334. malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
  335. }
  336. }
  337. void
  338. sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
  339. size_t sum = 0;
  340. for (size_t i = 0; i < sec->opts.nshards; i++) {
  341. /*
  342. * We could save these lock acquisitions by making bytes_cur
  343. * atomic, but stats collection is rare anyways and we expect
  344. * the number and type of stats to get more interesting.
  345. */
  346. malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
  347. sum += sec->shards[i].bytes_cur;
  348. malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
  349. }
  350. stats->bytes += sum;
  351. }
  352. void
  353. sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
  354. mutex_prof_data_t *mutex_prof_data) {
  355. for (size_t i = 0; i < sec->opts.nshards; i++) {
  356. malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
  357. malloc_mutex_prof_accum(tsdn, mutex_prof_data,
  358. &sec->shards[i].mtx);
  359. malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
  360. }
  361. }
  362. void
  363. sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
  364. for (size_t i = 0; i < sec->opts.nshards; i++) {
  365. malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
  366. }
  367. }
  368. void
  369. sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
  370. for (size_t i = 0; i < sec->opts.nshards; i++) {
  371. malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
  372. }
  373. }
  374. void
  375. sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
  376. for (size_t i = 0; i < sec->opts.nshards; i++) {
  377. malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
  378. }
  379. }