123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422 |
- #include "jemalloc/internal/jemalloc_preamble.h"
- #include "jemalloc/internal/jemalloc_internal_includes.h"
- #include "jemalloc/internal/sec.h"
- static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
- size_t alignment, bool zero, bool guarded, bool frequent_reuse,
- bool *deferred_work_generated);
- static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
- size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
- static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
- size_t old_size, size_t new_size, bool *deferred_work_generated);
- static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
- bool *deferred_work_generated);
- static void
- sec_bin_init(sec_bin_t *bin) {
- bin->being_batch_filled = false;
- bin->bytes_cur = 0;
- edata_list_active_init(&bin->freelist);
- }
- bool
- sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
- const sec_opts_t *opts) {
- assert(opts->max_alloc >= PAGE);
- size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
- pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
- size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
- size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
- size_t sz_alloc = sz_shards + sz_bins;
- void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
- if (dynalloc == NULL) {
- return true;
- }
- sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
- sec->shards = shard_cur;
- sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
- /* Just for asserts, below. */
- sec_bin_t *bin_start = bin_cur;
- for (size_t i = 0; i < opts->nshards; i++) {
- sec_shard_t *shard = shard_cur;
- shard_cur++;
- bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
- WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
- if (err) {
- return true;
- }
- shard->enabled = true;
- shard->bins = bin_cur;
- for (pszind_t j = 0; j < npsizes; j++) {
- sec_bin_init(&shard->bins[j]);
- bin_cur++;
- }
- shard->bytes_cur = 0;
- shard->to_flush_next = 0;
- }
- /*
- * Should have exactly matched the bin_start to the first unused byte
- * after the shards.
- */
- assert((void *)shard_cur == (void *)bin_start);
- /* And the last bin to use up the last bytes of the allocation. */
- assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
- sec->fallback = fallback;
- sec->opts = *opts;
- sec->npsizes = npsizes;
- /*
- * Initialize these last so that an improper use of an SEC whose
- * initialization failed will segfault in an easy-to-spot way.
- */
- sec->pai.alloc = &sec_alloc;
- sec->pai.alloc_batch = &pai_alloc_batch_default;
- sec->pai.expand = &sec_expand;
- sec->pai.shrink = &sec_shrink;
- sec->pai.dalloc = &sec_dalloc;
- sec->pai.dalloc_batch = &pai_dalloc_batch_default;
- return false;
- }
- static sec_shard_t *
- sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
- /*
- * Eventually, we should implement affinity, tracking source shard using
- * the edata_t's newly freed up fields. For now, just randomly
- * distribute across all shards.
- */
- if (tsdn_null(tsdn)) {
- return &sec->shards[0];
- }
- tsd_t *tsd = tsdn_tsd(tsdn);
- uint8_t *idxp = tsd_sec_shardp_get(tsd);
- if (*idxp == (uint8_t)-1) {
- /*
- * First use; initialize using the trick from Daniel Lemire's
- * "A fast alternative to the modulo reduction. Use a 64 bit
- * number to store 32 bits, since we'll deliberately overflow
- * when we multiply by the number of shards.
- */
- uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
- uint32_t idx =
- (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
- assert(idx < (uint32_t)sec->opts.nshards);
- *idxp = (uint8_t)idx;
- }
- return &sec->shards[*idxp];
- }
- /*
- * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
- * empty cache, we'll try to fill it, which can push the shard over it's limit.
- */
- static void
- sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
- malloc_mutex_assert_owner(tsdn, &shard->mtx);
- edata_list_active_t to_flush;
- edata_list_active_init(&to_flush);
- while (shard->bytes_cur > sec->opts.bytes_after_flush) {
- /* Pick a victim. */
- sec_bin_t *bin = &shard->bins[shard->to_flush_next];
- /* Update our victim-picking state. */
- shard->to_flush_next++;
- if (shard->to_flush_next == sec->npsizes) {
- shard->to_flush_next = 0;
- }
- assert(shard->bytes_cur >= bin->bytes_cur);
- if (bin->bytes_cur != 0) {
- shard->bytes_cur -= bin->bytes_cur;
- bin->bytes_cur = 0;
- edata_list_active_concat(&to_flush, &bin->freelist);
- }
- /*
- * Either bin->bytes_cur was 0, in which case we didn't touch
- * the bin list but it should be empty anyways (or else we
- * missed a bytes_cur update on a list modification), or it
- * *was* 0 and we emptied it ourselves. Either way, it should
- * be empty now.
- */
- assert(edata_list_active_empty(&bin->freelist));
- }
- malloc_mutex_unlock(tsdn, &shard->mtx);
- bool deferred_work_generated = false;
- pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
- &deferred_work_generated);
- }
- static edata_t *
- sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
- sec_bin_t *bin) {
- malloc_mutex_assert_owner(tsdn, &shard->mtx);
- if (!shard->enabled) {
- return NULL;
- }
- edata_t *edata = edata_list_active_first(&bin->freelist);
- if (edata != NULL) {
- edata_list_active_remove(&bin->freelist, edata);
- assert(edata_size_get(edata) <= bin->bytes_cur);
- bin->bytes_cur -= edata_size_get(edata);
- assert(edata_size_get(edata) <= shard->bytes_cur);
- shard->bytes_cur -= edata_size_get(edata);
- }
- return edata;
- }
- static edata_t *
- sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
- sec_bin_t *bin, size_t size) {
- malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
- edata_list_active_t result;
- edata_list_active_init(&result);
- bool deferred_work_generated = false;
- size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
- 1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
- edata_t *ret = edata_list_active_first(&result);
- if (ret != NULL) {
- edata_list_active_remove(&result, ret);
- }
- malloc_mutex_lock(tsdn, &shard->mtx);
- bin->being_batch_filled = false;
- /*
- * Handle the easy case first: nothing to cache. Note that this can
- * only happen in case of OOM, since sec_alloc checks the expected
- * number of allocs, and doesn't bother going down the batch_fill
- * pathway if there won't be anything left to cache. So to be in this
- * code path, we must have asked for > 1 alloc, but only gotten 1 back.
- */
- if (nalloc <= 1) {
- malloc_mutex_unlock(tsdn, &shard->mtx);
- return ret;
- }
- size_t new_cached_bytes = (nalloc - 1) * size;
- edata_list_active_concat(&bin->freelist, &result);
- bin->bytes_cur += new_cached_bytes;
- shard->bytes_cur += new_cached_bytes;
- if (shard->bytes_cur > sec->opts.max_bytes) {
- sec_flush_some_and_unlock(tsdn, sec, shard);
- } else {
- malloc_mutex_unlock(tsdn, &shard->mtx);
- }
- return ret;
- }
- static edata_t *
- sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
- bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
- assert((size & PAGE_MASK) == 0);
- assert(!guarded);
- sec_t *sec = (sec_t *)self;
- if (zero || alignment > PAGE || sec->opts.nshards == 0
- || size > sec->opts.max_alloc) {
- return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
- /* guarded */ false, frequent_reuse,
- deferred_work_generated);
- }
- pszind_t pszind = sz_psz2ind(size);
- assert(pszind < sec->npsizes);
- sec_shard_t *shard = sec_shard_pick(tsdn, sec);
- sec_bin_t *bin = &shard->bins[pszind];
- bool do_batch_fill = false;
- malloc_mutex_lock(tsdn, &shard->mtx);
- edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
- if (edata == NULL) {
- if (!bin->being_batch_filled
- && sec->opts.batch_fill_extra > 0) {
- bin->being_batch_filled = true;
- do_batch_fill = true;
- }
- }
- malloc_mutex_unlock(tsdn, &shard->mtx);
- if (edata == NULL) {
- if (do_batch_fill) {
- edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
- size);
- } else {
- edata = pai_alloc(tsdn, sec->fallback, size, alignment,
- zero, /* guarded */ false, frequent_reuse,
- deferred_work_generated);
- }
- }
- return edata;
- }
- static bool
- sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
- size_t new_size, bool zero, bool *deferred_work_generated) {
- sec_t *sec = (sec_t *)self;
- return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
- deferred_work_generated);
- }
- static bool
- sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
- size_t new_size, bool *deferred_work_generated) {
- sec_t *sec = (sec_t *)self;
- return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
- deferred_work_generated);
- }
- static void
- sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
- malloc_mutex_assert_owner(tsdn, &shard->mtx);
- shard->bytes_cur = 0;
- edata_list_active_t to_flush;
- edata_list_active_init(&to_flush);
- for (pszind_t i = 0; i < sec->npsizes; i++) {
- sec_bin_t *bin = &shard->bins[i];
- bin->bytes_cur = 0;
- edata_list_active_concat(&to_flush, &bin->freelist);
- }
- /*
- * Ordinarily we would try to avoid doing the batch deallocation while
- * holding the shard mutex, but the flush_all pathways only happen when
- * we're disabling the HPA or resetting the arena, both of which are
- * rare pathways.
- */
- bool deferred_work_generated = false;
- pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
- &deferred_work_generated);
- }
- static void
- sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
- edata_t *edata) {
- malloc_mutex_assert_owner(tsdn, &shard->mtx);
- assert(shard->bytes_cur <= sec->opts.max_bytes);
- size_t size = edata_size_get(edata);
- pszind_t pszind = sz_psz2ind(size);
- assert(pszind < sec->npsizes);
- /*
- * Prepending here results in LIFO allocation per bin, which seems
- * reasonable.
- */
- sec_bin_t *bin = &shard->bins[pszind];
- edata_list_active_prepend(&bin->freelist, edata);
- bin->bytes_cur += size;
- shard->bytes_cur += size;
- if (shard->bytes_cur > sec->opts.max_bytes) {
- /*
- * We've exceeded the shard limit. We make two nods in the
- * direction of fragmentation avoidance: we flush everything in
- * the shard, rather than one particular bin, and we hold the
- * lock while flushing (in case one of the extents we flush is
- * highly preferred from a fragmentation-avoidance perspective
- * in the backing allocator). This has the extra advantage of
- * not requiring advanced cache balancing strategies.
- */
- sec_flush_some_and_unlock(tsdn, sec, shard);
- malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
- } else {
- malloc_mutex_unlock(tsdn, &shard->mtx);
- }
- }
- static void
- sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
- bool *deferred_work_generated) {
- sec_t *sec = (sec_t *)self;
- if (sec->opts.nshards == 0
- || edata_size_get(edata) > sec->opts.max_alloc) {
- pai_dalloc(tsdn, sec->fallback, edata,
- deferred_work_generated);
- return;
- }
- sec_shard_t *shard = sec_shard_pick(tsdn, sec);
- malloc_mutex_lock(tsdn, &shard->mtx);
- if (shard->enabled) {
- sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
- } else {
- malloc_mutex_unlock(tsdn, &shard->mtx);
- pai_dalloc(tsdn, sec->fallback, edata,
- deferred_work_generated);
- }
- }
- void
- sec_flush(tsdn_t *tsdn, sec_t *sec) {
- for (size_t i = 0; i < sec->opts.nshards; i++) {
- malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
- sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
- malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
- }
- }
- void
- sec_disable(tsdn_t *tsdn, sec_t *sec) {
- for (size_t i = 0; i < sec->opts.nshards; i++) {
- malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
- sec->shards[i].enabled = false;
- sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
- malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
- }
- }
- void
- sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
- size_t sum = 0;
- for (size_t i = 0; i < sec->opts.nshards; i++) {
- /*
- * We could save these lock acquisitions by making bytes_cur
- * atomic, but stats collection is rare anyways and we expect
- * the number and type of stats to get more interesting.
- */
- malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
- sum += sec->shards[i].bytes_cur;
- malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
- }
- stats->bytes += sum;
- }
- void
- sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
- mutex_prof_data_t *mutex_prof_data) {
- for (size_t i = 0; i < sec->opts.nshards; i++) {
- malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
- malloc_mutex_prof_accum(tsdn, mutex_prof_data,
- &sec->shards[i].mtx);
- malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
- }
- }
- void
- sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
- for (size_t i = 0; i < sec->opts.nshards; i++) {
- malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
- }
- }
- void
- sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
- for (size_t i = 0; i < sec->opts.nshards; i++) {
- malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
- }
- }
- void
- sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
- for (size_t i = 0; i < sec->opts.nshards; i++) {
- malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
- }
- }
|