123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926 |
- #pragma once
- #include <stdlib.h>
- #include <stdio.h>
- #include <stdarg.h>
- #include <library/cpp/malloc/api/malloc.h>
- #include <util/system/compat.h>
- #include <util/system/compiler.h>
- #include <util/system/types.h>
- #ifdef _MSC_VER
- #ifndef _CRT_SECURE_NO_WARNINGS
- #define _CRT_SECURE_NO_WARNINGS
- #endif
- #ifdef _M_X64
- #define _64_
- #endif
- #include <intrin.h>
- #define WIN32_LEAN_AND_MEAN
- #include <Windows.h>
- #pragma intrinsic(_InterlockedCompareExchange)
- #pragma intrinsic(_InterlockedExchangeAdd)
- #include <new>
- #include <assert.h>
- #include <errno.h>
- #define PERTHREAD __declspec(thread)
- #define _win_
- #define Y_FORCE_INLINE __forceinline
- using TAtomic = volatile long;
- static inline long AtomicAdd(TAtomic& a, long b) {
- return _InterlockedExchangeAdd(&a, b) + b;
- }
- static inline long AtomicSub(TAtomic& a, long b) {
- return AtomicAdd(a, -b);
- }
- #pragma comment(lib, "synchronization.lib")
- #ifndef NDEBUG
- #define Y_ASSERT_NOBT(x) \
- { \
- if (IsDebuggerPresent()) { \
- if (!(x)) \
- __debugbreak(); \
- } else \
- assert(x); \
- }
- #else
- #define Y_ASSERT_NOBT(x) ((void)0)
- #endif
- #else
- #include <util/system/defaults.h>
- #include <util/system/atomic.h>
- #include <util/system/yassert.h>
- #if !defined(NDEBUG) && !defined(__GCCXML__)
- #define Y_ASSERT_NOBT(a) \
- do { \
- try { \
- if (Y_UNLIKELY(!(a))) { \
- if (YaIsDebuggerPresent()) \
- __debugbreak(); \
- else { \
- assert(false && (a)); \
- } \
- } \
- } catch (...) { \
- if (YaIsDebuggerPresent()) \
- __debugbreak(); \
- else { \
- assert(false && "Exception during assert"); \
- } \
- } \
- } while (0)
- #else
- #define Y_ASSERT_NOBT(a) \
- do { \
- if (false) { \
- bool __xxx = static_cast<bool>(a); \
- Y_UNUSED(__xxx); \
- } \
- } while (0)
- #endif
- #include <pthread.h>
- #include <sys/mman.h>
- #include <stdlib.h>
- #include <memory.h>
- #include <new>
- #include <errno.h>
- #if defined(_linux_)
- #include <linux/futex.h>
- #include <sys/syscall.h>
- #if !defined(MADV_HUGEPAGE)
- #define MADV_HUGEPAGE 14
- #endif
- #if !defined(MAP_HUGETLB)
- #define MAP_HUGETLB 0x40000
- #endif
- #endif
- #define PERTHREAD __thread
- #endif
- #ifndef _darwin_
- #ifndef Y_ARRAY_SIZE
- #define Y_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
- #endif
- #ifndef NDEBUG
- #define DBG_FILL_MEMORY
- static bool FillMemoryOnAllocation = true;
- #endif
- static bool TransparentHugePages = false; // force MADV_HUGEPAGE for large allocs
- static bool MapHugeTLB = false; // force MAP_HUGETLB for small allocs
- static bool EnableDefrag = true;
- // Buffers that are larger than this size will not be filled with 0xcf
- #ifndef DBG_FILL_MAX_SIZE
- #define DBG_FILL_MAX_SIZE 0x01000000000000ULL
- #endif
- template <class T>
- inline T* DoCas(T* volatile* target, T* exchange, T* compare) {
- #if defined(__has_builtin) && __has_builtin(__sync_val_compare_and_swap)
- return __sync_val_compare_and_swap(target, compare, exchange);
- #elif defined(_WIN32)
- #ifdef _64_
- return (T*)_InterlockedCompareExchange64((__int64*)target, (__int64)exchange, (__int64)compare);
- #else
- //return (T*)InterlockedCompareExchangePointer(targetVoidP, exchange, compare);
- return (T*)_InterlockedCompareExchange((LONG*)target, (LONG)exchange, (LONG)compare);
- #endif
- #elif defined(__i386) || defined(__x86_64__)
- union {
- T* volatile* NP;
- void* volatile* VoidP;
- } gccSucks;
- gccSucks.NP = target;
- void* volatile* targetVoidP = gccSucks.VoidP;
- __asm__ __volatile__(
- "lock\n\t"
- "cmpxchg %2,%0\n\t"
- : "+m"(*(targetVoidP)), "+a"(compare)
- : "r"(exchange)
- : "cc", "memory");
- return compare;
- #else
- #error inline_cas not defined for this platform
- #endif
- }
- #ifdef _64_
- const uintptr_t N_MAX_WORKSET_SIZE = 0x100000000ll * 200;
- const uintptr_t N_HUGE_AREA_FINISH = 0x700000000000ll;
- #ifndef _freebsd_
- const uintptr_t LINUX_MMAP_AREA_START = 0x100000000ll;
- static uintptr_t volatile linuxAllocPointer = LINUX_MMAP_AREA_START;
- static uintptr_t volatile linuxAllocPointerHuge = LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE;
- #endif
- #else
- const uintptr_t N_MAX_WORKSET_SIZE = 0xffffffff;
- #endif
- #define ALLOC_START ((char*)0)
- const size_t N_CHUNK_SIZE = 1024 * 1024;
- const size_t N_CHUNKS = N_MAX_WORKSET_SIZE / N_CHUNK_SIZE;
- const size_t N_LARGE_ALLOC_SIZE = N_CHUNK_SIZE * 128;
- // map size idx to size in bytes
- #ifdef LFALLOC_YT
- const int N_SIZES = 27;
- #else
- const int N_SIZES = 25;
- #endif
- const int nSizeIdxToSize[N_SIZES] = {
- -1,
- #if defined(_64_)
- 16, 16, 32, 32, 48, 64, 96, 128,
- #else
- 8,
- 16,
- 24,
- 32,
- 48,
- 64,
- 96,
- 128,
- #endif
- 192, 256, 384, 512, 768, 1024, 1536, 2048,
- 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768,
- #ifdef LFALLOC_YT
- 49152, 65536
- #endif
- };
- #ifdef LFALLOC_YT
- const size_t N_MAX_FAST_SIZE = 65536;
- #else
- const size_t N_MAX_FAST_SIZE = 32768;
- #endif
- const unsigned char size2idxArr1[64 + 1] = {
- 1,
- #if defined(_64_)
- 2, 2, 4, 4, // 16, 16, 32, 32
- #else
- 1, 2, 3, 4, // 8, 16, 24, 32
- #endif
- 5, 5, 6, 6, // 48, 64
- 7, 7, 7, 7, 8, 8, 8, 8, // 96, 128
- 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, // 192, 256
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, // 384
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 // 512
- };
- #ifdef LFALLOC_YT
- const unsigned char size2idxArr2[256] = {
- #else
- const unsigned char size2idxArr2[128] = {
- #endif
- 12, 12, 13, 14, // 512, 512, 768, 1024
- 15, 15, 16, 16, // 1536, 2048
- 17, 17, 17, 17, 18, 18, 18, 18, // 3072, 4096
- 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, // 6144, 8192
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, // 12288
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, // 16384
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, // 24576
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, // 32768
- #ifdef LFALLOC_YT
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, // 49152
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, // 65536
- #endif
- };
- // map entry number to size idx
- // special size idx's: 0 = not used, -1 = mem locked, but not allocated
- static volatile char chunkSizeIdx[N_CHUNKS];
- const int FREE_CHUNK_ARR_BUF = 0x20000; // this is effectively 128G of free memory (with 1M chunks), should not be exhausted actually
- static volatile uintptr_t freeChunkArr[FREE_CHUNK_ARR_BUF];
- static volatile int freeChunkCount;
- static void AddFreeChunk(uintptr_t chunkId) {
- chunkSizeIdx[chunkId] = -1;
- if (Y_UNLIKELY(freeChunkCount == FREE_CHUNK_ARR_BUF))
- NMalloc::AbortFromCorruptedAllocator("free chunks array overflowed");
- freeChunkArr[freeChunkCount++] = chunkId;
- }
- static bool GetFreeChunk(uintptr_t* res) {
- if (freeChunkCount == 0) {
- *res = 0;
- return false;
- }
- *res = freeChunkArr[--freeChunkCount];
- return true;
- }
- //////////////////////////////////////////////////////////////////////////
- enum ELFAllocCounter {
- CT_USER_ALLOC, // accumulated size requested by user code
- CT_MMAP, // accumulated mmapped size
- CT_MMAP_CNT, // number of mmapped regions
- CT_MUNMAP, // accumulated unmmapped size
- CT_MUNMAP_CNT, // number of munmaped regions
- CT_SYSTEM_ALLOC, // accumulated allocated size for internal lfalloc needs
- CT_SYSTEM_FREE, // accumulated deallocated size for internal lfalloc needs
- CT_SMALL_ALLOC, // accumulated allocated size for fixed-size blocks
- CT_SMALL_FREE, // accumulated deallocated size for fixed-size blocks
- CT_LARGE_ALLOC, // accumulated allocated size for large blocks
- CT_LARGE_FREE, // accumulated deallocated size for large blocks
- CT_SLOW_ALLOC_CNT, // number of slow (not LF) allocations
- CT_DEGRAGMENT_CNT, // number of memory defragmentations
- CT_MAX
- };
- static Y_FORCE_INLINE void IncrementCounter(ELFAllocCounter counter, size_t value);
- //////////////////////////////////////////////////////////////////////////
- enum EMMapMode {
- MM_NORMAL, // memory for small allocs
- MM_HUGE // memory for large allocs
- };
- #ifndef _MSC_VER
- inline void VerifyMmapResult(void* result) {
- if (Y_UNLIKELY(result == MAP_FAILED))
- NMalloc::AbortFromCorruptedAllocator("negative size requested? or just out of mem");
- }
- #endif
- #if !defined(_MSC_VER) && !defined(_freebsd_) && defined(_64_)
- static char* AllocWithMMapLinuxImpl(uintptr_t sz, EMMapMode mode) {
- char* volatile* areaPtr;
- char* areaStart;
- uintptr_t areaFinish;
- int mapProt = PROT_READ | PROT_WRITE;
- int mapFlags = MAP_PRIVATE | MAP_ANON;
- if (mode == MM_HUGE) {
- areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointerHuge);
- areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE);
- areaFinish = N_HUGE_AREA_FINISH;
- } else {
- areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointer);
- areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START);
- areaFinish = N_MAX_WORKSET_SIZE;
- if (MapHugeTLB) {
- mapFlags |= MAP_HUGETLB;
- }
- }
- bool wrapped = false;
- for (;;) {
- char* prevAllocPtr = *areaPtr;
- char* nextAllocPtr = prevAllocPtr + sz;
- if (uintptr_t(nextAllocPtr - (char*)nullptr) >= areaFinish) {
- if (Y_UNLIKELY(wrapped)) {
- NMalloc::AbortFromCorruptedAllocator("virtual memory is over fragmented");
- }
- // wrap after all area is used
- DoCas(areaPtr, areaStart, prevAllocPtr);
- wrapped = true;
- continue;
- }
- if (DoCas(areaPtr, nextAllocPtr, prevAllocPtr) != prevAllocPtr)
- continue;
- char* largeBlock = (char*)mmap(prevAllocPtr, sz, mapProt, mapFlags, -1, 0);
- VerifyMmapResult(largeBlock);
- if (largeBlock == prevAllocPtr)
- return largeBlock;
- if (largeBlock)
- munmap(largeBlock, sz);
- if (sz < 0x80000) {
- // skip utilized area with big steps
- DoCas(areaPtr, nextAllocPtr + 0x10 * 0x10000, nextAllocPtr);
- }
- }
- }
- #endif
- static char* AllocWithMMap(uintptr_t sz, EMMapMode mode) {
- (void)mode;
- #ifdef _MSC_VER
- char* largeBlock = (char*)VirtualAlloc(0, sz, MEM_RESERVE, PAGE_READWRITE);
- if (Y_UNLIKELY(largeBlock == nullptr))
- NMalloc::AbortFromCorruptedAllocator("out of memory");
- if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= N_MAX_WORKSET_SIZE))
- NMalloc::AbortFromCorruptedAllocator("out of working set, something has broken");
- #else
- #if defined(_freebsd_) || !defined(_64_)
- char* largeBlock = (char*)mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
- VerifyMmapResult(largeBlock);
- if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= N_MAX_WORKSET_SIZE))
- NMalloc::AbortFromCorruptedAllocator("out of working set, something has broken");
- #else
- char* largeBlock = AllocWithMMapLinuxImpl(sz, mode);
- if (TransparentHugePages) {
- madvise(largeBlock, sz, MADV_HUGEPAGE);
- }
- #endif
- #endif
- Y_ASSERT_NOBT(largeBlock);
- IncrementCounter(CT_MMAP, sz);
- IncrementCounter(CT_MMAP_CNT, 1);
- return largeBlock;
- }
- enum class ELarge : ui8 {
- Free = 0, // block in free cache
- Alloc = 1, // block is allocated
- Gone = 2, // block was unmapped
- };
- struct TLargeBlk {
- static TLargeBlk* As(void *raw) {
- return reinterpret_cast<TLargeBlk*>((char*)raw - 4096ll);
- }
- static const TLargeBlk* As(const void *raw) {
- return reinterpret_cast<const TLargeBlk*>((const char*)raw - 4096ll);
- }
- void SetSize(size_t bytes, size_t pages) {
- Pages = pages;
- Bytes = bytes;
- }
- void Mark(ELarge state) {
- const ui64 marks[] = {
- 0x8b38aa5ca4953c98, // ELarge::Free
- 0xf916d33584eb5087, // ELarge::Alloc
- 0xd33b0eca7651bc3f // ELarge::Gone
- };
- Token = size_t(marks[ui8(state)]);
- }
- size_t Pages; // Total pages allocated with mmap like call
- size_t Bytes; // Actually requested bytes by user
- size_t Token; // Block state token, see ELarge enum.
- };
- static void LargeBlockUnmap(void* p, size_t pages) {
- const auto bytes = (pages + 1) * uintptr_t(4096);
- IncrementCounter(CT_MUNMAP, bytes);
- IncrementCounter(CT_MUNMAP_CNT, 1);
- #ifdef _MSC_VER
- Y_ASSERT_NOBT(0);
- #else
- TLargeBlk::As(p)->Mark(ELarge::Gone);
- munmap((char*)p - 4096ll, bytes);
- #endif
- }
- //////////////////////////////////////////////////////////////////////////
- const size_t LB_BUF_SIZE = 250;
- const size_t LB_BUF_HASH = 977;
- static int LB_LIMIT_TOTAL_SIZE = 500 * 1024 * 1024 / 4096; // do not keep more then this mem total in lbFreePtrs[]
- static void* volatile lbFreePtrs[LB_BUF_HASH][LB_BUF_SIZE];
- static TAtomic lbFreePageCount;
- static void* LargeBlockAlloc(size_t _nSize, ELFAllocCounter counter) {
- size_t pgCount = (_nSize + 4095) / 4096;
- #ifdef _MSC_VER
- char* pRes = (char*)VirtualAlloc(0, (pgCount + 1) * 4096ll, MEM_COMMIT, PAGE_READWRITE);
- if (Y_UNLIKELY(pRes == 0)) {
- NMalloc::AbortFromCorruptedAllocator("out of memory");
- }
- #else
- IncrementCounter(counter, pgCount * 4096ll);
- IncrementCounter(CT_SYSTEM_ALLOC, 4096ll);
- int lbHash = pgCount % LB_BUF_HASH;
- for (int i = 0; i < LB_BUF_SIZE; ++i) {
- void* p = lbFreePtrs[lbHash][i];
- if (p == nullptr)
- continue;
- if (DoCas(&lbFreePtrs[lbHash][i], (void*)nullptr, p) == p) {
- size_t realPageCount = TLargeBlk::As(p)->Pages;
- if (realPageCount == pgCount) {
- AtomicAdd(lbFreePageCount, -pgCount);
- TLargeBlk::As(p)->Mark(ELarge::Alloc);
- return p;
- } else {
- if (DoCas(&lbFreePtrs[lbHash][i], p, (void*)nullptr) != (void*)nullptr) {
- // block was freed while we were busy
- AtomicAdd(lbFreePageCount, -realPageCount);
- LargeBlockUnmap(p, realPageCount);
- --i;
- }
- }
- }
- }
- char* pRes = AllocWithMMap((pgCount + 1) * 4096ll, MM_HUGE);
- #endif
- pRes += 4096ll;
- TLargeBlk::As(pRes)->SetSize(_nSize, pgCount);
- TLargeBlk::As(pRes)->Mark(ELarge::Alloc);
- return pRes;
- }
- #ifndef _MSC_VER
- static void FreeAllLargeBlockMem() {
- for (auto& lbFreePtr : lbFreePtrs) {
- for (int i = 0; i < LB_BUF_SIZE; ++i) {
- void* p = lbFreePtr[i];
- if (p == nullptr)
- continue;
- if (DoCas(&lbFreePtr[i], (void*)nullptr, p) == p) {
- int pgCount = TLargeBlk::As(p)->Pages;
- AtomicAdd(lbFreePageCount, -pgCount);
- LargeBlockUnmap(p, pgCount);
- }
- }
- }
- }
- #endif
- static void LargeBlockFree(void* p, ELFAllocCounter counter) {
- if (p == nullptr)
- return;
- #ifdef _MSC_VER
- VirtualFree((char*)p - 4096ll, 0, MEM_RELEASE);
- #else
- size_t pgCount = TLargeBlk::As(p)->Pages;
- TLargeBlk::As(p)->Mark(ELarge::Free);
- IncrementCounter(counter, pgCount * 4096ll);
- IncrementCounter(CT_SYSTEM_FREE, 4096ll);
- if (lbFreePageCount > LB_LIMIT_TOTAL_SIZE)
- FreeAllLargeBlockMem();
- int lbHash = pgCount % LB_BUF_HASH;
- for (int i = 0; i < LB_BUF_SIZE; ++i) {
- if (lbFreePtrs[lbHash][i] == nullptr) {
- if (DoCas(&lbFreePtrs[lbHash][i], p, (void*)nullptr) == nullptr) {
- AtomicAdd(lbFreePageCount, pgCount);
- return;
- }
- }
- }
- LargeBlockUnmap(p, pgCount);
- #endif
- }
- static void* SystemAlloc(size_t _nSize) {
- //HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, _nSize);
- return LargeBlockAlloc(_nSize, CT_SYSTEM_ALLOC);
- }
- static void SystemFree(void* p) {
- //HeapFree(GetProcessHeap(), 0, p);
- LargeBlockFree(p, CT_SYSTEM_FREE);
- }
- //////////////////////////////////////////////////////////////////////////
- char* const LF_LOCK_FREE = ((char*)0) + 0;
- char* const LF_LOCK_LOCKED = ((char*)0) + 1;
- char* const LF_LOCK_FUTEX_WAIT = ((char*)0) + 2;
- static bool LFHasFutex = true;
- static bool LFCheckedWinVersion = false;
- // TLFLockData has to be zero-initialized explicitly https://en.cppreference.com/w/cpp/language/zero_initialization
- // otherwise constructor TLFLockData() for global var might be called after first use
- struct TLFLockData
- {
- char* Pad1[15];
- char* volatile LockVar; // = LF_LOCK_FREE; // no constructor, zero-initialize manually
- char* Pad2[15];
- bool TryLock()
- {
- return (LockVar == LF_LOCK_FREE && DoCas(&LockVar, LF_LOCK_LOCKED, LF_LOCK_FREE) == LF_LOCK_FREE);
- }
- void FutexWait()
- {
- #ifdef _win_
- if (!LFCheckedWinVersion) {
- OSVERSIONINFOA ver;
- memset(&ver, 0, sizeof(ver));
- ver.dwOSVersionInfoSize = sizeof(OSVERSIONINFOA);
- GetVersionExA(&ver);
- LFHasFutex = (ver.dwMajorVersion > 6) || (ver.dwMajorVersion == 6 && ver.dwMinorVersion >= 2);
- LFCheckedWinVersion = true;
- }
- if (LFHasFutex) {
- if (LockVar == LF_LOCK_LOCKED) {
- DoCas(&LockVar, LF_LOCK_FUTEX_WAIT, LF_LOCK_LOCKED);
- }
- if (LockVar == LF_LOCK_FUTEX_WAIT) {
- char* lockedValue = LF_LOCK_FUTEX_WAIT;
- WaitOnAddress(&LockVar, &lockedValue, sizeof(LockVar), INFINITE);
- }
- } else {
- SwitchToThread();
- }
- #elif defined(_linux_)
- if (LFHasFutex) {
- if (LockVar == LF_LOCK_LOCKED) {
- DoCas(&LockVar, LF_LOCK_FUTEX_WAIT, LF_LOCK_LOCKED);
- }
- if (LockVar == LF_LOCK_FUTEX_WAIT) {
- // linux allow only int variables checks, here we pretend low bits of LockVar are int
- syscall(SYS_futex, &LockVar, FUTEX_WAIT_PRIVATE, *(int*)&LF_LOCK_FUTEX_WAIT, 0, 0, 0);
- }
- } else {
- sched_yield();
- }
- #else
- sched_yield();
- #endif
- }
- void Unlock()
- {
- Y_ASSERT_NOBT(LockVar != LF_LOCK_FREE);
- if (DoCas(&LockVar, LF_LOCK_FREE, LF_LOCK_LOCKED) != LF_LOCK_LOCKED) {
- Y_ASSERT_NOBT(LockVar == LF_LOCK_FUTEX_WAIT && LFHasFutex);
- LockVar = LF_LOCK_FREE;
- #ifdef _win_
- WakeByAddressAll((PVOID)&LockVar);
- #elif defined(_linux_)
- syscall(SYS_futex, &LockVar, FUTEX_WAKE_PRIVATE, INT_MAX, 0, 0, 0);
- #endif
- }
- }
- };
- static TLFLockData LFGlobalLock;
- class TLFLockHolder {
- TLFLockData *LockData = nullptr;
- int Attempt = 0;
- int SleepMask = 0x7f;
- public:
- TLFLockHolder() {}
- TLFLockHolder(TLFLockData *lk) {
- while (!TryLock(lk));
- }
- bool TryLock(TLFLockData *lk)
- {
- Y_ASSERT_NOBT(LockData == nullptr);
- if (lk->TryLock()) {
- LockData = lk;
- return true;
- }
- if ((++Attempt & SleepMask) == 0) {
- lk->FutexWait();
- SleepMask = (SleepMask * 2 + 1) & 0x7fff;
- } else {
- #ifdef _MSC_VER
- _mm_pause();
- #elif defined(__i386) || defined(__x86_64__)
- __asm__ __volatile__("pause");
- #endif
- }
- return false;
- }
- ~TLFLockHolder() {
- if (LockData) {
- LockData->Unlock();
- }
- }
- };
- //////////////////////////////////////////////////////////////////////////
- class TLFAllocFreeList {
- struct TNode {
- TNode* Next;
- };
- TNode* volatile Head;
- TNode* volatile Pending;
- TAtomic PendingToFreeListCounter;
- TAtomic AllocCount;
- void* Padding;
- static Y_FORCE_INLINE void Enqueue(TNode* volatile* headPtr, TNode* n) {
- for (;;) {
- TNode* volatile prevHead = *headPtr;
- n->Next = prevHead;
- if (DoCas(headPtr, n, prevHead) == prevHead)
- break;
- }
- }
- Y_FORCE_INLINE void* DoAlloc() {
- TNode* res;
- for (res = Head; res; res = Head) {
- TNode* keepNext = res->Next;
- if (DoCas(&Head, keepNext, res) == res) {
- //Y_VERIFY(keepNext == res->Next);
- break;
- }
- }
- return res;
- }
- void FreeList(TNode* fl) {
- if (!fl)
- return;
- TNode* flTail = fl;
- while (flTail->Next)
- flTail = flTail->Next;
- for (;;) {
- TNode* volatile prevHead = Head;
- flTail->Next = prevHead;
- if (DoCas(&Head, fl, prevHead) == prevHead)
- break;
- }
- }
- public:
- Y_FORCE_INLINE void Free(void* ptr) {
- TNode* newFree = (TNode*)ptr;
- if (AtomicAdd(AllocCount, 0) == 0)
- Enqueue(&Head, newFree);
- else
- Enqueue(&Pending, newFree);
- }
- Y_FORCE_INLINE void* Alloc() {
- TAtomic keepCounter = AtomicAdd(PendingToFreeListCounter, 0);
- TNode* fl = Pending;
- if (AtomicAdd(AllocCount, 1) == 1) {
- // No other allocs in progress.
- // If (keepCounter == PendingToFreeListCounter) then Pending was not freed by other threads.
- // Hence Pending is not used in any concurrent DoAlloc() atm and can be safely moved to FreeList
- if (fl && keepCounter == AtomicAdd(PendingToFreeListCounter, 0) && DoCas(&Pending, (TNode*)nullptr, fl) == fl) {
- // pick first element from Pending and return it
- void* res = fl;
- fl = fl->Next;
- // if there are other elements in Pending list, add them to main free list
- FreeList(fl);
- AtomicAdd(PendingToFreeListCounter, 1);
- AtomicAdd(AllocCount, -1);
- return res;
- }
- }
- void* res = DoAlloc();
- AtomicAdd(AllocCount, -1);
- return res;
- }
- void* GetWholeList() {
- TNode* res;
- for (res = Head; res; res = Head) {
- if (DoCas(&Head, (TNode*)nullptr, res) == res)
- break;
- }
- return res;
- }
- void ReturnWholeList(void* ptr) {
- while (AtomicAdd(AllocCount, 0) != 0) // theoretically can run into problems with parallel DoAlloc()
- ; //ThreadYield();
- for (;;) {
- TNode* prevHead = Head;
- if (DoCas(&Head, (TNode*)ptr, prevHead) == prevHead) {
- FreeList(prevHead);
- break;
- }
- }
- }
- };
- /////////////////////////////////////////////////////////////////////////
- static TLFAllocFreeList globalFreeLists[N_SIZES];
- static char* volatile globalCurrentPtr[N_SIZES];
- static TLFAllocFreeList blockFreeList;
- // globalFreeLists[] contains TFreeListGroup, each of them points up to 15 free blocks
- const int FL_GROUP_SIZE = 15;
- struct TFreeListGroup {
- TFreeListGroup* Next;
- char* Ptrs[FL_GROUP_SIZE];
- };
- #ifdef _64_
- const int FREE_LIST_GROUP_SIZEIDX = 8;
- #else
- const int FREE_LIST_GROUP_SIZEIDX = 6;
- #endif
- //////////////////////////////////////////////////////////////////////////
- // find free chunks and reset chunk size so they can be reused by different sized allocations
- // do not look at blockFreeList (TFreeListGroup has same size for any allocations)
- static bool DefragmentMem() {
- if (!EnableDefrag) {
- return false;
- }
- IncrementCounter(CT_DEGRAGMENT_CNT, 1);
- int* nFreeCount = (int*)SystemAlloc(N_CHUNKS * sizeof(int));
- if (Y_UNLIKELY(!nFreeCount)) {
- //__debugbreak();
- NMalloc::AbortFromCorruptedAllocator("debugbreak");
- }
- memset(nFreeCount, 0, N_CHUNKS * sizeof(int));
- TFreeListGroup* wholeLists[N_SIZES];
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) {
- wholeLists[nSizeIdx] = (TFreeListGroup*)globalFreeLists[nSizeIdx].GetWholeList();
- for (TFreeListGroup* g = wholeLists[nSizeIdx]; g; g = g->Next) {
- for (auto pData : g->Ptrs) {
- if (pData) {
- uintptr_t nChunk = (pData - ALLOC_START) / N_CHUNK_SIZE;
- ++nFreeCount[nChunk];
- Y_ASSERT_NOBT(chunkSizeIdx[nChunk] == nSizeIdx);
- }
- }
- }
- }
- bool bRes = false;
- for (size_t nChunk = 0; nChunk < N_CHUNKS; ++nChunk) {
- int fc = nFreeCount[nChunk];
- nFreeCount[nChunk] = 0;
- if (chunkSizeIdx[nChunk] <= 0)
- continue;
- int nEntries = N_CHUNK_SIZE / nSizeIdxToSize[static_cast<int>(chunkSizeIdx[nChunk])];
- Y_ASSERT_NOBT(fc <= nEntries); // can not have more free blocks then total count
- if (fc == nEntries) {
- bRes = true;
- nFreeCount[nChunk] = 1;
- }
- }
- if (bRes) {
- for (auto& wholeList : wholeLists) {
- TFreeListGroup** ppPtr = &wholeList;
- while (*ppPtr) {
- TFreeListGroup* g = *ppPtr;
- int dst = 0;
- for (auto pData : g->Ptrs) {
- if (pData) {
- uintptr_t nChunk = (pData - ALLOC_START) / N_CHUNK_SIZE;
- if (nFreeCount[nChunk] == 0)
- g->Ptrs[dst++] = pData; // block is not freed, keep pointer
- }
- }
- if (dst == 0) {
- // no valid pointers in group, free it
- *ppPtr = g->Next;
- blockFreeList.Free(g);
- } else {
- // reset invalid pointers to 0
- for (int i = dst; i < FL_GROUP_SIZE; ++i)
- g->Ptrs[i] = nullptr;
- ppPtr = &g->Next;
- }
- }
- }
- for (uintptr_t nChunk = 0; nChunk < N_CHUNKS; ++nChunk) {
- if (!nFreeCount[nChunk])
- continue;
- char* pStart = ALLOC_START + nChunk * N_CHUNK_SIZE;
- #ifdef _win_
- VirtualFree(pStart, N_CHUNK_SIZE, MEM_DECOMMIT);
- #elif defined(_freebsd_)
- madvise(pStart, N_CHUNK_SIZE, MADV_FREE);
- #else
- madvise(pStart, N_CHUNK_SIZE, MADV_DONTNEED);
- #endif
- AddFreeChunk(nChunk);
- }
- }
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx)
- globalFreeLists[nSizeIdx].ReturnWholeList(wholeLists[nSizeIdx]);
- SystemFree(nFreeCount);
- return bRes;
- }
- static Y_FORCE_INLINE void* LFAllocFromCurrentChunk(int nSizeIdx, int blockSize, int count) {
- char* volatile* pFreeArray = &globalCurrentPtr[nSizeIdx];
- while (char* newBlock = *pFreeArray) {
- char* nextFree = newBlock + blockSize * count;
- // check if there is space in chunk
- char* globalEndPtr = ALLOC_START + ((newBlock - ALLOC_START) & ~((uintptr_t)N_CHUNK_SIZE - 1)) + N_CHUNK_SIZE;
- if (nextFree >= globalEndPtr) {
- if (nextFree > globalEndPtr)
- break;
- nextFree = nullptr; // it was last block in chunk
- }
- if (DoCas(pFreeArray, nextFree, newBlock) == newBlock)
- return newBlock;
- }
- return nullptr;
- }
- enum EDefrag {
- MEM_DEFRAG,
- NO_MEM_DEFRAG,
- };
- static void* SlowLFAlloc(int nSizeIdx, int blockSize, EDefrag defrag) {
- IncrementCounter(CT_SLOW_ALLOC_CNT, 1);
- TLFLockHolder ls;
- for (;;) {
- bool locked = ls.TryLock(&LFGlobalLock);
- void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, 1);
- if (res) {
- return res; // might happen when other thread allocated new current chunk
- }
- if (locked) {
- break;
- }
- }
- for (;;) {
- uintptr_t nChunk;
- if (GetFreeChunk(&nChunk)) {
- char* newPlace = ALLOC_START + nChunk * N_CHUNK_SIZE;
- #ifdef _MSC_VER
- void* pTest = VirtualAlloc(newPlace, N_CHUNK_SIZE, MEM_COMMIT, PAGE_READWRITE);
- Y_ASSERT_NOBT(pTest == newPlace);
- #endif
- chunkSizeIdx[nChunk] = (char)nSizeIdx;
- globalCurrentPtr[nSizeIdx] = newPlace + blockSize;
- return newPlace;
- }
- // out of luck, try to defrag
- if (defrag == MEM_DEFRAG && DefragmentMem()) {
- continue;
- }
- char* largeBlock = AllocWithMMap(N_LARGE_ALLOC_SIZE, MM_NORMAL);
- uintptr_t addr = ((largeBlock - ALLOC_START) + N_CHUNK_SIZE - 1) & (~(N_CHUNK_SIZE - 1));
- uintptr_t endAddr = ((largeBlock - ALLOC_START) + N_LARGE_ALLOC_SIZE) & (~(N_CHUNK_SIZE - 1));
- for (uintptr_t p = addr; p < endAddr; p += N_CHUNK_SIZE) {
- uintptr_t chunk = p / N_CHUNK_SIZE;
- Y_ASSERT_NOBT(chunk * N_CHUNK_SIZE == p);
- Y_ASSERT_NOBT(chunkSizeIdx[chunk] == 0);
- AddFreeChunk(chunk);
- }
- }
- return nullptr;
- }
- // allocate single block
- static Y_FORCE_INLINE void* LFAllocNoCache(int nSizeIdx, EDefrag defrag) {
- int blockSize = nSizeIdxToSize[nSizeIdx];
- void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, 1);
- if (res)
- return res;
- return SlowLFAlloc(nSizeIdx, blockSize, defrag);
- }
- // allocate multiple blocks, returns number of blocks allocated (max FL_GROUP_SIZE)
- // buf should have space for at least FL_GROUP_SIZE elems
- static Y_FORCE_INLINE int LFAllocNoCacheMultiple(int nSizeIdx, char** buf) {
- int blockSize = nSizeIdxToSize[nSizeIdx];
- void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, FL_GROUP_SIZE);
- if (res) {
- char* resPtr = (char*)res;
- for (int k = 0; k < FL_GROUP_SIZE; ++k) {
- buf[k] = resPtr;
- resPtr += blockSize;
- }
- return FL_GROUP_SIZE;
- }
- buf[0] = (char*)SlowLFAlloc(nSizeIdx, blockSize, MEM_DEFRAG);
- return 1;
- }
- // take several blocks from global free list (max FL_GROUP_SIZE blocks), returns number of blocks taken
- // buf should have space for at least FL_GROUP_SIZE elems
- static Y_FORCE_INLINE int TakeBlocksFromGlobalFreeList(int nSizeIdx, char** buf) {
- TLFAllocFreeList& fl = globalFreeLists[nSizeIdx];
- TFreeListGroup* g = (TFreeListGroup*)fl.Alloc();
- if (g) {
- int resCount = 0;
- for (auto& ptr : g->Ptrs) {
- if (ptr)
- buf[resCount++] = ptr;
- else
- break;
- }
- blockFreeList.Free(g);
- return resCount;
- }
- return 0;
- }
- // add several blocks to global free list
- static Y_FORCE_INLINE void PutBlocksToGlobalFreeList(ptrdiff_t nSizeIdx, char** buf, int count) {
- for (int startIdx = 0; startIdx < count;) {
- TFreeListGroup* g = (TFreeListGroup*)blockFreeList.Alloc();
- Y_ASSERT_NOBT(sizeof(TFreeListGroup) == nSizeIdxToSize[FREE_LIST_GROUP_SIZEIDX]);
- if (!g) {
- g = (TFreeListGroup*)LFAllocNoCache(FREE_LIST_GROUP_SIZEIDX, NO_MEM_DEFRAG);
- }
- int groupSize = count - startIdx;
- if (groupSize > FL_GROUP_SIZE)
- groupSize = FL_GROUP_SIZE;
- for (int i = 0; i < groupSize; ++i)
- g->Ptrs[i] = buf[startIdx + i];
- for (int i = groupSize; i < FL_GROUP_SIZE; ++i)
- g->Ptrs[i] = nullptr;
- // add free group to the global list
- TLFAllocFreeList& fl = globalFreeLists[nSizeIdx];
- fl.Free(g);
- startIdx += groupSize;
- }
- }
- //////////////////////////////////////////////////////////////////////////
- static TAtomic GlobalCounters[CT_MAX];
- const int MAX_LOCAL_UPDATES = 100;
- const intptr_t MAX_LOCAL_DELTA = 1*1024*1024;
- struct TLocalCounter {
- intptr_t Value;
- int Updates;
- TAtomic* Parent;
- Y_FORCE_INLINE void Init(TAtomic* parent) {
- Parent = parent;
- Value = 0;
- Updates = 0;
- }
- Y_FORCE_INLINE void Increment(size_t value) {
- Value += value;
- if (++Updates > MAX_LOCAL_UPDATES || Value > MAX_LOCAL_DELTA) {
- Flush();
- }
- }
- Y_FORCE_INLINE void Flush() {
- AtomicAdd(*Parent, Value);
- Value = 0;
- Updates = 0;
- }
- };
- ////////////////////////////////////////////////////////////////////////////////
- // DBG stuff
- ////////////////////////////////////////////////////////////////////////////////
- #if defined(LFALLOC_DBG)
- struct TPerTagAllocCounter {
- TAtomic Size;
- TAtomic Count;
- Y_FORCE_INLINE void Alloc(size_t size) {
- AtomicAdd(Size, size);
- AtomicAdd(Count, 1);
- }
- Y_FORCE_INLINE void Free(size_t size) {
- AtomicSub(Size, size);
- AtomicSub(Count, 1);
- }
- };
- struct TLocalPerTagAllocCounter {
- intptr_t Size;
- int Count;
- int Updates;
- Y_FORCE_INLINE void Init() {
- Size = 0;
- Count = 0;
- Updates = 0;
- }
- Y_FORCE_INLINE void Alloc(TPerTagAllocCounter& parent, size_t size) {
- Size += size;
- ++Count;
- if (++Updates > MAX_LOCAL_UPDATES) {
- Flush(parent);
- }
- }
- Y_FORCE_INLINE void Free(TPerTagAllocCounter& parent, size_t size) {
- Size -= size;
- --Count;
- if (++Updates > MAX_LOCAL_UPDATES) {
- Flush(parent);
- }
- }
- Y_FORCE_INLINE void Flush(TPerTagAllocCounter& parent) {
- AtomicAdd(parent.Size, Size);
- Size = 0;
- AtomicAdd(parent.Count, Count);
- Count = 0;
- Updates = 0;
- }
- };
- static const int DBG_ALLOC_MAX_TAG = 1000;
- static const int DBG_ALLOC_ALIGNED_TAG = 0xF0000000;
- static const int DBG_ALLOC_NUM_SIZES = 30;
- static TPerTagAllocCounter GlobalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES];
- #endif // LFALLOC_DBG
- //////////////////////////////////////////////////////////////////////////
- const int THREAD_BUF = 256;
- static int borderSizes[N_SIZES];
- const int MAX_MEM_PER_SIZE_PER_THREAD = 512 * 1024;
- struct TThreadAllocInfo {
- // FreePtrs - pointers to first free blocks in per thread block list
- // LastFreePtrs - pointers to last blocks in lists, may be invalid if FreePtr is zero
- char* FreePtrs[N_SIZES][THREAD_BUF];
- int FreePtrIndex[N_SIZES];
- TThreadAllocInfo* pNextInfo;
- TLocalCounter LocalCounters[CT_MAX];
- #if defined(LFALLOC_DBG)
- TLocalPerTagAllocCounter LocalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES];
- #endif
- #ifdef _win_
- HANDLE hThread;
- #endif
- void Init(TThreadAllocInfo** pHead) {
- memset(this, 0, sizeof(*this));
- for (auto& i : FreePtrIndex)
- i = THREAD_BUF;
- #ifdef _win_
- BOOL b = DuplicateHandle(
- GetCurrentProcess(), GetCurrentThread(),
- GetCurrentProcess(), &hThread,
- 0, FALSE, DUPLICATE_SAME_ACCESS);
- Y_ASSERT_NOBT(b);
- #endif
- pNextInfo = *pHead;
- *pHead = this;
- for (int k = 0; k < N_SIZES; ++k) {
- int maxCount = MAX_MEM_PER_SIZE_PER_THREAD / nSizeIdxToSize[k];
- if (maxCount > THREAD_BUF)
- maxCount = THREAD_BUF;
- borderSizes[k] = THREAD_BUF - maxCount;
- }
- for (int i = 0; i < CT_MAX; ++i) {
- LocalCounters[i].Init(&GlobalCounters[i]);
- }
- #if defined(LFALLOC_DBG)
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& local = LocalPerTagAllocCounters[tag][sizeIdx];
- local.Init();
- }
- }
- #endif
- }
- void Done() {
- for (auto sizeIdx : FreePtrIndex) {
- Y_ASSERT_NOBT(sizeIdx == THREAD_BUF);
- }
- for (auto& localCounter : LocalCounters) {
- localCounter.Flush();
- }
- #if defined(LFALLOC_DBG)
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& local = LocalPerTagAllocCounters[tag][sizeIdx];
- auto& global = GlobalPerTagAllocCounters[tag][sizeIdx];
- local.Flush(global);
- }
- }
- #endif
- #ifdef _win_
- if (hThread)
- CloseHandle(hThread);
- #endif
- }
- };
- PERTHREAD TThreadAllocInfo* pThreadInfo;
- static TThreadAllocInfo* pThreadInfoList;
- static TLFLockData LFLockThreadInfo;
- static Y_FORCE_INLINE void IncrementCounter(ELFAllocCounter counter, size_t value) {
- #ifdef LFALLOC_YT
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- thr->LocalCounters[counter].Increment(value);
- } else {
- AtomicAdd(GlobalCounters[counter], value);
- }
- #endif
- }
- extern "C" i64 GetLFAllocCounterFast(int counter) {
- #ifdef LFALLOC_YT
- return GlobalCounters[counter];
- #else
- return 0;
- #endif
- }
- extern "C" i64 GetLFAllocCounterFull(int counter) {
- #ifdef LFALLOC_YT
- i64 ret = GlobalCounters[counter];
- {
- TLFLockHolder ll(&LFLockThreadInfo);
- for (TThreadAllocInfo** p = &pThreadInfoList; *p;) {
- TThreadAllocInfo* pInfo = *p;
- ret += pInfo->LocalCounters[counter].Value;
- p = &pInfo->pNextInfo;
- }
- }
- return ret;
- #else
- return 0;
- #endif
- }
- static void MoveSingleThreadFreeToGlobal(TThreadAllocInfo* pInfo) {
- for (int sizeIdx = 0; sizeIdx < N_SIZES; ++sizeIdx) {
- int& freePtrIdx = pInfo->FreePtrIndex[sizeIdx];
- char** freePtrs = pInfo->FreePtrs[sizeIdx];
- PutBlocksToGlobalFreeList(sizeIdx, freePtrs + freePtrIdx, THREAD_BUF - freePtrIdx);
- freePtrIdx = THREAD_BUF;
- }
- }
- #ifdef _win_
- static bool IsDeadThread(TThreadAllocInfo* pInfo) {
- DWORD dwExit;
- bool isDead = !GetExitCodeThread(pInfo->hThread, &dwExit) || dwExit != STILL_ACTIVE;
- return isDead;
- }
- static void CleanupAfterDeadThreads() {
- TLFLockHolder ll(&LFLockThreadInfo);
- for (TThreadAllocInfo** p = &pThreadInfoList; *p;) {
- TThreadAllocInfo* pInfo = *p;
- if (IsDeadThread(pInfo)) {
- MoveSingleThreadFreeToGlobal(pInfo);
- pInfo->Done();
- *p = pInfo->pNextInfo;
- SystemFree(pInfo);
- } else
- p = &pInfo->pNextInfo;
- }
- }
- #endif
- #ifndef _win_
- static pthread_key_t ThreadCacheCleaner;
- static void* volatile ThreadCacheCleanerStarted; // 0 = not started, -1 = started, -2 = is starting
- static PERTHREAD bool IsStoppingThread;
- static void FreeThreadCache(void*) {
- TThreadAllocInfo* pToDelete = nullptr;
- {
- TLFLockHolder ll(&LFLockThreadInfo);
- pToDelete = pThreadInfo;
- if (pToDelete == nullptr)
- return;
- // remove from the list
- for (TThreadAllocInfo** p = &pThreadInfoList; *p; p = &(*p)->pNextInfo) {
- if (*p == pToDelete) {
- *p = pToDelete->pNextInfo;
- break;
- }
- }
- IsStoppingThread = true;
- pThreadInfo = nullptr;
- }
- // free per thread buf
- MoveSingleThreadFreeToGlobal(pToDelete);
- pToDelete->Done();
- SystemFree(pToDelete);
- }
- #endif
- static void AllocThreadInfo() {
- #ifndef _win_
- if (DoCas(&ThreadCacheCleanerStarted, (void*)-2, (void*)nullptr) == (void*)nullptr) {
- pthread_key_create(&ThreadCacheCleaner, FreeThreadCache);
- ThreadCacheCleanerStarted = (void*)-1;
- }
- if (ThreadCacheCleanerStarted != (void*)-1)
- return; // do not use ThreadCacheCleaner until it is constructed
- {
- if (IsStoppingThread)
- return;
- TLFLockHolder ll(&LFLockThreadInfo);
- if (IsStoppingThread) // better safe than sorry
- return;
- pThreadInfo = (TThreadAllocInfo*)SystemAlloc(sizeof(TThreadAllocInfo));
- pThreadInfo->Init(&pThreadInfoList);
- }
- pthread_setspecific(ThreadCacheCleaner, (void*)-1); // without value destructor will not be called
- #else
- CleanupAfterDeadThreads();
- {
- pThreadInfo = (TThreadAllocInfo*)SystemAlloc(sizeof(TThreadAllocInfo));
- TLFLockHolder ll(&LFLockThreadInfo);
- pThreadInfo->Init(&pThreadInfoList);
- }
- #endif
- }
- //////////////////////////////////////////////////////////////////////////
- // DBG stuff
- //////////////////////////////////////////////////////////////////////////
- #if defined(LFALLOC_DBG)
- struct TAllocHeader {
- uint64_t Size;
- int Tag;
- int Cookie;
- };
- // should be power of 2
- static_assert(sizeof(TAllocHeader) == 16);
- static inline void* GetAllocPtr(TAllocHeader* p) {
- return p + 1;
- }
- static inline TAllocHeader* GetAllocHeader(void* p) {
- auto* header = ((TAllocHeader*)p) - 1;
- if (header->Tag == DBG_ALLOC_ALIGNED_TAG) {
- return (TAllocHeader*)header->Size;
- }
- return header;
- }
- PERTHREAD int AllocationTag;
- extern "C" int SetThreadAllocTag(int tag) {
- int prevTag = AllocationTag;
- if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) {
- AllocationTag = tag;
- }
- return prevTag;
- }
- PERTHREAD bool ProfileCurrentThread;
- extern "C" bool SetProfileCurrentThread(bool newVal) {
- bool prevVal = ProfileCurrentThread;
- ProfileCurrentThread = newVal;
- return prevVal;
- }
- static volatile bool ProfileAllThreads;
- extern "C" bool SetProfileAllThreads(bool newVal) {
- bool prevVal = ProfileAllThreads;
- ProfileAllThreads = newVal;
- return prevVal;
- }
- static volatile bool AllocationSamplingEnabled;
- extern "C" bool SetAllocationSamplingEnabled(bool newVal) {
- bool prevVal = AllocationSamplingEnabled;
- AllocationSamplingEnabled = newVal;
- return prevVal;
- }
- static size_t AllocationSampleRate = 1000;
- extern "C" size_t SetAllocationSampleRate(size_t newVal) {
- size_t prevVal = AllocationSampleRate;
- AllocationSampleRate = newVal;
- return prevVal;
- }
- static size_t AllocationSampleMaxSize = N_MAX_FAST_SIZE;
- extern "C" size_t SetAllocationSampleMaxSize(size_t newVal) {
- size_t prevVal = AllocationSampleMaxSize;
- AllocationSampleMaxSize = newVal;
- return prevVal;
- }
- using TAllocationCallback = int(int tag, size_t size, int sizeIdx);
- static TAllocationCallback* AllocationCallback;
- extern "C" TAllocationCallback* SetAllocationCallback(TAllocationCallback* newVal) {
- TAllocationCallback* prevVal = AllocationCallback;
- AllocationCallback = newVal;
- return prevVal;
- }
- using TDeallocationCallback = void(int cookie, int tag, size_t size, int sizeIdx);
- static TDeallocationCallback* DeallocationCallback;
- extern "C" TDeallocationCallback* SetDeallocationCallback(TDeallocationCallback* newVal) {
- TDeallocationCallback* prevVal = DeallocationCallback;
- DeallocationCallback = newVal;
- return prevVal;
- }
- PERTHREAD TAtomic AllocationsCount;
- PERTHREAD bool InAllocationCallback;
- static const int DBG_ALLOC_INVALID_COOKIE = -1;
- static inline int SampleAllocation(TAllocHeader* p, int sizeIdx) {
- int cookie = DBG_ALLOC_INVALID_COOKIE;
- if (AllocationSamplingEnabled && (ProfileCurrentThread || ProfileAllThreads) && !InAllocationCallback) {
- if (p->Size > AllocationSampleMaxSize || ++AllocationsCount % AllocationSampleRate == 0) {
- if (AllocationCallback) {
- InAllocationCallback = true;
- cookie = AllocationCallback(p->Tag, p->Size, sizeIdx);
- InAllocationCallback = false;
- }
- }
- }
- return cookie;
- }
- static inline void SampleDeallocation(TAllocHeader* p, int sizeIdx) {
- if (p->Cookie != DBG_ALLOC_INVALID_COOKIE && !InAllocationCallback) {
- if (DeallocationCallback) {
- InAllocationCallback = true;
- DeallocationCallback(p->Cookie, p->Tag, p->Size, sizeIdx);
- InAllocationCallback = false;
- }
- }
- }
- static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) {
- if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) {
- Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES);
- auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx];
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx];
- local.Alloc(global, p->Size);
- } else {
- global.Alloc(p->Size);
- }
- }
- }
- static inline void TrackPerTagDeallocation(TAllocHeader* p, int sizeIdx) {
- if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) {
- Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES);
- auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx];
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx];
- local.Free(global, p->Size);
- } else {
- global.Free(p->Size);
- }
- }
- }
- static void* TrackAllocation(void* ptr, size_t size, int sizeIdx) {
- TAllocHeader* p = (TAllocHeader*)ptr;
- p->Size = size;
- p->Tag = AllocationTag;
- p->Cookie = SampleAllocation(p, sizeIdx);
- TrackPerTagAllocation(p, sizeIdx);
- return GetAllocPtr(p);
- }
- static void TrackDeallocation(void* ptr, int sizeIdx) {
- TAllocHeader* p = (TAllocHeader*)ptr;
- SampleDeallocation(p, sizeIdx);
- TrackPerTagDeallocation(p, sizeIdx);
- }
- struct TPerTagAllocInfo {
- ssize_t Count;
- ssize_t Size;
- };
- extern "C" void GetPerTagAllocInfo(
- bool flushPerThreadCounters,
- TPerTagAllocInfo* info,
- int& maxTag,
- int& numSizes) {
- maxTag = DBG_ALLOC_MAX_TAG;
- numSizes = DBG_ALLOC_NUM_SIZES;
- if (info) {
- if (flushPerThreadCounters) {
- TLFLockHolder ll(&LFLockThreadInfo);
- for (TThreadAllocInfo** p = &pThreadInfoList; *p;) {
- TThreadAllocInfo* pInfo = *p;
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& local = pInfo->LocalPerTagAllocCounters[tag][sizeIdx];
- auto& global = GlobalPerTagAllocCounters[tag][sizeIdx];
- local.Flush(global);
- }
- }
- p = &pInfo->pNextInfo;
- }
- }
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& global = GlobalPerTagAllocCounters[tag][sizeIdx];
- auto& res = info[tag * DBG_ALLOC_NUM_SIZES + sizeIdx];
- res.Count = global.Count;
- res.Size = global.Size;
- }
- }
- }
- }
- #endif // LFALLOC_DBG
- //////////////////////////////////////////////////////////////////////////
- static Y_FORCE_INLINE void* LFAllocImpl(size_t _nSize) {
- #if defined(LFALLOC_DBG)
- size_t size = _nSize;
- _nSize += sizeof(TAllocHeader);
- #endif
- IncrementCounter(CT_USER_ALLOC, _nSize);
- int nSizeIdx;
- if (_nSize > 512) {
- if (_nSize > N_MAX_FAST_SIZE) {
- void* ptr = LargeBlockAlloc(_nSize, CT_LARGE_ALLOC);
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, N_SIZES);
- #endif
- return ptr;
- }
- nSizeIdx = size2idxArr2[(_nSize - 1) >> 8];
- } else
- nSizeIdx = size2idxArr1[1 + (((int)_nSize - 1) >> 3)];
- IncrementCounter(CT_SMALL_ALLOC, nSizeIdxToSize[nSizeIdx]);
- // check per thread buffer
- TThreadAllocInfo* thr = pThreadInfo;
- if (!thr) {
- AllocThreadInfo();
- thr = pThreadInfo;
- if (!thr) {
- void* ptr = LFAllocNoCache(nSizeIdx, MEM_DEFRAG);
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, nSizeIdx);
- #endif
- return ptr;
- }
- }
- {
- int& freePtrIdx = thr->FreePtrIndex[nSizeIdx];
- if (freePtrIdx < THREAD_BUF) {
- void* ptr = thr->FreePtrs[nSizeIdx][freePtrIdx++];
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, nSizeIdx);
- #endif
- return ptr;
- }
- // try to alloc from global free list
- char* buf[FL_GROUP_SIZE];
- int count = TakeBlocksFromGlobalFreeList(nSizeIdx, buf);
- if (count == 0) {
- count = LFAllocNoCacheMultiple(nSizeIdx, buf);
- if (count == 0) {
- NMalloc::AbortFromCorruptedAllocator("no way LFAllocNoCacheMultiple() can fail");
- }
- }
- char** dstBuf = thr->FreePtrs[nSizeIdx] + freePtrIdx - 1;
- for (int i = 0; i < count - 1; ++i)
- dstBuf[-i] = buf[i];
- freePtrIdx -= count - 1;
- void* ptr = buf[count - 1];
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, nSizeIdx);
- #endif
- return ptr;
- }
- }
- static Y_FORCE_INLINE void* LFAlloc(size_t _nSize) {
- void* res = LFAllocImpl(_nSize);
- #ifdef DBG_FILL_MEMORY
- if (FillMemoryOnAllocation && res && (_nSize <= DBG_FILL_MAX_SIZE)) {
- memset(res, 0xcf, _nSize);
- }
- #endif
- return res;
- }
- static Y_FORCE_INLINE void LFFree(void* p) {
- #if defined(LFALLOC_DBG)
- if (p == nullptr)
- return;
- p = GetAllocHeader(p);
- #endif
- uintptr_t chkOffset = ((char*)p - ALLOC_START) - 1ll;
- if (chkOffset >= N_MAX_WORKSET_SIZE) {
- if (p == nullptr)
- return;
- #if defined(LFALLOC_DBG)
- TrackDeallocation(p, N_SIZES);
- #endif
- LargeBlockFree(p, CT_LARGE_FREE);
- return;
- }
- uintptr_t chunk = ((char*)p - ALLOC_START) / N_CHUNK_SIZE;
- ptrdiff_t nSizeIdx = chunkSizeIdx[chunk];
- if (nSizeIdx <= 0) {
- #if defined(LFALLOC_DBG)
- TrackDeallocation(p, N_SIZES);
- #endif
- LargeBlockFree(p, CT_LARGE_FREE);
- return;
- }
- #if defined(LFALLOC_DBG)
- TrackDeallocation(p, nSizeIdx);
- #endif
- #ifdef DBG_FILL_MEMORY
- memset(p, 0xfe, nSizeIdxToSize[nSizeIdx]);
- #endif
- IncrementCounter(CT_SMALL_FREE, nSizeIdxToSize[nSizeIdx]);
- // try to store info to per thread buf
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- int& freePtrIdx = thr->FreePtrIndex[nSizeIdx];
- if (freePtrIdx > borderSizes[nSizeIdx]) {
- thr->FreePtrs[nSizeIdx][--freePtrIdx] = (char*)p;
- return;
- }
- // move several pointers to global free list
- int freeCount = FL_GROUP_SIZE;
- if (freeCount > THREAD_BUF - freePtrIdx)
- freeCount = THREAD_BUF - freePtrIdx;
- char** freePtrs = thr->FreePtrs[nSizeIdx];
- PutBlocksToGlobalFreeList(nSizeIdx, freePtrs + freePtrIdx, freeCount);
- freePtrIdx += freeCount;
- freePtrs[--freePtrIdx] = (char*)p;
- } else {
- AllocThreadInfo();
- PutBlocksToGlobalFreeList(nSizeIdx, (char**)&p, 1);
- }
- }
- static size_t LFGetSize(const void* p) {
- #if defined(LFALLOC_DBG)
- if (p == nullptr)
- return 0;
- return GetAllocHeader(const_cast<void*>(p))->Size;
- #endif
- uintptr_t chkOffset = ((const char*)p - ALLOC_START);
- if (chkOffset >= N_MAX_WORKSET_SIZE) {
- if (p == nullptr)
- return 0;
- return TLargeBlk::As(p)->Pages * 4096ll;
- }
- uintptr_t chunk = ((const char*)p - ALLOC_START) / N_CHUNK_SIZE;
- ptrdiff_t nSizeIdx = chunkSizeIdx[chunk];
- if (nSizeIdx <= 0)
- return TLargeBlk::As(p)->Pages * 4096ll;
- return nSizeIdxToSize[nSizeIdx];
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- // Output mem alloc stats
- const int N_PAGE_SIZE = 4096;
- static void DebugTraceMMgr(const char* pszFormat, ...) // __cdecl
- {
- static char buff[20000];
- va_list va;
- //
- va_start(va, pszFormat);
- vsprintf(buff, pszFormat, va);
- va_end(va);
- //
- #ifdef _win_
- OutputDebugStringA(buff);
- #else
- fputs(buff, stderr);
- #endif
- }
- struct TChunkStats {
- char *Start, *Finish;
- i64 Size;
- char* Entries;
- i64 FreeCount;
- TChunkStats(size_t chunk, i64 size, char* entries)
- : Size(size)
- , Entries(entries)
- , FreeCount(0)
- {
- Start = ALLOC_START + chunk * N_CHUNK_SIZE;
- Finish = Start + N_CHUNK_SIZE;
- }
- void CheckBlock(char* pBlock) {
- if (pBlock && pBlock >= Start && pBlock < Finish) {
- ++FreeCount;
- i64 nShift = pBlock - Start;
- i64 nOffsetInStep = nShift & (N_CHUNK_SIZE - 1);
- Entries[nOffsetInStep / Size] = 1;
- }
- }
- void SetGlobalFree(char* ptr) {
- i64 nShift = ptr - Start;
- i64 nOffsetInStep = nShift & (N_CHUNK_SIZE - 1);
- while (nOffsetInStep + Size <= N_CHUNK_SIZE) {
- ++FreeCount;
- Entries[nOffsetInStep / Size] = 1;
- nOffsetInStep += Size;
- }
- }
- };
- static void DumpMemoryBlockUtilizationLocked() {
- TFreeListGroup* wholeLists[N_SIZES];
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) {
- wholeLists[nSizeIdx] = (TFreeListGroup*)globalFreeLists[nSizeIdx].GetWholeList();
- }
- char* bfList = (char*)blockFreeList.GetWholeList();
- DebugTraceMMgr("memory blocks utilisation stats:\n");
- i64 nTotalAllocated = 0, nTotalFree = 0, nTotalBadPages = 0, nTotalPages = 0, nTotalUsed = 0, nTotalLocked = 0;
- i64 nTotalGroupBlocks = 0;
- char* entries;
- entries = (char*)SystemAlloc((N_CHUNK_SIZE / 4));
- for (size_t k = 0; k < N_CHUNKS; ++k) {
- if (chunkSizeIdx[k] <= 0) {
- if (chunkSizeIdx[k] == -1)
- nTotalLocked += N_CHUNK_SIZE;
- continue;
- }
- i64 nSizeIdx = chunkSizeIdx[k];
- i64 nSize = nSizeIdxToSize[nSizeIdx];
- TChunkStats cs(k, nSize, entries);
- int nEntriesTotal = N_CHUNK_SIZE / nSize;
- memset(entries, 0, nEntriesTotal);
- for (TFreeListGroup* g = wholeLists[nSizeIdx]; g; g = g->Next) {
- for (auto& ptr : g->Ptrs)
- cs.CheckBlock(ptr);
- }
- TChunkStats csGB(k, nSize, entries);
- if (nSizeIdx == FREE_LIST_GROUP_SIZEIDX) {
- for (auto g : wholeLists) {
- for (; g; g = g->Next)
- csGB.CheckBlock((char*)g);
- }
- for (char* blk = bfList; blk; blk = *(char**)blk)
- csGB.CheckBlock(blk);
- nTotalGroupBlocks += csGB.FreeCount * nSize;
- }
- if (((globalCurrentPtr[nSizeIdx] - ALLOC_START) / N_CHUNK_SIZE) == k)
- cs.SetGlobalFree(globalCurrentPtr[nSizeIdx]);
- nTotalUsed += (nEntriesTotal - cs.FreeCount - csGB.FreeCount) * nSize;
- char pages[N_CHUNK_SIZE / N_PAGE_SIZE];
- memset(pages, 0, sizeof(pages));
- for (int i = 0, nShift = 0; i < nEntriesTotal; ++i, nShift += nSize) {
- int nBit = 0;
- if (entries[i])
- nBit = 1; // free entry
- else
- nBit = 2; // used entry
- for (i64 nDelta = nSize - 1; nDelta >= 0; nDelta -= N_PAGE_SIZE)
- pages[(nShift + nDelta) / N_PAGE_SIZE] |= nBit;
- }
- i64 nBadPages = 0;
- for (auto page : pages) {
- nBadPages += page == 3;
- nTotalPages += page != 1;
- }
- DebugTraceMMgr("entry = %lld; size = %lld; free = %lld; system %lld; utilisation = %g%%, fragmentation = %g%%\n",
- k, nSize, cs.FreeCount * nSize, csGB.FreeCount * nSize,
- (N_CHUNK_SIZE - cs.FreeCount * nSize) * 100.0f / N_CHUNK_SIZE, 100.0f * nBadPages / Y_ARRAY_SIZE(pages));
- nTotalAllocated += N_CHUNK_SIZE;
- nTotalFree += cs.FreeCount * nSize;
- nTotalBadPages += nBadPages;
- }
- SystemFree(entries);
- DebugTraceMMgr("Total allocated = %llu, free = %lld, system = %lld, locked for future use %lld, utilisation = %g, fragmentation = %g\n",
- nTotalAllocated, nTotalFree, nTotalGroupBlocks, nTotalLocked,
- 100.0f * (nTotalAllocated - nTotalFree) / nTotalAllocated, 100.0f * nTotalBadPages / nTotalPages);
- DebugTraceMMgr("Total %lld bytes used, %lld bytes in used pages\n", nTotalUsed, nTotalPages * N_PAGE_SIZE);
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx)
- globalFreeLists[nSizeIdx].ReturnWholeList(wholeLists[nSizeIdx]);
- blockFreeList.ReturnWholeList(bfList);
- }
- void FlushThreadFreeList() {
- if (pThreadInfo)
- MoveSingleThreadFreeToGlobal(pThreadInfo);
- }
- void DumpMemoryBlockUtilization() {
- // move current thread free to global lists to get better statistics
- FlushThreadFreeList();
- {
- TLFLockHolder ls(&LFGlobalLock);
- DumpMemoryBlockUtilizationLocked();
- }
- }
- //////////////////////////////////////////////////////////////////////////
- // malloc api
- static bool LFAlloc_SetParam(const char* param, const char* value) {
- if (!strcmp(param, "LB_LIMIT_TOTAL_SIZE")) {
- LB_LIMIT_TOTAL_SIZE = atoi(value);
- return true;
- }
- if (!strcmp(param, "LB_LIMIT_TOTAL_SIZE_BYTES")) {
- LB_LIMIT_TOTAL_SIZE = (atoi(value) + N_PAGE_SIZE - 1) / N_PAGE_SIZE;
- return true;
- }
- #ifdef DBG_FILL_MEMORY
- if (!strcmp(param, "FillMemoryOnAllocation")) {
- FillMemoryOnAllocation = !strcmp(value, "true");
- return true;
- }
- #endif
- if (!strcmp(param, "TransparentHugePages")) {
- TransparentHugePages = !strcmp(value, "true");
- return true;
- }
- if (!strcmp(param, "MapHugeTLB")) {
- MapHugeTLB = !strcmp(value, "true");
- return true;
- }
- if (!strcmp(param, "EnableDefrag")) {
- EnableDefrag = !strcmp(value, "true");
- return true;
- }
- return false;
- };
- static const char* LFAlloc_GetParam(const char* param) {
- struct TParam {
- const char* Name;
- const char* Value;
- };
- static const TParam Params[] = {
- {"GetLFAllocCounterFast", (const char*)&GetLFAllocCounterFast},
- {"GetLFAllocCounterFull", (const char*)&GetLFAllocCounterFull},
- #if defined(LFALLOC_DBG)
- {"SetThreadAllocTag", (const char*)&SetThreadAllocTag},
- {"SetProfileCurrentThread", (const char*)&SetProfileCurrentThread},
- {"SetProfileAllThreads", (const char*)&SetProfileAllThreads},
- {"SetAllocationSamplingEnabled", (const char*)&SetAllocationSamplingEnabled},
- {"SetAllocationSampleRate", (const char*)&SetAllocationSampleRate},
- {"SetAllocationSampleMaxSize", (const char*)&SetAllocationSampleMaxSize},
- {"SetAllocationCallback", (const char*)&SetAllocationCallback},
- {"SetDeallocationCallback", (const char*)&SetDeallocationCallback},
- {"GetPerTagAllocInfo", (const char*)&GetPerTagAllocInfo},
- #endif // LFALLOC_DBG
- };
- for (int i = 0; i < Y_ARRAY_SIZE(Params); ++i) {
- if (strcmp(param, Params[i].Name) == 0) {
- return Params[i].Value;
- }
- }
- return nullptr;
- }
- static Y_FORCE_INLINE int LFPosixMemalign(void** memptr, size_t alignment, size_t size) {
- if (Y_UNLIKELY(alignment > 4096)) {
- const char* error = "Larger alignment are not guaranteed with this implementation\n";
- #ifdef _win_
- OutputDebugStringA(error);
- #endif
- NMalloc::AbortFromCorruptedAllocator(error);
- }
- size_t bigsize = size;
- if (bigsize <= alignment) {
- bigsize = alignment;
- } else if (bigsize < 2 * alignment) {
- bigsize = 2 * alignment;
- }
- #if defined(LFALLOC_DBG)
- if (alignment > sizeof(TAllocHeader)) {
- bigsize += alignment;
- }
- #endif
- *memptr = LFAlloc(bigsize);
- #if defined(LFALLOC_DBG)
- if (alignment > sizeof(TAllocHeader)) {
- // memptr isn't aligned due to alloc header
- const auto* header = GetAllocHeader(*memptr);
- *memptr = (void*)((const char*) (*memptr) + alignment - sizeof(TAllocHeader));
- // make fake header to retrieve original header ptr on dealloc
- auto* next = GetAllocHeader(*memptr);
- next->Tag = DBG_ALLOC_ALIGNED_TAG;
- next->Size = (uint64_t)header;
- next->Cookie = 0;
- }
- #endif
- Y_ASSERT_NOBT((intptr_t)*memptr % alignment == 0);
- return 0;
- }
- static Y_FORCE_INLINE void* LFVAlloc(size_t size) {
- const size_t pg = N_PAGE_SIZE;
- void* p = nullptr;
- #if defined(LFALLOC_DBG)
- LFPosixMemalign(&p, pg, size);
- #else
- size_t bigsize = (size + pg - 1) & (~(pg - 1));
- p = LFAlloc(bigsize);
- #endif
- Y_ASSERT_NOBT((intptr_t)p % N_PAGE_SIZE == 0);
- return p;
- }
- #endif
|