12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931 |
- #pragma once
- #include <stdlib.h>
- #include <stdio.h>
- #include <stdarg.h>
- #include <library/cpp/malloc/api/malloc.h>
- #include <util/system/compat.h>
- #include <util/system/compiler.h>
- #include <util/system/types.h>
- #ifdef _MSC_VER
- #ifndef _CRT_SECURE_NO_WARNINGS
- #define _CRT_SECURE_NO_WARNINGS
- #endif
- #ifdef _M_X64
- #define _64_
- #endif
- #include <intrin.h>
- #define WIN32_LEAN_AND_MEAN
- #include <Windows.h>
- #pragma intrinsic(_InterlockedCompareExchange)
- #pragma intrinsic(_InterlockedExchangeAdd)
- #include <new>
- #include <assert.h>
- #include <errno.h>
- #define PERTHREAD __declspec(thread)
- #define _win_
- #define Y_FORCE_INLINE __forceinline
- using TAtomic = volatile long;
- static inline long AtomicAdd(TAtomic& a, long b) {
- return _InterlockedExchangeAdd(&a, b) + b;
- }
- static inline long AtomicSub(TAtomic& a, long b) {
- return AtomicAdd(a, -b);
- }
- #pragma comment(lib, "synchronization.lib")
- #ifndef NDEBUG
- #define Y_ASSERT_NOBT(x) \
- { \
- if (IsDebuggerPresent()) { \
- if (!(x)) \
- __debugbreak(); \
- } else \
- assert(x); \
- }
- #else
- #define Y_ASSERT_NOBT(x) ((void)0)
- #endif
- #else
- #include <util/system/defaults.h>
- #include <library/cpp/deprecated/atomic/atomic.h>
- #include <util/system/yassert.h>
- #if !defined(NDEBUG) && !defined(__GCCXML__)
- #define Y_ASSERT_NOBT(a) \
- do { \
- try { \
- if (Y_UNLIKELY(!(a))) { \
- if (YaIsDebuggerPresent()) \
- __debugbreak(); \
- else { \
- assert(false && (a)); \
- } \
- } \
- } catch (...) { \
- if (YaIsDebuggerPresent()) \
- __debugbreak(); \
- else { \
- assert(false && "Exception during assert"); \
- } \
- } \
- } while (0)
- #else
- #define Y_ASSERT_NOBT(a) \
- do { \
- if (false) { \
- bool __xxx = static_cast<bool>(a); \
- Y_UNUSED(__xxx); \
- } \
- } while (0)
- #endif
- #include <pthread.h>
- #include <sys/mman.h>
- #include <stdlib.h>
- #include <memory.h>
- #include <new>
- #include <errno.h>
- #if defined(_linux_)
- #include <linux/futex.h>
- #include <sys/syscall.h>
- #if !defined(MADV_HUGEPAGE)
- #define MADV_HUGEPAGE 14
- #endif
- #if !defined(MAP_HUGETLB)
- #define MAP_HUGETLB 0x40000
- #endif
- #endif
- #define PERTHREAD __thread
- #endif
- #ifndef _darwin_
- #ifndef Y_ARRAY_SIZE
- #define Y_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
- #endif
- #ifndef NDEBUG
- #define DBG_FILL_MEMORY
- static bool FillMemoryOnAllocation = true;
- #endif
- static bool TransparentHugePages = false; // force MADV_HUGEPAGE for large allocs
- static bool MapHugeTLB = false; // force MAP_HUGETLB for small allocs
- static bool EnableDefrag = true;
- // Buffers that are larger than this size will not be filled with 0xcf
- #ifndef DBG_FILL_MAX_SIZE
- #define DBG_FILL_MAX_SIZE 0x01000000000000ULL
- #endif
- template <class T>
- inline T* DoCas(T* volatile* target, T* exchange, T* compare) {
- #if defined(__has_builtin) && __has_builtin(__sync_val_compare_and_swap)
- return __sync_val_compare_and_swap(target, compare, exchange);
- #elif defined(_WIN32)
- #ifdef _64_
- return (T*)_InterlockedCompareExchange64((__int64*)target, (__int64)exchange, (__int64)compare);
- #else
- //return (T*)InterlockedCompareExchangePointer(targetVoidP, exchange, compare);
- return (T*)_InterlockedCompareExchange((LONG*)target, (LONG)exchange, (LONG)compare);
- #endif
- #elif defined(__i386) || defined(__x86_64__)
- union {
- T* volatile* NP;
- void* volatile* VoidP;
- } gccSucks;
- gccSucks.NP = target;
- void* volatile* targetVoidP = gccSucks.VoidP;
- __asm__ __volatile__(
- "lock\n\t"
- "cmpxchg %2,%0\n\t"
- : "+m"(*(targetVoidP)), "+a"(compare)
- : "r"(exchange)
- : "cc", "memory");
- return compare;
- #else
- #error inline_cas not defined for this platform
- #endif
- }
- #ifdef _64_
- const uintptr_t N_MAX_WORKSET_SIZE = 0x100000000ll * 200;
- const uintptr_t N_HUGE_AREA_FINISH = 0x700000000000ll;
- #ifndef _freebsd_
- const uintptr_t LINUX_MMAP_AREA_START = 0x100000000ll;
- static uintptr_t volatile linuxAllocPointer = LINUX_MMAP_AREA_START;
- static uintptr_t volatile linuxAllocPointerHuge = LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE;
- #endif
- #else
- const uintptr_t N_MAX_WORKSET_SIZE = 0xffffffff;
- #endif
- #define ALLOC_START ((char*)0)
- const size_t N_CHUNK_SIZE = 1024 * 1024;
- const size_t N_CHUNKS = N_MAX_WORKSET_SIZE / N_CHUNK_SIZE;
- const size_t N_LARGE_ALLOC_SIZE = N_CHUNK_SIZE * 128;
- // map size idx to size in bytes
- #ifdef LFALLOC_YT
- const int N_SIZES = 27;
- #else
- const int N_SIZES = 25;
- #endif
- const int nSizeIdxToSize[N_SIZES] = {
- -1,
- #if defined(_64_)
- 16, 16, 32, 32, 48, 64, 96, 128,
- #else
- 8,
- 16,
- 24,
- 32,
- 48,
- 64,
- 96,
- 128,
- #endif
- 192, 256, 384, 512, 768, 1024, 1536, 2048,
- 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768,
- #ifdef LFALLOC_YT
- 49152, 65536
- #endif
- };
- #ifdef LFALLOC_YT
- const size_t N_MAX_FAST_SIZE = 65536;
- #else
- const size_t N_MAX_FAST_SIZE = 32768;
- #endif
- const unsigned char size2idxArr1[64 + 1] = {
- 1,
- #if defined(_64_)
- 2, 2, 4, 4, // 16, 16, 32, 32
- #else
- 1, 2, 3, 4, // 8, 16, 24, 32
- #endif
- 5, 5, 6, 6, // 48, 64
- 7, 7, 7, 7, 8, 8, 8, 8, // 96, 128
- 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, // 192, 256
- 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, // 384
- 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 // 512
- };
- #ifdef LFALLOC_YT
- const unsigned char size2idxArr2[256] = {
- #else
- const unsigned char size2idxArr2[128] = {
- #endif
- 12, 12, 13, 14, // 512, 512, 768, 1024
- 15, 15, 16, 16, // 1536, 2048
- 17, 17, 17, 17, 18, 18, 18, 18, // 3072, 4096
- 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, // 6144, 8192
- 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, // 12288
- 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, // 16384
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
- 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, // 24576
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, // 32768
- #ifdef LFALLOC_YT
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, // 49152
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
- 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, // 65536
- #endif
- };
- // map entry number to size idx
- // special size idx's: 0 = not used, -1 = mem locked, but not allocated
- static volatile char chunkSizeIdx[N_CHUNKS];
- const int FREE_CHUNK_ARR_BUF = 0x20000; // this is effectively 128G of free memory (with 1M chunks), should not be exhausted actually
- static volatile uintptr_t freeChunkArr[FREE_CHUNK_ARR_BUF];
- static volatile int freeChunkCount;
- static void AddFreeChunk(uintptr_t chunkId) {
- chunkSizeIdx[chunkId] = -1;
- if (Y_UNLIKELY(freeChunkCount == FREE_CHUNK_ARR_BUF))
- NMalloc::AbortFromCorruptedAllocator("free chunks array overflowed");
- freeChunkArr[freeChunkCount++] = chunkId;
- }
- static bool GetFreeChunk(uintptr_t* res) {
- if (freeChunkCount == 0) {
- *res = 0;
- return false;
- }
- *res = freeChunkArr[--freeChunkCount];
- return true;
- }
- //////////////////////////////////////////////////////////////////////////
- enum ELFAllocCounter {
- CT_USER_ALLOC, // accumulated size requested by user code
- CT_MMAP, // accumulated mmapped size
- CT_MMAP_CNT, // number of mmapped regions
- CT_MUNMAP, // accumulated unmmapped size
- CT_MUNMAP_CNT, // number of munmaped regions
- CT_SYSTEM_ALLOC, // accumulated allocated size for internal lfalloc needs
- CT_SYSTEM_FREE, // accumulated deallocated size for internal lfalloc needs
- CT_SMALL_ALLOC, // accumulated allocated size for fixed-size blocks
- CT_SMALL_FREE, // accumulated deallocated size for fixed-size blocks
- CT_LARGE_ALLOC, // accumulated allocated size for large blocks
- CT_LARGE_FREE, // accumulated deallocated size for large blocks
- CT_SLOW_ALLOC_CNT, // number of slow (not LF) allocations
- CT_DEGRAGMENT_CNT, // number of memory defragmentations
- CT_MAX
- };
- static Y_FORCE_INLINE void IncrementCounter(ELFAllocCounter counter, size_t value);
- //////////////////////////////////////////////////////////////////////////
- enum EMMapMode {
- MM_NORMAL, // memory for small allocs
- MM_HUGE // memory for large allocs
- };
- #ifndef _MSC_VER
- inline void VerifyMmapResult(void* result) {
- if (Y_UNLIKELY(result == MAP_FAILED))
- NMalloc::AbortFromCorruptedAllocator("negative size requested? or just out of mem");
- }
- #endif
- #if !defined(_MSC_VER) && !defined(_freebsd_) && defined(_64_)
- static char* AllocWithMMapLinuxImpl(uintptr_t sz, EMMapMode mode) {
- char* volatile* areaPtr;
- char* areaStart;
- uintptr_t areaFinish;
- int mapProt = PROT_READ | PROT_WRITE;
- int mapFlags = MAP_PRIVATE | MAP_ANON;
- if (mode == MM_HUGE) {
- areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointerHuge);
- areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START + N_MAX_WORKSET_SIZE);
- areaFinish = N_HUGE_AREA_FINISH;
- } else {
- areaPtr = reinterpret_cast<char* volatile*>(&linuxAllocPointer);
- areaStart = reinterpret_cast<char*>(LINUX_MMAP_AREA_START);
- areaFinish = N_MAX_WORKSET_SIZE;
- if (MapHugeTLB) {
- mapFlags |= MAP_HUGETLB;
- }
- }
- bool wrapped = false;
- for (;;) {
- char* prevAllocPtr = *areaPtr;
- char* nextAllocPtr = prevAllocPtr + sz;
- if (uintptr_t(nextAllocPtr - (char*)nullptr) >= areaFinish) {
- if (Y_UNLIKELY(wrapped)) {
- NMalloc::AbortFromCorruptedAllocator("virtual memory is over fragmented");
- }
- // wrap after all area is used
- DoCas(areaPtr, areaStart, prevAllocPtr);
- wrapped = true;
- continue;
- }
- if (DoCas(areaPtr, nextAllocPtr, prevAllocPtr) != prevAllocPtr)
- continue;
- char* largeBlock = (char*)mmap(prevAllocPtr, sz, mapProt, mapFlags, -1, 0);
- VerifyMmapResult(largeBlock);
- if (largeBlock == prevAllocPtr)
- return largeBlock;
- if (largeBlock)
- munmap(largeBlock, sz);
- if (sz < 0x80000) {
- // skip utilized area with big steps
- DoCas(areaPtr, nextAllocPtr + 0x10 * 0x10000, nextAllocPtr);
- }
- }
- }
- #endif
- static char* AllocWithMMap(uintptr_t sz, EMMapMode mode) {
- (void)mode;
- #ifdef _MSC_VER
- char* largeBlock = (char*)VirtualAlloc(0, sz, MEM_RESERVE, PAGE_READWRITE);
- if (Y_UNLIKELY(largeBlock == nullptr))
- NMalloc::AbortFromCorruptedAllocator("out of memory");
- if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= N_MAX_WORKSET_SIZE))
- NMalloc::AbortFromCorruptedAllocator("out of working set, something has broken");
- #else
- #if defined(_freebsd_) || !defined(_64_)
- char* largeBlock = (char*)mmap(0, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
- VerifyMmapResult(largeBlock);
- if (Y_UNLIKELY(uintptr_t(((char*)largeBlock - ALLOC_START) + sz) >= N_MAX_WORKSET_SIZE))
- NMalloc::AbortFromCorruptedAllocator("out of working set, something has broken");
- #else
- char* largeBlock = AllocWithMMapLinuxImpl(sz, mode);
- if (TransparentHugePages) {
- madvise(largeBlock, sz, MADV_HUGEPAGE);
- }
- #endif
- #endif
- Y_ASSERT_NOBT(largeBlock);
- IncrementCounter(CT_MMAP, sz);
- IncrementCounter(CT_MMAP_CNT, 1);
- return largeBlock;
- }
- enum class ELarge : ui8 {
- Free = 0, // block in free cache
- Alloc = 1, // block is allocated
- Gone = 2, // block was unmapped
- };
- struct TLargeBlk {
- static TLargeBlk* As(void *raw) {
- return reinterpret_cast<TLargeBlk*>((char*)raw - 4096ll);
- }
- static const TLargeBlk* As(const void *raw) {
- return reinterpret_cast<const TLargeBlk*>((const char*)raw - 4096ll);
- }
- void SetSize(size_t bytes, size_t pages) {
- Pages = pages;
- Bytes = bytes;
- }
- void Mark(ELarge state) {
- const ui64 marks[] = {
- 0x8b38aa5ca4953c98, // ELarge::Free
- 0xf916d33584eb5087, // ELarge::Alloc
- 0xd33b0eca7651bc3f // ELarge::Gone
- };
- Token = size_t(marks[ui8(state)]);
- }
- size_t Pages; // Total pages allocated with mmap like call
- size_t Bytes; // Actually requested bytes by user
- size_t Token; // Block state token, see ELarge enum.
- };
- static void LargeBlockUnmap(void* p, size_t pages) {
- const auto bytes = (pages + 1) * uintptr_t(4096);
- IncrementCounter(CT_MUNMAP, bytes);
- IncrementCounter(CT_MUNMAP_CNT, 1);
- #ifdef _MSC_VER
- Y_ASSERT_NOBT(0);
- #else
- TLargeBlk::As(p)->Mark(ELarge::Gone);
- munmap((char*)p - 4096ll, bytes);
- #endif
- }
- //////////////////////////////////////////////////////////////////////////
- const size_t LB_BUF_SIZE = 250;
- const size_t LB_BUF_HASH = 977;
- static int LB_LIMIT_TOTAL_SIZE = 500 * 1024 * 1024 / 4096; // do not keep more then this mem total in lbFreePtrs[]
- static void* volatile lbFreePtrs[LB_BUF_HASH][LB_BUF_SIZE];
- static TAtomic lbFreePageCount;
- static void* LargeBlockAlloc(size_t _nSize, ELFAllocCounter counter) {
- size_t pgCount = (_nSize + 4095) / 4096;
- #ifdef _MSC_VER
- char* pRes = (char*)VirtualAlloc(0, (pgCount + 1) * 4096ll, MEM_COMMIT, PAGE_READWRITE);
- if (Y_UNLIKELY(pRes == 0)) {
- NMalloc::AbortFromCorruptedAllocator("out of memory");
- }
- #else
- IncrementCounter(counter, pgCount * 4096ll);
- IncrementCounter(CT_SYSTEM_ALLOC, 4096ll);
- int lbHash = pgCount % LB_BUF_HASH;
- for (int i = 0; i < LB_BUF_SIZE; ++i) {
- void* p = lbFreePtrs[lbHash][i];
- if (p == nullptr)
- continue;
- if (DoCas(&lbFreePtrs[lbHash][i], (void*)nullptr, p) == p) {
- size_t realPageCount = TLargeBlk::As(p)->Pages;
- if (realPageCount == pgCount) {
- AtomicAdd(lbFreePageCount, -pgCount);
- TLargeBlk::As(p)->Mark(ELarge::Alloc);
- return p;
- } else {
- if (DoCas(&lbFreePtrs[lbHash][i], p, (void*)nullptr) != (void*)nullptr) {
- // block was freed while we were busy
- AtomicAdd(lbFreePageCount, -realPageCount);
- LargeBlockUnmap(p, realPageCount);
- --i;
- }
- }
- }
- }
- char* pRes = AllocWithMMap((pgCount + 1) * 4096ll, MM_HUGE);
- #endif
- pRes += 4096ll;
- TLargeBlk::As(pRes)->SetSize(_nSize, pgCount);
- TLargeBlk::As(pRes)->Mark(ELarge::Alloc);
- return pRes;
- }
- #ifndef _MSC_VER
- static void FreeAllLargeBlockMem() {
- for (auto& lbFreePtr : lbFreePtrs) {
- for (int i = 0; i < LB_BUF_SIZE; ++i) {
- void* p = lbFreePtr[i];
- if (p == nullptr)
- continue;
- if (DoCas(&lbFreePtr[i], (void*)nullptr, p) == p) {
- int pgCount = TLargeBlk::As(p)->Pages;
- AtomicAdd(lbFreePageCount, -pgCount);
- LargeBlockUnmap(p, pgCount);
- }
- }
- }
- }
- #endif
- static void LargeBlockFree(void* p, ELFAllocCounter counter) {
- if (p == nullptr)
- return;
- #ifdef _MSC_VER
- VirtualFree((char*)p - 4096ll, 0, MEM_RELEASE);
- #else
- size_t pgCount = TLargeBlk::As(p)->Pages;
- TLargeBlk::As(p)->Mark(ELarge::Free);
- IncrementCounter(counter, pgCount * 4096ll);
- IncrementCounter(CT_SYSTEM_FREE, 4096ll);
- if (lbFreePageCount > LB_LIMIT_TOTAL_SIZE)
- FreeAllLargeBlockMem();
- int lbHash = pgCount % LB_BUF_HASH;
- for (int i = 0; i < LB_BUF_SIZE; ++i) {
- if (lbFreePtrs[lbHash][i] == nullptr) {
- if (DoCas(&lbFreePtrs[lbHash][i], p, (void*)nullptr) == nullptr) {
- AtomicAdd(lbFreePageCount, pgCount);
- return;
- }
- }
- }
- LargeBlockUnmap(p, pgCount);
- #endif
- }
- static void* SystemAlloc(size_t _nSize) {
- //HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, _nSize);
- return LargeBlockAlloc(_nSize, CT_SYSTEM_ALLOC);
- }
- static void SystemFree(void* p) {
- //HeapFree(GetProcessHeap(), 0, p);
- LargeBlockFree(p, CT_SYSTEM_FREE);
- }
- //////////////////////////////////////////////////////////////////////////
- char* const LF_LOCK_FREE = ((char*)0) + 0;
- char* const LF_LOCK_LOCKED = ((char*)0) + 1;
- char* const LF_LOCK_FUTEX_WAIT = ((char*)0) + 2;
- static bool LFHasFutex = true;
- static bool LFCheckedWinVersion = false;
- // TLFLockData has to be zero-initialized explicitly https://en.cppreference.com/w/cpp/language/zero_initialization
- // otherwise constructor TLFLockData() for global var might be called after first use
- struct TLFLockData
- {
- char* Pad1[15];
- char* volatile LockVar; // = LF_LOCK_FREE; // no constructor, zero-initialize manually
- char* Pad2[15];
- bool TryLock()
- {
- return (LockVar == LF_LOCK_FREE && DoCas(&LockVar, LF_LOCK_LOCKED, LF_LOCK_FREE) == LF_LOCK_FREE);
- }
- void FutexWait()
- {
- #ifdef _win_
- if (!LFCheckedWinVersion) {
- OSVERSIONINFOA ver;
- memset(&ver, 0, sizeof(ver));
- ver.dwOSVersionInfoSize = sizeof(OSVERSIONINFOA);
- GetVersionExA(&ver);
- LFHasFutex = (ver.dwMajorVersion > 6) || (ver.dwMajorVersion == 6 && ver.dwMinorVersion >= 2);
- LFCheckedWinVersion = true;
- }
- if (LFHasFutex) {
- if (LockVar == LF_LOCK_LOCKED) {
- DoCas(&LockVar, LF_LOCK_FUTEX_WAIT, LF_LOCK_LOCKED);
- }
- if (LockVar == LF_LOCK_FUTEX_WAIT) {
- char* lockedValue = LF_LOCK_FUTEX_WAIT;
- WaitOnAddress(&LockVar, &lockedValue, sizeof(LockVar), INFINITE);
- }
- } else {
- SwitchToThread();
- }
- #elif defined(_linux_)
- if (LFHasFutex) {
- if (LockVar == LF_LOCK_LOCKED) {
- DoCas(&LockVar, LF_LOCK_FUTEX_WAIT, LF_LOCK_LOCKED);
- }
- if (LockVar == LF_LOCK_FUTEX_WAIT) {
- // linux allow only int variables checks, here we pretend low bits of LockVar are int
- syscall(SYS_futex, &LockVar, FUTEX_WAIT_PRIVATE, *(int*)&LF_LOCK_FUTEX_WAIT, 0, 0, 0);
- }
- } else {
- sched_yield();
- }
- #else
- sched_yield();
- #endif
- }
- void Unlock()
- {
- Y_ASSERT_NOBT(LockVar != LF_LOCK_FREE);
- if (DoCas(&LockVar, LF_LOCK_FREE, LF_LOCK_LOCKED) != LF_LOCK_LOCKED) {
- Y_ASSERT_NOBT(LockVar == LF_LOCK_FUTEX_WAIT && LFHasFutex);
- LockVar = LF_LOCK_FREE;
- #ifdef _win_
- WakeByAddressAll((PVOID)&LockVar);
- #elif defined(_linux_)
- syscall(SYS_futex, &LockVar, FUTEX_WAKE_PRIVATE, INT_MAX, 0, 0, 0);
- #endif
- }
- }
- };
- static TLFLockData LFGlobalLock;
- class TLFLockHolder {
- TLFLockData *LockData = nullptr;
- int Attempt = 0;
- int SleepMask = 0x7f;
- public:
- TLFLockHolder() {}
- TLFLockHolder(TLFLockData *lk) {
- while (!TryLock(lk));
- }
- bool TryLock(TLFLockData *lk)
- {
- Y_ASSERT_NOBT(LockData == nullptr);
- if (lk->TryLock()) {
- LockData = lk;
- return true;
- }
- if ((++Attempt & SleepMask) == 0) {
- lk->FutexWait();
- SleepMask = (SleepMask * 2 + 1) & 0x7fff;
- } else {
- #ifdef _MSC_VER
- _mm_pause();
- #elif defined(__i386) || defined(__x86_64__)
- __asm__ __volatile__("pause");
- #endif
- }
- return false;
- }
- ~TLFLockHolder() {
- if (LockData) {
- LockData->Unlock();
- }
- }
- };
- //////////////////////////////////////////////////////////////////////////
- class TLFAllocFreeList {
- struct TNode {
- TNode* Next;
- };
- TNode* volatile Head;
- TNode* volatile Pending;
- TAtomic PendingToFreeListCounter;
- TAtomic AllocCount;
- void* Padding;
- static Y_FORCE_INLINE void Enqueue(TNode* volatile* headPtr, TNode* n) {
- for (;;) {
- TNode* volatile prevHead = *headPtr;
- n->Next = prevHead;
- if (DoCas(headPtr, n, prevHead) == prevHead)
- break;
- }
- }
- Y_FORCE_INLINE void* DoAlloc() {
- TNode* res;
- for (res = Head; res; res = Head) {
- TNode* keepNext = res->Next;
- if (DoCas(&Head, keepNext, res) == res) {
- //Y_ABORT_UNLESS(keepNext == res->Next);
- break;
- }
- }
- return res;
- }
- void FreeList(TNode* fl) {
- if (!fl)
- return;
- TNode* flTail = fl;
- while (flTail->Next)
- flTail = flTail->Next;
- for (;;) {
- TNode* volatile prevHead = Head;
- flTail->Next = prevHead;
- if (DoCas(&Head, fl, prevHead) == prevHead)
- break;
- }
- }
- public:
- Y_FORCE_INLINE void Free(void* ptr) {
- TNode* newFree = (TNode*)ptr;
- if (AtomicAdd(AllocCount, 0) == 0)
- Enqueue(&Head, newFree);
- else
- Enqueue(&Pending, newFree);
- }
- Y_FORCE_INLINE void* Alloc() {
- TAtomic keepCounter = AtomicAdd(PendingToFreeListCounter, 0);
- TNode* fl = Pending;
- if (AtomicAdd(AllocCount, 1) == 1) {
- // No other allocs in progress.
- // If (keepCounter == PendingToFreeListCounter) then Pending was not freed by other threads.
- // Hence Pending is not used in any concurrent DoAlloc() atm and can be safely moved to FreeList
- if (fl && keepCounter == AtomicAdd(PendingToFreeListCounter, 0) && DoCas(&Pending, (TNode*)nullptr, fl) == fl) {
- // pick first element from Pending and return it
- void* res = fl;
- fl = fl->Next;
- // if there are other elements in Pending list, add them to main free list
- FreeList(fl);
- AtomicAdd(PendingToFreeListCounter, 1);
- AtomicAdd(AllocCount, -1);
- return res;
- }
- }
- void* res = DoAlloc();
- AtomicAdd(AllocCount, -1);
- return res;
- }
- void* GetWholeList() {
- TNode* res;
- for (res = Head; res; res = Head) {
- if (DoCas(&Head, (TNode*)nullptr, res) == res)
- break;
- }
- return res;
- }
- void ReturnWholeList(void* ptr) {
- while (AtomicAdd(AllocCount, 0) != 0) // theoretically can run into problems with parallel DoAlloc()
- ; //ThreadYield();
- for (;;) {
- TNode* prevHead = Head;
- if (DoCas(&Head, (TNode*)ptr, prevHead) == prevHead) {
- FreeList(prevHead);
- break;
- }
- }
- }
- };
- /////////////////////////////////////////////////////////////////////////
- static TLFAllocFreeList globalFreeLists[N_SIZES];
- static char* volatile globalCurrentPtr[N_SIZES];
- static TLFAllocFreeList blockFreeList;
- // globalFreeLists[] contains TFreeListGroup, each of them points up to 15 free blocks
- const int FL_GROUP_SIZE = 15;
- struct TFreeListGroup {
- TFreeListGroup* Next;
- char* Ptrs[FL_GROUP_SIZE];
- };
- #ifdef _64_
- const int FREE_LIST_GROUP_SIZEIDX = 8;
- #else
- const int FREE_LIST_GROUP_SIZEIDX = 6;
- #endif
- //////////////////////////////////////////////////////////////////////////
- // find free chunks and reset chunk size so they can be reused by different sized allocations
- // do not look at blockFreeList (TFreeListGroup has same size for any allocations)
- static bool DefragmentMem() {
- if (!EnableDefrag) {
- return false;
- }
- IncrementCounter(CT_DEGRAGMENT_CNT, 1);
- int* nFreeCount = (int*)SystemAlloc(N_CHUNKS * sizeof(int));
- if (Y_UNLIKELY(!nFreeCount)) {
- //__debugbreak();
- NMalloc::AbortFromCorruptedAllocator("debugbreak");
- }
- memset(nFreeCount, 0, N_CHUNKS * sizeof(int));
- TFreeListGroup* wholeLists[N_SIZES];
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) {
- wholeLists[nSizeIdx] = (TFreeListGroup*)globalFreeLists[nSizeIdx].GetWholeList();
- for (TFreeListGroup* g = wholeLists[nSizeIdx]; g; g = g->Next) {
- for (auto pData : g->Ptrs) {
- if (pData) {
- uintptr_t nChunk = (pData - ALLOC_START) / N_CHUNK_SIZE;
- ++nFreeCount[nChunk];
- Y_ASSERT_NOBT(chunkSizeIdx[nChunk] == nSizeIdx);
- }
- }
- }
- }
- bool bRes = false;
- for (size_t nChunk = 0; nChunk < N_CHUNKS; ++nChunk) {
- int fc = nFreeCount[nChunk];
- nFreeCount[nChunk] = 0;
- if (chunkSizeIdx[nChunk] <= 0)
- continue;
- int nEntries = N_CHUNK_SIZE / nSizeIdxToSize[static_cast<int>(chunkSizeIdx[nChunk])];
- Y_ASSERT_NOBT(fc <= nEntries); // can not have more free blocks then total count
- if (fc == nEntries) {
- bRes = true;
- nFreeCount[nChunk] = 1;
- }
- }
- if (bRes) {
- for (auto& wholeList : wholeLists) {
- TFreeListGroup** ppPtr = &wholeList;
- while (*ppPtr) {
- TFreeListGroup* g = *ppPtr;
- int dst = 0;
- for (auto pData : g->Ptrs) {
- if (pData) {
- uintptr_t nChunk = (pData - ALLOC_START) / N_CHUNK_SIZE;
- if (nFreeCount[nChunk] == 0)
- g->Ptrs[dst++] = pData; // block is not freed, keep pointer
- }
- }
- if (dst == 0) {
- // no valid pointers in group, free it
- *ppPtr = g->Next;
- blockFreeList.Free(g);
- } else {
- // reset invalid pointers to 0
- for (int i = dst; i < FL_GROUP_SIZE; ++i)
- g->Ptrs[i] = nullptr;
- ppPtr = &g->Next;
- }
- }
- }
- for (uintptr_t nChunk = 0; nChunk < N_CHUNKS; ++nChunk) {
- if (!nFreeCount[nChunk])
- continue;
- char* pStart = ALLOC_START + nChunk * N_CHUNK_SIZE;
- #ifdef _win_
- VirtualFree(pStart, N_CHUNK_SIZE, MEM_DECOMMIT);
- #elif defined(_freebsd_)
- madvise(pStart, N_CHUNK_SIZE, MADV_FREE);
- #else
- madvise(pStart, N_CHUNK_SIZE, MADV_DONTNEED);
- #endif
- AddFreeChunk(nChunk);
- }
- }
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx)
- globalFreeLists[nSizeIdx].ReturnWholeList(wholeLists[nSizeIdx]);
- SystemFree(nFreeCount);
- return bRes;
- }
- static Y_FORCE_INLINE void* LFAllocFromCurrentChunk(int nSizeIdx, int blockSize, int count) {
- char* volatile* pFreeArray = &globalCurrentPtr[nSizeIdx];
- while (char* newBlock = *pFreeArray) {
- char* nextFree = newBlock + blockSize * count;
- // check if there is space in chunk
- char* globalEndPtr = ALLOC_START + ((newBlock - ALLOC_START) & ~((uintptr_t)N_CHUNK_SIZE - 1)) + N_CHUNK_SIZE;
- if (nextFree >= globalEndPtr) {
- if (nextFree > globalEndPtr)
- break;
- nextFree = nullptr; // it was last block in chunk
- }
- if (DoCas(pFreeArray, nextFree, newBlock) == newBlock)
- return newBlock;
- }
- return nullptr;
- }
- enum EDefrag {
- MEM_DEFRAG,
- NO_MEM_DEFRAG,
- };
- static void* SlowLFAlloc(int nSizeIdx, int blockSize, EDefrag defrag) {
- IncrementCounter(CT_SLOW_ALLOC_CNT, 1);
- TLFLockHolder ls;
- for (;;) {
- bool locked = ls.TryLock(&LFGlobalLock);
- void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, 1);
- if (res) {
- return res; // might happen when other thread allocated new current chunk
- }
- if (locked) {
- break;
- }
- }
- for (;;) {
- uintptr_t nChunk;
- if (GetFreeChunk(&nChunk)) {
- char* newPlace = ALLOC_START + nChunk * N_CHUNK_SIZE;
- #ifdef _MSC_VER
- void* pTest = VirtualAlloc(newPlace, N_CHUNK_SIZE, MEM_COMMIT, PAGE_READWRITE);
- Y_ASSERT_NOBT(pTest == newPlace);
- #endif
- chunkSizeIdx[nChunk] = (char)nSizeIdx;
- globalCurrentPtr[nSizeIdx] = newPlace + blockSize;
- return newPlace;
- }
- // out of luck, try to defrag
- if (defrag == MEM_DEFRAG && DefragmentMem()) {
- continue;
- }
- char* largeBlock = AllocWithMMap(N_LARGE_ALLOC_SIZE, MM_NORMAL);
- uintptr_t addr = ((largeBlock - ALLOC_START) + N_CHUNK_SIZE - 1) & (~(N_CHUNK_SIZE - 1));
- uintptr_t endAddr = ((largeBlock - ALLOC_START) + N_LARGE_ALLOC_SIZE) & (~(N_CHUNK_SIZE - 1));
- for (uintptr_t p = addr; p < endAddr; p += N_CHUNK_SIZE) {
- uintptr_t chunk = p / N_CHUNK_SIZE;
- Y_ASSERT_NOBT(chunk * N_CHUNK_SIZE == p);
- Y_ASSERT_NOBT(chunkSizeIdx[chunk] == 0);
- AddFreeChunk(chunk);
- }
- }
- return nullptr;
- }
- // allocate single block
- static Y_FORCE_INLINE void* LFAllocNoCache(int nSizeIdx, EDefrag defrag) {
- int blockSize = nSizeIdxToSize[nSizeIdx];
- void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, 1);
- if (res)
- return res;
- return SlowLFAlloc(nSizeIdx, blockSize, defrag);
- }
- // allocate multiple blocks, returns number of blocks allocated (max FL_GROUP_SIZE)
- // buf should have space for at least FL_GROUP_SIZE elems
- static Y_FORCE_INLINE int LFAllocNoCacheMultiple(int nSizeIdx, char** buf) {
- int blockSize = nSizeIdxToSize[nSizeIdx];
- void* res = LFAllocFromCurrentChunk(nSizeIdx, blockSize, FL_GROUP_SIZE);
- if (res) {
- char* resPtr = (char*)res;
- for (int k = 0; k < FL_GROUP_SIZE; ++k) {
- buf[k] = resPtr;
- resPtr += blockSize;
- }
- return FL_GROUP_SIZE;
- }
- buf[0] = (char*)SlowLFAlloc(nSizeIdx, blockSize, MEM_DEFRAG);
- return 1;
- }
- // take several blocks from global free list (max FL_GROUP_SIZE blocks), returns number of blocks taken
- // buf should have space for at least FL_GROUP_SIZE elems
- static Y_FORCE_INLINE int TakeBlocksFromGlobalFreeList(int nSizeIdx, char** buf) {
- TLFAllocFreeList& fl = globalFreeLists[nSizeIdx];
- TFreeListGroup* g = (TFreeListGroup*)fl.Alloc();
- if (g) {
- int resCount = 0;
- for (auto& ptr : g->Ptrs) {
- if (ptr)
- buf[resCount++] = ptr;
- else
- break;
- }
- blockFreeList.Free(g);
- return resCount;
- }
- return 0;
- }
- // add several blocks to global free list
- static Y_FORCE_INLINE void PutBlocksToGlobalFreeList(ptrdiff_t nSizeIdx, char** buf, int count) {
- for (int startIdx = 0; startIdx < count;) {
- TFreeListGroup* g = (TFreeListGroup*)blockFreeList.Alloc();
- Y_ASSERT_NOBT(sizeof(TFreeListGroup) == nSizeIdxToSize[FREE_LIST_GROUP_SIZEIDX]);
- if (!g) {
- g = (TFreeListGroup*)LFAllocNoCache(FREE_LIST_GROUP_SIZEIDX, NO_MEM_DEFRAG);
- }
- int groupSize = count - startIdx;
- if (groupSize > FL_GROUP_SIZE)
- groupSize = FL_GROUP_SIZE;
- for (int i = 0; i < groupSize; ++i)
- g->Ptrs[i] = buf[startIdx + i];
- for (int i = groupSize; i < FL_GROUP_SIZE; ++i)
- g->Ptrs[i] = nullptr;
- // add free group to the global list
- TLFAllocFreeList& fl = globalFreeLists[nSizeIdx];
- fl.Free(g);
- startIdx += groupSize;
- }
- }
- //////////////////////////////////////////////////////////////////////////
- static TAtomic GlobalCounters[CT_MAX];
- const int MAX_LOCAL_UPDATES = 100;
- const intptr_t MAX_LOCAL_DELTA = 1*1024*1024;
- struct TLocalCounter {
- intptr_t Value;
- int Updates;
- TAtomic* Parent;
- Y_FORCE_INLINE void Init(TAtomic* parent) {
- Parent = parent;
- Value = 0;
- Updates = 0;
- }
- Y_FORCE_INLINE void Increment(size_t value) {
- Value += value;
- if (++Updates > MAX_LOCAL_UPDATES || Value > MAX_LOCAL_DELTA) {
- Flush();
- }
- }
- Y_FORCE_INLINE void Flush() {
- AtomicAdd(*Parent, Value);
- Value = 0;
- Updates = 0;
- }
- };
- ////////////////////////////////////////////////////////////////////////////////
- // DBG stuff
- ////////////////////////////////////////////////////////////////////////////////
- #if defined(LFALLOC_DBG)
- struct TPerTagAllocCounter {
- TAtomic Size;
- TAtomic Count;
- Y_FORCE_INLINE void Alloc(size_t size) {
- AtomicAdd(Size, size);
- AtomicAdd(Count, 1);
- }
- Y_FORCE_INLINE void Free(size_t size) {
- AtomicSub(Size, size);
- AtomicSub(Count, 1);
- }
- };
- struct TLocalPerTagAllocCounter {
- intptr_t Size;
- int Count;
- int Updates;
- Y_FORCE_INLINE void Init() {
- Size = 0;
- Count = 0;
- Updates = 0;
- }
- Y_FORCE_INLINE void Alloc(TPerTagAllocCounter& parent, size_t size) {
- Size += size;
- ++Count;
- if (++Updates > MAX_LOCAL_UPDATES) {
- Flush(parent);
- }
- }
- Y_FORCE_INLINE void Free(TPerTagAllocCounter& parent, size_t size) {
- Size -= size;
- --Count;
- if (++Updates > MAX_LOCAL_UPDATES) {
- Flush(parent);
- }
- }
- Y_FORCE_INLINE void Flush(TPerTagAllocCounter& parent) {
- AtomicAdd(parent.Size, Size);
- Size = 0;
- AtomicAdd(parent.Count, Count);
- Count = 0;
- Updates = 0;
- }
- };
- static const int DBG_ALLOC_MAX_TAG = 1000;
- static const int DBG_ALLOC_ALIGNED_TAG = 0xF0000000;
- static const int DBG_ALLOC_NUM_SIZES = 30;
- static TPerTagAllocCounter GlobalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES];
- #endif // LFALLOC_DBG
- //////////////////////////////////////////////////////////////////////////
- const int THREAD_BUF = 256;
- static int borderSizes[N_SIZES];
- const int MAX_MEM_PER_SIZE_PER_THREAD = 512 * 1024;
- struct TThreadAllocInfo {
- // FreePtrs - pointers to first free blocks in per thread block list
- // LastFreePtrs - pointers to last blocks in lists, may be invalid if FreePtr is zero
- char* FreePtrs[N_SIZES][THREAD_BUF];
- int FreePtrIndex[N_SIZES];
- TThreadAllocInfo* pNextInfo;
- TLocalCounter LocalCounters[CT_MAX];
- #if defined(LFALLOC_DBG)
- TLocalPerTagAllocCounter LocalPerTagAllocCounters[DBG_ALLOC_MAX_TAG][DBG_ALLOC_NUM_SIZES];
- #endif
- #ifdef _win_
- HANDLE hThread;
- #endif
- void Init(TThreadAllocInfo** pHead) {
- memset(this, 0, sizeof(*this));
- for (auto& i : FreePtrIndex)
- i = THREAD_BUF;
- #ifdef _win_
- BOOL b = DuplicateHandle(
- GetCurrentProcess(), GetCurrentThread(),
- GetCurrentProcess(), &hThread,
- 0, FALSE, DUPLICATE_SAME_ACCESS);
- Y_ASSERT_NOBT(b);
- #endif
- pNextInfo = *pHead;
- *pHead = this;
- for (int k = 0; k < N_SIZES; ++k) {
- int maxCount = MAX_MEM_PER_SIZE_PER_THREAD / nSizeIdxToSize[k];
- if (maxCount > THREAD_BUF)
- maxCount = THREAD_BUF;
- borderSizes[k] = THREAD_BUF - maxCount;
- }
- for (int i = 0; i < CT_MAX; ++i) {
- LocalCounters[i].Init(&GlobalCounters[i]);
- }
- #if defined(LFALLOC_DBG)
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& local = LocalPerTagAllocCounters[tag][sizeIdx];
- local.Init();
- }
- }
- #endif
- }
- void Done() {
- for (auto sizeIdx : FreePtrIndex) {
- Y_ASSERT_NOBT(sizeIdx == THREAD_BUF);
- }
- for (auto& localCounter : LocalCounters) {
- localCounter.Flush();
- }
- #if defined(LFALLOC_DBG)
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& local = LocalPerTagAllocCounters[tag][sizeIdx];
- auto& global = GlobalPerTagAllocCounters[tag][sizeIdx];
- local.Flush(global);
- }
- }
- #endif
- #ifdef _win_
- if (hThread)
- CloseHandle(hThread);
- #endif
- }
- };
- PERTHREAD TThreadAllocInfo* pThreadInfo;
- static TThreadAllocInfo* pThreadInfoList;
- static TLFLockData LFLockThreadInfo;
- static Y_FORCE_INLINE void IncrementCounter(ELFAllocCounter counter, size_t value) {
- #ifdef LFALLOC_YT
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- thr->LocalCounters[counter].Increment(value);
- } else {
- AtomicAdd(GlobalCounters[counter], value);
- }
- #endif
- }
- extern "C" i64 GetLFAllocCounterFast(int counter) {
- #ifdef LFALLOC_YT
- return GlobalCounters[counter];
- #else
- return 0;
- #endif
- }
- extern "C" i64 GetLFAllocCounterFull(int counter) {
- #ifdef LFALLOC_YT
- i64 ret = GlobalCounters[counter];
- {
- TLFLockHolder ll(&LFLockThreadInfo);
- for (TThreadAllocInfo** p = &pThreadInfoList; *p;) {
- TThreadAllocInfo* pInfo = *p;
- ret += pInfo->LocalCounters[counter].Value;
- p = &pInfo->pNextInfo;
- }
- }
- return ret;
- #else
- return 0;
- #endif
- }
- static void MoveSingleThreadFreeToGlobal(TThreadAllocInfo* pInfo) {
- for (int sizeIdx = 0; sizeIdx < N_SIZES; ++sizeIdx) {
- int& freePtrIdx = pInfo->FreePtrIndex[sizeIdx];
- char** freePtrs = pInfo->FreePtrs[sizeIdx];
- PutBlocksToGlobalFreeList(sizeIdx, freePtrs + freePtrIdx, THREAD_BUF - freePtrIdx);
- freePtrIdx = THREAD_BUF;
- }
- }
- #ifdef _win_
- static bool IsDeadThread(TThreadAllocInfo* pInfo) {
- DWORD dwExit;
- bool isDead = !GetExitCodeThread(pInfo->hThread, &dwExit) || dwExit != STILL_ACTIVE;
- return isDead;
- }
- static void CleanupAfterDeadThreads() {
- TLFLockHolder ll(&LFLockThreadInfo);
- for (TThreadAllocInfo** p = &pThreadInfoList; *p;) {
- TThreadAllocInfo* pInfo = *p;
- if (IsDeadThread(pInfo)) {
- MoveSingleThreadFreeToGlobal(pInfo);
- pInfo->Done();
- *p = pInfo->pNextInfo;
- SystemFree(pInfo);
- } else
- p = &pInfo->pNextInfo;
- }
- }
- #endif
- #ifndef _win_
- static pthread_key_t ThreadCacheCleaner;
- static void* volatile ThreadCacheCleanerStarted; // 0 = not started, -1 = started, -2 = is starting
- static PERTHREAD bool IsStoppingThread;
- static void FreeThreadCache(void*) {
- TThreadAllocInfo* pToDelete = nullptr;
- {
- TLFLockHolder ll(&LFLockThreadInfo);
- pToDelete = pThreadInfo;
- if (pToDelete == nullptr)
- return;
- // remove from the list
- for (TThreadAllocInfo** p = &pThreadInfoList; *p; p = &(*p)->pNextInfo) {
- if (*p == pToDelete) {
- *p = pToDelete->pNextInfo;
- break;
- }
- }
- IsStoppingThread = true;
- pThreadInfo = nullptr;
- }
- // free per thread buf
- MoveSingleThreadFreeToGlobal(pToDelete);
- pToDelete->Done();
- SystemFree(pToDelete);
- }
- #endif
- static void AllocThreadInfo() {
- #ifndef _win_
- if (DoCas(&ThreadCacheCleanerStarted, (void*)-2, (void*)nullptr) == (void*)nullptr) {
- pthread_key_create(&ThreadCacheCleaner, FreeThreadCache);
- ThreadCacheCleanerStarted = (void*)-1;
- }
- if (ThreadCacheCleanerStarted != (void*)-1)
- return; // do not use ThreadCacheCleaner until it is constructed
- {
- if (IsStoppingThread)
- return;
- TLFLockHolder ll(&LFLockThreadInfo);
- if (IsStoppingThread) // better safe than sorry
- return;
- pThreadInfo = (TThreadAllocInfo*)SystemAlloc(sizeof(TThreadAllocInfo));
- pThreadInfo->Init(&pThreadInfoList);
- }
- pthread_setspecific(ThreadCacheCleaner, (void*)-1); // without value destructor will not be called
- #else
- CleanupAfterDeadThreads();
- {
- pThreadInfo = (TThreadAllocInfo*)SystemAlloc(sizeof(TThreadAllocInfo));
- TLFLockHolder ll(&LFLockThreadInfo);
- pThreadInfo->Init(&pThreadInfoList);
- }
- #endif
- }
- //////////////////////////////////////////////////////////////////////////
- // DBG stuff
- //////////////////////////////////////////////////////////////////////////
- #if defined(LFALLOC_DBG)
- struct TAllocHeader {
- uint64_t Size;
- int Tag;
- int Cookie;
- };
- // should be power of 2
- static_assert(sizeof(TAllocHeader) == 16);
- static inline void* GetAllocPtr(TAllocHeader* p) {
- return p + 1;
- }
- static inline TAllocHeader* GetAllocHeader(void* p) {
- return ((TAllocHeader*)p) - 1;
- }
- // if present, uses the fake header stored by LFPosixMemalign() to retrieve the original header.
- static inline TAllocHeader* GetOrigAllocHeader(void* p) {
- auto* header = GetAllocHeader(p);
- if (header->Tag == DBG_ALLOC_ALIGNED_TAG) {
- return (TAllocHeader*)header->Size;
- }
- return header;
- }
- PERTHREAD int AllocationTag;
- extern "C" int SetThreadAllocTag(int tag) {
- int prevTag = AllocationTag;
- if (tag < DBG_ALLOC_MAX_TAG && tag >= 0) {
- AllocationTag = tag;
- }
- return prevTag;
- }
- PERTHREAD bool ProfileCurrentThread;
- extern "C" bool SetProfileCurrentThread(bool newVal) {
- bool prevVal = ProfileCurrentThread;
- ProfileCurrentThread = newVal;
- return prevVal;
- }
- static volatile bool ProfileAllThreads;
- extern "C" bool SetProfileAllThreads(bool newVal) {
- bool prevVal = ProfileAllThreads;
- ProfileAllThreads = newVal;
- return prevVal;
- }
- static volatile bool AllocationSamplingEnabled;
- extern "C" bool SetAllocationSamplingEnabled(bool newVal) {
- bool prevVal = AllocationSamplingEnabled;
- AllocationSamplingEnabled = newVal;
- return prevVal;
- }
- static size_t AllocationSampleRate = 1000;
- extern "C" size_t SetAllocationSampleRate(size_t newVal) {
- size_t prevVal = AllocationSampleRate;
- AllocationSampleRate = newVal;
- return prevVal;
- }
- static size_t AllocationSampleMaxSize = N_MAX_FAST_SIZE;
- extern "C" size_t SetAllocationSampleMaxSize(size_t newVal) {
- size_t prevVal = AllocationSampleMaxSize;
- AllocationSampleMaxSize = newVal;
- return prevVal;
- }
- using TAllocationCallback = int(int tag, size_t size, int sizeIdx);
- static TAllocationCallback* AllocationCallback;
- extern "C" TAllocationCallback* SetAllocationCallback(TAllocationCallback* newVal) {
- TAllocationCallback* prevVal = AllocationCallback;
- AllocationCallback = newVal;
- return prevVal;
- }
- using TDeallocationCallback = void(int cookie, int tag, size_t size, int sizeIdx);
- static TDeallocationCallback* DeallocationCallback;
- extern "C" TDeallocationCallback* SetDeallocationCallback(TDeallocationCallback* newVal) {
- TDeallocationCallback* prevVal = DeallocationCallback;
- DeallocationCallback = newVal;
- return prevVal;
- }
- PERTHREAD TAtomic AllocationsCount;
- PERTHREAD bool InAllocationCallback;
- static const int DBG_ALLOC_INVALID_COOKIE = -1;
- static inline int SampleAllocation(TAllocHeader* p, int sizeIdx) {
- int cookie = DBG_ALLOC_INVALID_COOKIE;
- if (AllocationSamplingEnabled && (ProfileCurrentThread || ProfileAllThreads) && !InAllocationCallback) {
- if (p->Size > AllocationSampleMaxSize || ++AllocationsCount % AllocationSampleRate == 0) {
- if (AllocationCallback) {
- InAllocationCallback = true;
- cookie = AllocationCallback(p->Tag, p->Size, sizeIdx);
- InAllocationCallback = false;
- }
- }
- }
- return cookie;
- }
- static inline void SampleDeallocation(TAllocHeader* p, int sizeIdx) {
- if (p->Cookie != DBG_ALLOC_INVALID_COOKIE && !InAllocationCallback) {
- if (DeallocationCallback) {
- InAllocationCallback = true;
- DeallocationCallback(p->Cookie, p->Tag, p->Size, sizeIdx);
- InAllocationCallback = false;
- }
- }
- }
- static inline void TrackPerTagAllocation(TAllocHeader* p, int sizeIdx) {
- if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) {
- Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES);
- auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx];
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx];
- local.Alloc(global, p->Size);
- } else {
- global.Alloc(p->Size);
- }
- }
- }
- static inline void TrackPerTagDeallocation(TAllocHeader* p, int sizeIdx) {
- if (p->Tag < DBG_ALLOC_MAX_TAG && p->Tag >= 0) {
- Y_ASSERT_NOBT(sizeIdx < DBG_ALLOC_NUM_SIZES);
- auto& global = GlobalPerTagAllocCounters[p->Tag][sizeIdx];
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- auto& local = thr->LocalPerTagAllocCounters[p->Tag][sizeIdx];
- local.Free(global, p->Size);
- } else {
- global.Free(p->Size);
- }
- }
- }
- static void* TrackAllocation(void* ptr, size_t size, int sizeIdx) {
- TAllocHeader* p = (TAllocHeader*)ptr;
- p->Size = size;
- p->Tag = AllocationTag;
- p->Cookie = SampleAllocation(p, sizeIdx);
- TrackPerTagAllocation(p, sizeIdx);
- return GetAllocPtr(p);
- }
- static void TrackDeallocation(void* ptr, int sizeIdx) {
- TAllocHeader* p = (TAllocHeader*)ptr;
- SampleDeallocation(p, sizeIdx);
- TrackPerTagDeallocation(p, sizeIdx);
- }
- struct TPerTagAllocInfo {
- ssize_t Count;
- ssize_t Size;
- };
- extern "C" void GetPerTagAllocInfo(
- bool flushPerThreadCounters,
- TPerTagAllocInfo* info,
- int& maxTag,
- int& numSizes) {
- maxTag = DBG_ALLOC_MAX_TAG;
- numSizes = DBG_ALLOC_NUM_SIZES;
- if (info) {
- if (flushPerThreadCounters) {
- TLFLockHolder ll(&LFLockThreadInfo);
- for (TThreadAllocInfo** p = &pThreadInfoList; *p;) {
- TThreadAllocInfo* pInfo = *p;
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& local = pInfo->LocalPerTagAllocCounters[tag][sizeIdx];
- auto& global = GlobalPerTagAllocCounters[tag][sizeIdx];
- local.Flush(global);
- }
- }
- p = &pInfo->pNextInfo;
- }
- }
- for (int tag = 0; tag < DBG_ALLOC_MAX_TAG; ++tag) {
- for (int sizeIdx = 0; sizeIdx < DBG_ALLOC_NUM_SIZES; ++sizeIdx) {
- auto& global = GlobalPerTagAllocCounters[tag][sizeIdx];
- auto& res = info[tag * DBG_ALLOC_NUM_SIZES + sizeIdx];
- res.Count = global.Count;
- res.Size = global.Size;
- }
- }
- }
- }
- #endif // LFALLOC_DBG
- //////////////////////////////////////////////////////////////////////////
- static Y_FORCE_INLINE void* LFAllocImpl(size_t _nSize) {
- #if defined(LFALLOC_DBG)
- size_t size = _nSize;
- _nSize += sizeof(TAllocHeader);
- #endif
- IncrementCounter(CT_USER_ALLOC, _nSize);
- int nSizeIdx;
- if (_nSize > 512) {
- if (_nSize > N_MAX_FAST_SIZE) {
- void* ptr = LargeBlockAlloc(_nSize, CT_LARGE_ALLOC);
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, N_SIZES);
- #endif
- return ptr;
- }
- nSizeIdx = size2idxArr2[(_nSize - 1) >> 8];
- } else
- nSizeIdx = size2idxArr1[1 + (((int)_nSize - 1) >> 3)];
- IncrementCounter(CT_SMALL_ALLOC, nSizeIdxToSize[nSizeIdx]);
- // check per thread buffer
- TThreadAllocInfo* thr = pThreadInfo;
- if (!thr) {
- AllocThreadInfo();
- thr = pThreadInfo;
- if (!thr) {
- void* ptr = LFAllocNoCache(nSizeIdx, MEM_DEFRAG);
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, nSizeIdx);
- #endif
- return ptr;
- }
- }
- {
- int& freePtrIdx = thr->FreePtrIndex[nSizeIdx];
- if (freePtrIdx < THREAD_BUF) {
- void* ptr = thr->FreePtrs[nSizeIdx][freePtrIdx++];
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, nSizeIdx);
- #endif
- return ptr;
- }
- // try to alloc from global free list
- char* buf[FL_GROUP_SIZE];
- int count = TakeBlocksFromGlobalFreeList(nSizeIdx, buf);
- if (count == 0) {
- count = LFAllocNoCacheMultiple(nSizeIdx, buf);
- if (count == 0) {
- NMalloc::AbortFromCorruptedAllocator("no way LFAllocNoCacheMultiple() can fail");
- }
- }
- char** dstBuf = thr->FreePtrs[nSizeIdx] + freePtrIdx - 1;
- for (int i = 0; i < count - 1; ++i)
- dstBuf[-i] = buf[i];
- freePtrIdx -= count - 1;
- void* ptr = buf[count - 1];
- #if defined(LFALLOC_DBG)
- ptr = TrackAllocation(ptr, size, nSizeIdx);
- #endif
- return ptr;
- }
- }
- static Y_FORCE_INLINE void* LFAlloc(size_t _nSize) {
- void* res = LFAllocImpl(_nSize);
- #ifdef DBG_FILL_MEMORY
- if (FillMemoryOnAllocation && res && (_nSize <= DBG_FILL_MAX_SIZE)) {
- memset(res, 0xcf, _nSize);
- }
- #endif
- return res;
- }
- static Y_FORCE_INLINE void LFFree(void* p) {
- #if defined(LFALLOC_DBG)
- if (p == nullptr)
- return;
- p = GetOrigAllocHeader(p);
- #endif
- uintptr_t chkOffset = ((char*)p - ALLOC_START) - 1ll;
- if (chkOffset >= N_MAX_WORKSET_SIZE) {
- if (p == nullptr)
- return;
- #if defined(LFALLOC_DBG)
- TrackDeallocation(p, N_SIZES);
- #endif
- LargeBlockFree(p, CT_LARGE_FREE);
- return;
- }
- uintptr_t chunk = ((char*)p - ALLOC_START) / N_CHUNK_SIZE;
- ptrdiff_t nSizeIdx = chunkSizeIdx[chunk];
- if (nSizeIdx <= 0) {
- #if defined(LFALLOC_DBG)
- TrackDeallocation(p, N_SIZES);
- #endif
- LargeBlockFree(p, CT_LARGE_FREE);
- return;
- }
- #if defined(LFALLOC_DBG)
- TrackDeallocation(p, nSizeIdx);
- #endif
- #ifdef DBG_FILL_MEMORY
- memset(p, 0xfe, nSizeIdxToSize[nSizeIdx]);
- #endif
- IncrementCounter(CT_SMALL_FREE, nSizeIdxToSize[nSizeIdx]);
- // try to store info to per thread buf
- TThreadAllocInfo* thr = pThreadInfo;
- if (thr) {
- int& freePtrIdx = thr->FreePtrIndex[nSizeIdx];
- if (freePtrIdx > borderSizes[nSizeIdx]) {
- thr->FreePtrs[nSizeIdx][--freePtrIdx] = (char*)p;
- return;
- }
- // move several pointers to global free list
- int freeCount = FL_GROUP_SIZE;
- if (freeCount > THREAD_BUF - freePtrIdx)
- freeCount = THREAD_BUF - freePtrIdx;
- char** freePtrs = thr->FreePtrs[nSizeIdx];
- PutBlocksToGlobalFreeList(nSizeIdx, freePtrs + freePtrIdx, freeCount);
- freePtrIdx += freeCount;
- freePtrs[--freePtrIdx] = (char*)p;
- } else {
- AllocThreadInfo();
- PutBlocksToGlobalFreeList(nSizeIdx, (char**)&p, 1);
- }
- }
- static size_t LFGetSize(const void* p) {
- #if defined(LFALLOC_DBG)
- if (p == nullptr)
- return 0;
- return GetOrigAllocHeader(const_cast<void*>(p))->Size;
- #endif
- uintptr_t chkOffset = ((const char*)p - ALLOC_START);
- if (chkOffset >= N_MAX_WORKSET_SIZE) {
- if (p == nullptr)
- return 0;
- return TLargeBlk::As(p)->Pages * 4096ll;
- }
- uintptr_t chunk = ((const char*)p - ALLOC_START) / N_CHUNK_SIZE;
- ptrdiff_t nSizeIdx = chunkSizeIdx[chunk];
- if (nSizeIdx <= 0)
- return TLargeBlk::As(p)->Pages * 4096ll;
- return nSizeIdxToSize[nSizeIdx];
- }
- ////////////////////////////////////////////////////////////////////////////////////////////////////
- // Output mem alloc stats
- const int N_PAGE_SIZE = 4096;
- static void DebugTraceMMgr(const char* pszFormat, ...) // __cdecl
- {
- static char buff[20000];
- va_list va;
- //
- va_start(va, pszFormat);
- vsprintf(buff, pszFormat, va);
- va_end(va);
- //
- #ifdef _win_
- OutputDebugStringA(buff);
- #else
- fputs(buff, stderr);
- #endif
- }
- struct TChunkStats {
- char *Start, *Finish;
- i64 Size;
- char* Entries;
- i64 FreeCount;
- TChunkStats(size_t chunk, i64 size, char* entries)
- : Size(size)
- , Entries(entries)
- , FreeCount(0)
- {
- Start = ALLOC_START + chunk * N_CHUNK_SIZE;
- Finish = Start + N_CHUNK_SIZE;
- }
- void CheckBlock(char* pBlock) {
- if (pBlock && pBlock >= Start && pBlock < Finish) {
- ++FreeCount;
- i64 nShift = pBlock - Start;
- i64 nOffsetInStep = nShift & (N_CHUNK_SIZE - 1);
- Entries[nOffsetInStep / Size] = 1;
- }
- }
- void SetGlobalFree(char* ptr) {
- i64 nShift = ptr - Start;
- i64 nOffsetInStep = nShift & (N_CHUNK_SIZE - 1);
- while (nOffsetInStep + Size <= N_CHUNK_SIZE) {
- ++FreeCount;
- Entries[nOffsetInStep / Size] = 1;
- nOffsetInStep += Size;
- }
- }
- };
- static void DumpMemoryBlockUtilizationLocked() {
- TFreeListGroup* wholeLists[N_SIZES];
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx) {
- wholeLists[nSizeIdx] = (TFreeListGroup*)globalFreeLists[nSizeIdx].GetWholeList();
- }
- char* bfList = (char*)blockFreeList.GetWholeList();
- DebugTraceMMgr("memory blocks utilisation stats:\n");
- i64 nTotalAllocated = 0, nTotalFree = 0, nTotalBadPages = 0, nTotalPages = 0, nTotalUsed = 0, nTotalLocked = 0;
- i64 nTotalGroupBlocks = 0;
- char* entries;
- entries = (char*)SystemAlloc((N_CHUNK_SIZE / 4));
- for (size_t k = 0; k < N_CHUNKS; ++k) {
- if (chunkSizeIdx[k] <= 0) {
- if (chunkSizeIdx[k] == -1)
- nTotalLocked += N_CHUNK_SIZE;
- continue;
- }
- i64 nSizeIdx = chunkSizeIdx[k];
- i64 nSize = nSizeIdxToSize[nSizeIdx];
- TChunkStats cs(k, nSize, entries);
- int nEntriesTotal = N_CHUNK_SIZE / nSize;
- memset(entries, 0, nEntriesTotal);
- for (TFreeListGroup* g = wholeLists[nSizeIdx]; g; g = g->Next) {
- for (auto& ptr : g->Ptrs)
- cs.CheckBlock(ptr);
- }
- TChunkStats csGB(k, nSize, entries);
- if (nSizeIdx == FREE_LIST_GROUP_SIZEIDX) {
- for (auto g : wholeLists) {
- for (; g; g = g->Next)
- csGB.CheckBlock((char*)g);
- }
- for (char* blk = bfList; blk; blk = *(char**)blk)
- csGB.CheckBlock(blk);
- nTotalGroupBlocks += csGB.FreeCount * nSize;
- }
- if (((globalCurrentPtr[nSizeIdx] - ALLOC_START) / N_CHUNK_SIZE) == k)
- cs.SetGlobalFree(globalCurrentPtr[nSizeIdx]);
- nTotalUsed += (nEntriesTotal - cs.FreeCount - csGB.FreeCount) * nSize;
- char pages[N_CHUNK_SIZE / N_PAGE_SIZE];
- memset(pages, 0, sizeof(pages));
- for (int i = 0, nShift = 0; i < nEntriesTotal; ++i, nShift += nSize) {
- int nBit = 0;
- if (entries[i])
- nBit = 1; // free entry
- else
- nBit = 2; // used entry
- for (i64 nDelta = nSize - 1; nDelta >= 0; nDelta -= N_PAGE_SIZE)
- pages[(nShift + nDelta) / N_PAGE_SIZE] |= nBit;
- }
- i64 nBadPages = 0;
- for (auto page : pages) {
- nBadPages += page == 3;
- nTotalPages += page != 1;
- }
- DebugTraceMMgr("entry = %lld; size = %lld; free = %lld; system %lld; utilisation = %g%%, fragmentation = %g%%\n",
- k, nSize, cs.FreeCount * nSize, csGB.FreeCount * nSize,
- (N_CHUNK_SIZE - cs.FreeCount * nSize) * 100.0f / N_CHUNK_SIZE, 100.0f * nBadPages / Y_ARRAY_SIZE(pages));
- nTotalAllocated += N_CHUNK_SIZE;
- nTotalFree += cs.FreeCount * nSize;
- nTotalBadPages += nBadPages;
- }
- SystemFree(entries);
- DebugTraceMMgr("Total allocated = %llu, free = %lld, system = %lld, locked for future use %lld, utilisation = %g, fragmentation = %g\n",
- nTotalAllocated, nTotalFree, nTotalGroupBlocks, nTotalLocked,
- 100.0f * (nTotalAllocated - nTotalFree) / nTotalAllocated, 100.0f * nTotalBadPages / nTotalPages);
- DebugTraceMMgr("Total %lld bytes used, %lld bytes in used pages\n", nTotalUsed, nTotalPages * N_PAGE_SIZE);
- for (int nSizeIdx = 0; nSizeIdx < N_SIZES; ++nSizeIdx)
- globalFreeLists[nSizeIdx].ReturnWholeList(wholeLists[nSizeIdx]);
- blockFreeList.ReturnWholeList(bfList);
- }
- void FlushThreadFreeList() {
- if (pThreadInfo)
- MoveSingleThreadFreeToGlobal(pThreadInfo);
- }
- void DumpMemoryBlockUtilization() {
- // move current thread free to global lists to get better statistics
- FlushThreadFreeList();
- {
- TLFLockHolder ls(&LFGlobalLock);
- DumpMemoryBlockUtilizationLocked();
- }
- }
- //////////////////////////////////////////////////////////////////////////
- // malloc api
- static bool LFAlloc_SetParam(const char* param, const char* value) {
- if (!strcmp(param, "LB_LIMIT_TOTAL_SIZE")) {
- LB_LIMIT_TOTAL_SIZE = atoi(value);
- return true;
- }
- if (!strcmp(param, "LB_LIMIT_TOTAL_SIZE_BYTES")) {
- LB_LIMIT_TOTAL_SIZE = (atoi(value) + N_PAGE_SIZE - 1) / N_PAGE_SIZE;
- return true;
- }
- #ifdef DBG_FILL_MEMORY
- if (!strcmp(param, "FillMemoryOnAllocation")) {
- FillMemoryOnAllocation = !strcmp(value, "true");
- return true;
- }
- #endif
- if (!strcmp(param, "TransparentHugePages")) {
- TransparentHugePages = !strcmp(value, "true");
- return true;
- }
- if (!strcmp(param, "MapHugeTLB")) {
- MapHugeTLB = !strcmp(value, "true");
- return true;
- }
- if (!strcmp(param, "EnableDefrag")) {
- EnableDefrag = !strcmp(value, "true");
- return true;
- }
- return false;
- };
- static const char* LFAlloc_GetParam(const char* param) {
- struct TParam {
- const char* Name;
- const char* Value;
- };
- static const TParam Params[] = {
- {"GetLFAllocCounterFast", (const char*)&GetLFAllocCounterFast},
- {"GetLFAllocCounterFull", (const char*)&GetLFAllocCounterFull},
- #if defined(LFALLOC_DBG)
- {"SetThreadAllocTag", (const char*)&SetThreadAllocTag},
- {"SetProfileCurrentThread", (const char*)&SetProfileCurrentThread},
- {"SetProfileAllThreads", (const char*)&SetProfileAllThreads},
- {"SetAllocationSamplingEnabled", (const char*)&SetAllocationSamplingEnabled},
- {"SetAllocationSampleRate", (const char*)&SetAllocationSampleRate},
- {"SetAllocationSampleMaxSize", (const char*)&SetAllocationSampleMaxSize},
- {"SetAllocationCallback", (const char*)&SetAllocationCallback},
- {"SetDeallocationCallback", (const char*)&SetDeallocationCallback},
- {"GetPerTagAllocInfo", (const char*)&GetPerTagAllocInfo},
- #endif // LFALLOC_DBG
- };
- for (int i = 0; i < Y_ARRAY_SIZE(Params); ++i) {
- if (strcmp(param, Params[i].Name) == 0) {
- return Params[i].Value;
- }
- }
- return nullptr;
- }
- static Y_FORCE_INLINE int LFPosixMemalign(void** memptr, size_t alignment, size_t size) {
- if (Y_UNLIKELY(alignment > 4096)) {
- const char* error = "Larger alignment are not guaranteed with this implementation\n";
- #ifdef _win_
- OutputDebugStringA(error);
- #endif
- NMalloc::AbortFromCorruptedAllocator(error);
- }
- size_t bigsize = size;
- if (bigsize <= alignment) {
- bigsize = alignment;
- } else if (bigsize < 2 * alignment) {
- bigsize = 2 * alignment;
- }
- #if defined(LFALLOC_DBG)
- if (alignment > sizeof(TAllocHeader)) {
- bigsize += alignment;
- }
- #endif
- *memptr = LFAlloc(bigsize);
- #if defined(LFALLOC_DBG)
- if (alignment > sizeof(TAllocHeader)) {
- // memptr isn't aligned due to alloc header
- const auto* header = GetAllocHeader(*memptr);
- *memptr = (void*)((const char*) (*memptr) + alignment - sizeof(TAllocHeader));
- // make fake header to retrieve original header ptr on dealloc
- auto* next = GetAllocHeader(*memptr);
- next->Tag = DBG_ALLOC_ALIGNED_TAG;
- next->Size = (uint64_t)header;
- next->Cookie = 0;
- }
- #endif
- Y_ASSERT_NOBT((intptr_t)*memptr % alignment == 0);
- return 0;
- }
- static Y_FORCE_INLINE void* LFVAlloc(size_t size) {
- const size_t pg = N_PAGE_SIZE;
- void* p = nullptr;
- #if defined(LFALLOC_DBG)
- LFPosixMemalign(&p, pg, size);
- #else
- size_t bigsize = (size + pg - 1) & (~(pg - 1));
- p = LFAlloc(bigsize);
- #endif
- Y_ASSERT_NOBT((intptr_t)p % N_PAGE_SIZE == 0);
- return p;
- }
- #endif
|