#pragma once #include #include #include #include #include #include namespace NYT::NYTAlloc { //////////////////////////////////////////////////////////////////////////////// // Macros #if defined(_linux_) && \ !defined(_asan_enabled_) && \ !defined(_msan_enabled_) && \ !defined(_tsan_enabled_) #define YT_ALLOC_ENABLED #endif //////////////////////////////////////////////////////////////////////////////// // Constants constexpr int SmallRankCount = 23; constexpr int MinLargeRank = 15; constexpr int LargeRankCount = 30; constexpr size_t LargeAllocationSizeThreshold = 32_KB; constexpr size_t HugeAllocationSizeThreshold = 1ULL << (LargeRankCount - 1); constexpr size_t MaxAllocationSize = 1_TB; constexpr size_t PageSize = 4_KB; constexpr size_t RightReadableAreaSize = 16; //////////////////////////////////////////////////////////////////////////////// // Allocation API // Allocates a chunk of memory of (at least) #size bytes. // The returned pointer is guaranteed to be 16-byte aligned. // Moreover, it is guaranteeed that #RightReadableAreaSize bytes immediately following // the allocated chunk are readable (but may belong to another allocated chunk). // This enables eliminating some nasty corner cases in SIMD memory manipulations. void* Allocate(size_t size); // Allocates a chunk of memory of (at least) #size bytes. // The returned pointer is guaranteed to be 4K-byte aligned. // #size, however, need not be divisible by page size (but internally it will be rounded up). void* AllocatePageAligned(size_t size); // An optimized version of #Allocate with #Size being known at compile-time. template void* AllocateConstSize(); // Frees a chunk of memory previously allocated via Allocate functions. // Does nothing if #ptr is null. void Free(void* ptr); // Similar to #Free but assumes that #ptr is not null. void FreeNonNull(void* ptr); // Returns the size of the chunk pointed to by #ptr. // This size is not guaranteed to be exactly equal to #size passed to allocation functions // due to rounding; the returned size, however, is never less than the latter size. // If #ptr is null or we are unable to determine the allocation size, then 0 is returned. size_t GetAllocationSize(const void* ptr); // Returns the size of the chunk that will actually be allocated // when requesting an allocation of given #size. This is never less than #size. size_t GetAllocationSize(size_t size); //////////////////////////////////////////////////////////////////////////////// // Memory zone API // // Each allocation is either in the "normal zone" or "undumpable zone". // The latter indicates that this memory region will be excluded from a coredump // should it happen. // // The current zone used for allocations is stored in TLS. // Memory zone is used to pass hint to the allocator. DEFINE_ENUM(EMemoryZone, ((Unknown) (-1)) // not a valid zone ((Normal) ( 0)) // default memory type ((Undumpable) ( 1)) // memory is omitted from the core dump ); // Updates the current zone in TLS. void SetCurrentMemoryZone(EMemoryZone zone); // Returns the current zone from TLS. EMemoryZone GetCurrentMemoryZone(); // Returns the zone where #ptr resides; // EMemoryZone::Invalid indicates that #ptr is outside of any recognized memory zone. EMemoryZone GetAllocationMemoryZone(const void* ptr); //////////////////////////////////////////////////////////////////////////////// // When a "timing event" (hiccup) occurs during an allocation, // YTAlloc records this event and captures the current fiber id. // The latter is provided externally by calling SetCurrentFiberId. // // This may be helpful to correlate various application-level timings // with internal events in YTAlloc. // // The current fiber id is stored in TLS. using TFiberId = ui64; // Updates the current fiber id in TLS. void SetCurrentFiberId(TFiberId id); // Returns the currently assinged fiber id from TLS. TFiberId GetCurrentFiberId(); //////////////////////////////////////////////////////////////////////////////// // Logging DEFINE_ENUM(ELogEventSeverity, (Debug) (Info) (Warning) (Error) ); struct TLogEvent { ELogEventSeverity Severity; TStringBuf Message; }; using TLogHandler = void(*)(const TLogEvent& event); // Sets the handler to be invoked for each log event produced by YTAlloc. // Can be called multiple times (but calls to the previous incarnations of the handler // are racy). void EnableLogging(TLogHandler logHandler); //////////////////////////////////////////////////////////////////////////////// // Backtraces using TBacktraceProvider = int(*)(void** frames, int maxFrames, int skipFrames); // Sets the provider used for collecting backtraces when allocation profiling // is turned ON. Can be called multiple times (but calls to the previous // incarnations of the provider are racy). void SetBacktraceProvider(TBacktraceProvider provider); using TBacktraceFormatter = TString(*)(const void* const* frames, int frameCount); // Sets the callback used for formatting backtraces during large arena mmap calls // to help detect memory leaks. Can be called multiple times (but calls to the // previous incarnations of the provider are racy). void SetBacktraceFormatter(TBacktraceFormatter provider); //////////////////////////////////////////////////////////////////////////////// // Misc //! Tries to mlock all opened file mappings of the current process. //! Typically invoked on application startup to lock all binaries in memory //! and prevent executable code and static data to be paged out //! causing latency spikes. void MlockFileMappings(bool populate = true); //////////////////////////////////////////////////////////////////////////////// // Configuration API // Calling this function enables periodic calls to madvise(ADV_STOCKPILE); // cf. https://st.yandex-team.ru/KERNEL-186 void EnableStockpile(); // Sets the interval between madvise(ADV_STOCKPILE) calls. // Only makes sense if stockpile was enabled. void SetStockpileInterval(TDuration value); // Sets the number of threads to be invoking madvise(ADV_STOCKPILE). // This call should be made before calling #EnableStockpile. void SetStockpileThreadCount(int value); // Sets the size passsed to madvise(ADV_STOCKPILE) calls. // Only makes sense if stockpile was enabled. void SetStockpileSize(size_t value); // For large blobs, YTAlloc keeps at least // LargeUnreclaimableCoeff * TotalLargeBytesUsed clamped to range // [MinLargeUnreclaimableBytes, MaxLargeUnreclaimableBytes] // bytes of pooled (unreclaimable) memory. void SetLargeUnreclaimableCoeff(double value); void SetMinLargeUnreclaimableBytes(size_t value); void SetMaxLargeUnreclaimableBytes(size_t value); // When a syscall (mmap, munmap, or madvise) or an internal lock acquisition // takes longer then the configured time, a "timing event" is recorded. void SetTimingEventThreshold(TDuration value); // Toggles the global allocation profiling knob (OFF by default). // For profiled allocations, YTAlloc collects (see #SetBacktraceProvider) and aggregates their // backtraces. void SetAllocationProfilingEnabled(bool value); // Determines the fraction of allocations to be sampled for profiling. void SetAllocationProfilingSamplingRate(double rate); // Controls if small allocations of a given rank are profiled (OFF by default). void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value); // Controls if large allocations of a given rank are profiled (OFF by default). void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value); // Controls the depth of the backtraces to collect. Deeper backtraces // take more time and affect the program performance. void SetProfilingBacktraceDepth(int depth); // Controls the minimum number of bytes a certain backtrace must // allocate to appear in profiling reports. void SetMinProfilingBytesUsedToReport(size_t size); // If set to true (default), YTAlloc uses madvise with MADV_DONTNEED to release unused large blob pages // (slower but leads to more predicable RSS values); // if false then MADV_FREE is used instead, if available // (faster but RSS may get stuck arbitrary higher than the actual usage as long // as no memory pressure is applied). void SetEnableEagerMemoryRelease(bool value); // If set to true, YTAlloc uses madvise with MADV_POPULATE to prefault freshly acclaimed pages. // Otherwise (this is the default), these pages are prefaulted with linear memory access. // See https://st.yandex-team.ru/KERNEL-185. void SetEnableMadvisePopulate(bool value); //////////////////////////////////////////////////////////////////////////////// // Statistics API DEFINE_ENUM(EBasicCounter, (BytesAllocated) (BytesFreed) (BytesUsed) ); using ESystemCounter = EBasicCounter; using ESmallCounter = EBasicCounter; using ELargeCounter = EBasicCounter; using EUndumpableCounter = EBasicCounter; DEFINE_ENUM(ESmallArenaCounter, (PagesMapped) (BytesMapped) (PagesCommitted) (BytesCommitted) ); DEFINE_ENUM(ELargeArenaCounter, (BytesSpare) (BytesOverhead) (BlobsAllocated) (BlobsFreed) (BlobsUsed) (BytesAllocated) (BytesFreed) (BytesUsed) (ExtentsAllocated) (PagesMapped) (BytesMapped) (PagesPopulated) (BytesPopulated) (PagesReleased) (BytesReleased) (PagesCommitted) (BytesCommitted) (OverheadBytesReclaimed) (SpareBytesReclaimed) ); DEFINE_ENUM(EHugeCounter, (BytesAllocated) (BytesFreed) (BytesUsed) (BlobsAllocated) (BlobsFreed) (BlobsUsed) ); DEFINE_ENUM(ETotalCounter, (BytesAllocated) (BytesFreed) (BytesUsed) (BytesCommitted) (BytesUnaccounted) ); // Returns statistics for all user allocations. TEnumIndexedArray GetTotalAllocationCounters(); // Returns statistics for small allocations; these are included into total statistics. TEnumIndexedArray GetSmallAllocationCounters(); // Returns statistics for large allocations; these are included into total statistics. TEnumIndexedArray GetLargeAllocationCounters(); // Returns per-arena statistics for small allocations; these are included into total statistics. std::array, SmallRankCount> GetSmallArenaAllocationCounters(); // Returns per-arena statistics for large allocations; these are included into total statistics. std::array, LargeRankCount> GetLargeArenaAllocationCounters(); // Returns statistics for huge allocations; these are included into total statistics. TEnumIndexedArray GetHugeAllocationCounters(); // Returns statistics for all system allocations; these are not included into total statistics. TEnumIndexedArray GetSystemAllocationCounters(); // Returns statistics for undumpable allocations. TEnumIndexedArray GetUndumpableAllocationCounters(); DEFINE_ENUM(ETimingEventType, (Mmap) (Munmap) (MadvisePopulate) (MadviseFree) (MadviseDontNeed) (Locking) (Prefault) (FilePrefault) ); struct TTimingEventCounters { // Number of events happened since start. size_t Count = 0; // Total size of memory blocks involved in these events (if applicable). size_t Size = 0; }; // Returns statistics for timing events happened since start. // See SetTimingEventThreshold. TEnumIndexedArray GetTimingEventCounters(); //////////////////////////////////////////////////////////////////////////////// // We never collect backtraces deeper than this limit. constexpr int MaxAllocationProfilingBacktraceDepth = 16; struct TBacktrace { int FrameCount; std::array Frames; }; struct TProfiledAllocation { TBacktrace Backtrace; TEnumIndexedArray Counters; }; // Returns statistics for profiled allocations (available when allocation // profiling is ON). Allocations are grouped by backtrace; for each backtrace // we provide the counters indicating the number of allocated, freed, and used bytes. // To appear here, used bytes counter must be at least the value configured // via SetMinProfilingBytesUsedToReport. std::vector GetProfiledAllocationStatistics(); //////////////////////////////////////////////////////////////////////////////// } // namespace NYT::NYTAlloc #define YT_ALLOC_INL_H_ #include "ytalloc-inl.h" #undef YT_ALLOC_INL_H_