ytalloc.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. #pragma once
  2. #include <stddef.h>
  3. #include <library/cpp/yt/misc/enum.h>
  4. #include <util/system/types.h>
  5. #include <util/generic/size_literals.h>
  6. #include <util/datetime/base.h>
  7. namespace NYT::NYTAlloc {
  8. ////////////////////////////////////////////////////////////////////////////////
  9. // Macros
  10. #if defined(_linux_) && \
  11. !defined(_asan_enabled_) && \
  12. !defined(_msan_enabled_) && \
  13. !defined(_tsan_enabled_)
  14. #define YT_ALLOC_ENABLED
  15. #endif
  16. ////////////////////////////////////////////////////////////////////////////////
  17. // Constants
  18. constexpr int SmallRankCount = 23;
  19. constexpr int MinLargeRank = 15;
  20. constexpr int LargeRankCount = 30;
  21. constexpr size_t LargeAllocationSizeThreshold = 32_KB;
  22. constexpr size_t HugeAllocationSizeThreshold = 1ULL << (LargeRankCount - 1);
  23. constexpr size_t MaxAllocationSize = 1_TB;
  24. constexpr size_t PageSize = 4_KB;
  25. constexpr size_t RightReadableAreaSize = 16;
  26. ////////////////////////////////////////////////////////////////////////////////
  27. // Allocation API
  28. // Allocates a chunk of memory of (at least) #size bytes.
  29. // The returned pointer is guaranteed to be 16-byte aligned.
  30. // Moreover, it is guaranteeed that #RightReadableAreaSize bytes immediately following
  31. // the allocated chunk are readable (but may belong to another allocated chunk).
  32. // This enables eliminating some nasty corner cases in SIMD memory manipulations.
  33. void* Allocate(size_t size);
  34. // Allocates a chunk of memory of (at least) #size bytes.
  35. // The returned pointer is guaranteed to be 4K-byte aligned.
  36. // #size, however, need not be divisible by page size (but internally it will be rounded up).
  37. void* AllocatePageAligned(size_t size);
  38. // An optimized version of #Allocate with #Size being known at compile-time.
  39. template <size_t Size>
  40. void* AllocateConstSize();
  41. // Frees a chunk of memory previously allocated via Allocate functions.
  42. // Does nothing if #ptr is null.
  43. void Free(void* ptr);
  44. // Similar to #Free but assumes that #ptr is not null.
  45. void FreeNonNull(void* ptr);
  46. // Returns the size of the chunk pointed to by #ptr.
  47. // This size is not guaranteed to be exactly equal to #size passed to allocation functions
  48. // due to rounding; the returned size, however, is never less than the latter size.
  49. // If #ptr is null or we are unable to determine the allocation size, then 0 is returned.
  50. size_t GetAllocationSize(const void* ptr);
  51. // Returns the size of the chunk that will actually be allocated
  52. // when requesting an allocation of given #size. This is never less than #size.
  53. size_t GetAllocationSize(size_t size);
  54. ////////////////////////////////////////////////////////////////////////////////
  55. // Memory zone API
  56. //
  57. // Each allocation is either in the "normal zone" or "undumpable zone".
  58. // The latter indicates that this memory region will be excluded from a coredump
  59. // should it happen.
  60. //
  61. // The current zone used for allocations is stored in TLS.
  62. // Memory zone is used to pass hint to the allocator.
  63. DEFINE_ENUM(EMemoryZone,
  64. ((Unknown) (-1)) // not a valid zone
  65. ((Normal) ( 0)) // default memory type
  66. ((Undumpable) ( 1)) // memory is omitted from the core dump
  67. );
  68. // Updates the current zone in TLS.
  69. void SetCurrentMemoryZone(EMemoryZone zone);
  70. // Returns the current zone from TLS.
  71. EMemoryZone GetCurrentMemoryZone();
  72. // Returns the zone where #ptr resides;
  73. // EMemoryZone::Invalid indicates that #ptr is outside of any recognized memory zone.
  74. EMemoryZone GetAllocationMemoryZone(const void* ptr);
  75. ////////////////////////////////////////////////////////////////////////////////
  76. // When a "timing event" (hiccup) occurs during an allocation,
  77. // YTAlloc records this event and captures the current fiber id.
  78. // The latter is provided externally by calling SetCurrentFiberId.
  79. //
  80. // This may be helpful to correlate various application-level timings
  81. // with internal events in YTAlloc.
  82. //
  83. // The current fiber id is stored in TLS.
  84. using TFiberId = ui64;
  85. // Updates the current fiber id in TLS.
  86. void SetCurrentFiberId(TFiberId id);
  87. // Returns the currently assinged fiber id from TLS.
  88. TFiberId GetCurrentFiberId();
  89. ////////////////////////////////////////////////////////////////////////////////
  90. // Logging
  91. DEFINE_ENUM(ELogEventSeverity,
  92. (Debug)
  93. (Info)
  94. (Warning)
  95. (Error)
  96. );
  97. struct TLogEvent
  98. {
  99. ELogEventSeverity Severity;
  100. TStringBuf Message;
  101. };
  102. using TLogHandler = void(*)(const TLogEvent& event);
  103. // Sets the handler to be invoked for each log event produced by YTAlloc.
  104. // Can be called multiple times (but calls to the previous incarnations of the handler
  105. // are racy).
  106. void EnableLogging(TLogHandler logHandler);
  107. ////////////////////////////////////////////////////////////////////////////////
  108. // Backtraces
  109. using TBacktraceProvider = int(*)(void** frames, int maxFrames, int skipFrames);
  110. // Sets the provider used for collecting backtraces when allocation profiling
  111. // is turned ON. Can be called multiple times (but calls to the previous
  112. // incarnations of the provider are racy).
  113. void SetBacktraceProvider(TBacktraceProvider provider);
  114. using TBacktraceFormatter = TString(*)(const void* const* frames, int frameCount);
  115. // Sets the callback used for formatting backtraces during large arena mmap calls
  116. // to help detect memory leaks. Can be called multiple times (but calls to the
  117. // previous incarnations of the provider are racy).
  118. void SetBacktraceFormatter(TBacktraceFormatter provider);
  119. ////////////////////////////////////////////////////////////////////////////////
  120. // Misc
  121. //! Tries to mlock all opened file mappings of the current process.
  122. //! Typically invoked on application startup to lock all binaries in memory
  123. //! and prevent executable code and static data to be paged out
  124. //! causing latency spikes.
  125. void MlockFileMappings(bool populate = true);
  126. ////////////////////////////////////////////////////////////////////////////////
  127. // Configuration API
  128. // Calling this function enables periodic calls to madvise(ADV_STOCKPILE);
  129. // cf. https://st.yandex-team.ru/KERNEL-186
  130. void EnableStockpile();
  131. // Sets the interval between madvise(ADV_STOCKPILE) calls.
  132. // Only makes sense if stockpile was enabled.
  133. void SetStockpileInterval(TDuration value);
  134. // Sets the number of threads to be invoking madvise(ADV_STOCKPILE).
  135. // This call should be made before calling #EnableStockpile.
  136. void SetStockpileThreadCount(int value);
  137. // Sets the size passsed to madvise(ADV_STOCKPILE) calls.
  138. // Only makes sense if stockpile was enabled.
  139. void SetStockpileSize(size_t value);
  140. // For large blobs, YTAlloc keeps at least
  141. // LargeUnreclaimableCoeff * TotalLargeBytesUsed clamped to range
  142. // [MinLargeUnreclaimableBytes, MaxLargeUnreclaimableBytes]
  143. // bytes of pooled (unreclaimable) memory.
  144. void SetLargeUnreclaimableCoeff(double value);
  145. void SetMinLargeUnreclaimableBytes(size_t value);
  146. void SetMaxLargeUnreclaimableBytes(size_t value);
  147. // When a syscall (mmap, munmap, or madvise) or an internal lock acquisition
  148. // takes longer then the configured time, a "timing event" is recorded.
  149. void SetTimingEventThreshold(TDuration value);
  150. // Toggles the global allocation profiling knob (OFF by default).
  151. // For profiled allocations, YTAlloc collects (see #SetBacktraceProvider) and aggregates their
  152. // backtraces.
  153. void SetAllocationProfilingEnabled(bool value);
  154. // Determines the fraction of allocations to be sampled for profiling.
  155. void SetAllocationProfilingSamplingRate(double rate);
  156. // Controls if small allocations of a given rank are profiled (OFF by default).
  157. void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value);
  158. // Controls if large allocations of a given rank are profiled (OFF by default).
  159. void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value);
  160. // Controls the depth of the backtraces to collect. Deeper backtraces
  161. // take more time and affect the program performance.
  162. void SetProfilingBacktraceDepth(int depth);
  163. // Controls the minimum number of bytes a certain backtrace must
  164. // allocate to appear in profiling reports.
  165. void SetMinProfilingBytesUsedToReport(size_t size);
  166. // If set to true (default), YTAlloc uses madvise with MADV_DONTNEED to release unused large blob pages
  167. // (slower but leads to more predicable RSS values);
  168. // if false then MADV_FREE is used instead, if available
  169. // (faster but RSS may get stuck arbitrary higher than the actual usage as long
  170. // as no memory pressure is applied).
  171. void SetEnableEagerMemoryRelease(bool value);
  172. // If set to true, YTAlloc uses madvise with MADV_POPULATE to prefault freshly acclaimed pages.
  173. // Otherwise (this is the default), these pages are prefaulted with linear memory access.
  174. // See https://st.yandex-team.ru/KERNEL-185.
  175. void SetEnableMadvisePopulate(bool value);
  176. ////////////////////////////////////////////////////////////////////////////////
  177. // Statistics API
  178. DEFINE_ENUM(EBasicCounter,
  179. (BytesAllocated)
  180. (BytesFreed)
  181. (BytesUsed)
  182. );
  183. using ESystemCounter = EBasicCounter;
  184. using ESmallCounter = EBasicCounter;
  185. using ELargeCounter = EBasicCounter;
  186. using EUndumpableCounter = EBasicCounter;
  187. DEFINE_ENUM(ESmallArenaCounter,
  188. (PagesMapped)
  189. (BytesMapped)
  190. (PagesCommitted)
  191. (BytesCommitted)
  192. );
  193. DEFINE_ENUM(ELargeArenaCounter,
  194. (BytesSpare)
  195. (BytesOverhead)
  196. (BlobsAllocated)
  197. (BlobsFreed)
  198. (BlobsUsed)
  199. (BytesAllocated)
  200. (BytesFreed)
  201. (BytesUsed)
  202. (ExtentsAllocated)
  203. (PagesMapped)
  204. (BytesMapped)
  205. (PagesPopulated)
  206. (BytesPopulated)
  207. (PagesReleased)
  208. (BytesReleased)
  209. (PagesCommitted)
  210. (BytesCommitted)
  211. (OverheadBytesReclaimed)
  212. (SpareBytesReclaimed)
  213. );
  214. DEFINE_ENUM(EHugeCounter,
  215. (BytesAllocated)
  216. (BytesFreed)
  217. (BytesUsed)
  218. (BlobsAllocated)
  219. (BlobsFreed)
  220. (BlobsUsed)
  221. );
  222. DEFINE_ENUM(ETotalCounter,
  223. (BytesAllocated)
  224. (BytesFreed)
  225. (BytesUsed)
  226. (BytesCommitted)
  227. (BytesUnaccounted)
  228. );
  229. // Returns statistics for all user allocations.
  230. TEnumIndexedVector<ETotalCounter, ssize_t> GetTotalAllocationCounters();
  231. // Returns statistics for small allocations; these are included into total statistics.
  232. TEnumIndexedVector<ESmallCounter, ssize_t> GetSmallAllocationCounters();
  233. // Returns statistics for large allocations; these are included into total statistics.
  234. TEnumIndexedVector<ELargeCounter, ssize_t> GetLargeAllocationCounters();
  235. // Returns per-arena statistics for small allocations; these are included into total statistics.
  236. std::array<TEnumIndexedVector<ESmallArenaCounter, ssize_t>, SmallRankCount> GetSmallArenaAllocationCounters();
  237. // Returns per-arena statistics for large allocations; these are included into total statistics.
  238. std::array<TEnumIndexedVector<ELargeArenaCounter, ssize_t>, LargeRankCount> GetLargeArenaAllocationCounters();
  239. // Returns statistics for huge allocations; these are included into total statistics.
  240. TEnumIndexedVector<EHugeCounter, ssize_t> GetHugeAllocationCounters();
  241. // Returns statistics for all system allocations; these are not included into total statistics.
  242. TEnumIndexedVector<ESystemCounter, ssize_t> GetSystemAllocationCounters();
  243. // Returns statistics for undumpable allocations.
  244. TEnumIndexedVector<EUndumpableCounter, ssize_t> GetUndumpableAllocationCounters();
  245. DEFINE_ENUM(ETimingEventType,
  246. (Mmap)
  247. (Munmap)
  248. (MadvisePopulate)
  249. (MadviseFree)
  250. (MadviseDontNeed)
  251. (Locking)
  252. (Prefault)
  253. (FilePrefault)
  254. );
  255. struct TTimingEventCounters
  256. {
  257. // Number of events happened since start.
  258. size_t Count = 0;
  259. // Total size of memory blocks involved in these events (if applicable).
  260. size_t Size = 0;
  261. };
  262. // Returns statistics for timing events happened since start.
  263. // See SetTimingEventThreshold.
  264. TEnumIndexedVector<ETimingEventType, TTimingEventCounters> GetTimingEventCounters();
  265. ////////////////////////////////////////////////////////////////////////////////
  266. // We never collect backtraces deeper than this limit.
  267. constexpr int MaxAllocationProfilingBacktraceDepth = 16;
  268. struct TBacktrace
  269. {
  270. int FrameCount;
  271. std::array<void*, MaxAllocationProfilingBacktraceDepth> Frames;
  272. };
  273. struct TProfiledAllocation
  274. {
  275. TBacktrace Backtrace;
  276. TEnumIndexedVector<EBasicCounter, ssize_t> Counters;
  277. };
  278. // Returns statistics for profiled allocations (available when allocation
  279. // profiling is ON). Allocations are grouped by backtrace; for each backtrace
  280. // we provide the counters indicating the number of allocated, freed, and used bytes.
  281. // To appear here, used bytes counter must be at least the value configured
  282. // via SetMinProfilingBytesUsedToReport.
  283. std::vector<TProfiledAllocation> GetProfiledAllocationStatistics();
  284. ////////////////////////////////////////////////////////////////////////////////
  285. } // namespace NYT::NYTAlloc
  286. #define YT_ALLOC_INL_H_
  287. #include "ytalloc-inl.h"
  288. #undef YT_ALLOC_INL_H_