ytalloc.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. #pragma once
  2. #include <stddef.h>
  3. #include <library/cpp/yt/misc/enum.h>
  4. #include <library/cpp/yt/containers/enum_indexed_array.h>
  5. #include <util/system/types.h>
  6. #include <util/generic/size_literals.h>
  7. #include <util/datetime/base.h>
  8. namespace NYT::NYTAlloc {
  9. ////////////////////////////////////////////////////////////////////////////////
  10. // Macros
  11. #if defined(_linux_) && \
  12. !defined(_asan_enabled_) && \
  13. !defined(_msan_enabled_) && \
  14. !defined(_tsan_enabled_)
  15. #define YT_ALLOC_ENABLED
  16. #endif
  17. ////////////////////////////////////////////////////////////////////////////////
  18. // Constants
  19. constexpr int SmallRankCount = 23;
  20. constexpr int MinLargeRank = 15;
  21. constexpr int LargeRankCount = 30;
  22. constexpr size_t LargeAllocationSizeThreshold = 32_KB;
  23. constexpr size_t HugeAllocationSizeThreshold = 1ULL << (LargeRankCount - 1);
  24. constexpr size_t MaxAllocationSize = 1_TB;
  25. constexpr size_t PageSize = 4_KB;
  26. constexpr size_t RightReadableAreaSize = 16;
  27. ////////////////////////////////////////////////////////////////////////////////
  28. // Allocation API
  29. // Allocates a chunk of memory of (at least) #size bytes.
  30. // The returned pointer is guaranteed to be 16-byte aligned.
  31. // Moreover, it is guaranteeed that #RightReadableAreaSize bytes immediately following
  32. // the allocated chunk are readable (but may belong to another allocated chunk).
  33. // This enables eliminating some nasty corner cases in SIMD memory manipulations.
  34. void* Allocate(size_t size);
  35. // Allocates a chunk of memory of (at least) #size bytes.
  36. // The returned pointer is guaranteed to be 4K-byte aligned.
  37. // #size, however, need not be divisible by page size (but internally it will be rounded up).
  38. void* AllocatePageAligned(size_t size);
  39. // An optimized version of #Allocate with #Size being known at compile-time.
  40. template <size_t Size>
  41. void* AllocateConstSize();
  42. // Frees a chunk of memory previously allocated via Allocate functions.
  43. // Does nothing if #ptr is null.
  44. void Free(void* ptr);
  45. // Similar to #Free but assumes that #ptr is not null.
  46. void FreeNonNull(void* ptr);
  47. // Returns the size of the chunk pointed to by #ptr.
  48. // This size is not guaranteed to be exactly equal to #size passed to allocation functions
  49. // due to rounding; the returned size, however, is never less than the latter size.
  50. // If #ptr is null or we are unable to determine the allocation size, then 0 is returned.
  51. size_t GetAllocationSize(const void* ptr);
  52. // Returns the size of the chunk that will actually be allocated
  53. // when requesting an allocation of given #size. This is never less than #size.
  54. size_t GetAllocationSize(size_t size);
  55. ////////////////////////////////////////////////////////////////////////////////
  56. // Memory zone API
  57. //
  58. // Each allocation is either in the "normal zone" or "undumpable zone".
  59. // The latter indicates that this memory region will be excluded from a coredump
  60. // should it happen.
  61. //
  62. // The current zone used for allocations is stored in TLS.
  63. // Memory zone is used to pass hint to the allocator.
  64. DEFINE_ENUM(EMemoryZone,
  65. ((Unknown) (-1)) // not a valid zone
  66. ((Normal) ( 0)) // default memory type
  67. ((Undumpable) ( 1)) // memory is omitted from the core dump
  68. );
  69. // Updates the current zone in TLS.
  70. void SetCurrentMemoryZone(EMemoryZone zone);
  71. // Returns the current zone from TLS.
  72. EMemoryZone GetCurrentMemoryZone();
  73. // Returns the zone where #ptr resides;
  74. // EMemoryZone::Invalid indicates that #ptr is outside of any recognized memory zone.
  75. EMemoryZone GetAllocationMemoryZone(const void* ptr);
  76. ////////////////////////////////////////////////////////////////////////////////
  77. // When a "timing event" (hiccup) occurs during an allocation,
  78. // YTAlloc records this event and captures the current fiber id.
  79. // The latter is provided externally by calling SetCurrentFiberId.
  80. //
  81. // This may be helpful to correlate various application-level timings
  82. // with internal events in YTAlloc.
  83. //
  84. // The current fiber id is stored in TLS.
  85. using TFiberId = ui64;
  86. // Updates the current fiber id in TLS.
  87. void SetCurrentFiberId(TFiberId id);
  88. // Returns the currently assinged fiber id from TLS.
  89. TFiberId GetCurrentFiberId();
  90. ////////////////////////////////////////////////////////////////////////////////
  91. // Logging
  92. DEFINE_ENUM(ELogEventSeverity,
  93. (Debug)
  94. (Info)
  95. (Warning)
  96. (Error)
  97. );
  98. struct TLogEvent
  99. {
  100. ELogEventSeverity Severity;
  101. TStringBuf Message;
  102. };
  103. using TLogHandler = void(*)(const TLogEvent& event);
  104. // Sets the handler to be invoked for each log event produced by YTAlloc.
  105. // Can be called multiple times (but calls to the previous incarnations of the handler
  106. // are racy).
  107. void EnableLogging(TLogHandler logHandler);
  108. ////////////////////////////////////////////////////////////////////////////////
  109. // Backtraces
  110. using TBacktraceProvider = int(*)(void** frames, int maxFrames, int skipFrames);
  111. // Sets the provider used for collecting backtraces when allocation profiling
  112. // is turned ON. Can be called multiple times (but calls to the previous
  113. // incarnations of the provider are racy).
  114. void SetBacktraceProvider(TBacktraceProvider provider);
  115. using TBacktraceFormatter = TString(*)(const void* const* frames, int frameCount);
  116. // Sets the callback used for formatting backtraces during large arena mmap calls
  117. // to help detect memory leaks. Can be called multiple times (but calls to the
  118. // previous incarnations of the provider are racy).
  119. void SetBacktraceFormatter(TBacktraceFormatter provider);
  120. ////////////////////////////////////////////////////////////////////////////////
  121. // Misc
  122. //! Tries to mlock all opened file mappings of the current process.
  123. //! Typically invoked on application startup to lock all binaries in memory
  124. //! and prevent executable code and static data to be paged out
  125. //! causing latency spikes.
  126. void MlockFileMappings(bool populate = true);
  127. ////////////////////////////////////////////////////////////////////////////////
  128. // Configuration API
  129. // Calling this function enables periodic calls to madvise(ADV_STOCKPILE);
  130. // cf. https://st.yandex-team.ru/KERNEL-186
  131. void EnableStockpile();
  132. // Sets the interval between madvise(ADV_STOCKPILE) calls.
  133. // Only makes sense if stockpile was enabled.
  134. void SetStockpileInterval(TDuration value);
  135. // Sets the number of threads to be invoking madvise(ADV_STOCKPILE).
  136. // This call should be made before calling #EnableStockpile.
  137. void SetStockpileThreadCount(int value);
  138. // Sets the size passsed to madvise(ADV_STOCKPILE) calls.
  139. // Only makes sense if stockpile was enabled.
  140. void SetStockpileSize(size_t value);
  141. // For large blobs, YTAlloc keeps at least
  142. // LargeUnreclaimableCoeff * TotalLargeBytesUsed clamped to range
  143. // [MinLargeUnreclaimableBytes, MaxLargeUnreclaimableBytes]
  144. // bytes of pooled (unreclaimable) memory.
  145. void SetLargeUnreclaimableCoeff(double value);
  146. void SetMinLargeUnreclaimableBytes(size_t value);
  147. void SetMaxLargeUnreclaimableBytes(size_t value);
  148. // When a syscall (mmap, munmap, or madvise) or an internal lock acquisition
  149. // takes longer then the configured time, a "timing event" is recorded.
  150. void SetTimingEventThreshold(TDuration value);
  151. // Toggles the global allocation profiling knob (OFF by default).
  152. // For profiled allocations, YTAlloc collects (see #SetBacktraceProvider) and aggregates their
  153. // backtraces.
  154. void SetAllocationProfilingEnabled(bool value);
  155. // Determines the fraction of allocations to be sampled for profiling.
  156. void SetAllocationProfilingSamplingRate(double rate);
  157. // Controls if small allocations of a given rank are profiled (OFF by default).
  158. void SetSmallArenaAllocationProfilingEnabled(size_t rank, bool value);
  159. // Controls if large allocations of a given rank are profiled (OFF by default).
  160. void SetLargeArenaAllocationProfilingEnabled(size_t rank, bool value);
  161. // Controls the depth of the backtraces to collect. Deeper backtraces
  162. // take more time and affect the program performance.
  163. void SetProfilingBacktraceDepth(int depth);
  164. // Controls the minimum number of bytes a certain backtrace must
  165. // allocate to appear in profiling reports.
  166. void SetMinProfilingBytesUsedToReport(size_t size);
  167. // If set to true (default), YTAlloc uses madvise with MADV_DONTNEED to release unused large blob pages
  168. // (slower but leads to more predicable RSS values);
  169. // if false then MADV_FREE is used instead, if available
  170. // (faster but RSS may get stuck arbitrary higher than the actual usage as long
  171. // as no memory pressure is applied).
  172. void SetEnableEagerMemoryRelease(bool value);
  173. // If set to true, YTAlloc uses madvise with MADV_POPULATE to prefault freshly acclaimed pages.
  174. // Otherwise (this is the default), these pages are prefaulted with linear memory access.
  175. // See https://st.yandex-team.ru/KERNEL-185.
  176. void SetEnableMadvisePopulate(bool value);
  177. ////////////////////////////////////////////////////////////////////////////////
  178. // Statistics API
  179. DEFINE_ENUM(EBasicCounter,
  180. (BytesAllocated)
  181. (BytesFreed)
  182. (BytesUsed)
  183. );
  184. using ESystemCounter = EBasicCounter;
  185. using ESmallCounter = EBasicCounter;
  186. using ELargeCounter = EBasicCounter;
  187. using EUndumpableCounter = EBasicCounter;
  188. DEFINE_ENUM(ESmallArenaCounter,
  189. (PagesMapped)
  190. (BytesMapped)
  191. (PagesCommitted)
  192. (BytesCommitted)
  193. );
  194. DEFINE_ENUM(ELargeArenaCounter,
  195. (BytesSpare)
  196. (BytesOverhead)
  197. (BlobsAllocated)
  198. (BlobsFreed)
  199. (BlobsUsed)
  200. (BytesAllocated)
  201. (BytesFreed)
  202. (BytesUsed)
  203. (ExtentsAllocated)
  204. (PagesMapped)
  205. (BytesMapped)
  206. (PagesPopulated)
  207. (BytesPopulated)
  208. (PagesReleased)
  209. (BytesReleased)
  210. (PagesCommitted)
  211. (BytesCommitted)
  212. (OverheadBytesReclaimed)
  213. (SpareBytesReclaimed)
  214. );
  215. DEFINE_ENUM(EHugeCounter,
  216. (BytesAllocated)
  217. (BytesFreed)
  218. (BytesUsed)
  219. (BlobsAllocated)
  220. (BlobsFreed)
  221. (BlobsUsed)
  222. );
  223. DEFINE_ENUM(ETotalCounter,
  224. (BytesAllocated)
  225. (BytesFreed)
  226. (BytesUsed)
  227. (BytesCommitted)
  228. (BytesUnaccounted)
  229. );
  230. // Returns statistics for all user allocations.
  231. TEnumIndexedArray<ETotalCounter, ssize_t> GetTotalAllocationCounters();
  232. // Returns statistics for small allocations; these are included into total statistics.
  233. TEnumIndexedArray<ESmallCounter, ssize_t> GetSmallAllocationCounters();
  234. // Returns statistics for large allocations; these are included into total statistics.
  235. TEnumIndexedArray<ELargeCounter, ssize_t> GetLargeAllocationCounters();
  236. // Returns per-arena statistics for small allocations; these are included into total statistics.
  237. std::array<TEnumIndexedArray<ESmallArenaCounter, ssize_t>, SmallRankCount> GetSmallArenaAllocationCounters();
  238. // Returns per-arena statistics for large allocations; these are included into total statistics.
  239. std::array<TEnumIndexedArray<ELargeArenaCounter, ssize_t>, LargeRankCount> GetLargeArenaAllocationCounters();
  240. // Returns statistics for huge allocations; these are included into total statistics.
  241. TEnumIndexedArray<EHugeCounter, ssize_t> GetHugeAllocationCounters();
  242. // Returns statistics for all system allocations; these are not included into total statistics.
  243. TEnumIndexedArray<ESystemCounter, ssize_t> GetSystemAllocationCounters();
  244. // Returns statistics for undumpable allocations.
  245. TEnumIndexedArray<EUndumpableCounter, ssize_t> GetUndumpableAllocationCounters();
  246. DEFINE_ENUM(ETimingEventType,
  247. (Mmap)
  248. (Munmap)
  249. (MadvisePopulate)
  250. (MadviseFree)
  251. (MadviseDontNeed)
  252. (Locking)
  253. (Prefault)
  254. (FilePrefault)
  255. );
  256. struct TTimingEventCounters
  257. {
  258. // Number of events happened since start.
  259. size_t Count = 0;
  260. // Total size of memory blocks involved in these events (if applicable).
  261. size_t Size = 0;
  262. };
  263. // Returns statistics for timing events happened since start.
  264. // See SetTimingEventThreshold.
  265. TEnumIndexedArray<ETimingEventType, TTimingEventCounters> GetTimingEventCounters();
  266. ////////////////////////////////////////////////////////////////////////////////
  267. // We never collect backtraces deeper than this limit.
  268. constexpr int MaxAllocationProfilingBacktraceDepth = 16;
  269. struct TBacktrace
  270. {
  271. int FrameCount;
  272. std::array<void*, MaxAllocationProfilingBacktraceDepth> Frames;
  273. };
  274. struct TProfiledAllocation
  275. {
  276. TBacktrace Backtrace;
  277. TEnumIndexedArray<EBasicCounter, ssize_t> Counters;
  278. };
  279. // Returns statistics for profiled allocations (available when allocation
  280. // profiling is ON). Allocations are grouped by backtrace; for each backtrace
  281. // we provide the counters indicating the number of allocated, freed, and used bytes.
  282. // To appear here, used bytes counter must be at least the value configured
  283. // via SetMinProfilingBytesUsedToReport.
  284. std::vector<TProfiledAllocation> GetProfiledAllocationStatistics();
  285. ////////////////////////////////////////////////////////////////////////////////
  286. } // namespace NYT::NYTAlloc
  287. #define YT_ALLOC_INL_H_
  288. #include "ytalloc-inl.h"
  289. #undef YT_ALLOC_INL_H_