stackcollect.cpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. #include "stackcollect.h"
  2. #include "profiler.h"
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/vector.h>
  5. #include <util/stream/format.h>
  6. #include <util/stream/str.h>
  7. #include <util/string/cast.h>
  8. #include <util/string/printf.h>
  9. #include <util/system/backtrace.h>
  10. #include <util/system/spinlock.h>
  11. #include <util/system/yassert.h>
  12. namespace NAllocProfiler {
  13. ////////////////////////////////////////////////////////////////////////////////
  14. template <typename T>
  15. class TStackCollector: private TNonCopyable {
  16. public:
  17. struct TFrameInfo {
  18. int PrevInd;
  19. void* Addr;
  20. int Tag;
  21. T Stats;
  22. void Clear()
  23. {
  24. PrevInd = 0;
  25. Addr = nullptr;
  26. Tag = 0;
  27. Stats.Clear();
  28. }
  29. };
  30. private:
  31. static const size_t STACKS_HASH_MAP_SIZE = 256 * 1024;
  32. TFrameInfo Frames[STACKS_HASH_MAP_SIZE];
  33. ui64 Samples; // Saved samples count
  34. ui64 UniqueSamples; // Number of unique addresses
  35. ui64 UsedSlots; // Number of occupied slots in the hashtable
  36. ui64 DroppedSamples; // Number of unsaved addresses
  37. ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions
  38. TAdaptiveLock Lock;
  39. public:
  40. TStackCollector()
  41. {
  42. Clear();
  43. }
  44. int AddStack(void** stack, size_t frameCount, int tag)
  45. {
  46. Y_ASSERT(frameCount > 0);
  47. int prevInd = -1;
  48. with_lock (Lock) {
  49. for (int i = frameCount - 1; i >= 0; --i) {
  50. prevInd = AddFrame(stack[i], prevInd, ((i == 0) ? tag : 0), (i == 0));
  51. if (prevInd == -1) {
  52. break;
  53. }
  54. }
  55. }
  56. return prevInd;
  57. }
  58. T& GetStats(int stackId)
  59. {
  60. Y_ASSERT(stackId >= 0 && (size_t)stackId < Y_ARRAY_SIZE(Frames));
  61. Y_ASSERT(!IsSlotEmpty(stackId));
  62. return Frames[stackId].Stats;
  63. }
  64. const TFrameInfo* GetFrames() const
  65. {
  66. return Frames;
  67. }
  68. size_t GetFramesCount() const
  69. {
  70. return Y_ARRAY_SIZE(Frames);
  71. }
  72. void BackTrace(const TFrameInfo* stack, TStackVec<void*, 64>& frames) const
  73. {
  74. frames.clear();
  75. for (size_t i = 0; i < 100; ++i) {
  76. frames.push_back(stack->Addr);
  77. int prevInd = stack->PrevInd;
  78. if (prevInd == -1) {
  79. break;
  80. }
  81. stack = &Frames[prevInd];
  82. }
  83. }
  84. void Clear()
  85. {
  86. for (auto& frame: Frames) {
  87. frame.Clear();
  88. }
  89. Samples = 0;
  90. DroppedSamples = 0;
  91. UniqueSamples = 0;
  92. UsedSlots = 0;
  93. SearchSkipCount = 0;
  94. }
  95. private:
  96. // Hash function applied to the addresses
  97. static ui32 Hash(void* addr, int prevInd, int tag)
  98. {
  99. return (((size_t)addr + ((size_t)addr / STACKS_HASH_MAP_SIZE)) + prevInd + tag) % STACKS_HASH_MAP_SIZE;
  100. }
  101. static bool EqualFrame(const TFrameInfo& frame, void* addr, int prevInd, int tag)
  102. {
  103. return (frame.Addr == addr && frame.PrevInd == prevInd && frame.Tag == tag);
  104. }
  105. bool IsSlotEmpty(ui32 slot) const
  106. {
  107. return Frames[slot].Addr == 0;
  108. }
  109. bool InsertsAllowed() const
  110. {
  111. return UsedSlots < STACKS_HASH_MAP_SIZE / 2;
  112. }
  113. // returns the index in the hashmap
  114. int AddFrame(void* addr, int prevFrameIndex, int tag, bool last)
  115. {
  116. ui32 slot = Hash(addr, prevFrameIndex, tag);
  117. ui32 prevSlot = (slot - 1) % STACKS_HASH_MAP_SIZE;
  118. while (!EqualFrame(Frames[slot], addr, prevFrameIndex, tag) && !IsSlotEmpty(slot) && slot != prevSlot) {
  119. slot = (slot + 1) % STACKS_HASH_MAP_SIZE;
  120. SearchSkipCount++;
  121. }
  122. if (EqualFrame(Frames[slot], addr, prevFrameIndex, tag)) {
  123. if (last) {
  124. ++Samples;
  125. }
  126. } else if (InsertsAllowed() && IsSlotEmpty(slot)) {
  127. // add new sample
  128. Frames[slot].Clear();
  129. Frames[slot].Addr = addr;
  130. Frames[slot].PrevInd = prevFrameIndex;
  131. Frames[slot].Tag = tag;
  132. ++UsedSlots;
  133. if (last) {
  134. ++UniqueSamples;
  135. ++Samples;
  136. }
  137. } else {
  138. // don't insert new sample if the search is becoming too slow
  139. ++DroppedSamples;
  140. return -1;
  141. }
  142. return slot;
  143. }
  144. };
  145. ////////////////////////////////////////////////////////////////////////////////
  146. class TAllocationStackCollector::TImpl: public TStackCollector<TStats> {
  147. using TBase = TStackCollector<TStats>;
  148. private:
  149. TStats Total;
  150. public:
  151. int Alloc(void** stack, size_t frameCount, int tag, size_t size)
  152. {
  153. int stackId = TBase::AddStack(stack, frameCount, tag);
  154. if (stackId >= 0) {
  155. TBase::GetStats(stackId).Alloc(size);
  156. Total.Alloc(size);
  157. }
  158. return stackId;
  159. }
  160. void Free(int stackId, size_t size)
  161. {
  162. TBase::GetStats(stackId).Free(size);
  163. Total.Free(size);
  164. }
  165. void Clear()
  166. {
  167. TBase::Clear();
  168. Total.Clear();
  169. }
  170. void Dump(int count, IAllocationStatsDumper& out) const
  171. {
  172. const TFrameInfo* frames = TBase::GetFrames();
  173. size_t framesCount = TBase::GetFramesCount();
  174. TVector<const TFrameInfo*> stacks;
  175. for (size_t i = 0; i < framesCount; ++i) {
  176. if (frames[i].Stats.Allocs) {
  177. stacks.push_back(&frames[i]);
  178. }
  179. }
  180. Sort(stacks, [] (const TFrameInfo* l, const TFrameInfo* r) {
  181. const auto& ls = l->Stats;
  182. const auto& rs = r->Stats;
  183. return ls.CurrentSize != rs.CurrentSize
  184. ? ls.CurrentSize > rs.CurrentSize
  185. : ls.Allocs != rs.Allocs
  186. ? ls.Allocs > rs.Allocs
  187. : ls.Frees > rs.Frees;
  188. });
  189. out.DumpTotal(Total);
  190. TAllocationInfo allocInfo;
  191. int printedCount = 0;
  192. for (const TFrameInfo* stack: stacks) {
  193. allocInfo.Clear();
  194. allocInfo.Tag = stack->Tag;
  195. allocInfo.Stats = stack->Stats;
  196. TBase::BackTrace(stack, allocInfo.Stack);
  197. out.DumpEntry(allocInfo);
  198. if (++printedCount >= count) {
  199. break;
  200. }
  201. }
  202. }
  203. };
  204. ////////////////////////////////////////////////////////////////////////////////
  205. TAllocationStackCollector::TAllocationStackCollector()
  206. : Impl(new TImpl())
  207. {}
  208. TAllocationStackCollector::~TAllocationStackCollector()
  209. {}
  210. int TAllocationStackCollector::Alloc(void** stack, size_t frameCount, int tag, size_t size)
  211. {
  212. return Impl->Alloc(stack, frameCount, tag, size);
  213. }
  214. void TAllocationStackCollector::Free(int stackId, size_t size)
  215. {
  216. Impl->Free(stackId, size);
  217. }
  218. void TAllocationStackCollector::Clear()
  219. {
  220. Impl->Clear();
  221. }
  222. void TAllocationStackCollector::Dump(int count, IAllocationStatsDumper &out) const
  223. {
  224. Impl->Dump(count, out);
  225. }
  226. TString IAllocationStatsDumper::FormatTag(int tag) {
  227. return ToString(tag);
  228. }
  229. TString IAllocationStatsDumper::FormatSize(intptr_t sz) {
  230. return ToString(sz);
  231. }
  232. TAllocationStatsDumper::TAllocationStatsDumper(IOutputStream& out)
  233. : PrintedCount(0)
  234. , Out(out)
  235. , SymbolCache(2048)
  236. {}
  237. void TAllocationStatsDumper::DumpTotal(const TStats& total) {
  238. Out << "TOTAL"
  239. << "\tAllocs: " << total.Allocs
  240. << "\tFrees: " << total.Frees
  241. << "\tCurrentSize: " << FormatSize(total.CurrentSize)
  242. << Endl;
  243. }
  244. void TAllocationStatsDumper::DumpEntry(const TAllocationInfo& allocInfo) {
  245. Out << Endl
  246. << "STACK #" << PrintedCount+1 << ": " << FormatTag(allocInfo.Tag)
  247. << "\tAllocs: " << allocInfo.Stats.Allocs
  248. << "\tFrees: " << allocInfo.Stats.Frees
  249. << "\tCurrentSize: " << FormatSize(allocInfo.Stats.CurrentSize)
  250. << Endl;
  251. FormatBackTrace(allocInfo.Stack.data(), allocInfo.Stack.size());
  252. PrintedCount++;
  253. }
  254. void TAllocationStatsDumper::FormatBackTrace(void* const* stack, size_t sz) {
  255. char name[1024];
  256. for (size_t i = 0; i < sz; ++i) {
  257. TSymbol symbol;
  258. auto it = SymbolCache.Find(stack[i]);
  259. if (it != SymbolCache.End()) {
  260. symbol = it.Value();
  261. } else {
  262. TResolvedSymbol rs = ResolveSymbol(stack[i], name, sizeof(name));
  263. symbol = {rs.NearestSymbol, rs.Name};
  264. SymbolCache.Insert(stack[i], symbol);
  265. }
  266. Out << Hex((intptr_t)stack[i], HF_FULL) << "\t" << symbol.Name;
  267. intptr_t offset = (intptr_t)stack[i] - (intptr_t)symbol.Address;
  268. if (offset)
  269. Out << " +" << offset;
  270. Out << Endl;
  271. }
  272. }
  273. } // namespace NAllocProfiler