#include "stackcollect.h" #include "profiler.h" #include #include #include #include #include #include #include #include #include namespace NAllocProfiler { //////////////////////////////////////////////////////////////////////////////// template class TStackCollector: private TNonCopyable { public: struct TFrameInfo { int PrevInd; void* Addr; int Tag; T Stats; void Clear() { PrevInd = 0; Addr = nullptr; Tag = 0; Stats.Clear(); } }; private: static const size_t STACKS_HASH_MAP_SIZE = 256 * 1024; TFrameInfo Frames[STACKS_HASH_MAP_SIZE]; ui64 Samples; // Saved samples count ui64 UniqueSamples; // Number of unique addresses ui64 UsedSlots; // Number of occupied slots in the hashtable ui64 DroppedSamples; // Number of unsaved addresses ui64 SearchSkipCount; // Total number of linear hash table probes due to collisions TAdaptiveLock Lock; public: TStackCollector() { Clear(); } int AddStack(void** stack, size_t frameCount, int tag) { Y_ASSERT(frameCount > 0); int prevInd = -1; with_lock (Lock) { for (int i = frameCount - 1; i >= 0; --i) { prevInd = AddFrame(stack[i], prevInd, ((i == 0) ? tag : 0), (i == 0)); if (prevInd == -1) { break; } } } return prevInd; } T& GetStats(int stackId) { Y_ASSERT(stackId >= 0 && (size_t)stackId < Y_ARRAY_SIZE(Frames)); Y_ASSERT(!IsSlotEmpty(stackId)); return Frames[stackId].Stats; } const TFrameInfo* GetFrames() const { return Frames; } size_t GetFramesCount() const { return Y_ARRAY_SIZE(Frames); } void BackTrace(const TFrameInfo* stack, TStackVec& frames) const { frames.clear(); for (size_t i = 0; i < 100; ++i) { frames.push_back(stack->Addr); int prevInd = stack->PrevInd; if (prevInd == -1) { break; } stack = &Frames[prevInd]; } } void Clear() { for (auto& frame: Frames) { frame.Clear(); } Samples = 0; DroppedSamples = 0; UniqueSamples = 0; UsedSlots = 0; SearchSkipCount = 0; } private: // Hash function applied to the addresses static ui32 Hash(void* addr, int prevInd, int tag) { return (((size_t)addr + ((size_t)addr / STACKS_HASH_MAP_SIZE)) + prevInd + tag) % STACKS_HASH_MAP_SIZE; } static bool EqualFrame(const TFrameInfo& frame, void* addr, int prevInd, int tag) { return (frame.Addr == addr && frame.PrevInd == prevInd && frame.Tag == tag); } bool IsSlotEmpty(ui32 slot) const { return Frames[slot].Addr == 0; } bool InsertsAllowed() const { return UsedSlots < STACKS_HASH_MAP_SIZE / 2; } // returns the index in the hashmap int AddFrame(void* addr, int prevFrameIndex, int tag, bool last) { ui32 slot = Hash(addr, prevFrameIndex, tag); ui32 prevSlot = (slot - 1) % STACKS_HASH_MAP_SIZE; while (!EqualFrame(Frames[slot], addr, prevFrameIndex, tag) && !IsSlotEmpty(slot) && slot != prevSlot) { slot = (slot + 1) % STACKS_HASH_MAP_SIZE; SearchSkipCount++; } if (EqualFrame(Frames[slot], addr, prevFrameIndex, tag)) { if (last) { ++Samples; } } else if (InsertsAllowed() && IsSlotEmpty(slot)) { // add new sample Frames[slot].Clear(); Frames[slot].Addr = addr; Frames[slot].PrevInd = prevFrameIndex; Frames[slot].Tag = tag; ++UsedSlots; if (last) { ++UniqueSamples; ++Samples; } } else { // don't insert new sample if the search is becoming too slow ++DroppedSamples; return -1; } return slot; } }; //////////////////////////////////////////////////////////////////////////////// class TAllocationStackCollector::TImpl: public TStackCollector { using TBase = TStackCollector; private: TStats Total; public: int Alloc(void** stack, size_t frameCount, int tag, size_t size) { int stackId = TBase::AddStack(stack, frameCount, tag); if (stackId >= 0) { TBase::GetStats(stackId).Alloc(size); Total.Alloc(size); } return stackId; } void Free(int stackId, size_t size) { TBase::GetStats(stackId).Free(size); Total.Free(size); } void Clear() { TBase::Clear(); Total.Clear(); } void Dump(int count, IAllocationStatsDumper& out) const { const TFrameInfo* frames = TBase::GetFrames(); size_t framesCount = TBase::GetFramesCount(); TVector stacks; for (size_t i = 0; i < framesCount; ++i) { if (frames[i].Stats.Allocs) { stacks.push_back(&frames[i]); } } Sort(stacks, [] (const TFrameInfo* l, const TFrameInfo* r) { const auto& ls = l->Stats; const auto& rs = r->Stats; return ls.CurrentSize != rs.CurrentSize ? ls.CurrentSize > rs.CurrentSize : ls.Allocs != rs.Allocs ? ls.Allocs > rs.Allocs : ls.Frees > rs.Frees; }); out.DumpTotal(Total); TAllocationInfo allocInfo; int printedCount = 0; for (const TFrameInfo* stack: stacks) { allocInfo.Clear(); allocInfo.Tag = stack->Tag; allocInfo.Stats = stack->Stats; TBase::BackTrace(stack, allocInfo.Stack); out.DumpEntry(allocInfo); if (++printedCount >= count) { break; } } } }; //////////////////////////////////////////////////////////////////////////////// TAllocationStackCollector::TAllocationStackCollector() : Impl(new TImpl()) {} TAllocationStackCollector::~TAllocationStackCollector() {} int TAllocationStackCollector::Alloc(void** stack, size_t frameCount, int tag, size_t size) { return Impl->Alloc(stack, frameCount, tag, size); } void TAllocationStackCollector::Free(int stackId, size_t size) { Impl->Free(stackId, size); } void TAllocationStackCollector::Clear() { Impl->Clear(); } void TAllocationStackCollector::Dump(int count, IAllocationStatsDumper &out) const { Impl->Dump(count, out); } TString IAllocationStatsDumper::FormatTag(int tag) { return ToString(tag); } TString IAllocationStatsDumper::FormatSize(intptr_t sz) { return ToString(sz); } TAllocationStatsDumper::TAllocationStatsDumper(IOutputStream& out) : PrintedCount(0) , Out(out) , SymbolCache(2048) {} void TAllocationStatsDumper::DumpTotal(const TStats& total) { Out << "TOTAL" << "\tAllocs: " << total.Allocs << "\tFrees: " << total.Frees << "\tCurrentSize: " << FormatSize(total.CurrentSize) << Endl; } void TAllocationStatsDumper::DumpEntry(const TAllocationInfo& allocInfo) { Out << Endl << "STACK #" << PrintedCount+1 << ": " << FormatTag(allocInfo.Tag) << "\tAllocs: " << allocInfo.Stats.Allocs << "\tFrees: " << allocInfo.Stats.Frees << "\tCurrentSize: " << FormatSize(allocInfo.Stats.CurrentSize) << Endl; FormatBackTrace(allocInfo.Stack.data(), allocInfo.Stack.size()); PrintedCount++; } void TAllocationStatsDumper::FormatBackTrace(void* const* stack, size_t sz) { char name[1024]; for (size_t i = 0; i < sz; ++i) { TSymbol symbol; auto it = SymbolCache.Find(stack[i]); if (it != SymbolCache.End()) { symbol = it.Value(); } else { TResolvedSymbol rs = ResolveSymbol(stack[i], name, sizeof(name)); symbol = {rs.NearestSymbol, rs.Name}; SymbolCache.Insert(stack[i], symbol); } Out << Hex((intptr_t)stack[i], HF_FULL) << "\t" << symbol.Name; intptr_t offset = (intptr_t)stack[i] - (intptr_t)symbol.Address; if (offset) Out << " +" << offset; Out << Endl; } } } // namespace NAllocProfiler