//===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains support for reading MemProf profiling data. // //===----------------------------------------------------------------------===// #include #include #include #include #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/RawMemProfReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" #define DEBUG_TYPE "memprof" namespace llvm { namespace memprof { namespace { template inline T alignedRead(const char *Ptr) { static_assert(std::is_pod::value, "Not a pod type."); assert(reinterpret_cast(Ptr) % sizeof(T) == 0 && "Unaligned Read"); return *reinterpret_cast(Ptr); } Error checkBuffer(const MemoryBuffer &Buffer) { if (!RawMemProfReader::hasFormat(Buffer)) return make_error(instrprof_error::bad_magic); if (Buffer.getBufferSize() == 0) return make_error(instrprof_error::empty_raw_profile); if (Buffer.getBufferSize() < sizeof(Header)) { return make_error(instrprof_error::truncated); } // The size of the buffer can be > header total size since we allow repeated // serialization of memprof profiles to the same file. uint64_t TotalSize = 0; const char *Next = Buffer.getBufferStart(); while (Next < Buffer.getBufferEnd()) { auto *H = reinterpret_cast(Next); if (H->Version != MEMPROF_RAW_VERSION) { return make_error(instrprof_error::unsupported_version); } TotalSize += H->TotalSize; Next += H->TotalSize; } if (Buffer.getBufferSize() != TotalSize) { return make_error(instrprof_error::malformed); } return Error::success(); } llvm::SmallVector readSegmentEntries(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = endian::readNext(Ptr); llvm::SmallVector Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { Items.push_back(*reinterpret_cast( Ptr + I * sizeof(SegmentEntry))); } return Items; } llvm::SmallVector> readMemInfoBlocks(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = endian::readNext(Ptr); llvm::SmallVector> Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { const uint64_t Id = endian::readNext(Ptr); const MemInfoBlock MIB = *reinterpret_cast(Ptr); Items.push_back({Id, MIB}); // Only increment by size of MIB since readNext implicitly increments. Ptr += sizeof(MemInfoBlock); } return Items; } CallStackMap readStackInfo(const char *Ptr) { using namespace support; const uint64_t NumItemsToRead = endian::readNext(Ptr); CallStackMap Items; for (uint64_t I = 0; I < NumItemsToRead; I++) { const uint64_t StackId = endian::readNext(Ptr); const uint64_t NumPCs = endian::readNext(Ptr); SmallVector CallStack; for (uint64_t J = 0; J < NumPCs; J++) { CallStack.push_back(endian::readNext(Ptr)); } Items[StackId] = CallStack; } return Items; } // Merges the contents of stack information in \p From to \p To. Returns true if // any stack ids observed previously map to a different set of program counter // addresses. bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { for (const auto &IdStack : From) { auto I = To.find(IdStack.first); if (I == To.end()) { To[IdStack.first] = IdStack.second; } else { // Check that the PCs are the same (in order). if (IdStack.second != I->second) return true; } } return false; } Error report(Error E, const StringRef Context) { return joinErrors(createStringError(inconvertibleErrorCode(), Context), std::move(E)); } bool isRuntimePath(const StringRef Path) { return StringRef(llvm::sys::path::convert_to_slash(Path)) .contains("memprof/memprof_"); } std::string getBuildIdString(const SegmentEntry &Entry) { constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t); constexpr uint8_t Zeros[Size] = {0}; // If the build id is unset print a helpful string instead of all zeros. if (memcmp(Entry.BuildId, Zeros, Size) == 0) return ""; std::string Str; raw_string_ostream OS(Str); for (size_t I = 0; I < Size; I++) { OS << format_hex_no_prefix(Entry.BuildId[I], 2); } return OS.str(); } } // namespace Expected> RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, bool KeepName) { auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); if (std::error_code EC = BufferOr.getError()) return report(errorCodeToError(EC), Path.getSingleStringRef()); std::unique_ptr Buffer(BufferOr.get().release()); if (Error E = checkBuffer(*Buffer)) return report(std::move(E), Path.getSingleStringRef()); if (ProfiledBinary.empty()) return report( errorCodeToError(make_error_code(std::errc::invalid_argument)), "Path to profiled binary is empty!"); auto BinaryOr = llvm::object::createBinary(ProfiledBinary); if (!BinaryOr) { return report(BinaryOr.takeError(), ProfiledBinary); } // Use new here since constructor is private. std::unique_ptr Reader( new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); if (Error E = Reader->initialize(std::move(Buffer))) { return std::move(E); } return std::move(Reader); } bool RawMemProfReader::hasFormat(const StringRef Path) { auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); if (!BufferOr) return false; std::unique_ptr Buffer(BufferOr.get().release()); return hasFormat(*Buffer); } bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { if (Buffer.getBufferSize() < sizeof(uint64_t)) return false; // Aligned read to sanity check that the buffer was allocated with at least 8b // alignment. const uint64_t Magic = alignedRead(Buffer.getBufferStart()); return Magic == MEMPROF_RAW_MAGIC_64; } void RawMemProfReader::printYAML(raw_ostream &OS) { uint64_t NumAllocFunctions = 0, NumMibInfo = 0; for (const auto &KV : FunctionProfileData) { const size_t NumAllocSites = KV.second.AllocSites.size(); if (NumAllocSites > 0) { NumAllocFunctions++; NumMibInfo += NumAllocSites; } } OS << "MemprofProfile:\n"; OS << " Summary:\n"; OS << " Version: " << MEMPROF_RAW_VERSION << "\n"; OS << " NumSegments: " << SegmentInfo.size() << "\n"; OS << " NumMibInfo: " << NumMibInfo << "\n"; OS << " NumAllocFunctions: " << NumAllocFunctions << "\n"; OS << " NumStackOffsets: " << StackMap.size() << "\n"; // Print out the segment information. OS << " Segments:\n"; for (const auto &Entry : SegmentInfo) { OS << " -\n"; OS << " BuildId: " << getBuildIdString(Entry) << "\n"; OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n"; OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n"; OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n"; } // Print out the merged contents of the profiles. OS << " Records:\n"; for (const auto &Entry : *this) { OS << " -\n"; OS << " FunctionGUID: " << Entry.first << "\n"; Entry.second.print(OS); } } Error RawMemProfReader::initialize(std::unique_ptr DataBuffer) { const StringRef FileName = Binary.getBinary()->getFileName(); auto *ElfObject = dyn_cast(Binary.getBinary()); if (!ElfObject) { return report(make_error(Twine("Not an ELF file: "), inconvertibleErrorCode()), FileName); } // Check whether the profiled binary was built with position independent code // (PIC). For now we provide a error message until symbolization support // is added for pic. auto* Elf64LEObject = llvm::cast(ElfObject); const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile(); auto PHdrsOr = ElfFile.program_headers(); if(!PHdrsOr) return report(make_error(Twine("Could not read program headers: "), inconvertibleErrorCode()), FileName); auto FirstLoadHeader = PHdrsOr->begin(); while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD) ++FirstLoadHeader; if(FirstLoadHeader->p_vaddr == 0) return report(make_error(Twine("Unsupported position independent code"), inconvertibleErrorCode()), FileName); auto Triple = ElfObject->makeTriple(); if (!Triple.isX86()) return report(make_error(Twine("Unsupported target: ") + Triple.getArchName(), inconvertibleErrorCode()), FileName); auto *Object = cast(Binary.getBinary()); std::unique_ptr Context = DWARFContext::create( *Object, DWARFContext::ProcessDebugRelocations::Process); auto SOFOr = symbolize::SymbolizableObjectFile::create( Object, std::move(Context), /*UntagAddresses=*/false); if (!SOFOr) return report(SOFOr.takeError(), FileName); Symbolizer = std::move(SOFOr.get()); if (Error E = readRawProfile(std::move(DataBuffer))) return E; if (Error E = symbolizeAndFilterStackFrames()) return E; return mapRawProfileToRecords(); } Error RawMemProfReader::mapRawProfileToRecords() { // Hold a mapping from function to each callsite location we encounter within // it that is part of some dynamic allocation context. The location is stored // as a pointer to a symbolized list of inline frames. using LocationPtr = const llvm::SmallVector *; llvm::MapVector> PerFunctionCallSites; // Convert the raw profile callstack data into memprof records. While doing so // keep track of related contexts so that we can fill these in later. for (const auto &Entry : CallstackProfileData) { const uint64_t StackId = Entry.first; auto It = StackMap.find(StackId); if (It == StackMap.end()) return make_error( instrprof_error::malformed, "memprof callstack record does not contain id: " + Twine(StackId)); // Construct the symbolized callstack. llvm::SmallVector Callstack; Callstack.reserve(It->getSecond().size()); llvm::ArrayRef Addresses = It->getSecond(); for (size_t I = 0; I < Addresses.size(); I++) { const uint64_t Address = Addresses[I]; assert(SymbolizedFrame.count(Address) > 0 && "Address not found in SymbolizedFrame map"); const SmallVector &Frames = SymbolizedFrame[Address]; assert(!idToFrame(Frames.back()).IsInlineFrame && "The last frame should not be inlined"); // Record the callsites for each function. Skip the first frame of the // first address since it is the allocation site itself that is recorded // as an alloc site. for (size_t J = 0; J < Frames.size(); J++) { if (I == 0 && J == 0) continue; // We attach the entire bottom-up frame here for the callsite even // though we only need the frames up to and including the frame for // Frames[J].Function. This will enable better deduplication for // compression in the future. const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; PerFunctionCallSites[Guid].insert(&Frames); } // Add all the frames to the current allocation callstack. Callstack.append(Frames.begin(), Frames.end()); } // We attach the memprof record to each function bottom-up including the // first non-inline frame. for (size_t I = 0; /*Break out using the condition below*/; I++) { const Frame &F = idToFrame(Callstack[I]); auto Result = FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); IndexedMemProfRecord &Record = Result.first->second; Record.AllocSites.emplace_back(Callstack, Entry.second); if (!F.IsInlineFrame) break; } } // Fill in the related callsites per function. for (const auto &[Id, Locs] : PerFunctionCallSites) { // Some functions may have only callsite data and no allocation data. Here // we insert a new entry for callsite data if we need to. auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); IndexedMemProfRecord &Record = Result.first->second; for (LocationPtr Loc : Locs) { Record.CallSites.push_back(*Loc); } } return Error::success(); } Error RawMemProfReader::symbolizeAndFilterStackFrames() { // The specifier to use when symbolization is requested. const DILineInfoSpecifier Specifier( DILineInfoSpecifier::FileLineInfoKind::RawValue, DILineInfoSpecifier::FunctionNameKind::LinkageName); // For entries where all PCs in the callstack are discarded, we erase the // entry from the stack map. llvm::SmallVector EntriesToErase; // We keep track of all prior discarded entries so that we can avoid invoking // the symbolizer for such entries. llvm::DenseSet AllVAddrsToDiscard; for (auto &Entry : StackMap) { for (const uint64_t VAddr : Entry.getSecond()) { // Check if we have already symbolized and cached the result or if we // don't want to attempt symbolization since we know this address is bad. // In this case the address is also removed from the current callstack. if (SymbolizedFrame.count(VAddr) > 0 || AllVAddrsToDiscard.contains(VAddr)) continue; Expected DIOr = Symbolizer->symbolizeInlinedCode( getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); if (!DIOr) return DIOr.takeError(); DIInliningInfo DI = DIOr.get(); // Drop frames which we can't symbolize or if they belong to the runtime. if (DI.getFrame(0).FunctionName == DILineInfo::BadString || isRuntimePath(DI.getFrame(0).FileName)) { AllVAddrsToDiscard.insert(VAddr); continue; } for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; I++) { const auto &DIFrame = DI.getFrame(I); const uint64_t Guid = IndexedMemProfRecord::getGUID(DIFrame.FunctionName); const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, // Only the last entry is not an inlined location. I != NumFrames - 1); // Here we retain a mapping from the GUID to symbol name instead of // adding it to the frame object directly to reduce memory overhead. // This is because there can be many unique frames, particularly for // callsite frames. if (KeepSymbolName) GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); const FrameId Hash = F.hash(); IdToFrame.insert({Hash, F}); SymbolizedFrame[VAddr].push_back(Hash); } } auto &CallStack = Entry.getSecond(); llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) { return AllVAddrsToDiscard.contains(A); }); if (CallStack.empty()) EntriesToErase.push_back(Entry.getFirst()); } // Drop the entries where the callstack is empty. for (const uint64_t Id : EntriesToErase) { StackMap.erase(Id); CallstackProfileData.erase(Id); } if (StackMap.empty()) return make_error( instrprof_error::malformed, "no entries in callstack map after symbolization"); return Error::success(); } Error RawMemProfReader::readRawProfile( std::unique_ptr DataBuffer) { const char *Next = DataBuffer->getBufferStart(); while (Next < DataBuffer->getBufferEnd()) { auto *Header = reinterpret_cast(Next); // Read in the segment information, check whether its the same across all // profiles in this binary file. const llvm::SmallVector Entries = readSegmentEntries(Next + Header->SegmentOffset); if (!SegmentInfo.empty() && SegmentInfo != Entries) { // We do not expect segment information to change when deserializing from // the same binary profile file. This can happen if dynamic libraries are // loaded/unloaded between profile dumping. return make_error( instrprof_error::malformed, "memprof raw profile has different segment information"); } SegmentInfo.assign(Entries.begin(), Entries.end()); // Read in the MemInfoBlocks. Merge them based on stack id - we assume that // raw profiles in the same binary file are from the same process so the // stackdepot ids are the same. for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { if (CallstackProfileData.count(Value.first)) { CallstackProfileData[Value.first].Merge(Value.second); } else { CallstackProfileData[Value.first] = Value.second; } } // Read in the callstack for each ids. For multiple raw profiles in the same // file, we expect that the callstack is the same for a unique id. const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); if (StackMap.empty()) { StackMap = CSM; } else { if (mergeStackMap(CSM, StackMap)) return make_error( instrprof_error::malformed, "memprof raw profile got different call stack for same id"); } Next += Header->TotalSize; } return Error::success(); } object::SectionedAddress RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { LLVM_DEBUG({ SegmentEntry *ContainingSegment = nullptr; for (auto &SE : SegmentInfo) { if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { ContainingSegment = &SE; } } // Ensure that the virtual address is valid. assert(ContainingSegment && "Could not find a segment entry"); }); // TODO: Compute the file offset based on the maps and program headers. For // now this only works for non PIE binaries. return object::SectionedAddress{VirtualAddress}; } Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { if (FunctionProfileData.empty()) return make_error(instrprof_error::empty_raw_profile); if (Iter == FunctionProfileData.end()) return make_error(instrprof_error::eof); auto IdToFrameCallback = [this](const FrameId Id) { Frame F = this->idToFrame(Id); if (!this->KeepSymbolName) return F; auto Iter = this->GuidToSymbolName.find(F.Function); assert(Iter != this->GuidToSymbolName.end()); F.SymbolName = Iter->getSecond(); return F; }; const IndexedMemProfRecord &IndexedRecord = Iter->second; GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; Iter++; return Error::success(); } } // namespace memprof } // namespace llvm