123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585 |
- //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
- #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
- #include "CallContext.h"
- #include "ErrorHandling.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/StringRef.h"
- #include "llvm/ADT/StringSet.h"
- #include "llvm/DebugInfo/DWARF/DWARFContext.h"
- #include "llvm/DebugInfo/Symbolize/Symbolize.h"
- #include "llvm/MC/MCAsmInfo.h"
- #include "llvm/MC/MCContext.h"
- #include "llvm/MC/MCDisassembler/MCDisassembler.h"
- #include "llvm/MC/MCInst.h"
- #include "llvm/MC/MCInstPrinter.h"
- #include "llvm/MC/MCInstrAnalysis.h"
- #include "llvm/MC/MCInstrInfo.h"
- #include "llvm/MC/MCObjectFileInfo.h"
- #include "llvm/MC/MCPseudoProbe.h"
- #include "llvm/MC/MCRegisterInfo.h"
- #include "llvm/MC/MCSubtargetInfo.h"
- #include "llvm/MC/MCTargetOptions.h"
- #include "llvm/Object/ELFObjectFile.h"
- #include "llvm/ProfileData/SampleProf.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/Path.h"
- #include "llvm/Transforms/IPO/SampleContextTracker.h"
- #include <list>
- #include <map>
- #include <set>
- #include <sstream>
- #include <string>
- #include <unordered_map>
- #include <unordered_set>
- #include <vector>
- extern cl::opt<bool> EnableCSPreInliner;
- extern cl::opt<bool> UseContextCostForPreInliner;
- using namespace llvm;
- using namespace sampleprof;
- using namespace llvm::object;
- namespace llvm {
- namespace sampleprof {
- class ProfiledBinary;
- class MissingFrameInferrer;
- struct InstructionPointer {
- const ProfiledBinary *Binary;
- // Address of the executable segment of the binary.
- uint64_t Address;
- // Index to the sorted code address array of the binary.
- uint64_t Index = 0;
- InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
- bool RoundToNext = false);
- bool advance();
- bool backward();
- void update(uint64_t Addr);
- };
- // The special frame addresses.
- enum SpecialFrameAddr {
- // Dummy root of frame trie.
- DummyRoot = 0,
- // Represent all the addresses outside of current binary.
- // This's also used to indicate the call stack should be truncated since this
- // isn't a real call context the compiler will see.
- ExternalAddr = 1,
- };
- using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
- struct BinaryFunction {
- StringRef FuncName;
- // End of range is an exclusive bound.
- RangesTy Ranges;
- uint64_t getFuncSize() {
- uint64_t Sum = 0;
- for (auto &R : Ranges) {
- Sum += R.second - R.first;
- }
- return Sum;
- }
- };
- // Info about function range. A function can be split into multiple
- // non-continuous ranges, each range corresponds to one FuncRange.
- struct FuncRange {
- uint64_t StartAddress;
- // EndAddress is an exclusive bound.
- uint64_t EndAddress;
- // Function the range belongs to
- BinaryFunction *Func;
- // Whether the start address is the real entry of the function.
- bool IsFuncEntry = false;
- StringRef getFuncName() { return Func->FuncName; }
- };
- // PrologEpilog address tracker, used to filter out broken stack samples
- // Currently we use a heuristic size (two) to infer prolog and epilog
- // based on the start address and return address. In the future,
- // we will switch to Dwarf CFI based tracker
- struct PrologEpilogTracker {
- // A set of prolog and epilog addresses. Used by virtual unwinding.
- std::unordered_set<uint64_t> PrologEpilogSet;
- ProfiledBinary *Binary;
- PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
- // Take the two addresses from the start of function as prolog
- void
- inferPrologAddresses(std::map<uint64_t, FuncRange> &FuncStartAddressMap) {
- for (auto I : FuncStartAddressMap) {
- PrologEpilogSet.insert(I.first);
- InstructionPointer IP(Binary, I.first);
- if (!IP.advance())
- break;
- PrologEpilogSet.insert(IP.Address);
- }
- }
- // Take the last two addresses before the return address as epilog
- void inferEpilogAddresses(std::unordered_set<uint64_t> &RetAddrs) {
- for (auto Addr : RetAddrs) {
- PrologEpilogSet.insert(Addr);
- InstructionPointer IP(Binary, Addr);
- if (!IP.backward())
- break;
- PrologEpilogSet.insert(IP.Address);
- }
- }
- };
- // Track function byte size under different context (outlined version as well as
- // various inlined versions). It also provides query support to get function
- // size with the best matching context, which is used to help pre-inliner use
- // accurate post-optimization size to make decisions.
- // TODO: If an inlinee is completely optimized away, ideally we should have zero
- // for its context size, currently we would misss such context since it doesn't
- // have instructions. To fix this, we need to mark all inlinee with entry probe
- // but without instructions as having zero size.
- class BinarySizeContextTracker {
- public:
- // Add instruction with given size to a context
- void addInstructionForContext(const SampleContextFrameVector &Context,
- uint32_t InstrSize);
- // Get function size with a specific context. When there's no exact match
- // for the given context, try to retrieve the size of that function from
- // closest matching context.
- uint32_t getFuncSizeForContext(const ContextTrieNode *Context);
- // For inlinees that are full optimized away, we can establish zero size using
- // their remaining probes.
- void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
- using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
- void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
- MCDecodedPseudoProbeInlineTree &ProbeNode,
- ProbeFrameStack &Context);
- void dump() { RootContext.dumpTree(); }
- private:
- // Root node for context trie tree, node that this is a reverse context trie
- // with callee as parent and caller as child. This way we can traverse from
- // root to find the best/longest matching context if an exact match does not
- // exist. It gives us the best possible estimate for function's post-inline,
- // post-optimization byte size.
- ContextTrieNode RootContext;
- };
- using AddressRange = std::pair<uint64_t, uint64_t>;
- class ProfiledBinary {
- // Absolute path of the executable binary.
- std::string Path;
- // Path of the debug info binary.
- std::string DebugBinaryPath;
- // Path of symbolizer path which should be pointed to binary with debug info.
- StringRef SymbolizerPath;
- // The target triple.
- Triple TheTriple;
- // The runtime base address that the first executable segment is loaded at.
- uint64_t BaseAddress = 0;
- // The runtime base address that the first loadabe segment is loaded at.
- uint64_t FirstLoadableAddress = 0;
- // The preferred load address of each executable segment.
- std::vector<uint64_t> PreferredTextSegmentAddresses;
- // The file offset of each executable segment.
- std::vector<uint64_t> TextSegmentOffsets;
- // Mutiple MC component info
- std::unique_ptr<const MCRegisterInfo> MRI;
- std::unique_ptr<const MCAsmInfo> AsmInfo;
- std::unique_ptr<const MCSubtargetInfo> STI;
- std::unique_ptr<const MCInstrInfo> MII;
- std::unique_ptr<MCDisassembler> DisAsm;
- std::unique_ptr<const MCInstrAnalysis> MIA;
- std::unique_ptr<MCInstPrinter> IPrinter;
- // A list of text sections sorted by start RVA and size. Used to check
- // if a given RVA is a valid code address.
- std::set<std::pair<uint64_t, uint64_t>> TextSections;
- // A map of mapping function name to BinaryFunction info.
- std::unordered_map<std::string, BinaryFunction> BinaryFunctions;
- // A list of binary functions that have samples.
- std::unordered_set<const BinaryFunction *> ProfiledFunctions;
- // GUID to Elf symbol start address map
- DenseMap<uint64_t, uint64_t> SymbolStartAddrs;
- // Start address to Elf symbol GUID map
- std::unordered_multimap<uint64_t, uint64_t> StartAddrToSymMap;
- // An ordered map of mapping function's start address to function range
- // relevant info. Currently to determine if the offset of ELF is the start of
- // a real function, we leverage the function range info from DWARF.
- std::map<uint64_t, FuncRange> StartAddrToFuncRangeMap;
- // Address to context location map. Used to expand the context.
- std::unordered_map<uint64_t, SampleContextFrameVector> AddressToLocStackMap;
- // Address to instruction size map. Also used for quick Address lookup.
- std::unordered_map<uint64_t, uint64_t> AddressToInstSizeMap;
- // An array of Addresses of all instructions sorted in increasing order. The
- // sorting is needed to fast advance to the next forward/backward instruction.
- std::vector<uint64_t> CodeAddressVec;
- // A set of call instruction addresses. Used by virtual unwinding.
- std::unordered_set<uint64_t> CallAddressSet;
- // A set of return instruction addresses. Used by virtual unwinding.
- std::unordered_set<uint64_t> RetAddressSet;
- // An ordered set of unconditional branch instruction addresses.
- std::set<uint64_t> UncondBranchAddrSet;
- // A set of branch instruction addresses.
- std::unordered_set<uint64_t> BranchAddressSet;
- // Estimate and track function prolog and epilog ranges.
- PrologEpilogTracker ProEpilogTracker;
- // Infer missing frames due to compiler optimizations such as tail call
- // elimination.
- std::unique_ptr<MissingFrameInferrer> MissingContextInferrer;
- // Track function sizes under different context
- BinarySizeContextTracker FuncSizeTracker;
- // The symbolizer used to get inline context for an instruction.
- std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
- // String table owning function name strings created from the symbolizer.
- std::unordered_set<std::string> NameStrings;
- // A collection of functions to print disassembly for.
- StringSet<> DisassembleFunctionSet;
- // Pseudo probe decoder
- MCPseudoProbeDecoder ProbeDecoder;
- // Function name to probe frame map for top-level outlined functions.
- StringMap<MCDecodedPseudoProbeInlineTree *> TopLevelProbeFrameMap;
- bool UsePseudoProbes = false;
- bool UseFSDiscriminator = false;
- // Whether we need to symbolize all instructions to get function context size.
- bool TrackFuncContextSize = false;
- // Indicate if the base loading address is parsed from the mmap event or uses
- // the preferred address
- bool IsLoadedByMMap = false;
- // Use to avoid redundant warning.
- bool MissingMMapWarned = false;
- void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);
- template <class ELFT>
- void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
- StringRef FileName);
- void checkPseudoProbe(const ELFObjectFileBase *Obj);
- void decodePseudoProbe(const ELFObjectFileBase *Obj);
- void
- checkUseFSDiscriminator(const ELFObjectFileBase *Obj,
- std::map<SectionRef, SectionSymbolsTy> &AllSymbols);
- // Set up disassembler and related components.
- void setUpDisassembler(const ELFObjectFileBase *Obj);
- void setupSymbolizer();
- // Load debug info of subprograms from DWARF section.
- void loadSymbolsFromDWARF(ObjectFile &Obj);
- // Load debug info from DWARF unit.
- void loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit);
- // Create elf symbol to its start address mapping.
- void populateElfSymbolAddressList(const ELFObjectFileBase *O);
- // A function may be spilt into multiple non-continuous address ranges. We use
- // this to set whether start a function range is the real entry of the
- // function and also set false to the non-function label.
- void setIsFuncEntry(FuncRange *FRange, StringRef RangeSymName);
- // Warn if no entry range exists in the function.
- void warnNoFuncEntry();
- /// Dissassemble the text section and build various address maps.
- void disassemble(const ELFObjectFileBase *O);
- /// Helper function to dissassemble the symbol and extract info for unwinding
- bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
- SectionSymbolsTy &Symbols, const SectionRef &Section);
- /// Symbolize a given instruction pointer and return a full call context.
- SampleContextFrameVector symbolize(const InstructionPointer &IP,
- bool UseCanonicalFnName = false,
- bool UseProbeDiscriminator = false);
- /// Decode the interesting parts of the binary and build internal data
- /// structures. On high level, the parts of interest are:
- /// 1. Text sections, including the main code section and the PLT
- /// entries that will be used to handle cross-module call transitions.
- /// 2. The .debug_line section, used by Dwarf-based profile generation.
- /// 3. Pseudo probe related sections, used by probe-based profile
- /// generation.
- void load();
- public:
- ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath);
- ~ProfiledBinary();
- void decodePseudoProbe();
- StringRef getPath() const { return Path; }
- StringRef getName() const { return llvm::sys::path::filename(Path); }
- uint64_t getBaseAddress() const { return BaseAddress; }
- void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
- // Canonicalize to use preferred load address as base address.
- uint64_t canonicalizeVirtualAddress(uint64_t Address) {
- return Address - BaseAddress + getPreferredBaseAddress();
- }
- // Return the preferred load address for the first executable segment.
- uint64_t getPreferredBaseAddress() const {
- return PreferredTextSegmentAddresses[0];
- }
- // Return the preferred load address for the first loadable segment.
- uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; }
- // Return the file offset for the first executable segment.
- uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; }
- const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const {
- return PreferredTextSegmentAddresses;
- }
- const std::vector<uint64_t> &getTextSegmentOffsets() const {
- return TextSegmentOffsets;
- }
- uint64_t getInstSize(uint64_t Address) const {
- auto I = AddressToInstSizeMap.find(Address);
- if (I == AddressToInstSizeMap.end())
- return 0;
- return I->second;
- }
- bool addressIsCode(uint64_t Address) const {
- return AddressToInstSizeMap.find(Address) != AddressToInstSizeMap.end();
- }
- bool addressIsCall(uint64_t Address) const {
- return CallAddressSet.count(Address);
- }
- bool addressIsReturn(uint64_t Address) const {
- return RetAddressSet.count(Address);
- }
- bool addressInPrologEpilog(uint64_t Address) const {
- return ProEpilogTracker.PrologEpilogSet.count(Address);
- }
- bool addressIsTransfer(uint64_t Address) {
- return BranchAddressSet.count(Address) || RetAddressSet.count(Address) ||
- CallAddressSet.count(Address);
- }
- bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) {
- if (Start >= End)
- return false;
- auto R = UncondBranchAddrSet.lower_bound(Start);
- return R != UncondBranchAddrSet.end() && *R < End;
- }
- uint64_t getAddressforIndex(uint64_t Index) const {
- return CodeAddressVec[Index];
- }
- size_t getCodeAddrVecSize() const { return CodeAddressVec.size(); }
- bool usePseudoProbes() const { return UsePseudoProbes; }
- bool useFSDiscriminator() const { return UseFSDiscriminator; }
- // Get the index in CodeAddressVec for the address
- // As we might get an address which is not the code
- // here it would round to the next valid code address by
- // using lower bound operation
- uint32_t getIndexForAddr(uint64_t Address) const {
- auto Low = llvm::lower_bound(CodeAddressVec, Address);
- return Low - CodeAddressVec.begin();
- }
- uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
- if (FrameAddr == ExternalAddr)
- return ExternalAddr;
- auto I = getIndexForAddr(FrameAddr);
- FrameAddr = I ? getAddressforIndex(I - 1) : 0;
- if (FrameAddr && addressIsCall(FrameAddr))
- return FrameAddr;
- return 0;
- }
- FuncRange *findFuncRangeForStartAddr(uint64_t Address) {
- auto I = StartAddrToFuncRangeMap.find(Address);
- if (I == StartAddrToFuncRangeMap.end())
- return nullptr;
- return &I->second;
- }
- // Binary search the function range which includes the input address.
- FuncRange *findFuncRange(uint64_t Address) {
- auto I = StartAddrToFuncRangeMap.upper_bound(Address);
- if (I == StartAddrToFuncRangeMap.begin())
- return nullptr;
- I--;
- if (Address >= I->second.EndAddress)
- return nullptr;
- return &I->second;
- }
- // Get all ranges of one function.
- RangesTy getRanges(uint64_t Address) {
- auto *FRange = findFuncRange(Address);
- // Ignore the range which falls into plt section or system lib.
- if (!FRange)
- return RangesTy();
- return FRange->Func->Ranges;
- }
- const std::unordered_map<std::string, BinaryFunction> &
- getAllBinaryFunctions() {
- return BinaryFunctions;
- }
- std::unordered_set<const BinaryFunction *> &getProfiledFunctions() {
- return ProfiledFunctions;
- }
- void setProfiledFunctions(std::unordered_set<const BinaryFunction *> &Funcs) {
- ProfiledFunctions = Funcs;
- }
- BinaryFunction *getBinaryFunction(StringRef FName) {
- auto I = BinaryFunctions.find(FName.str());
- if (I == BinaryFunctions.end())
- return nullptr;
- return &I->second;
- }
- uint32_t getFuncSizeForContext(const ContextTrieNode *ContextNode) {
- return FuncSizeTracker.getFuncSizeForContext(ContextNode);
- }
- void inferMissingFrames(const SmallVectorImpl<uint64_t> &Context,
- SmallVectorImpl<uint64_t> &NewContext);
- // Load the symbols from debug table and populate into symbol list.
- void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList);
- SampleContextFrameVector
- getFrameLocationStack(uint64_t Address, bool UseProbeDiscriminator = false) {
- InstructionPointer IP(this, Address);
- return symbolize(IP, true, UseProbeDiscriminator);
- }
- const SampleContextFrameVector &
- getCachedFrameLocationStack(uint64_t Address,
- bool UseProbeDiscriminator = false) {
- auto I = AddressToLocStackMap.emplace(Address, SampleContextFrameVector());
- if (I.second) {
- I.first->second = getFrameLocationStack(Address, UseProbeDiscriminator);
- }
- return I.first->second;
- }
- std::optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Address) {
- const auto &Stack = getCachedFrameLocationStack(Address);
- if (Stack.empty())
- return {};
- return Stack.back();
- }
- void flushSymbolizer() { Symbolizer.reset(); }
- MissingFrameInferrer* getMissingContextInferrer() {
- return MissingContextInferrer.get();
- }
- // Compare two addresses' inline context
- bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
- // Get the full context of the current stack with inline context filled in.
- // It will search the disassembling info stored in AddressToLocStackMap. This
- // is used as the key of function sample map
- SampleContextFrameVector
- getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
- bool &WasLeafInlined);
- // Go through instructions among the given range and record its size for the
- // inline context.
- void computeInlinedContextSizeForRange(uint64_t StartAddress,
- uint64_t EndAddress);
- void computeInlinedContextSizeForFunc(const BinaryFunction *Func);
- const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
- return ProbeDecoder.getCallProbeForAddr(Address);
- }
- void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe,
- SampleContextFrameVector &InlineContextStack,
- bool IncludeLeaf = false) const {
- SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext;
- ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext,
- IncludeLeaf);
- for (uint32_t I = 0; I < ProbeInlineContext.size(); I++) {
- auto &Callsite = ProbeInlineContext[I];
- // Clear the current context for an unknown probe.
- if (Callsite.second == 0 && I != ProbeInlineContext.size() - 1) {
- InlineContextStack.clear();
- continue;
- }
- InlineContextStack.emplace_back(Callsite.first,
- LineLocation(Callsite.second, 0));
- }
- }
- const AddressProbesMap &getAddress2ProbesMap() const {
- return ProbeDecoder.getAddress2ProbesMap();
- }
- const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
- return ProbeDecoder.getFuncDescForGUID(GUID);
- }
- const MCPseudoProbeFuncDesc *
- getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) {
- return ProbeDecoder.getInlinerDescForProbe(Probe);
- }
- bool getTrackFuncContextSize() { return TrackFuncContextSize; }
- bool getIsLoadedByMMap() { return IsLoadedByMMap; }
- void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }
- bool getMissingMMapWarned() { return MissingMMapWarned; }
- void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; }
- };
- } // end namespace sampleprof
- } // end namespace llvm
- #endif
|