ProfiledBinary.h 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. //===-- ProfiledBinary.h - Binary decoder -----------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
  9. #define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
  10. #include "CallContext.h"
  11. #include "ErrorHandling.h"
  12. #include "llvm/ADT/Optional.h"
  13. #include "llvm/ADT/StringRef.h"
  14. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  15. #include "llvm/DebugInfo/Symbolize/Symbolize.h"
  16. #include "llvm/MC/MCAsmInfo.h"
  17. #include "llvm/MC/MCContext.h"
  18. #include "llvm/MC/MCDisassembler/MCDisassembler.h"
  19. #include "llvm/MC/MCInst.h"
  20. #include "llvm/MC/MCInstPrinter.h"
  21. #include "llvm/MC/MCInstrAnalysis.h"
  22. #include "llvm/MC/MCInstrInfo.h"
  23. #include "llvm/MC/MCObjectFileInfo.h"
  24. #include "llvm/MC/MCPseudoProbe.h"
  25. #include "llvm/MC/MCRegisterInfo.h"
  26. #include "llvm/MC/MCSubtargetInfo.h"
  27. #include "llvm/MC/MCTargetOptions.h"
  28. #include "llvm/Object/ELFObjectFile.h"
  29. #include "llvm/ProfileData/SampleProf.h"
  30. #include "llvm/Support/CommandLine.h"
  31. #include "llvm/Support/Path.h"
  32. #include "llvm/Transforms/IPO/SampleContextTracker.h"
  33. #include <list>
  34. #include <map>
  35. #include <set>
  36. #include <sstream>
  37. #include <string>
  38. #include <unordered_map>
  39. #include <unordered_set>
  40. #include <vector>
  41. extern cl::opt<bool> EnableCSPreInliner;
  42. extern cl::opt<bool> UseContextCostForPreInliner;
  43. using namespace llvm;
  44. using namespace sampleprof;
  45. using namespace llvm::object;
  46. namespace llvm {
  47. namespace sampleprof {
  48. class ProfiledBinary;
  49. struct InstructionPointer {
  50. const ProfiledBinary *Binary;
  51. union {
  52. // Offset of the executable segment of the binary.
  53. uint64_t Offset = 0;
  54. // Also used as address in unwinder
  55. uint64_t Address;
  56. };
  57. // Index to the sorted code address array of the binary.
  58. uint64_t Index = 0;
  59. InstructionPointer(const ProfiledBinary *Binary, uint64_t Address,
  60. bool RoundToNext = false);
  61. bool advance();
  62. bool backward();
  63. void update(uint64_t Addr);
  64. };
  65. // The special frame addresses.
  66. enum SpecialFrameAddr {
  67. // Dummy root of frame trie.
  68. DummyRoot = 0,
  69. // Represent all the addresses outside of current binary.
  70. // This's also used to indicate the call stack should be truncated since this
  71. // isn't a real call context the compiler will see.
  72. ExternalAddr = 1,
  73. };
  74. using RangesTy = std::vector<std::pair<uint64_t, uint64_t>>;
  75. struct BinaryFunction {
  76. StringRef FuncName;
  77. // End of range is an exclusive bound.
  78. RangesTy Ranges;
  79. uint64_t getFuncSize() {
  80. uint64_t Sum = 0;
  81. for (auto &R : Ranges) {
  82. Sum += R.second - R.first;
  83. }
  84. return Sum;
  85. }
  86. };
  87. // Info about function range. A function can be split into multiple
  88. // non-continuous ranges, each range corresponds to one FuncRange.
  89. struct FuncRange {
  90. uint64_t StartOffset;
  91. // EndOffset is an exclusive bound.
  92. uint64_t EndOffset;
  93. // Function the range belongs to
  94. BinaryFunction *Func;
  95. // Whether the start offset is the real entry of the function.
  96. bool IsFuncEntry = false;
  97. StringRef getFuncName() { return Func->FuncName; }
  98. };
  99. // PrologEpilog offset tracker, used to filter out broken stack samples
  100. // Currently we use a heuristic size (two) to infer prolog and epilog
  101. // based on the start address and return address. In the future,
  102. // we will switch to Dwarf CFI based tracker
  103. struct PrologEpilogTracker {
  104. // A set of prolog and epilog offsets. Used by virtual unwinding.
  105. std::unordered_set<uint64_t> PrologEpilogSet;
  106. ProfiledBinary *Binary;
  107. PrologEpilogTracker(ProfiledBinary *Bin) : Binary(Bin){};
  108. // Take the two addresses from the start of function as prolog
  109. void inferPrologOffsets(std::map<uint64_t, FuncRange> &FuncStartOffsetMap) {
  110. for (auto I : FuncStartOffsetMap) {
  111. PrologEpilogSet.insert(I.first);
  112. InstructionPointer IP(Binary, I.first);
  113. if (!IP.advance())
  114. break;
  115. PrologEpilogSet.insert(IP.Offset);
  116. }
  117. }
  118. // Take the last two addresses before the return address as epilog
  119. void inferEpilogOffsets(std::unordered_set<uint64_t> &RetAddrs) {
  120. for (auto Addr : RetAddrs) {
  121. PrologEpilogSet.insert(Addr);
  122. InstructionPointer IP(Binary, Addr);
  123. if (!IP.backward())
  124. break;
  125. PrologEpilogSet.insert(IP.Offset);
  126. }
  127. }
  128. };
  129. // Track function byte size under different context (outlined version as well as
  130. // various inlined versions). It also provides query support to get function
  131. // size with the best matching context, which is used to help pre-inliner use
  132. // accurate post-optimization size to make decisions.
  133. // TODO: If an inlinee is completely optimized away, ideally we should have zero
  134. // for its context size, currently we would misss such context since it doesn't
  135. // have instructions. To fix this, we need to mark all inlinee with entry probe
  136. // but without instructions as having zero size.
  137. class BinarySizeContextTracker {
  138. public:
  139. // Add instruction with given size to a context
  140. void addInstructionForContext(const SampleContextFrameVector &Context,
  141. uint32_t InstrSize);
  142. // Get function size with a specific context. When there's no exact match
  143. // for the given context, try to retrieve the size of that function from
  144. // closest matching context.
  145. uint32_t getFuncSizeForContext(const SampleContext &Context);
  146. // For inlinees that are full optimized away, we can establish zero size using
  147. // their remaining probes.
  148. void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
  149. void dump() { RootContext.dumpTree(); }
  150. private:
  151. using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
  152. void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
  153. MCDecodedPseudoProbeInlineTree &ProbeNode,
  154. ProbeFrameStack &Context);
  155. // Root node for context trie tree, node that this is a reverse context trie
  156. // with callee as parent and caller as child. This way we can traverse from
  157. // root to find the best/longest matching context if an exact match does not
  158. // exist. It gives us the best possible estimate for function's post-inline,
  159. // post-optimization byte size.
  160. ContextTrieNode RootContext;
  161. };
  162. using OffsetRange = std::pair<uint64_t, uint64_t>;
  163. class ProfiledBinary {
  164. // Absolute path of the executable binary.
  165. std::string Path;
  166. // Path of the debug info binary.
  167. std::string DebugBinaryPath;
  168. // Path of symbolizer path which should be pointed to binary with debug info.
  169. StringRef SymbolizerPath;
  170. // The target triple.
  171. Triple TheTriple;
  172. // The runtime base address that the first executable segment is loaded at.
  173. uint64_t BaseAddress = 0;
  174. // The runtime base address that the first loadabe segment is loaded at.
  175. uint64_t FirstLoadableAddress = 0;
  176. // The preferred load address of each executable segment.
  177. std::vector<uint64_t> PreferredTextSegmentAddresses;
  178. // The file offset of each executable segment.
  179. std::vector<uint64_t> TextSegmentOffsets;
  180. // Mutiple MC component info
  181. std::unique_ptr<const MCRegisterInfo> MRI;
  182. std::unique_ptr<const MCAsmInfo> AsmInfo;
  183. std::unique_ptr<const MCSubtargetInfo> STI;
  184. std::unique_ptr<const MCInstrInfo> MII;
  185. std::unique_ptr<MCDisassembler> DisAsm;
  186. std::unique_ptr<const MCInstrAnalysis> MIA;
  187. std::unique_ptr<MCInstPrinter> IPrinter;
  188. // A list of text sections sorted by start RVA and size. Used to check
  189. // if a given RVA is a valid code address.
  190. std::set<std::pair<uint64_t, uint64_t>> TextSections;
  191. // A map of mapping function name to BinaryFunction info.
  192. std::unordered_map<std::string, BinaryFunction> BinaryFunctions;
  193. // An ordered map of mapping function's start offset to function range
  194. // relevant info. Currently to determine if the offset of ELF is the start of
  195. // a real function, we leverage the function range info from DWARF.
  196. std::map<uint64_t, FuncRange> StartOffset2FuncRangeMap;
  197. // Offset to context location map. Used to expand the context.
  198. std::unordered_map<uint64_t, SampleContextFrameVector> Offset2LocStackMap;
  199. // Offset to instruction size map. Also used for quick offset lookup.
  200. std::unordered_map<uint64_t, uint64_t> Offset2InstSizeMap;
  201. // An array of offsets of all instructions sorted in increasing order. The
  202. // sorting is needed to fast advance to the next forward/backward instruction.
  203. std::vector<uint64_t> CodeAddrOffsets;
  204. // A set of call instruction offsets. Used by virtual unwinding.
  205. std::unordered_set<uint64_t> CallOffsets;
  206. // A set of return instruction offsets. Used by virtual unwinding.
  207. std::unordered_set<uint64_t> RetOffsets;
  208. // A set of branch instruction offsets.
  209. std::unordered_set<uint64_t> BranchOffsets;
  210. // Estimate and track function prolog and epilog ranges.
  211. PrologEpilogTracker ProEpilogTracker;
  212. // Track function sizes under different context
  213. BinarySizeContextTracker FuncSizeTracker;
  214. // The symbolizer used to get inline context for an instruction.
  215. std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
  216. // String table owning function name strings created from the symbolizer.
  217. std::unordered_set<std::string> NameStrings;
  218. // A collection of functions to print disassembly for.
  219. StringSet<> DisassembleFunctionSet;
  220. // Pseudo probe decoder
  221. MCPseudoProbeDecoder ProbeDecoder;
  222. bool UsePseudoProbes = false;
  223. bool UseFSDiscriminator = false;
  224. // Whether we need to symbolize all instructions to get function context size.
  225. bool TrackFuncContextSize = false;
  226. // Indicate if the base loading address is parsed from the mmap event or uses
  227. // the preferred address
  228. bool IsLoadedByMMap = false;
  229. // Use to avoid redundant warning.
  230. bool MissingMMapWarned = false;
  231. void setPreferredTextSegmentAddresses(const ELFObjectFileBase *O);
  232. template <class ELFT>
  233. void setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj, StringRef FileName);
  234. void decodePseudoProbe(const ELFObjectFileBase *Obj);
  235. void
  236. checkUseFSDiscriminator(const ELFObjectFileBase *Obj,
  237. std::map<SectionRef, SectionSymbolsTy> &AllSymbols);
  238. // Set up disassembler and related components.
  239. void setUpDisassembler(const ELFObjectFileBase *Obj);
  240. void setupSymbolizer();
  241. // Load debug info of subprograms from DWARF section.
  242. void loadSymbolsFromDWARF(ObjectFile &Obj);
  243. // A function may be spilt into multiple non-continuous address ranges. We use
  244. // this to set whether start offset of a function is the real entry of the
  245. // function and also set false to the non-function label.
  246. void setIsFuncEntry(uint64_t Offset, StringRef RangeSymName);
  247. // Warn if no entry range exists in the function.
  248. void warnNoFuncEntry();
  249. /// Dissassemble the text section and build various address maps.
  250. void disassemble(const ELFObjectFileBase *O);
  251. /// Helper function to dissassemble the symbol and extract info for unwinding
  252. bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
  253. SectionSymbolsTy &Symbols, const SectionRef &Section);
  254. /// Symbolize a given instruction pointer and return a full call context.
  255. SampleContextFrameVector symbolize(const InstructionPointer &IP,
  256. bool UseCanonicalFnName = false,
  257. bool UseProbeDiscriminator = false);
  258. /// Decode the interesting parts of the binary and build internal data
  259. /// structures. On high level, the parts of interest are:
  260. /// 1. Text sections, including the main code section and the PLT
  261. /// entries that will be used to handle cross-module call transitions.
  262. /// 2. The .debug_line section, used by Dwarf-based profile generation.
  263. /// 3. Pseudo probe related sections, used by probe-based profile
  264. /// generation.
  265. void load();
  266. public:
  267. ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath)
  268. : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
  269. TrackFuncContextSize(EnableCSPreInliner &&
  270. UseContextCostForPreInliner) {
  271. // Point to executable binary if debug info binary is not specified.
  272. SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
  273. setupSymbolizer();
  274. load();
  275. }
  276. uint64_t virtualAddrToOffset(uint64_t VirtualAddress) const {
  277. return VirtualAddress - BaseAddress;
  278. }
  279. uint64_t offsetToVirtualAddr(uint64_t Offset) const {
  280. return Offset + BaseAddress;
  281. }
  282. StringRef getPath() const { return Path; }
  283. StringRef getName() const { return llvm::sys::path::filename(Path); }
  284. uint64_t getBaseAddress() const { return BaseAddress; }
  285. void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
  286. // Return the preferred load address for the first executable segment.
  287. uint64_t getPreferredBaseAddress() const { return PreferredTextSegmentAddresses[0]; }
  288. // Return the preferred load address for the first loadable segment.
  289. uint64_t getFirstLoadableAddress() const { return FirstLoadableAddress; }
  290. // Return the file offset for the first executable segment.
  291. uint64_t getTextSegmentOffset() const { return TextSegmentOffsets[0]; }
  292. const std::vector<uint64_t> &getPreferredTextSegmentAddresses() const {
  293. return PreferredTextSegmentAddresses;
  294. }
  295. const std::vector<uint64_t> &getTextSegmentOffsets() const {
  296. return TextSegmentOffsets;
  297. }
  298. uint64_t getInstSize(uint64_t Offset) const {
  299. auto I = Offset2InstSizeMap.find(Offset);
  300. if (I == Offset2InstSizeMap.end())
  301. return 0;
  302. return I->second;
  303. }
  304. bool offsetIsCode(uint64_t Offset) const {
  305. return Offset2InstSizeMap.find(Offset) != Offset2InstSizeMap.end();
  306. }
  307. bool addressIsCode(uint64_t Address) const {
  308. uint64_t Offset = virtualAddrToOffset(Address);
  309. return offsetIsCode(Offset);
  310. }
  311. bool addressIsCall(uint64_t Address) const {
  312. uint64_t Offset = virtualAddrToOffset(Address);
  313. return CallOffsets.count(Offset);
  314. }
  315. bool addressIsReturn(uint64_t Address) const {
  316. uint64_t Offset = virtualAddrToOffset(Address);
  317. return RetOffsets.count(Offset);
  318. }
  319. bool addressInPrologEpilog(uint64_t Address) const {
  320. uint64_t Offset = virtualAddrToOffset(Address);
  321. return ProEpilogTracker.PrologEpilogSet.count(Offset);
  322. }
  323. bool offsetIsTransfer(uint64_t Offset) {
  324. return BranchOffsets.count(Offset) || RetOffsets.count(Offset) ||
  325. CallOffsets.count(Offset);
  326. }
  327. uint64_t getAddressforIndex(uint64_t Index) const {
  328. return offsetToVirtualAddr(CodeAddrOffsets[Index]);
  329. }
  330. size_t getCodeOffsetsSize() const { return CodeAddrOffsets.size(); }
  331. bool usePseudoProbes() const { return UsePseudoProbes; }
  332. bool useFSDiscriminator() const { return UseFSDiscriminator; }
  333. // Get the index in CodeAddrOffsets for the address
  334. // As we might get an address which is not the code
  335. // here it would round to the next valid code address by
  336. // using lower bound operation
  337. uint32_t getIndexForOffset(uint64_t Offset) const {
  338. auto Low = llvm::lower_bound(CodeAddrOffsets, Offset);
  339. return Low - CodeAddrOffsets.begin();
  340. }
  341. uint32_t getIndexForAddr(uint64_t Address) const {
  342. uint64_t Offset = virtualAddrToOffset(Address);
  343. return getIndexForOffset(Offset);
  344. }
  345. uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
  346. if (FrameAddr == ExternalAddr)
  347. return ExternalAddr;
  348. auto I = getIndexForAddr(FrameAddr);
  349. FrameAddr = I ? getAddressforIndex(I - 1) : 0;
  350. if (FrameAddr && addressIsCall(FrameAddr))
  351. return FrameAddr;
  352. return 0;
  353. }
  354. FuncRange *findFuncRangeForStartOffset(uint64_t Offset) {
  355. auto I = StartOffset2FuncRangeMap.find(Offset);
  356. if (I == StartOffset2FuncRangeMap.end())
  357. return nullptr;
  358. return &I->second;
  359. }
  360. // Binary search the function range which includes the input offset.
  361. FuncRange *findFuncRangeForOffset(uint64_t Offset) {
  362. auto I = StartOffset2FuncRangeMap.upper_bound(Offset);
  363. if (I == StartOffset2FuncRangeMap.begin())
  364. return nullptr;
  365. I--;
  366. if (Offset >= I->second.EndOffset)
  367. return nullptr;
  368. return &I->second;
  369. }
  370. // Get all ranges of one function.
  371. RangesTy getRangesForOffset(uint64_t Offset) {
  372. auto *FRange = findFuncRangeForOffset(Offset);
  373. // Ignore the range which falls into plt section or system lib.
  374. if (!FRange)
  375. return RangesTy();
  376. return FRange->Func->Ranges;
  377. }
  378. const std::unordered_map<std::string, BinaryFunction> &
  379. getAllBinaryFunctions() {
  380. return BinaryFunctions;
  381. }
  382. BinaryFunction *getBinaryFunction(StringRef FName) {
  383. auto I = BinaryFunctions.find(FName.str());
  384. if (I == BinaryFunctions.end())
  385. return nullptr;
  386. return &I->second;
  387. }
  388. uint32_t getFuncSizeForContext(SampleContext &Context) {
  389. return FuncSizeTracker.getFuncSizeForContext(Context);
  390. }
  391. // Load the symbols from debug table and populate into symbol list.
  392. void populateSymbolListFromDWARF(ProfileSymbolList &SymbolList);
  393. const SampleContextFrameVector &
  394. getFrameLocationStack(uint64_t Offset, bool UseProbeDiscriminator = false) {
  395. auto I = Offset2LocStackMap.emplace(Offset, SampleContextFrameVector());
  396. if (I.second) {
  397. InstructionPointer IP(this, Offset);
  398. I.first->second = symbolize(IP, true, UseProbeDiscriminator);
  399. }
  400. return I.first->second;
  401. }
  402. Optional<SampleContextFrame> getInlineLeafFrameLoc(uint64_t Offset) {
  403. const auto &Stack = getFrameLocationStack(Offset);
  404. if (Stack.empty())
  405. return {};
  406. return Stack.back();
  407. }
  408. // Compare two addresses' inline context
  409. bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
  410. // Get the full context of the current stack with inline context filled in.
  411. // It will search the disassembling info stored in Offset2LocStackMap. This is
  412. // used as the key of function sample map
  413. SampleContextFrameVector
  414. getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
  415. bool &WasLeafInlined);
  416. // Go through instructions among the given range and record its size for the
  417. // inline context.
  418. void computeInlinedContextSizeForRange(uint64_t StartOffset,
  419. uint64_t EndOffset);
  420. const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
  421. return ProbeDecoder.getCallProbeForAddr(Address);
  422. }
  423. void getInlineContextForProbe(const MCDecodedPseudoProbe *Probe,
  424. SampleContextFrameVector &InlineContextStack,
  425. bool IncludeLeaf = false) const {
  426. SmallVector<MCPseduoProbeFrameLocation, 16> ProbeInlineContext;
  427. ProbeDecoder.getInlineContextForProbe(Probe, ProbeInlineContext,
  428. IncludeLeaf);
  429. for (uint32_t I = 0; I < ProbeInlineContext.size(); I++) {
  430. auto &Callsite = ProbeInlineContext[I];
  431. // Clear the current context for an unknown probe.
  432. if (Callsite.second == 0 && I != ProbeInlineContext.size() - 1) {
  433. InlineContextStack.clear();
  434. continue;
  435. }
  436. InlineContextStack.emplace_back(Callsite.first,
  437. LineLocation(Callsite.second, 0));
  438. }
  439. }
  440. const AddressProbesMap &getAddress2ProbesMap() const {
  441. return ProbeDecoder.getAddress2ProbesMap();
  442. }
  443. const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) {
  444. return ProbeDecoder.getFuncDescForGUID(GUID);
  445. }
  446. const MCPseudoProbeFuncDesc *
  447. getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) {
  448. return ProbeDecoder.getInlinerDescForProbe(Probe);
  449. }
  450. bool getTrackFuncContextSize() { return TrackFuncContextSize; }
  451. bool getIsLoadedByMMap() { return IsLoadedByMMap; }
  452. void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }
  453. bool getMissingMMapWarned() { return MissingMMapWarned; }
  454. void setMissingMMapWarned(bool Value) { MissingMMapWarned = Value; }
  455. };
  456. } // end namespace sampleprof
  457. } // end namespace llvm
  458. #endif