ProfiledBinary.cpp 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978
  1. //===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "ProfiledBinary.h"
  9. #include "ErrorHandling.h"
  10. #include "MissingFrameInferrer.h"
  11. #include "ProfileGenerator.h"
  12. #include "llvm/ADT/Triple.h"
  13. #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
  14. #include "llvm/Demangle/Demangle.h"
  15. #include "llvm/IR/DebugInfoMetadata.h"
  16. #include "llvm/MC/TargetRegistry.h"
  17. #include "llvm/Support/CommandLine.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Support/Format.h"
  20. #include "llvm/Support/TargetSelect.h"
  21. #include <optional>
  22. #define DEBUG_TYPE "load-binary"
  23. using namespace llvm;
  24. using namespace sampleprof;
  25. cl::opt<bool> ShowDisassemblyOnly("show-disassembly-only",
  26. cl::desc("Print disassembled code."));
  27. cl::opt<bool> ShowSourceLocations("show-source-locations",
  28. cl::desc("Print source locations."));
  29. static cl::opt<bool>
  30. ShowCanonicalFnName("show-canonical-fname",
  31. cl::desc("Print canonical function name."));
  32. static cl::opt<bool> ShowPseudoProbe(
  33. "show-pseudo-probe",
  34. cl::desc("Print pseudo probe section and disassembled info."));
  35. static cl::opt<bool> UseDwarfCorrelation(
  36. "use-dwarf-correlation",
  37. cl::desc("Use dwarf for profile correlation even when binary contains "
  38. "pseudo probe."));
  39. static cl::opt<std::string>
  40. DWPPath("dwp", cl::init(""),
  41. cl::desc("Path of .dwp file. When not specified, it will be "
  42. "<binary>.dwp in the same directory as the main binary."));
  43. static cl::list<std::string> DisassembleFunctions(
  44. "disassemble-functions", cl::CommaSeparated,
  45. cl::desc("List of functions to print disassembly for. Accept demangled "
  46. "names only. Only work with show-disassembly-only"));
  47. extern cl::opt<bool> ShowDetailedWarning;
  48. extern cl::opt<bool> InferMissingFrames;
  49. namespace llvm {
  50. namespace sampleprof {
  51. static const Target *getTarget(const ObjectFile *Obj) {
  52. Triple TheTriple = Obj->makeTriple();
  53. std::string Error;
  54. std::string ArchName;
  55. const Target *TheTarget =
  56. TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
  57. if (!TheTarget)
  58. exitWithError(Error, Obj->getFileName());
  59. return TheTarget;
  60. }
  61. void BinarySizeContextTracker::addInstructionForContext(
  62. const SampleContextFrameVector &Context, uint32_t InstrSize) {
  63. ContextTrieNode *CurNode = &RootContext;
  64. bool IsLeaf = true;
  65. for (const auto &Callsite : reverse(Context)) {
  66. StringRef CallerName = Callsite.FuncName;
  67. LineLocation CallsiteLoc = IsLeaf ? LineLocation(0, 0) : Callsite.Location;
  68. CurNode = CurNode->getOrCreateChildContext(CallsiteLoc, CallerName);
  69. IsLeaf = false;
  70. }
  71. CurNode->addFunctionSize(InstrSize);
  72. }
  73. uint32_t
  74. BinarySizeContextTracker::getFuncSizeForContext(const ContextTrieNode *Node) {
  75. ContextTrieNode *CurrNode = &RootContext;
  76. ContextTrieNode *PrevNode = nullptr;
  77. std::optional<uint32_t> Size;
  78. // Start from top-level context-less function, traverse down the reverse
  79. // context trie to find the best/longest match for given context, then
  80. // retrieve the size.
  81. LineLocation CallSiteLoc(0, 0);
  82. while (CurrNode && Node->getParentContext() != nullptr) {
  83. PrevNode = CurrNode;
  84. CurrNode = CurrNode->getChildContext(CallSiteLoc, Node->getFuncName());
  85. if (CurrNode && CurrNode->getFunctionSize())
  86. Size = *CurrNode->getFunctionSize();
  87. CallSiteLoc = Node->getCallSiteLoc();
  88. Node = Node->getParentContext();
  89. }
  90. // If we traversed all nodes along the path of the context and haven't
  91. // found a size yet, pivot to look for size from sibling nodes, i.e size
  92. // of inlinee under different context.
  93. if (!Size) {
  94. if (!CurrNode)
  95. CurrNode = PrevNode;
  96. while (!Size && CurrNode && !CurrNode->getAllChildContext().empty()) {
  97. CurrNode = &CurrNode->getAllChildContext().begin()->second;
  98. if (CurrNode->getFunctionSize())
  99. Size = *CurrNode->getFunctionSize();
  100. }
  101. }
  102. assert(Size && "We should at least find one context size.");
  103. return *Size;
  104. }
  105. void BinarySizeContextTracker::trackInlineesOptimizedAway(
  106. MCPseudoProbeDecoder &ProbeDecoder) {
  107. ProbeFrameStack ProbeContext;
  108. for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren())
  109. trackInlineesOptimizedAway(ProbeDecoder, *Child.second.get(), ProbeContext);
  110. }
  111. void BinarySizeContextTracker::trackInlineesOptimizedAway(
  112. MCPseudoProbeDecoder &ProbeDecoder,
  113. MCDecodedPseudoProbeInlineTree &ProbeNode, ProbeFrameStack &ProbeContext) {
  114. StringRef FuncName =
  115. ProbeDecoder.getFuncDescForGUID(ProbeNode.Guid)->FuncName;
  116. ProbeContext.emplace_back(FuncName, 0);
  117. // This ProbeContext has a probe, so it has code before inlining and
  118. // optimization. Make sure we mark its size as known.
  119. if (!ProbeNode.getProbes().empty()) {
  120. ContextTrieNode *SizeContext = &RootContext;
  121. for (auto &ProbeFrame : reverse(ProbeContext)) {
  122. StringRef CallerName = ProbeFrame.first;
  123. LineLocation CallsiteLoc(ProbeFrame.second, 0);
  124. SizeContext =
  125. SizeContext->getOrCreateChildContext(CallsiteLoc, CallerName);
  126. }
  127. // Add 0 size to make known.
  128. SizeContext->addFunctionSize(0);
  129. }
  130. // DFS down the probe inline tree
  131. for (const auto &ChildNode : ProbeNode.getChildren()) {
  132. InlineSite Location = ChildNode.first;
  133. ProbeContext.back().second = std::get<1>(Location);
  134. trackInlineesOptimizedAway(ProbeDecoder, *ChildNode.second.get(),
  135. ProbeContext);
  136. }
  137. ProbeContext.pop_back();
  138. }
  139. ProfiledBinary::ProfiledBinary(const StringRef ExeBinPath,
  140. const StringRef DebugBinPath)
  141. : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
  142. TrackFuncContextSize(EnableCSPreInliner && UseContextCostForPreInliner) {
  143. // Point to executable binary if debug info binary is not specified.
  144. SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
  145. setupSymbolizer();
  146. if (InferMissingFrames)
  147. MissingContextInferrer = std::make_unique<MissingFrameInferrer>(this);
  148. load();
  149. }
  150. ProfiledBinary::~ProfiledBinary() {}
  151. void ProfiledBinary::warnNoFuncEntry() {
  152. uint64_t NoFuncEntryNum = 0;
  153. for (auto &F : BinaryFunctions) {
  154. if (F.second.Ranges.empty())
  155. continue;
  156. bool hasFuncEntry = false;
  157. for (auto &R : F.second.Ranges) {
  158. if (FuncRange *FR = findFuncRangeForStartAddr(R.first)) {
  159. if (FR->IsFuncEntry) {
  160. hasFuncEntry = true;
  161. break;
  162. }
  163. }
  164. }
  165. if (!hasFuncEntry) {
  166. NoFuncEntryNum++;
  167. if (ShowDetailedWarning)
  168. WithColor::warning()
  169. << "Failed to determine function entry for " << F.first
  170. << " due to inconsistent name from symbol table and dwarf info.\n";
  171. }
  172. }
  173. emitWarningSummary(NoFuncEntryNum, BinaryFunctions.size(),
  174. "of functions failed to determine function entry due to "
  175. "inconsistent name from symbol table and dwarf info.");
  176. }
  177. void ProfiledBinary::load() {
  178. // Attempt to open the binary.
  179. OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
  180. Binary &ExeBinary = *OBinary.getBinary();
  181. auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
  182. if (!Obj)
  183. exitWithError("not a valid Elf image", Path);
  184. TheTriple = Obj->makeTriple();
  185. // Current only support X86
  186. if (!TheTriple.isX86())
  187. exitWithError("unsupported target", TheTriple.getTriple());
  188. LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
  189. // Find the preferred load address for text sections.
  190. setPreferredTextSegmentAddresses(Obj);
  191. // Load debug info of subprograms from DWARF section.
  192. // If path of debug info binary is specified, use the debug info from it,
  193. // otherwise use the debug info from the executable binary.
  194. if (!DebugBinaryPath.empty()) {
  195. OwningBinary<Binary> DebugPath =
  196. unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
  197. loadSymbolsFromDWARF(*cast<ObjectFile>(DebugPath.getBinary()));
  198. } else {
  199. loadSymbolsFromDWARF(*cast<ObjectFile>(&ExeBinary));
  200. }
  201. DisassembleFunctionSet.insert(DisassembleFunctions.begin(),
  202. DisassembleFunctions.end());
  203. checkPseudoProbe(Obj);
  204. if (UsePseudoProbes)
  205. populateElfSymbolAddressList(Obj);
  206. if (ShowDisassemblyOnly)
  207. decodePseudoProbe(Obj);
  208. // Disassemble the text sections.
  209. disassemble(Obj);
  210. // Use function start and return address to infer prolog and epilog
  211. ProEpilogTracker.inferPrologAddresses(StartAddrToFuncRangeMap);
  212. ProEpilogTracker.inferEpilogAddresses(RetAddressSet);
  213. warnNoFuncEntry();
  214. // TODO: decode other sections.
  215. }
  216. bool ProfiledBinary::inlineContextEqual(uint64_t Address1, uint64_t Address2) {
  217. const SampleContextFrameVector &Context1 =
  218. getCachedFrameLocationStack(Address1);
  219. const SampleContextFrameVector &Context2 =
  220. getCachedFrameLocationStack(Address2);
  221. if (Context1.size() != Context2.size())
  222. return false;
  223. if (Context1.empty())
  224. return false;
  225. // The leaf frame contains location within the leaf, and it
  226. // needs to be remove that as it's not part of the calling context
  227. return std::equal(Context1.begin(), Context1.begin() + Context1.size() - 1,
  228. Context2.begin(), Context2.begin() + Context2.size() - 1);
  229. }
  230. SampleContextFrameVector
  231. ProfiledBinary::getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
  232. bool &WasLeafInlined) {
  233. SampleContextFrameVector ContextVec;
  234. if (Stack.empty())
  235. return ContextVec;
  236. // Process from frame root to leaf
  237. for (auto Address : Stack) {
  238. const SampleContextFrameVector &ExpandedContext =
  239. getCachedFrameLocationStack(Address);
  240. // An instruction without a valid debug line will be ignored by sample
  241. // processing
  242. if (ExpandedContext.empty())
  243. return SampleContextFrameVector();
  244. // Set WasLeafInlined to the size of inlined frame count for the last
  245. // address which is leaf
  246. WasLeafInlined = (ExpandedContext.size() > 1);
  247. ContextVec.append(ExpandedContext);
  248. }
  249. // Replace with decoded base discriminator
  250. for (auto &Frame : ContextVec) {
  251. Frame.Location.Discriminator = ProfileGeneratorBase::getBaseDiscriminator(
  252. Frame.Location.Discriminator, UseFSDiscriminator);
  253. }
  254. assert(ContextVec.size() && "Context length should be at least 1");
  255. // Compress the context string except for the leaf frame
  256. auto LeafFrame = ContextVec.back();
  257. LeafFrame.Location = LineLocation(0, 0);
  258. ContextVec.pop_back();
  259. CSProfileGenerator::compressRecursionContext(ContextVec);
  260. CSProfileGenerator::trimContext(ContextVec);
  261. ContextVec.push_back(LeafFrame);
  262. return ContextVec;
  263. }
  264. template <class ELFT>
  265. void ProfiledBinary::setPreferredTextSegmentAddresses(const ELFFile<ELFT> &Obj,
  266. StringRef FileName) {
  267. const auto &PhdrRange = unwrapOrError(Obj.program_headers(), FileName);
  268. // FIXME: This should be the page size of the system running profiling.
  269. // However such info isn't available at post-processing time, assuming
  270. // 4K page now. Note that we don't use EXEC_PAGESIZE from <linux/param.h>
  271. // because we may build the tools on non-linux.
  272. uint32_t PageSize = 0x1000;
  273. for (const typename ELFT::Phdr &Phdr : PhdrRange) {
  274. if (Phdr.p_type == ELF::PT_LOAD) {
  275. if (!FirstLoadableAddress)
  276. FirstLoadableAddress = Phdr.p_vaddr & ~(PageSize - 1U);
  277. if (Phdr.p_flags & ELF::PF_X) {
  278. // Segments will always be loaded at a page boundary.
  279. PreferredTextSegmentAddresses.push_back(Phdr.p_vaddr &
  280. ~(PageSize - 1U));
  281. TextSegmentOffsets.push_back(Phdr.p_offset & ~(PageSize - 1U));
  282. }
  283. }
  284. }
  285. if (PreferredTextSegmentAddresses.empty())
  286. exitWithError("no executable segment found", FileName);
  287. }
  288. void ProfiledBinary::setPreferredTextSegmentAddresses(
  289. const ELFObjectFileBase *Obj) {
  290. if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
  291. setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
  292. else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
  293. setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
  294. else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
  295. setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
  296. else if (const auto *ELFObj = cast<ELF64BEObjectFile>(Obj))
  297. setPreferredTextSegmentAddresses(ELFObj->getELFFile(), Obj->getFileName());
  298. else
  299. llvm_unreachable("invalid ELF object format");
  300. }
  301. void ProfiledBinary::checkPseudoProbe(const ELFObjectFileBase *Obj) {
  302. if (UseDwarfCorrelation)
  303. return;
  304. bool HasProbeDescSection = false;
  305. bool HasPseudoProbeSection = false;
  306. StringRef FileName = Obj->getFileName();
  307. for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
  308. SI != SE; ++SI) {
  309. const SectionRef &Section = *SI;
  310. StringRef SectionName = unwrapOrError(Section.getName(), FileName);
  311. if (SectionName == ".pseudo_probe_desc") {
  312. HasProbeDescSection = true;
  313. } else if (SectionName == ".pseudo_probe") {
  314. HasPseudoProbeSection = true;
  315. }
  316. }
  317. // set UsePseudoProbes flag, used for PerfReader
  318. UsePseudoProbes = HasProbeDescSection && HasPseudoProbeSection;
  319. }
  320. void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
  321. if (!UsePseudoProbes)
  322. return;
  323. MCPseudoProbeDecoder::Uint64Set GuidFilter;
  324. MCPseudoProbeDecoder::Uint64Map FuncStartAddresses;
  325. if (ShowDisassemblyOnly) {
  326. if (DisassembleFunctionSet.empty()) {
  327. FuncStartAddresses = SymbolStartAddrs;
  328. } else {
  329. for (auto &F : DisassembleFunctionSet) {
  330. auto GUID = Function::getGUID(F.first());
  331. if (auto StartAddr = SymbolStartAddrs.lookup(GUID)) {
  332. FuncStartAddresses[GUID] = StartAddr;
  333. FuncRange &Range = StartAddrToFuncRangeMap[StartAddr];
  334. GuidFilter.insert(Function::getGUID(Range.getFuncName()));
  335. }
  336. }
  337. }
  338. } else {
  339. for (auto *F : ProfiledFunctions) {
  340. GuidFilter.insert(Function::getGUID(F->FuncName));
  341. for (auto &Range : F->Ranges) {
  342. auto GUIDs = StartAddrToSymMap.equal_range(Range.first);
  343. for (auto I = GUIDs.first; I != GUIDs.second; ++I)
  344. FuncStartAddresses[I->second] = I->first;
  345. }
  346. }
  347. }
  348. StringRef FileName = Obj->getFileName();
  349. for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
  350. SI != SE; ++SI) {
  351. const SectionRef &Section = *SI;
  352. StringRef SectionName = unwrapOrError(Section.getName(), FileName);
  353. if (SectionName == ".pseudo_probe_desc") {
  354. StringRef Contents = unwrapOrError(Section.getContents(), FileName);
  355. if (!ProbeDecoder.buildGUID2FuncDescMap(
  356. reinterpret_cast<const uint8_t *>(Contents.data()),
  357. Contents.size()))
  358. exitWithError(
  359. "Pseudo Probe decoder fail in .pseudo_probe_desc section");
  360. } else if (SectionName == ".pseudo_probe") {
  361. StringRef Contents = unwrapOrError(Section.getContents(), FileName);
  362. if (!ProbeDecoder.buildAddress2ProbeMap(
  363. reinterpret_cast<const uint8_t *>(Contents.data()),
  364. Contents.size(), GuidFilter, FuncStartAddresses))
  365. exitWithError("Pseudo Probe decoder fail in .pseudo_probe section");
  366. }
  367. }
  368. // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
  369. // is available
  370. if (TrackFuncContextSize) {
  371. for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
  372. auto *Frame = Child.second.get();
  373. StringRef FuncName =
  374. ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
  375. TopLevelProbeFrameMap[FuncName] = Frame;
  376. }
  377. }
  378. if (ShowPseudoProbe)
  379. ProbeDecoder.printGUID2FuncDescMap(outs());
  380. }
  381. void ProfiledBinary::decodePseudoProbe() {
  382. OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
  383. Binary &ExeBinary = *OBinary.getBinary();
  384. auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
  385. decodePseudoProbe(Obj);
  386. }
  387. void ProfiledBinary::setIsFuncEntry(FuncRange *FuncRange,
  388. StringRef RangeSymName) {
  389. // Skip external function symbol.
  390. if (!FuncRange)
  391. return;
  392. // Set IsFuncEntry to ture if there is only one range in the function or the
  393. // RangeSymName from ELF is equal to its DWARF-based function name.
  394. if (FuncRange->Func->Ranges.size() == 1 ||
  395. (!FuncRange->IsFuncEntry && FuncRange->getFuncName() == RangeSymName))
  396. FuncRange->IsFuncEntry = true;
  397. }
  398. bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
  399. SectionSymbolsTy &Symbols,
  400. const SectionRef &Section) {
  401. std::size_t SE = Symbols.size();
  402. uint64_t SectionAddress = Section.getAddress();
  403. uint64_t SectSize = Section.getSize();
  404. uint64_t StartAddress = Symbols[SI].Addr;
  405. uint64_t NextStartAddress =
  406. (SI + 1 < SE) ? Symbols[SI + 1].Addr : SectionAddress + SectSize;
  407. FuncRange *FRange = findFuncRange(StartAddress);
  408. setIsFuncEntry(FRange, FunctionSamples::getCanonicalFnName(Symbols[SI].Name));
  409. StringRef SymbolName =
  410. ShowCanonicalFnName
  411. ? FunctionSamples::getCanonicalFnName(Symbols[SI].Name)
  412. : Symbols[SI].Name;
  413. bool ShowDisassembly =
  414. ShowDisassemblyOnly && (DisassembleFunctionSet.empty() ||
  415. DisassembleFunctionSet.count(SymbolName));
  416. if (ShowDisassembly)
  417. outs() << '<' << SymbolName << ">:\n";
  418. auto WarnInvalidInsts = [](uint64_t Start, uint64_t End) {
  419. WithColor::warning() << "Invalid instructions at "
  420. << format("%8" PRIx64, Start) << " - "
  421. << format("%8" PRIx64, End) << "\n";
  422. };
  423. uint64_t Address = StartAddress;
  424. // Size of a consecutive invalid instruction range starting from Address -1
  425. // backwards.
  426. uint64_t InvalidInstLength = 0;
  427. while (Address < NextStartAddress) {
  428. MCInst Inst;
  429. uint64_t Size;
  430. // Disassemble an instruction.
  431. bool Disassembled = DisAsm->getInstruction(
  432. Inst, Size, Bytes.slice(Address - SectionAddress), Address, nulls());
  433. if (Size == 0)
  434. Size = 1;
  435. if (ShowDisassembly) {
  436. if (ShowPseudoProbe) {
  437. ProbeDecoder.printProbeForAddress(outs(), Address);
  438. }
  439. outs() << format("%8" PRIx64 ":", Address);
  440. size_t Start = outs().tell();
  441. if (Disassembled)
  442. IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), outs());
  443. else
  444. outs() << "\t<unknown>";
  445. if (ShowSourceLocations) {
  446. unsigned Cur = outs().tell() - Start;
  447. if (Cur < 40)
  448. outs().indent(40 - Cur);
  449. InstructionPointer IP(this, Address);
  450. outs() << getReversedLocWithContext(
  451. symbolize(IP, ShowCanonicalFnName, ShowPseudoProbe));
  452. }
  453. outs() << "\n";
  454. }
  455. if (Disassembled) {
  456. const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
  457. // Record instruction size.
  458. AddressToInstSizeMap[Address] = Size;
  459. // Populate address maps.
  460. CodeAddressVec.push_back(Address);
  461. if (MCDesc.isCall()) {
  462. CallAddressSet.insert(Address);
  463. UncondBranchAddrSet.insert(Address);
  464. } else if (MCDesc.isReturn()) {
  465. RetAddressSet.insert(Address);
  466. UncondBranchAddrSet.insert(Address);
  467. } else if (MCDesc.isBranch()) {
  468. if (MCDesc.isUnconditionalBranch())
  469. UncondBranchAddrSet.insert(Address);
  470. BranchAddressSet.insert(Address);
  471. }
  472. // Record potential call targets for tail frame inference later-on.
  473. if (InferMissingFrames && FRange) {
  474. uint64_t Target = 0;
  475. MIA->evaluateBranch(Inst, Address, Size, Target);
  476. if (MCDesc.isCall()) {
  477. // Indirect call targets are unknown at this point. Recording the
  478. // unknown target (zero) for further LBR-based refinement.
  479. MissingContextInferrer->CallEdges[Address].insert(Target);
  480. } else if (MCDesc.isUnconditionalBranch()) {
  481. assert(Target &&
  482. "target should be known for unconditional direct branch");
  483. // Any inter-function unconditional jump is considered tail call at
  484. // this point. This is not 100% accurate and could further be
  485. // optimized based on some source annotation.
  486. FuncRange *ToFRange = findFuncRange(Target);
  487. if (ToFRange && ToFRange->Func != FRange->Func)
  488. MissingContextInferrer->TailCallEdges[Address].insert(Target);
  489. LLVM_DEBUG({
  490. dbgs() << "Direct Tail call: " << format("%8" PRIx64 ":", Address);
  491. IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
  492. dbgs() << "\n";
  493. });
  494. } else if (MCDesc.isIndirectBranch() && MCDesc.isBarrier()) {
  495. // This is an indirect branch but not necessarily an indirect tail
  496. // call. The isBarrier check is to filter out conditional branch.
  497. // Similar with indirect call targets, recording the unknown target
  498. // (zero) for further LBR-based refinement.
  499. MissingContextInferrer->TailCallEdges[Address].insert(Target);
  500. LLVM_DEBUG({
  501. dbgs() << "Indirect Tail call: "
  502. << format("%8" PRIx64 ":", Address);
  503. IPrinter->printInst(&Inst, Address + Size, "", *STI.get(), dbgs());
  504. dbgs() << "\n";
  505. });
  506. }
  507. }
  508. if (InvalidInstLength) {
  509. WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
  510. InvalidInstLength = 0;
  511. }
  512. } else {
  513. InvalidInstLength += Size;
  514. }
  515. Address += Size;
  516. }
  517. if (InvalidInstLength)
  518. WarnInvalidInsts(Address - InvalidInstLength, Address - 1);
  519. if (ShowDisassembly)
  520. outs() << "\n";
  521. return true;
  522. }
  523. void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
  524. const Target *TheTarget = getTarget(Obj);
  525. std::string TripleName = TheTriple.getTriple();
  526. StringRef FileName = Obj->getFileName();
  527. MRI.reset(TheTarget->createMCRegInfo(TripleName));
  528. if (!MRI)
  529. exitWithError("no register info for target " + TripleName, FileName);
  530. MCTargetOptions MCOptions;
  531. AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
  532. if (!AsmInfo)
  533. exitWithError("no assembly info for target " + TripleName, FileName);
  534. Expected<SubtargetFeatures> Features = Obj->getFeatures();
  535. if (!Features)
  536. exitWithError(Features.takeError(), FileName);
  537. STI.reset(
  538. TheTarget->createMCSubtargetInfo(TripleName, "", Features->getString()));
  539. if (!STI)
  540. exitWithError("no subtarget info for target " + TripleName, FileName);
  541. MII.reset(TheTarget->createMCInstrInfo());
  542. if (!MII)
  543. exitWithError("no instruction info for target " + TripleName, FileName);
  544. MCContext Ctx(Triple(TripleName), AsmInfo.get(), MRI.get(), STI.get());
  545. std::unique_ptr<MCObjectFileInfo> MOFI(
  546. TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
  547. Ctx.setObjectFileInfo(MOFI.get());
  548. DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
  549. if (!DisAsm)
  550. exitWithError("no disassembler for target " + TripleName, FileName);
  551. MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
  552. int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
  553. IPrinter.reset(TheTarget->createMCInstPrinter(
  554. Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
  555. IPrinter->setPrintBranchImmAsAddress(true);
  556. }
  557. void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
  558. // Set up disassembler and related components.
  559. setUpDisassembler(Obj);
  560. // Create a mapping from virtual address to symbol name. The symbols in text
  561. // sections are the candidates to dissassemble.
  562. std::map<SectionRef, SectionSymbolsTy> AllSymbols;
  563. StringRef FileName = Obj->getFileName();
  564. for (const SymbolRef &Symbol : Obj->symbols()) {
  565. const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
  566. const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
  567. section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
  568. if (SecI != Obj->section_end())
  569. AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
  570. }
  571. // Sort all the symbols. Use a stable sort to stabilize the output.
  572. for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
  573. stable_sort(SecSyms.second);
  574. assert((DisassembleFunctionSet.empty() || ShowDisassemblyOnly) &&
  575. "Functions to disassemble should be only specified together with "
  576. "--show-disassembly-only");
  577. if (ShowDisassemblyOnly)
  578. outs() << "\nDisassembly of " << FileName << ":\n";
  579. // Dissassemble a text section.
  580. for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
  581. SI != SE; ++SI) {
  582. const SectionRef &Section = *SI;
  583. if (!Section.isText())
  584. continue;
  585. uint64_t ImageLoadAddr = getPreferredBaseAddress();
  586. uint64_t SectionAddress = Section.getAddress() - ImageLoadAddr;
  587. uint64_t SectSize = Section.getSize();
  588. if (!SectSize)
  589. continue;
  590. // Register the text section.
  591. TextSections.insert({SectionAddress, SectSize});
  592. StringRef SectionName = unwrapOrError(Section.getName(), FileName);
  593. if (ShowDisassemblyOnly) {
  594. outs() << "\nDisassembly of section " << SectionName;
  595. outs() << " [" << format("0x%" PRIx64, Section.getAddress()) << ", "
  596. << format("0x%" PRIx64, Section.getAddress() + SectSize)
  597. << "]:\n\n";
  598. }
  599. if (SectionName == ".plt")
  600. continue;
  601. // Get the section data.
  602. ArrayRef<uint8_t> Bytes =
  603. arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
  604. // Get the list of all the symbols in this section.
  605. SectionSymbolsTy &Symbols = AllSymbols[Section];
  606. // Disassemble symbol by symbol.
  607. for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
  608. if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
  609. exitWithError("disassembling error", FileName);
  610. }
  611. }
  612. // Dissassemble rodata section to check if FS discriminator symbol exists.
  613. checkUseFSDiscriminator(Obj, AllSymbols);
  614. }
  615. void ProfiledBinary::checkUseFSDiscriminator(
  616. const ELFObjectFileBase *Obj,
  617. std::map<SectionRef, SectionSymbolsTy> &AllSymbols) {
  618. const char *FSDiscriminatorVar = "__llvm_fs_discriminator__";
  619. for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
  620. SI != SE; ++SI) {
  621. const SectionRef &Section = *SI;
  622. if (!Section.isData() || Section.getSize() == 0)
  623. continue;
  624. SectionSymbolsTy &Symbols = AllSymbols[Section];
  625. for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
  626. if (Symbols[SI].Name == FSDiscriminatorVar) {
  627. UseFSDiscriminator = true;
  628. return;
  629. }
  630. }
  631. }
  632. }
  633. void ProfiledBinary::populateElfSymbolAddressList(
  634. const ELFObjectFileBase *Obj) {
  635. // Create a mapping from virtual address to symbol GUID and the other way
  636. // around.
  637. StringRef FileName = Obj->getFileName();
  638. for (const SymbolRef &Symbol : Obj->symbols()) {
  639. const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
  640. const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
  641. uint64_t GUID = Function::getGUID(Name);
  642. SymbolStartAddrs[GUID] = Addr;
  643. StartAddrToSymMap.emplace(Addr, GUID);
  644. }
  645. }
  646. void ProfiledBinary::loadSymbolsFromDWARFUnit(DWARFUnit &CompilationUnit) {
  647. for (const auto &DieInfo : CompilationUnit.dies()) {
  648. llvm::DWARFDie Die(&CompilationUnit, &DieInfo);
  649. if (!Die.isSubprogramDIE())
  650. continue;
  651. auto Name = Die.getName(llvm::DINameKind::LinkageName);
  652. if (!Name)
  653. Name = Die.getName(llvm::DINameKind::ShortName);
  654. if (!Name)
  655. continue;
  656. auto RangesOrError = Die.getAddressRanges();
  657. if (!RangesOrError)
  658. continue;
  659. const DWARFAddressRangesVector &Ranges = RangesOrError.get();
  660. if (Ranges.empty())
  661. continue;
  662. // Different DWARF symbols can have same function name, search or create
  663. // BinaryFunction indexed by the name.
  664. auto Ret = BinaryFunctions.emplace(Name, BinaryFunction());
  665. auto &Func = Ret.first->second;
  666. if (Ret.second)
  667. Func.FuncName = Ret.first->first;
  668. for (const auto &Range : Ranges) {
  669. uint64_t StartAddress = Range.LowPC;
  670. uint64_t EndAddress = Range.HighPC;
  671. if (EndAddress <= StartAddress ||
  672. StartAddress < getPreferredBaseAddress())
  673. continue;
  674. // We may want to know all ranges for one function. Here group the
  675. // ranges and store them into BinaryFunction.
  676. Func.Ranges.emplace_back(StartAddress, EndAddress);
  677. auto R = StartAddrToFuncRangeMap.emplace(StartAddress, FuncRange());
  678. if (R.second) {
  679. FuncRange &FRange = R.first->second;
  680. FRange.Func = &Func;
  681. FRange.StartAddress = StartAddress;
  682. FRange.EndAddress = EndAddress;
  683. } else {
  684. WithColor::warning()
  685. << "Duplicated symbol start address at "
  686. << format("%8" PRIx64, StartAddress) << " "
  687. << R.first->second.getFuncName() << " and " << Name << "\n";
  688. }
  689. }
  690. }
  691. }
  692. void ProfiledBinary::loadSymbolsFromDWARF(ObjectFile &Obj) {
  693. auto DebugContext = llvm::DWARFContext::create(
  694. Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, DWPPath);
  695. if (!DebugContext)
  696. exitWithError("Error creating the debug info context", Path);
  697. for (const auto &CompilationUnit : DebugContext->compile_units())
  698. loadSymbolsFromDWARFUnit(*CompilationUnit.get());
  699. // Handles DWO sections that can either be in .o, .dwo or .dwp files.
  700. for (const auto &CompilationUnit : DebugContext->compile_units()) {
  701. DWARFUnit *const DwarfUnit = CompilationUnit.get();
  702. if (std::optional<uint64_t> DWOId = DwarfUnit->getDWOId()) {
  703. DWARFUnit *DWOCU = DwarfUnit->getNonSkeletonUnitDIE(false).getDwarfUnit();
  704. if (!DWOCU->isDWOUnit()) {
  705. std::string DWOName = dwarf::toString(
  706. DwarfUnit->getUnitDIE().find(
  707. {dwarf::DW_AT_dwo_name, dwarf::DW_AT_GNU_dwo_name}),
  708. "");
  709. WithColor::warning()
  710. << "DWO debug information for " << DWOName
  711. << " was not loaded. Please check the .o, .dwo or .dwp path.\n";
  712. continue;
  713. }
  714. loadSymbolsFromDWARFUnit(*DWOCU);
  715. }
  716. }
  717. if (BinaryFunctions.empty())
  718. WithColor::warning() << "Loading of DWARF info completed, but no binary "
  719. "functions have been retrieved.\n";
  720. }
  721. void ProfiledBinary::populateSymbolListFromDWARF(
  722. ProfileSymbolList &SymbolList) {
  723. for (auto &I : StartAddrToFuncRangeMap)
  724. SymbolList.add(I.second.getFuncName());
  725. }
  726. void ProfiledBinary::setupSymbolizer() {
  727. symbolize::LLVMSymbolizer::Options SymbolizerOpts;
  728. SymbolizerOpts.PrintFunctions =
  729. DILineInfoSpecifier::FunctionNameKind::LinkageName;
  730. SymbolizerOpts.Demangle = false;
  731. SymbolizerOpts.DefaultArch = TheTriple.getArchName().str();
  732. SymbolizerOpts.UseSymbolTable = false;
  733. SymbolizerOpts.RelativeAddresses = false;
  734. SymbolizerOpts.DWPName = DWPPath;
  735. Symbolizer = std::make_unique<symbolize::LLVMSymbolizer>(SymbolizerOpts);
  736. }
  737. SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
  738. bool UseCanonicalFnName,
  739. bool UseProbeDiscriminator) {
  740. assert(this == IP.Binary &&
  741. "Binary should only symbolize its own instruction");
  742. auto Addr = object::SectionedAddress{IP.Address,
  743. object::SectionedAddress::UndefSection};
  744. DIInliningInfo InlineStack = unwrapOrError(
  745. Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
  746. SymbolizerPath);
  747. SampleContextFrameVector CallStack;
  748. for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
  749. const auto &CallerFrame = InlineStack.getFrame(I);
  750. if (CallerFrame.FunctionName == "<invalid>")
  751. break;
  752. StringRef FunctionName(CallerFrame.FunctionName);
  753. if (UseCanonicalFnName)
  754. FunctionName = FunctionSamples::getCanonicalFnName(FunctionName);
  755. uint32_t Discriminator = CallerFrame.Discriminator;
  756. uint32_t LineOffset = (CallerFrame.Line - CallerFrame.StartLine) & 0xffff;
  757. if (UseProbeDiscriminator) {
  758. LineOffset =
  759. PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
  760. Discriminator = 0;
  761. }
  762. LineLocation Line(LineOffset, Discriminator);
  763. auto It = NameStrings.insert(FunctionName.str());
  764. CallStack.emplace_back(*It.first, Line);
  765. }
  766. return CallStack;
  767. }
  768. void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t RangeBegin,
  769. uint64_t RangeEnd) {
  770. InstructionPointer IP(this, RangeBegin, true);
  771. if (IP.Address != RangeBegin)
  772. WithColor::warning() << "Invalid start instruction at "
  773. << format("%8" PRIx64, RangeBegin) << "\n";
  774. if (IP.Address >= RangeEnd)
  775. return;
  776. do {
  777. const SampleContextFrameVector SymbolizedCallStack =
  778. getFrameLocationStack(IP.Address, UsePseudoProbes);
  779. uint64_t Size = AddressToInstSizeMap[IP.Address];
  780. // Record instruction size for the corresponding context
  781. FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
  782. } while (IP.advance() && IP.Address < RangeEnd);
  783. }
  784. void ProfiledBinary::computeInlinedContextSizeForFunc(
  785. const BinaryFunction *Func) {
  786. // Note that a function can be spilt into multiple ranges, so compute for all
  787. // ranges of the function.
  788. for (const auto &Range : Func->Ranges)
  789. computeInlinedContextSizeForRange(Range.first, Range.second);
  790. // Track optimized-away inlinee for probed binary. A function inlined and then
  791. // optimized away should still have their probes left over in places.
  792. if (usePseudoProbes()) {
  793. auto I = TopLevelProbeFrameMap.find(Func->FuncName);
  794. if (I != TopLevelProbeFrameMap.end()) {
  795. BinarySizeContextTracker::ProbeFrameStack ProbeContext;
  796. FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
  797. ProbeContext);
  798. }
  799. }
  800. }
  801. void ProfiledBinary::inferMissingFrames(
  802. const SmallVectorImpl<uint64_t> &Context,
  803. SmallVectorImpl<uint64_t> &NewContext) {
  804. MissingContextInferrer->inferMissingFrames(Context, NewContext);
  805. }
  806. InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
  807. uint64_t Address, bool RoundToNext)
  808. : Binary(Binary), Address(Address) {
  809. Index = Binary->getIndexForAddr(Address);
  810. if (RoundToNext) {
  811. // we might get address which is not the code
  812. // it should round to the next valid address
  813. if (Index >= Binary->getCodeAddrVecSize())
  814. this->Address = UINT64_MAX;
  815. else
  816. this->Address = Binary->getAddressforIndex(Index);
  817. }
  818. }
  819. bool InstructionPointer::advance() {
  820. Index++;
  821. if (Index >= Binary->getCodeAddrVecSize()) {
  822. Address = UINT64_MAX;
  823. return false;
  824. }
  825. Address = Binary->getAddressforIndex(Index);
  826. return true;
  827. }
  828. bool InstructionPointer::backward() {
  829. if (Index == 0) {
  830. Address = 0;
  831. return false;
  832. }
  833. Index--;
  834. Address = Binary->getAddressforIndex(Index);
  835. return true;
  836. }
  837. void InstructionPointer::update(uint64_t Addr) {
  838. Address = Addr;
  839. Index = Binary->getIndexForAddr(Address);
  840. }
  841. } // end namespace sampleprof
  842. } // end namespace llvm