PerfReader.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "PerfReader.h"
  9. #include "ProfileGenerator.h"
  10. static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
  11. cl::init(false), cl::ZeroOrMore,
  12. cl::desc("Print binary load events."));
  13. static cl::opt<bool> ShowUnwinderOutput("show-unwinder-output",
  14. cl::ReallyHidden, cl::init(false),
  15. cl::ZeroOrMore,
  16. cl::desc("Print unwinder output"));
  17. extern cl::opt<bool> ShowDisassemblyOnly;
  18. extern cl::opt<bool> ShowSourceLocations;
  19. namespace llvm {
  20. namespace sampleprof {
  21. void VirtualUnwinder::unwindCall(UnwindState &State) {
  22. // The 2nd frame after leaf could be missing if stack sample is
  23. // taken when IP is within prolog/epilog, as frame chain isn't
  24. // setup yet. Fill in the missing frame in that case.
  25. // TODO: Currently we just assume all the addr that can't match the
  26. // 2nd frame is in prolog/epilog. In the future, we will switch to
  27. // pro/epi tracker(Dwarf CFI) for the precise check.
  28. uint64_t Source = State.getCurrentLBRSource();
  29. auto *ParentFrame = State.getParentFrame();
  30. if (ParentFrame == State.getDummyRootPtr() ||
  31. ParentFrame->Address != Source) {
  32. State.switchToFrame(Source);
  33. } else {
  34. State.popFrame();
  35. }
  36. State.InstPtr.update(Source);
  37. }
  38. void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
  39. InstructionPointer &IP = State.InstPtr;
  40. uint64_t Target = State.getCurrentLBRTarget();
  41. uint64_t End = IP.Address;
  42. if (Binary->usePseudoProbes()) {
  43. // We don't need to top frame probe since it should be extracted
  44. // from the range.
  45. // The outcome of the virtual unwinding with pseudo probes is a
  46. // map from a context key to the address range being unwound.
  47. // This means basically linear unwinding is not needed for pseudo
  48. // probes. The range will be simply recorded here and will be
  49. // converted to a list of pseudo probes to report in ProfileGenerator.
  50. State.getParentFrame()->recordRangeCount(Target, End, Repeat);
  51. } else {
  52. // Unwind linear execution part
  53. uint64_t LeafAddr = State.CurrentLeafFrame->Address;
  54. while (IP.Address >= Target) {
  55. uint64_t PrevIP = IP.Address;
  56. IP.backward();
  57. // Break into segments for implicit call/return due to inlining
  58. bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
  59. if (!SameInlinee || PrevIP == Target) {
  60. State.switchToFrame(LeafAddr);
  61. State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
  62. End = IP.Address;
  63. }
  64. LeafAddr = IP.Address;
  65. }
  66. }
  67. }
  68. void VirtualUnwinder::unwindReturn(UnwindState &State) {
  69. // Add extra frame as we unwind through the return
  70. const LBREntry &LBR = State.getCurrentLBR();
  71. uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
  72. State.switchToFrame(CallAddr);
  73. State.pushFrame(LBR.Source);
  74. State.InstPtr.update(LBR.Source);
  75. }
  76. void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) {
  77. // TODO: Tolerate tail call for now, as we may see tail call from libraries.
  78. // This is only for intra function branches, excluding tail calls.
  79. uint64_t Source = State.getCurrentLBRSource();
  80. State.switchToFrame(Source);
  81. State.InstPtr.update(Source);
  82. }
  83. std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
  84. std::shared_ptr<StringBasedCtxKey> KeyStr =
  85. std::make_shared<StringBasedCtxKey>();
  86. KeyStr->Context = Binary->getExpandedContextStr(Stack);
  87. if (KeyStr->Context.empty())
  88. return nullptr;
  89. KeyStr->genHashCode();
  90. return KeyStr;
  91. }
  92. std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
  93. std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
  94. std::make_shared<ProbeBasedCtxKey>();
  95. for (auto CallProbe : Stack) {
  96. ProbeBasedKey->Probes.emplace_back(CallProbe);
  97. }
  98. CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
  99. ProbeBasedKey->Probes);
  100. ProbeBasedKey->genHashCode();
  101. return ProbeBasedKey;
  102. }
  103. template <typename T>
  104. void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
  105. T &Stack) {
  106. if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
  107. return;
  108. std::shared_ptr<ContextKey> Key = Stack.getContextKey();
  109. if (Key == nullptr)
  110. return;
  111. auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
  112. SampleCounter &SCounter = Ret.first->second;
  113. for (auto &Item : Cur->RangeSamples) {
  114. uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
  115. uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
  116. SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item));
  117. }
  118. for (auto &Item : Cur->BranchSamples) {
  119. uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
  120. uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
  121. SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item));
  122. }
  123. }
  124. template <typename T>
  125. void VirtualUnwinder::collectSamplesFromFrameTrie(
  126. UnwindState::ProfiledFrame *Cur, T &Stack) {
  127. if (!Cur->isDummyRoot()) {
  128. if (!Stack.pushFrame(Cur)) {
  129. // Process truncated context
  130. for (const auto &Item : Cur->Children) {
  131. // Start a new traversal ignoring its bottom context
  132. collectSamplesFromFrameTrie(Item.second.get());
  133. }
  134. return;
  135. }
  136. }
  137. collectSamplesFromFrame(Cur, Stack);
  138. // Process children frame
  139. for (const auto &Item : Cur->Children) {
  140. collectSamplesFromFrameTrie(Item.second.get(), Stack);
  141. }
  142. // Recover the call stack
  143. Stack.popFrame();
  144. }
  145. void VirtualUnwinder::collectSamplesFromFrameTrie(
  146. UnwindState::ProfiledFrame *Cur) {
  147. if (Binary->usePseudoProbes()) {
  148. ProbeStack Stack(Binary);
  149. collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
  150. } else {
  151. FrameStack Stack(Binary);
  152. collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
  153. }
  154. }
  155. void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
  156. UnwindState &State, uint64_t Repeat) {
  157. if (Branch.IsArtificial)
  158. return;
  159. if (Binary->usePseudoProbes()) {
  160. // Same as recordRangeCount, We don't need to top frame probe since we will
  161. // extract it from branch's source address
  162. State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
  163. Repeat);
  164. } else {
  165. State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
  166. Repeat);
  167. }
  168. }
  169. bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
  170. // Capture initial state as starting point for unwinding.
  171. UnwindState State(Sample);
  172. // Sanity check - making sure leaf of LBR aligns with leaf of stack sample
  173. // Stack sample sometimes can be unreliable, so filter out bogus ones.
  174. if (!State.validateInitialState())
  175. return false;
  176. // Also do not attempt linear unwind for the leaf range as it's incomplete.
  177. bool IsLeaf = true;
  178. // Now process the LBR samples in parrallel with stack sample
  179. // Note that we do not reverse the LBR entry order so we can
  180. // unwind the sample stack as we walk through LBR entries.
  181. while (State.hasNextLBR()) {
  182. State.checkStateConsistency();
  183. // Unwind implicit calls/returns from inlining, along the linear path,
  184. // break into smaller sub section each with its own calling context.
  185. if (!IsLeaf) {
  186. unwindLinear(State, Repeat);
  187. }
  188. IsLeaf = false;
  189. // Save the LBR branch before it gets unwound.
  190. const LBREntry &Branch = State.getCurrentLBR();
  191. if (isCallState(State)) {
  192. // Unwind calls - we know we encountered call if LBR overlaps with
  193. // transition between leaf the 2nd frame. Note that for calls that
  194. // were not in the original stack sample, we should have added the
  195. // extra frame when processing the return paired with this call.
  196. unwindCall(State);
  197. } else if (isReturnState(State)) {
  198. // Unwind returns - check whether the IP is indeed at a return instruction
  199. unwindReturn(State);
  200. } else {
  201. // Unwind branches - for regular intra function branches, we only
  202. // need to record branch with context.
  203. unwindBranchWithinFrame(State);
  204. }
  205. State.advanceLBR();
  206. // Record `branch` with calling context after unwinding.
  207. recordBranchCount(Branch, State, Repeat);
  208. }
  209. // As samples are aggregated on trie, record them into counter map
  210. collectSamplesFromFrameTrie(State.getDummyRootPtr());
  211. return true;
  212. }
  213. void PerfReader::validateCommandLine(
  214. cl::list<std::string> &BinaryFilenames,
  215. cl::list<std::string> &PerfTraceFilenames) {
  216. // Allow the invalid perfscript if we only use to show binary disassembly
  217. if (!ShowDisassemblyOnly) {
  218. for (auto &File : PerfTraceFilenames) {
  219. if (!llvm::sys::fs::exists(File)) {
  220. std::string Msg = "Input perf script(" + File + ") doesn't exist!";
  221. exitWithError(Msg);
  222. }
  223. }
  224. }
  225. if (BinaryFilenames.size() > 1) {
  226. // TODO: remove this if everything is ready to support multiple binaries.
  227. exitWithError(
  228. "Currently only support one input binary, multiple binaries' "
  229. "profile will be merged in one profile and make profile "
  230. "summary info inaccurate. Please use `llvm-perfdata` to merge "
  231. "profiles from multiple binaries.");
  232. }
  233. for (auto &Binary : BinaryFilenames) {
  234. if (!llvm::sys::fs::exists(Binary)) {
  235. std::string Msg = "Input binary(" + Binary + ") doesn't exist!";
  236. exitWithError(Msg);
  237. }
  238. }
  239. if (CSProfileGenerator::MaxCompressionSize < -1) {
  240. exitWithError("Value of --compress-recursion should >= -1");
  241. }
  242. if (ShowSourceLocations && !ShowDisassemblyOnly) {
  243. exitWithError("--show-source-locations should work together with "
  244. "--show-disassembly-only!");
  245. }
  246. }
  247. PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames,
  248. cl::list<std::string> &PerfTraceFilenames) {
  249. validateCommandLine(BinaryFilenames, PerfTraceFilenames);
  250. // Load the binaries.
  251. for (auto Filename : BinaryFilenames)
  252. loadBinary(Filename, /*AllowNameConflict*/ false);
  253. }
  254. ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath,
  255. bool AllowNameConflict) {
  256. // The binary table is currently indexed by the binary name not the full
  257. // binary path. This is because the user-given path may not match the one
  258. // that was actually executed.
  259. StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
  260. // Call to load the binary in the ctor of ProfiledBinary.
  261. auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)});
  262. if (!Ret.second && !AllowNameConflict) {
  263. std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() +
  264. " and " + Ret.first->second.getPath().str() + " \n";
  265. exitWithError(ErrorMsg);
  266. }
  267. return Ret.first->second;
  268. }
  269. void PerfReader::updateBinaryAddress(const MMapEvent &Event) {
  270. // Load the binary.
  271. StringRef BinaryPath = Event.BinaryPath;
  272. StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
  273. auto I = BinaryTable.find(BinaryName);
  274. // Drop the event which doesn't belong to user-provided binaries
  275. // or if its image is loaded at the same address
  276. if (I == BinaryTable.end() || Event.BaseAddress == I->second.getBaseAddress())
  277. return;
  278. ProfiledBinary &Binary = I->second;
  279. // A binary image could be uploaded and then reloaded at different
  280. // place, so update the address map here
  281. AddrToBinaryMap.erase(Binary.getBaseAddress());
  282. AddrToBinaryMap[Event.BaseAddress] = &Binary;
  283. // Update binary load address.
  284. Binary.setBaseAddress(Event.BaseAddress);
  285. }
  286. ProfiledBinary *PerfReader::getBinary(uint64_t Address) {
  287. auto Iter = AddrToBinaryMap.lower_bound(Address);
  288. if (Iter == AddrToBinaryMap.end() || Iter->first != Address) {
  289. if (Iter == AddrToBinaryMap.begin())
  290. return nullptr;
  291. Iter--;
  292. }
  293. return Iter->second;
  294. }
  295. // Use ordered map to make the output deterministic
  296. using OrderedCounterForPrint = std::map<std::string, RangeSample>;
  297. static void printSampleCounter(OrderedCounterForPrint &OrderedCounter) {
  298. for (auto Range : OrderedCounter) {
  299. outs() << Range.first << "\n";
  300. for (auto I : Range.second) {
  301. outs() << " (" << format("%" PRIx64, I.first.first) << ", "
  302. << format("%" PRIx64, I.first.second) << "): " << I.second << "\n";
  303. }
  304. }
  305. }
  306. static std::string getContextKeyStr(ContextKey *K,
  307. const ProfiledBinary *Binary) {
  308. std::string ContextStr;
  309. if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {
  310. return CtxKey->Context;
  311. } else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) {
  312. SmallVector<std::string, 16> ContextStack;
  313. for (const auto *Probe : CtxKey->Probes) {
  314. Binary->getInlineContextForProbe(Probe, ContextStack, true);
  315. }
  316. for (const auto &Context : ContextStack) {
  317. if (ContextStr.size())
  318. ContextStr += " @ ";
  319. ContextStr += Context;
  320. }
  321. }
  322. return ContextStr;
  323. }
  324. static void printRangeCounter(ContextSampleCounterMap &Counter,
  325. const ProfiledBinary *Binary) {
  326. OrderedCounterForPrint OrderedCounter;
  327. for (auto &CI : Counter) {
  328. OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =
  329. CI.second.RangeCounter;
  330. }
  331. printSampleCounter(OrderedCounter);
  332. }
  333. static void printBranchCounter(ContextSampleCounterMap &Counter,
  334. const ProfiledBinary *Binary) {
  335. OrderedCounterForPrint OrderedCounter;
  336. for (auto &CI : Counter) {
  337. OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =
  338. CI.second.BranchCounter;
  339. }
  340. printSampleCounter(OrderedCounter);
  341. }
  342. void PerfReader::printUnwinderOutput() {
  343. for (auto I : BinarySampleCounters) {
  344. const ProfiledBinary *Binary = I.first;
  345. outs() << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n";
  346. printRangeCounter(I.second, Binary);
  347. outs() << "\nBinary(" << Binary->getName().str() << ")'s Branch Counter:\n";
  348. printBranchCounter(I.second, Binary);
  349. }
  350. }
  351. void PerfReader::unwindSamples() {
  352. for (const auto &Item : AggregatedSamples) {
  353. const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
  354. VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
  355. Sample->Binary);
  356. Unwinder.unwind(Sample, Item.second);
  357. }
  358. if (ShowUnwinderOutput)
  359. printUnwinderOutput();
  360. }
  361. bool PerfReader::extractLBRStack(TraceStream &TraceIt,
  362. SmallVectorImpl<LBREntry> &LBRStack,
  363. ProfiledBinary *Binary) {
  364. // The raw format of LBR stack is like:
  365. // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
  366. // ... 0x4005c8/0x4005dc/P/-/-/0
  367. // It's in FIFO order and seperated by whitespace.
  368. SmallVector<StringRef, 32> Records;
  369. TraceIt.getCurrentLine().split(Records, " ");
  370. // Extract leading instruction pointer if present, use single
  371. // list to pass out as reference.
  372. size_t Index = 0;
  373. if (!Records.empty() && Records[0].find('/') == StringRef::npos) {
  374. Index = 1;
  375. }
  376. // Now extract LBR samples - note that we do not reverse the
  377. // LBR entry order so we can unwind the sample stack as we walk
  378. // through LBR entries.
  379. uint64_t PrevTrDst = 0;
  380. while (Index < Records.size()) {
  381. auto &Token = Records[Index++];
  382. if (Token.size() == 0)
  383. continue;
  384. SmallVector<StringRef, 8> Addresses;
  385. Token.split(Addresses, "/");
  386. uint64_t Src;
  387. uint64_t Dst;
  388. Addresses[0].substr(2).getAsInteger(16, Src);
  389. Addresses[1].substr(2).getAsInteger(16, Dst);
  390. bool SrcIsInternal = Binary->addressIsCode(Src);
  391. bool DstIsInternal = Binary->addressIsCode(Dst);
  392. bool IsArtificial = false;
  393. // Ignore branches outside the current binary.
  394. if (!SrcIsInternal && !DstIsInternal)
  395. continue;
  396. if (!SrcIsInternal && DstIsInternal) {
  397. // For transition from external code (such as dynamic libraries) to
  398. // the current binary, keep track of the branch target which will be
  399. // grouped with the Source of the last transition from the current
  400. // binary.
  401. PrevTrDst = Dst;
  402. continue;
  403. }
  404. if (SrcIsInternal && !DstIsInternal) {
  405. // For transition to external code, group the Source with the next
  406. // availabe transition target.
  407. if (!PrevTrDst)
  408. continue;
  409. Dst = PrevTrDst;
  410. PrevTrDst = 0;
  411. IsArtificial = true;
  412. }
  413. // TODO: filter out buggy duplicate branches on Skylake
  414. LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial));
  415. }
  416. TraceIt.advance();
  417. return !LBRStack.empty();
  418. }
  419. bool PerfReader::extractCallstack(TraceStream &TraceIt,
  420. SmallVectorImpl<uint64_t> &CallStack) {
  421. // The raw format of call stack is like:
  422. // 4005dc # leaf frame
  423. // 400634
  424. // 400684 # root frame
  425. // It's in bottom-up order with each frame in one line.
  426. // Extract stack frames from sample
  427. ProfiledBinary *Binary = nullptr;
  428. while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
  429. StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
  430. uint64_t FrameAddr = 0;
  431. if (FrameStr.getAsInteger(16, FrameAddr)) {
  432. // We might parse a non-perf sample line like empty line and comments,
  433. // skip it
  434. TraceIt.advance();
  435. return false;
  436. }
  437. TraceIt.advance();
  438. if (!Binary) {
  439. Binary = getBinary(FrameAddr);
  440. // we might have addr not match the MMAP, skip it
  441. if (!Binary) {
  442. if (AddrToBinaryMap.size() == 0)
  443. WithColor::warning() << "No MMAP event in the perfscript, create it "
  444. "with '--show-mmap-events'\n";
  445. break;
  446. }
  447. }
  448. // Currently intermixed frame from different binaries is not supported.
  449. // Ignore bottom frames not from binary of interest.
  450. if (!Binary->addressIsCode(FrameAddr))
  451. break;
  452. // We need to translate return address to call address
  453. // for non-leaf frames
  454. if (!CallStack.empty()) {
  455. FrameAddr = Binary->getCallAddrFromFrameAddr(FrameAddr);
  456. }
  457. CallStack.emplace_back(FrameAddr);
  458. }
  459. // Skip other unrelated line, find the next valid LBR line
  460. // Note that even for empty call stack, we should skip the address at the
  461. // bottom, otherwise the following pass may generate a truncated callstack
  462. while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
  463. TraceIt.advance();
  464. }
  465. // Filter out broken stack sample. We may not have complete frame info
  466. // if sample end up in prolog/epilog, the result is dangling context not
  467. // connected to entry point. This should be relatively rare thus not much
  468. // impact on overall profile quality. However we do want to filter them
  469. // out to reduce the number of different calling contexts. One instance
  470. // of such case - when sample landed in prolog/epilog, somehow stack
  471. // walking will be broken in an unexpected way that higher frames will be
  472. // missing.
  473. return !CallStack.empty() &&
  474. !Binary->addressInPrologEpilog(CallStack.front());
  475. }
  476. void PerfReader::parseHybridSample(TraceStream &TraceIt) {
  477. // The raw hybird sample started with call stack in FILO order and followed
  478. // intermediately by LBR sample
  479. // e.g.
  480. // 4005dc # call stack leaf
  481. // 400634
  482. // 400684 # call stack root
  483. // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
  484. // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
  485. //
  486. std::shared_ptr<HybridSample> Sample = std::make_shared<HybridSample>();
  487. // Parsing call stack and populate into HybridSample.CallStack
  488. if (!extractCallstack(TraceIt, Sample->CallStack)) {
  489. // Skip the next LBR line matched current call stack
  490. if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x"))
  491. TraceIt.advance();
  492. return;
  493. }
  494. // Set the binary current sample belongs to
  495. Sample->Binary = getBinary(Sample->CallStack.front());
  496. if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {
  497. // Parsing LBR stack and populate into HybridSample.LBRStack
  498. if (extractLBRStack(TraceIt, Sample->LBRStack, Sample->Binary)) {
  499. // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
  500. // ranges
  501. Sample->CallStack.front() = Sample->LBRStack[0].Target;
  502. // Record samples by aggregation
  503. Sample->genHashCode();
  504. AggregatedSamples[Hashable<PerfSample>(Sample)]++;
  505. }
  506. } else {
  507. // LBR sample is encoded in single line after stack sample
  508. exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");
  509. }
  510. }
  511. void PerfReader::parseMMap2Event(TraceStream &TraceIt) {
  512. // Parse a line like:
  513. // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
  514. // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
  515. constexpr static const char *const Pattern =
  516. "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
  517. "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
  518. "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
  519. // Field 0 - whole line
  520. // Field 1 - PID
  521. // Field 2 - base address
  522. // Field 3 - mmapped size
  523. // Field 4 - page offset
  524. // Field 5 - binary path
  525. enum EventIndex {
  526. WHOLE_LINE = 0,
  527. PID = 1,
  528. BASE_ADDRESS = 2,
  529. MMAPPED_SIZE = 3,
  530. PAGE_OFFSET = 4,
  531. BINARY_PATH = 5
  532. };
  533. Regex RegMmap2(Pattern);
  534. SmallVector<StringRef, 6> Fields;
  535. bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields);
  536. if (!R) {
  537. std::string ErrorMsg = "Cannot parse mmap event: Line" +
  538. Twine(TraceIt.getLineNumber()).str() + ": " +
  539. TraceIt.getCurrentLine().str() + " \n";
  540. exitWithError(ErrorMsg);
  541. }
  542. MMapEvent Event;
  543. Fields[PID].getAsInteger(10, Event.PID);
  544. Fields[BASE_ADDRESS].getAsInteger(0, Event.BaseAddress);
  545. Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size);
  546. Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset);
  547. Event.BinaryPath = Fields[BINARY_PATH];
  548. updateBinaryAddress(Event);
  549. if (ShowMmapEvents) {
  550. outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at "
  551. << format("0x%" PRIx64 ":", Event.BaseAddress) << " \n";
  552. }
  553. TraceIt.advance();
  554. }
  555. void PerfReader::parseEventOrSample(TraceStream &TraceIt) {
  556. if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2"))
  557. parseMMap2Event(TraceIt);
  558. else if (getPerfScriptType() == PERF_LBR_STACK)
  559. parseHybridSample(TraceIt);
  560. else {
  561. // TODO: parse other type sample
  562. TraceIt.advance();
  563. }
  564. }
  565. void PerfReader::parseAndAggregateTrace(StringRef Filename) {
  566. // Trace line iterator
  567. TraceStream TraceIt(Filename);
  568. while (!TraceIt.isAtEoF())
  569. parseEventOrSample(TraceIt);
  570. }
  571. void PerfReader::checkAndSetPerfType(
  572. cl::list<std::string> &PerfTraceFilenames) {
  573. for (auto FileName : PerfTraceFilenames) {
  574. PerfScriptType Type = checkPerfScriptType(FileName);
  575. if (Type == PERF_INVALID)
  576. exitWithError("Invalid perf script input!");
  577. if (PerfType != PERF_UNKNOWN && PerfType != Type)
  578. exitWithError("Inconsistent sample among different perf scripts");
  579. PerfType = Type;
  580. }
  581. }
  582. void PerfReader::generateRawProfile() {
  583. if (getPerfScriptType() == PERF_LBR_STACK) {
  584. // Unwind samples if it's hybird sample
  585. unwindSamples();
  586. } else if (getPerfScriptType() == PERF_LBR) {
  587. // TODO: range overlap computation for regular AutoFDO
  588. }
  589. }
  590. void PerfReader::parsePerfTraces(cl::list<std::string> &PerfTraceFilenames) {
  591. // Check and set current perfscript type
  592. checkAndSetPerfType(PerfTraceFilenames);
  593. // Parse perf traces and do aggregation.
  594. for (auto Filename : PerfTraceFilenames)
  595. parseAndAggregateTrace(Filename);
  596. generateRawProfile();
  597. }
  598. } // end namespace sampleprof
  599. } // end namespace llvm