xray-account.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. //===- xray-account.h - XRay Function Call Accounting ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements basic function call accounting from an XRay trace.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include <algorithm>
  13. #include <cassert>
  14. #include <numeric>
  15. #include <system_error>
  16. #include <utility>
  17. #include "xray-account.h"
  18. #include "xray-registry.h"
  19. #include "llvm/Support/ErrorHandling.h"
  20. #include "llvm/Support/FormatVariadic.h"
  21. #include "llvm/XRay/InstrumentationMap.h"
  22. #include "llvm/XRay/Trace.h"
  23. #include <cmath>
  24. using namespace llvm;
  25. using namespace llvm::xray;
  26. static cl::SubCommand Account("account", "Function call accounting");
  27. static cl::opt<std::string> AccountInput(cl::Positional,
  28. cl::desc("<xray log file>"),
  29. cl::Required, cl::sub(Account));
  30. static cl::opt<bool>
  31. AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
  32. cl::sub(Account), cl::init(false));
  33. static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),
  34. cl::desc("Alias for -keep_going"));
  35. static cl::opt<bool> AccountRecursiveCallsOnly(
  36. "recursive-calls-only", cl::desc("Only count the calls that are recursive"),
  37. cl::sub(Account), cl::init(false));
  38. static cl::opt<bool> AccountDeduceSiblingCalls(
  39. "deduce-sibling-calls",
  40. cl::desc("Deduce sibling calls when unrolling function call stacks"),
  41. cl::sub(Account), cl::init(false));
  42. static cl::alias
  43. AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),
  44. cl::desc("Alias for -deduce_sibling_calls"));
  45. static cl::opt<std::string>
  46. AccountOutput("output", cl::value_desc("output file"), cl::init("-"),
  47. cl::desc("output file; use '-' for stdout"),
  48. cl::sub(Account));
  49. static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput),
  50. cl::desc("Alias for -output"));
  51. enum class AccountOutputFormats { TEXT, CSV };
  52. static cl::opt<AccountOutputFormats>
  53. AccountOutputFormat("format", cl::desc("output format"),
  54. cl::values(clEnumValN(AccountOutputFormats::TEXT,
  55. "text", "report stats in text"),
  56. clEnumValN(AccountOutputFormats::CSV, "csv",
  57. "report stats in csv")),
  58. cl::sub(Account));
  59. static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"),
  60. cl::aliasopt(AccountOutputFormat));
  61. enum class SortField {
  62. FUNCID,
  63. COUNT,
  64. MIN,
  65. MED,
  66. PCT90,
  67. PCT99,
  68. MAX,
  69. SUM,
  70. FUNC,
  71. };
  72. static cl::opt<SortField> AccountSortOutput(
  73. "sort", cl::desc("sort output by this field"), cl::value_desc("field"),
  74. cl::sub(Account), cl::init(SortField::FUNCID),
  75. cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"),
  76. clEnumValN(SortField::COUNT, "count", "funciton call counts"),
  77. clEnumValN(SortField::MIN, "min", "minimum function durations"),
  78. clEnumValN(SortField::MED, "med", "median function durations"),
  79. clEnumValN(SortField::PCT90, "90p", "90th percentile durations"),
  80. clEnumValN(SortField::PCT99, "99p", "99th percentile durations"),
  81. clEnumValN(SortField::MAX, "max", "maximum function durations"),
  82. clEnumValN(SortField::SUM, "sum", "sum of call durations"),
  83. clEnumValN(SortField::FUNC, "func", "function names")));
  84. static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput),
  85. cl::desc("Alias for -sort"));
  86. enum class SortDirection {
  87. ASCENDING,
  88. DESCENDING,
  89. };
  90. static cl::opt<SortDirection> AccountSortOrder(
  91. "sortorder", cl::desc("sort ordering"), cl::init(SortDirection::ASCENDING),
  92. cl::values(clEnumValN(SortDirection::ASCENDING, "asc", "ascending"),
  93. clEnumValN(SortDirection::DESCENDING, "dsc", "descending")),
  94. cl::sub(Account));
  95. static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder),
  96. cl::desc("Alias for -sortorder"));
  97. static cl::opt<int> AccountTop("top", cl::desc("only show the top N results"),
  98. cl::value_desc("N"), cl::sub(Account),
  99. cl::init(-1));
  100. static cl::alias AccountTop2("p", cl::desc("Alias for -top"),
  101. cl::aliasopt(AccountTop));
  102. static cl::opt<std::string>
  103. AccountInstrMap("instr_map",
  104. cl::desc("binary with the instrumentation map, or "
  105. "a separate instrumentation map"),
  106. cl::value_desc("binary with xray_instr_map"),
  107. cl::sub(Account), cl::init(""));
  108. static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap),
  109. cl::desc("Alias for -instr_map"));
  110. namespace {
  111. template <class T, class U> void setMinMax(std::pair<T, T> &MM, U &&V) {
  112. if (MM.first == 0 || MM.second == 0)
  113. MM = std::make_pair(std::forward<U>(V), std::forward<U>(V));
  114. else
  115. MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V));
  116. }
  117. template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }
  118. } // namespace
  119. using RecursionStatus = LatencyAccountant::FunctionStack::RecursionStatus;
  120. RecursionStatus &RecursionStatus::operator++() {
  121. auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
  122. assert(Depth >= 0 && Depth < std::numeric_limits<decltype(Depth)>::max());
  123. ++Depth;
  124. Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // ++Storage
  125. // Did this function just (maybe indirectly) call itself the first time?
  126. if (!isRecursive() && Depth == 2) // Storage == 2 / Storage s> 1
  127. Bitfield::set<RecursionStatus::IsRecursive>(Storage,
  128. true); // Storage |= INT_MIN
  129. return *this;
  130. }
  131. RecursionStatus &RecursionStatus::operator--() {
  132. auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
  133. assert(Depth > 0);
  134. --Depth;
  135. Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // --Storage
  136. // Did we leave a function that previouly (maybe indirectly) called itself?
  137. if (isRecursive() && Depth == 0) // Storage == INT_MIN
  138. Bitfield::set<RecursionStatus::IsRecursive>(Storage, false); // Storage = 0
  139. return *this;
  140. }
  141. bool RecursionStatus::isRecursive() const {
  142. return Bitfield::get<RecursionStatus::IsRecursive>(Storage); // Storage s< 0
  143. }
  144. bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
  145. setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);
  146. setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);
  147. if (CurrentMaxTSC == 0)
  148. CurrentMaxTSC = Record.TSC;
  149. if (Record.TSC < CurrentMaxTSC)
  150. return false;
  151. auto &ThreadStack = PerThreadFunctionStack[Record.TId];
  152. if (RecursiveCallsOnly && !ThreadStack.RecursionDepth)
  153. ThreadStack.RecursionDepth.emplace();
  154. switch (Record.Type) {
  155. case RecordTypes::CUSTOM_EVENT:
  156. case RecordTypes::TYPED_EVENT:
  157. // TODO: Support custom and typed event accounting in the future.
  158. return true;
  159. case RecordTypes::ENTER:
  160. case RecordTypes::ENTER_ARG: {
  161. ThreadStack.Stack.emplace_back(Record.FuncId, Record.TSC);
  162. if (ThreadStack.RecursionDepth)
  163. ++(*ThreadStack.RecursionDepth)[Record.FuncId];
  164. break;
  165. }
  166. case RecordTypes::EXIT:
  167. case RecordTypes::TAIL_EXIT: {
  168. if (ThreadStack.Stack.empty())
  169. return false;
  170. if (ThreadStack.Stack.back().first == Record.FuncId) {
  171. const auto &Top = ThreadStack.Stack.back();
  172. if (!ThreadStack.RecursionDepth ||
  173. (*ThreadStack.RecursionDepth)[Top.first].isRecursive())
  174. recordLatency(Top.first, diff(Top.second, Record.TSC));
  175. if (ThreadStack.RecursionDepth)
  176. --(*ThreadStack.RecursionDepth)[Top.first];
  177. ThreadStack.Stack.pop_back();
  178. break;
  179. }
  180. if (!DeduceSiblingCalls)
  181. return false;
  182. // Look for the parent up the stack.
  183. auto Parent =
  184. llvm::find_if(llvm::reverse(ThreadStack.Stack),
  185. [&](const std::pair<const int32_t, uint64_t> &E) {
  186. return E.first == Record.FuncId;
  187. });
  188. if (Parent == ThreadStack.Stack.rend())
  189. return false;
  190. // Account time for this apparently sibling call exit up the stack.
  191. // Considering the following case:
  192. //
  193. // f()
  194. // g()
  195. // h()
  196. //
  197. // We might only ever see the following entries:
  198. //
  199. // -> f()
  200. // -> g()
  201. // -> h()
  202. // <- h()
  203. // <- f()
  204. //
  205. // Now we don't see the exit to g() because some older version of the XRay
  206. // runtime wasn't instrumenting tail exits. If we don't deduce tail calls,
  207. // we may potentially never account time for g() -- and this code would have
  208. // already bailed out, because `<- f()` doesn't match the current "top" of
  209. // stack where we're waiting for the exit to `g()` instead. This is not
  210. // ideal and brittle -- so instead we provide a potentially inaccurate
  211. // accounting of g() instead, computing it from the exit of f().
  212. //
  213. // While it might be better that we account the time between `-> g()` and
  214. // `-> h()` as the proper accounting of time for g() here, this introduces
  215. // complexity to do correctly (need to backtrack, etc.).
  216. //
  217. // FIXME: Potentially implement the more complex deduction algorithm?
  218. auto R = make_range(std::next(Parent).base(), ThreadStack.Stack.end());
  219. for (auto &E : R) {
  220. if (!ThreadStack.RecursionDepth ||
  221. (*ThreadStack.RecursionDepth)[E.first].isRecursive())
  222. recordLatency(E.first, diff(E.second, Record.TSC));
  223. }
  224. for (auto &Top : reverse(R)) {
  225. if (ThreadStack.RecursionDepth)
  226. --(*ThreadStack.RecursionDepth)[Top.first];
  227. ThreadStack.Stack.pop_back();
  228. }
  229. break;
  230. }
  231. }
  232. return true;
  233. }
  234. namespace {
  235. // We consolidate the data into a struct which we can output in various forms.
  236. struct ResultRow {
  237. uint64_t Count;
  238. double Min;
  239. double Median;
  240. double Pct90;
  241. double Pct99;
  242. double Max;
  243. double Sum;
  244. std::string DebugInfo;
  245. std::string Function;
  246. };
  247. ResultRow getStats(MutableArrayRef<uint64_t> Timings) {
  248. assert(!Timings.empty());
  249. ResultRow R;
  250. R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0);
  251. auto MinMax = std::minmax_element(Timings.begin(), Timings.end());
  252. R.Min = *MinMax.first;
  253. R.Max = *MinMax.second;
  254. R.Count = Timings.size();
  255. auto MedianOff = Timings.size() / 2;
  256. std::nth_element(Timings.begin(), Timings.begin() + MedianOff, Timings.end());
  257. R.Median = Timings[MedianOff];
  258. size_t Pct90Off = std::floor(Timings.size() * 0.9);
  259. std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct90Off,
  260. Timings.end());
  261. R.Pct90 = Timings[Pct90Off];
  262. size_t Pct99Off = std::floor(Timings.size() * 0.99);
  263. std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct99Off,
  264. Timings.end());
  265. R.Pct99 = Timings[Pct99Off];
  266. return R;
  267. }
  268. } // namespace
  269. using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
  270. template <typename F>
  271. static void sortByKey(std::vector<TupleType> &Results, F Fn) {
  272. bool ASC = AccountSortOrder == SortDirection::ASCENDING;
  273. llvm::sort(Results, [=](const TupleType &L, const TupleType &R) {
  274. return ASC ? Fn(L) < Fn(R) : Fn(L) > Fn(R);
  275. });
  276. }
  277. template <class F>
  278. void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
  279. std::vector<TupleType> Results;
  280. Results.reserve(FunctionLatencies.size());
  281. for (auto FT : FunctionLatencies) {
  282. const auto &FuncId = FT.first;
  283. auto &Timings = FT.second;
  284. Results.emplace_back(FuncId, Timings.size(), getStats(Timings));
  285. auto &Row = std::get<2>(Results.back());
  286. if (Header.CycleFrequency) {
  287. double CycleFrequency = Header.CycleFrequency;
  288. Row.Min /= CycleFrequency;
  289. Row.Median /= CycleFrequency;
  290. Row.Pct90 /= CycleFrequency;
  291. Row.Pct99 /= CycleFrequency;
  292. Row.Max /= CycleFrequency;
  293. Row.Sum /= CycleFrequency;
  294. }
  295. Row.Function = FuncIdHelper.SymbolOrNumber(FuncId);
  296. Row.DebugInfo = FuncIdHelper.FileLineAndColumn(FuncId);
  297. }
  298. // Sort the data according to user-provided flags.
  299. switch (AccountSortOutput) {
  300. case SortField::FUNCID:
  301. sortByKey(Results, [](const TupleType &X) { return std::get<0>(X); });
  302. break;
  303. case SortField::COUNT:
  304. sortByKey(Results, [](const TupleType &X) { return std::get<1>(X); });
  305. break;
  306. case SortField::MIN:
  307. sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Min; });
  308. break;
  309. case SortField::MED:
  310. sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Median; });
  311. break;
  312. case SortField::PCT90:
  313. sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct90; });
  314. break;
  315. case SortField::PCT99:
  316. sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct99; });
  317. break;
  318. case SortField::MAX:
  319. sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Max; });
  320. break;
  321. case SortField::SUM:
  322. sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Sum; });
  323. break;
  324. case SortField::FUNC:
  325. llvm_unreachable("Not implemented");
  326. }
  327. if (AccountTop > 0) {
  328. auto MaxTop =
  329. std::min(AccountTop.getValue(), static_cast<int>(Results.size()));
  330. Results.erase(Results.begin() + MaxTop, Results.end());
  331. }
  332. for (const auto &R : Results)
  333. Fn(std::get<0>(R), std::get<1>(R), std::get<2>(R));
  334. }
  335. void LatencyAccountant::exportStatsAsText(raw_ostream &OS,
  336. const XRayFileHeader &Header) const {
  337. OS << "Functions with latencies: " << FunctionLatencies.size() << "\n";
  338. // We spend some effort to make the text output more readable, so we do the
  339. // following formatting decisions for each of the fields:
  340. //
  341. // - funcid: 32-bit, but we can determine the largest number and be
  342. // between
  343. // a minimum of 5 characters, up to 9 characters, right aligned.
  344. // - count: 64-bit, but we can determine the largest number and be
  345. // between
  346. // a minimum of 5 characters, up to 9 characters, right aligned.
  347. // - min, median, 90pct, 99pct, max: double precision, but we want to keep
  348. // the values in seconds, with microsecond precision (0.000'001), so we
  349. // have at most 6 significant digits, with the whole number part to be
  350. // at
  351. // least 1 character. For readability we'll right-align, with full 9
  352. // characters each.
  353. // - debug info, function name: we format this as a concatenation of the
  354. // debug info and the function name.
  355. //
  356. static constexpr char StatsHeaderFormat[] =
  357. "{0,+9} {1,+10} [{2,+9}, {3,+9}, {4,+9}, {5,+9}, {6,+9}] {7,+9}";
  358. static constexpr char StatsFormat[] =
  359. R"({0,+9} {1,+10} [{2,+9:f6}, {3,+9:f6}, {4,+9:f6}, {5,+9:f6}, {6,+9:f6}] {7,+9:f6})";
  360. OS << llvm::formatv(StatsHeaderFormat, "funcid", "count", "min", "med", "90p",
  361. "99p", "max", "sum")
  362. << llvm::formatv(" {0,-12}\n", "function");
  363. exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
  364. OS << llvm::formatv(StatsFormat, FuncId, Count, Row.Min, Row.Median,
  365. Row.Pct90, Row.Pct99, Row.Max, Row.Sum)
  366. << " " << Row.DebugInfo << ": " << Row.Function << "\n";
  367. });
  368. }
  369. void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS,
  370. const XRayFileHeader &Header) const {
  371. OS << "funcid,count,min,median,90%ile,99%ile,max,sum,debug,function\n";
  372. exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
  373. OS << FuncId << ',' << Count << ',' << Row.Min << ',' << Row.Median << ','
  374. << Row.Pct90 << ',' << Row.Pct99 << ',' << Row.Max << "," << Row.Sum
  375. << ",\"" << Row.DebugInfo << "\",\"" << Row.Function << "\"\n";
  376. });
  377. }
  378. using namespace llvm::xray;
  379. namespace llvm {
  380. template <> struct format_provider<llvm::xray::RecordTypes> {
  381. static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream,
  382. StringRef Style) {
  383. switch (T) {
  384. case RecordTypes::ENTER:
  385. Stream << "enter";
  386. break;
  387. case RecordTypes::ENTER_ARG:
  388. Stream << "enter-arg";
  389. break;
  390. case RecordTypes::EXIT:
  391. Stream << "exit";
  392. break;
  393. case RecordTypes::TAIL_EXIT:
  394. Stream << "tail-exit";
  395. break;
  396. case RecordTypes::CUSTOM_EVENT:
  397. Stream << "custom-event";
  398. break;
  399. case RecordTypes::TYPED_EVENT:
  400. Stream << "typed-event";
  401. break;
  402. }
  403. }
  404. };
  405. } // namespace llvm
  406. static CommandRegistration Unused(&Account, []() -> Error {
  407. InstrumentationMap Map;
  408. if (!AccountInstrMap.empty()) {
  409. auto InstrumentationMapOrError = loadInstrumentationMap(AccountInstrMap);
  410. if (!InstrumentationMapOrError)
  411. return joinErrors(make_error<StringError>(
  412. Twine("Cannot open instrumentation map '") +
  413. AccountInstrMap + "'",
  414. std::make_error_code(std::errc::invalid_argument)),
  415. InstrumentationMapOrError.takeError());
  416. Map = std::move(*InstrumentationMapOrError);
  417. }
  418. std::error_code EC;
  419. raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_TextWithCRLF);
  420. if (EC)
  421. return make_error<StringError>(
  422. Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);
  423. const auto &FunctionAddresses = Map.getFunctionAddresses();
  424. symbolize::LLVMSymbolizer Symbolizer;
  425. llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
  426. FunctionAddresses);
  427. xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly,
  428. AccountDeduceSiblingCalls);
  429. auto TraceOrErr = loadTraceFile(AccountInput);
  430. if (!TraceOrErr)
  431. return joinErrors(
  432. make_error<StringError>(
  433. Twine("Failed loading input file '") + AccountInput + "'",
  434. std::make_error_code(std::errc::executable_format_error)),
  435. TraceOrErr.takeError());
  436. auto &T = *TraceOrErr;
  437. for (const auto &Record : T) {
  438. if (FCA.accountRecord(Record))
  439. continue;
  440. errs()
  441. << "Error processing record: "
  442. << llvm::formatv(
  443. R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}; process-id: {6}}})",
  444. Record.RecordType, Record.CPU, Record.Type, Record.FuncId,
  445. Record.TSC, Record.TId, Record.PId)
  446. << '\n';
  447. for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {
  448. errs() << "Thread ID: " << ThreadStack.first << "\n";
  449. if (ThreadStack.second.Stack.empty()) {
  450. errs() << " (empty stack)\n";
  451. continue;
  452. }
  453. auto Level = ThreadStack.second.Stack.size();
  454. for (const auto &Entry : llvm::reverse(ThreadStack.second.Stack))
  455. errs() << " #" << Level-- << "\t"
  456. << FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';
  457. }
  458. if (!AccountKeepGoing)
  459. return make_error<StringError>(
  460. Twine("Failed accounting function calls in file '") + AccountInput +
  461. "'.",
  462. std::make_error_code(std::errc::executable_format_error));
  463. }
  464. switch (AccountOutputFormat) {
  465. case AccountOutputFormats::TEXT:
  466. FCA.exportStatsAsText(OS, T.getFileHeader());
  467. break;
  468. case AccountOutputFormats::CSV:
  469. FCA.exportStatsAsCSV(OS, T.getFileHeader());
  470. break;
  471. }
  472. return Error::success();
  473. });