Profile.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Defines the XRay Profile class representing the latency profile generated by
  10. // XRay's profiling mode.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/XRay/Profile.h"
  14. #include "llvm/Support/DataExtractor.h"
  15. #include "llvm/Support/Error.h"
  16. #include "llvm/Support/FileSystem.h"
  17. #include "llvm/XRay/Trace.h"
  18. #include <deque>
  19. #include <memory>
  20. namespace llvm {
  21. namespace xray {
  22. Profile::Profile(const Profile &O) {
  23. // We need to re-create all the tries from the original (O), into the current
  24. // Profile being initialized, through the Block instances we see.
  25. for (const auto &Block : O) {
  26. Blocks.push_back({Block.Thread, {}});
  27. auto &B = Blocks.back();
  28. for (const auto &PathData : Block.PathData)
  29. B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
  30. PathData.second});
  31. }
  32. }
  33. Profile &Profile::operator=(const Profile &O) {
  34. Profile P = O;
  35. *this = std::move(P);
  36. return *this;
  37. }
  38. namespace {
  39. struct BlockHeader {
  40. uint32_t Size;
  41. uint32_t Number;
  42. uint64_t Thread;
  43. };
  44. static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
  45. uint64_t &Offset) {
  46. BlockHeader H;
  47. uint64_t CurrentOffset = Offset;
  48. H.Size = Extractor.getU32(&Offset);
  49. if (Offset == CurrentOffset)
  50. return make_error<StringError>(
  51. Twine("Error parsing block header size at offset '") +
  52. Twine(CurrentOffset) + "'",
  53. std::make_error_code(std::errc::invalid_argument));
  54. CurrentOffset = Offset;
  55. H.Number = Extractor.getU32(&Offset);
  56. if (Offset == CurrentOffset)
  57. return make_error<StringError>(
  58. Twine("Error parsing block header number at offset '") +
  59. Twine(CurrentOffset) + "'",
  60. std::make_error_code(std::errc::invalid_argument));
  61. CurrentOffset = Offset;
  62. H.Thread = Extractor.getU64(&Offset);
  63. if (Offset == CurrentOffset)
  64. return make_error<StringError>(
  65. Twine("Error parsing block header thread id at offset '") +
  66. Twine(CurrentOffset) + "'",
  67. std::make_error_code(std::errc::invalid_argument));
  68. return H;
  69. }
  70. static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
  71. uint64_t &Offset) {
  72. // We're reading a sequence of int32_t's until we find a 0.
  73. std::vector<Profile::FuncID> Path;
  74. auto CurrentOffset = Offset;
  75. int32_t FuncId;
  76. do {
  77. FuncId = Extractor.getSigned(&Offset, 4);
  78. if (CurrentOffset == Offset)
  79. return make_error<StringError>(
  80. Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
  81. std::make_error_code(std::errc::invalid_argument));
  82. CurrentOffset = Offset;
  83. Path.push_back(FuncId);
  84. } while (FuncId != 0);
  85. return std::move(Path);
  86. }
  87. static Expected<Profile::Data> readData(DataExtractor &Extractor,
  88. uint64_t &Offset) {
  89. // We expect a certain number of elements for Data:
  90. // - A 64-bit CallCount
  91. // - A 64-bit CumulativeLocalTime counter
  92. Profile::Data D;
  93. auto CurrentOffset = Offset;
  94. D.CallCount = Extractor.getU64(&Offset);
  95. if (CurrentOffset == Offset)
  96. return make_error<StringError>(
  97. Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
  98. "'",
  99. std::make_error_code(std::errc::invalid_argument));
  100. CurrentOffset = Offset;
  101. D.CumulativeLocalTime = Extractor.getU64(&Offset);
  102. if (CurrentOffset == Offset)
  103. return make_error<StringError>(
  104. Twine("Error parsing cumulative local time at offset '") +
  105. Twine(CurrentOffset) + "'",
  106. std::make_error_code(std::errc::invalid_argument));
  107. return D;
  108. }
  109. } // namespace
  110. Error Profile::addBlock(Block &&B) {
  111. if (B.PathData.empty())
  112. return make_error<StringError>(
  113. "Block may not have empty path data.",
  114. std::make_error_code(std::errc::invalid_argument));
  115. Blocks.emplace_back(std::move(B));
  116. return Error::success();
  117. }
  118. Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
  119. auto It = PathIDMap.find(P);
  120. if (It == PathIDMap.end())
  121. return make_error<StringError>(
  122. Twine("PathID not found: ") + Twine(P),
  123. std::make_error_code(std::errc::invalid_argument));
  124. std::vector<Profile::FuncID> Path;
  125. for (auto Node = It->second; Node; Node = Node->Caller)
  126. Path.push_back(Node->Func);
  127. return std::move(Path);
  128. }
  129. Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
  130. if (P.empty())
  131. return 0;
  132. auto RootToLeafPath = reverse(P);
  133. // Find the root.
  134. auto It = RootToLeafPath.begin();
  135. auto PathRoot = *It++;
  136. auto RootIt =
  137. find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
  138. // If we've not seen this root before, remember it.
  139. TrieNode *Node = nullptr;
  140. if (RootIt == Roots.end()) {
  141. NodeStorage.emplace_back();
  142. Node = &NodeStorage.back();
  143. Node->Func = PathRoot;
  144. Roots.push_back(Node);
  145. } else {
  146. Node = *RootIt;
  147. }
  148. // Now traverse the path, re-creating if necessary.
  149. while (It != RootToLeafPath.end()) {
  150. auto NodeFuncID = *It++;
  151. auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
  152. return N->Func == NodeFuncID;
  153. });
  154. if (CalleeIt == Node->Callees.end()) {
  155. NodeStorage.emplace_back();
  156. auto NewNode = &NodeStorage.back();
  157. NewNode->Func = NodeFuncID;
  158. NewNode->Caller = Node;
  159. Node->Callees.push_back(NewNode);
  160. Node = NewNode;
  161. } else {
  162. Node = *CalleeIt;
  163. }
  164. }
  165. // At this point, Node *must* be pointing at the leaf.
  166. assert(Node->Func == P.front());
  167. if (Node->ID == 0) {
  168. Node->ID = NextID++;
  169. PathIDMap.insert({Node->ID, Node});
  170. }
  171. return Node->ID;
  172. }
  173. Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
  174. Profile Merged;
  175. using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
  176. using PathDataMapPtr = std::unique_ptr<PathDataMap>;
  177. using PathDataVector = decltype(Profile::Block::PathData);
  178. using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
  179. ThreadProfileIndexMap ThreadProfileIndex;
  180. for (const auto &P : {std::ref(L), std::ref(R)})
  181. for (const auto &Block : P.get()) {
  182. ThreadProfileIndexMap::iterator It;
  183. std::tie(It, std::ignore) = ThreadProfileIndex.insert(
  184. {Block.Thread, PathDataMapPtr{new PathDataMap()}});
  185. for (const auto &PathAndData : Block.PathData) {
  186. auto &PathID = PathAndData.first;
  187. auto &Data = PathAndData.second;
  188. auto NewPathID =
  189. Merged.internPath(cantFail(P.get().expandPath(PathID)));
  190. PathDataMap::iterator PathDataIt;
  191. bool Inserted;
  192. std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
  193. if (!Inserted) {
  194. auto &ExistingData = PathDataIt->second;
  195. ExistingData.CallCount += Data.CallCount;
  196. ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
  197. }
  198. }
  199. }
  200. for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
  201. PathDataVector PathAndData;
  202. PathAndData.reserve(IndexedThreadBlock.second->size());
  203. copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
  204. cantFail(
  205. Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
  206. }
  207. return Merged;
  208. }
  209. Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
  210. Profile Merged;
  211. using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
  212. PathDataMap PathData;
  213. using PathDataVector = decltype(Profile::Block::PathData);
  214. for (const auto &P : {std::ref(L), std::ref(R)})
  215. for (const auto &Block : P.get())
  216. for (const auto &PathAndData : Block.PathData) {
  217. auto &PathId = PathAndData.first;
  218. auto &Data = PathAndData.second;
  219. auto NewPathID =
  220. Merged.internPath(cantFail(P.get().expandPath(PathId)));
  221. PathDataMap::iterator PathDataIt;
  222. bool Inserted;
  223. std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
  224. if (!Inserted) {
  225. auto &ExistingData = PathDataIt->second;
  226. ExistingData.CallCount += Data.CallCount;
  227. ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
  228. }
  229. }
  230. // In the end there's a single Block, for thread 0.
  231. PathDataVector Block;
  232. Block.reserve(PathData.size());
  233. copy(PathData, std::back_inserter(Block));
  234. cantFail(Merged.addBlock({0, std::move(Block)}));
  235. return Merged;
  236. }
  237. Expected<Profile> loadProfile(StringRef Filename) {
  238. Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
  239. if (!FdOrErr)
  240. return FdOrErr.takeError();
  241. uint64_t FileSize;
  242. if (auto EC = sys::fs::file_size(Filename, FileSize))
  243. return make_error<StringError>(
  244. Twine("Cannot get filesize of '") + Filename + "'", EC);
  245. std::error_code EC;
  246. sys::fs::mapped_file_region MappedFile(
  247. *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
  248. EC);
  249. sys::fs::closeFile(*FdOrErr);
  250. if (EC)
  251. return make_error<StringError>(
  252. Twine("Cannot mmap profile '") + Filename + "'", EC);
  253. StringRef Data(MappedFile.data(), MappedFile.size());
  254. Profile P;
  255. uint64_t Offset = 0;
  256. DataExtractor Extractor(Data, true, 8);
  257. // For each block we get from the file:
  258. while (Offset != MappedFile.size()) {
  259. auto HeaderOrError = readBlockHeader(Extractor, Offset);
  260. if (!HeaderOrError)
  261. return HeaderOrError.takeError();
  262. // TODO: Maybe store this header information for each block, even just for
  263. // debugging?
  264. const auto &Header = HeaderOrError.get();
  265. // Read in the path data.
  266. auto PathOrError = readPath(Extractor, Offset);
  267. if (!PathOrError)
  268. return PathOrError.takeError();
  269. const auto &Path = PathOrError.get();
  270. // For each path we encounter, we should intern it to get a PathID.
  271. auto DataOrError = readData(Extractor, Offset);
  272. if (!DataOrError)
  273. return DataOrError.takeError();
  274. auto &Data = DataOrError.get();
  275. if (auto E =
  276. P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
  277. {{P.internPath(Path), std::move(Data)}}}))
  278. return std::move(E);
  279. }
  280. return P;
  281. }
  282. namespace {
  283. struct StackEntry {
  284. uint64_t Timestamp;
  285. Profile::FuncID FuncId;
  286. };
  287. } // namespace
  288. Expected<Profile> profileFromTrace(const Trace &T) {
  289. Profile P;
  290. // The implementation of the algorithm re-creates the execution of
  291. // the functions based on the trace data. To do this, we set up a number of
  292. // data structures to track the execution context of every thread in the
  293. // Trace.
  294. DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
  295. DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
  296. ThreadPathData;
  297. // We then do a pass through the Trace to account data on a per-thread-basis.
  298. for (const auto &E : T) {
  299. auto &TSD = ThreadStacks[E.TId];
  300. switch (E.Type) {
  301. case RecordTypes::ENTER:
  302. case RecordTypes::ENTER_ARG:
  303. // Push entries into the function call stack.
  304. TSD.push_back({E.TSC, E.FuncId});
  305. break;
  306. case RecordTypes::EXIT:
  307. case RecordTypes::TAIL_EXIT:
  308. // Exits cause some accounting to happen, based on the state of the stack.
  309. // For each function we pop off the stack, we take note of the path and
  310. // record the cumulative state for this path. As we're doing this, we
  311. // intern the path into the Profile.
  312. while (!TSD.empty()) {
  313. auto Top = TSD.back();
  314. auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
  315. SmallVector<Profile::FuncID, 16> Path;
  316. transform(reverse(TSD), std::back_inserter(Path),
  317. std::mem_fn(&StackEntry::FuncId));
  318. auto InternedPath = P.internPath(Path);
  319. auto &TPD = ThreadPathData[E.TId][InternedPath];
  320. ++TPD.CallCount;
  321. TPD.CumulativeLocalTime += FunctionLocalTime;
  322. TSD.pop_back();
  323. // If we've matched the corresponding entry event for this function,
  324. // then we exit the loop.
  325. if (Top.FuncId == E.FuncId)
  326. break;
  327. // FIXME: Consider the intermediate times and the cumulative tree time
  328. // as well.
  329. }
  330. break;
  331. case RecordTypes::CUSTOM_EVENT:
  332. case RecordTypes::TYPED_EVENT:
  333. // TODO: Support an extension point to allow handling of custom and typed
  334. // events in profiles.
  335. break;
  336. }
  337. }
  338. // Once we've gone through the Trace, we now create one Block per thread in
  339. // the Profile.
  340. for (const auto &ThreadPaths : ThreadPathData) {
  341. const auto &TID = ThreadPaths.first;
  342. const auto &PathsData = ThreadPaths.second;
  343. if (auto E = P.addBlock({
  344. TID,
  345. std::vector<std::pair<Profile::PathID, Profile::Data>>(
  346. PathsData.begin(), PathsData.end()),
  347. }))
  348. return std::move(E);
  349. }
  350. return P;
  351. }
  352. } // namespace xray
  353. } // namespace llvm