123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403 |
- //===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // Defines the XRay Profile class representing the latency profile generated by
- // XRay's profiling mode.
- //
- //===----------------------------------------------------------------------===//
- #include "llvm/XRay/Profile.h"
- #include "llvm/Support/DataExtractor.h"
- #include "llvm/Support/Error.h"
- #include "llvm/Support/FileSystem.h"
- #include "llvm/XRay/Trace.h"
- #include <deque>
- #include <memory>
- namespace llvm {
- namespace xray {
- Profile::Profile(const Profile &O) {
- // We need to re-create all the tries from the original (O), into the current
- // Profile being initialized, through the Block instances we see.
- for (const auto &Block : O) {
- Blocks.push_back({Block.Thread, {}});
- auto &B = Blocks.back();
- for (const auto &PathData : Block.PathData)
- B.PathData.push_back({internPath(cantFail(O.expandPath(PathData.first))),
- PathData.second});
- }
- }
- Profile &Profile::operator=(const Profile &O) {
- Profile P = O;
- *this = std::move(P);
- return *this;
- }
- namespace {
- struct BlockHeader {
- uint32_t Size;
- uint32_t Number;
- uint64_t Thread;
- };
- static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
- uint64_t &Offset) {
- BlockHeader H;
- uint64_t CurrentOffset = Offset;
- H.Size = Extractor.getU32(&Offset);
- if (Offset == CurrentOffset)
- return make_error<StringError>(
- Twine("Error parsing block header size at offset '") +
- Twine(CurrentOffset) + "'",
- std::make_error_code(std::errc::invalid_argument));
- CurrentOffset = Offset;
- H.Number = Extractor.getU32(&Offset);
- if (Offset == CurrentOffset)
- return make_error<StringError>(
- Twine("Error parsing block header number at offset '") +
- Twine(CurrentOffset) + "'",
- std::make_error_code(std::errc::invalid_argument));
- CurrentOffset = Offset;
- H.Thread = Extractor.getU64(&Offset);
- if (Offset == CurrentOffset)
- return make_error<StringError>(
- Twine("Error parsing block header thread id at offset '") +
- Twine(CurrentOffset) + "'",
- std::make_error_code(std::errc::invalid_argument));
- return H;
- }
- static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
- uint64_t &Offset) {
- // We're reading a sequence of int32_t's until we find a 0.
- std::vector<Profile::FuncID> Path;
- auto CurrentOffset = Offset;
- int32_t FuncId;
- do {
- FuncId = Extractor.getSigned(&Offset, 4);
- if (CurrentOffset == Offset)
- return make_error<StringError>(
- Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
- std::make_error_code(std::errc::invalid_argument));
- CurrentOffset = Offset;
- Path.push_back(FuncId);
- } while (FuncId != 0);
- return std::move(Path);
- }
- static Expected<Profile::Data> readData(DataExtractor &Extractor,
- uint64_t &Offset) {
- // We expect a certain number of elements for Data:
- // - A 64-bit CallCount
- // - A 64-bit CumulativeLocalTime counter
- Profile::Data D;
- auto CurrentOffset = Offset;
- D.CallCount = Extractor.getU64(&Offset);
- if (CurrentOffset == Offset)
- return make_error<StringError>(
- Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
- "'",
- std::make_error_code(std::errc::invalid_argument));
- CurrentOffset = Offset;
- D.CumulativeLocalTime = Extractor.getU64(&Offset);
- if (CurrentOffset == Offset)
- return make_error<StringError>(
- Twine("Error parsing cumulative local time at offset '") +
- Twine(CurrentOffset) + "'",
- std::make_error_code(std::errc::invalid_argument));
- return D;
- }
- } // namespace
- Error Profile::addBlock(Block &&B) {
- if (B.PathData.empty())
- return make_error<StringError>(
- "Block may not have empty path data.",
- std::make_error_code(std::errc::invalid_argument));
- Blocks.emplace_back(std::move(B));
- return Error::success();
- }
- Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
- auto It = PathIDMap.find(P);
- if (It == PathIDMap.end())
- return make_error<StringError>(
- Twine("PathID not found: ") + Twine(P),
- std::make_error_code(std::errc::invalid_argument));
- std::vector<Profile::FuncID> Path;
- for (auto Node = It->second; Node; Node = Node->Caller)
- Path.push_back(Node->Func);
- return std::move(Path);
- }
- Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
- if (P.empty())
- return 0;
- auto RootToLeafPath = reverse(P);
- // Find the root.
- auto It = RootToLeafPath.begin();
- auto PathRoot = *It++;
- auto RootIt =
- find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
- // If we've not seen this root before, remember it.
- TrieNode *Node = nullptr;
- if (RootIt == Roots.end()) {
- NodeStorage.emplace_back();
- Node = &NodeStorage.back();
- Node->Func = PathRoot;
- Roots.push_back(Node);
- } else {
- Node = *RootIt;
- }
- // Now traverse the path, re-creating if necessary.
- while (It != RootToLeafPath.end()) {
- auto NodeFuncID = *It++;
- auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
- return N->Func == NodeFuncID;
- });
- if (CalleeIt == Node->Callees.end()) {
- NodeStorage.emplace_back();
- auto NewNode = &NodeStorage.back();
- NewNode->Func = NodeFuncID;
- NewNode->Caller = Node;
- Node->Callees.push_back(NewNode);
- Node = NewNode;
- } else {
- Node = *CalleeIt;
- }
- }
- // At this point, Node *must* be pointing at the leaf.
- assert(Node->Func == P.front());
- if (Node->ID == 0) {
- Node->ID = NextID++;
- PathIDMap.insert({Node->ID, Node});
- }
- return Node->ID;
- }
- Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
- Profile Merged;
- using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
- using PathDataMapPtr = std::unique_ptr<PathDataMap>;
- using PathDataVector = decltype(Profile::Block::PathData);
- using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
- ThreadProfileIndexMap ThreadProfileIndex;
- for (const auto &P : {std::ref(L), std::ref(R)})
- for (const auto &Block : P.get()) {
- ThreadProfileIndexMap::iterator It;
- std::tie(It, std::ignore) = ThreadProfileIndex.insert(
- {Block.Thread, PathDataMapPtr{new PathDataMap()}});
- for (const auto &PathAndData : Block.PathData) {
- auto &PathID = PathAndData.first;
- auto &Data = PathAndData.second;
- auto NewPathID =
- Merged.internPath(cantFail(P.get().expandPath(PathID)));
- PathDataMap::iterator PathDataIt;
- bool Inserted;
- std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
- if (!Inserted) {
- auto &ExistingData = PathDataIt->second;
- ExistingData.CallCount += Data.CallCount;
- ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
- }
- }
- }
- for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
- PathDataVector PathAndData;
- PathAndData.reserve(IndexedThreadBlock.second->size());
- copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
- cantFail(
- Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
- }
- return Merged;
- }
- Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
- Profile Merged;
- using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
- PathDataMap PathData;
- using PathDataVector = decltype(Profile::Block::PathData);
- for (const auto &P : {std::ref(L), std::ref(R)})
- for (const auto &Block : P.get())
- for (const auto &PathAndData : Block.PathData) {
- auto &PathId = PathAndData.first;
- auto &Data = PathAndData.second;
- auto NewPathID =
- Merged.internPath(cantFail(P.get().expandPath(PathId)));
- PathDataMap::iterator PathDataIt;
- bool Inserted;
- std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
- if (!Inserted) {
- auto &ExistingData = PathDataIt->second;
- ExistingData.CallCount += Data.CallCount;
- ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
- }
- }
- // In the end there's a single Block, for thread 0.
- PathDataVector Block;
- Block.reserve(PathData.size());
- copy(PathData, std::back_inserter(Block));
- cantFail(Merged.addBlock({0, std::move(Block)}));
- return Merged;
- }
- Expected<Profile> loadProfile(StringRef Filename) {
- Expected<sys::fs::file_t> FdOrErr = sys::fs::openNativeFileForRead(Filename);
- if (!FdOrErr)
- return FdOrErr.takeError();
- uint64_t FileSize;
- if (auto EC = sys::fs::file_size(Filename, FileSize))
- return make_error<StringError>(
- Twine("Cannot get filesize of '") + Filename + "'", EC);
- std::error_code EC;
- sys::fs::mapped_file_region MappedFile(
- *FdOrErr, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0,
- EC);
- sys::fs::closeFile(*FdOrErr);
- if (EC)
- return make_error<StringError>(
- Twine("Cannot mmap profile '") + Filename + "'", EC);
- StringRef Data(MappedFile.data(), MappedFile.size());
- Profile P;
- uint64_t Offset = 0;
- DataExtractor Extractor(Data, true, 8);
- // For each block we get from the file:
- while (Offset != MappedFile.size()) {
- auto HeaderOrError = readBlockHeader(Extractor, Offset);
- if (!HeaderOrError)
- return HeaderOrError.takeError();
- // TODO: Maybe store this header information for each block, even just for
- // debugging?
- const auto &Header = HeaderOrError.get();
- // Read in the path data.
- auto PathOrError = readPath(Extractor, Offset);
- if (!PathOrError)
- return PathOrError.takeError();
- const auto &Path = PathOrError.get();
- // For each path we encounter, we should intern it to get a PathID.
- auto DataOrError = readData(Extractor, Offset);
- if (!DataOrError)
- return DataOrError.takeError();
- auto &Data = DataOrError.get();
- if (auto E =
- P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
- {{P.internPath(Path), std::move(Data)}}}))
- return std::move(E);
- }
- return P;
- }
- namespace {
- struct StackEntry {
- uint64_t Timestamp;
- Profile::FuncID FuncId;
- };
- } // namespace
- Expected<Profile> profileFromTrace(const Trace &T) {
- Profile P;
- // The implementation of the algorithm re-creates the execution of
- // the functions based on the trace data. To do this, we set up a number of
- // data structures to track the execution context of every thread in the
- // Trace.
- DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
- DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
- ThreadPathData;
- // We then do a pass through the Trace to account data on a per-thread-basis.
- for (const auto &E : T) {
- auto &TSD = ThreadStacks[E.TId];
- switch (E.Type) {
- case RecordTypes::ENTER:
- case RecordTypes::ENTER_ARG:
- // Push entries into the function call stack.
- TSD.push_back({E.TSC, E.FuncId});
- break;
- case RecordTypes::EXIT:
- case RecordTypes::TAIL_EXIT:
- // Exits cause some accounting to happen, based on the state of the stack.
- // For each function we pop off the stack, we take note of the path and
- // record the cumulative state for this path. As we're doing this, we
- // intern the path into the Profile.
- while (!TSD.empty()) {
- auto Top = TSD.back();
- auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
- SmallVector<Profile::FuncID, 16> Path;
- transform(reverse(TSD), std::back_inserter(Path),
- std::mem_fn(&StackEntry::FuncId));
- auto InternedPath = P.internPath(Path);
- auto &TPD = ThreadPathData[E.TId][InternedPath];
- ++TPD.CallCount;
- TPD.CumulativeLocalTime += FunctionLocalTime;
- TSD.pop_back();
- // If we've matched the corresponding entry event for this function,
- // then we exit the loop.
- if (Top.FuncId == E.FuncId)
- break;
- // FIXME: Consider the intermediate times and the cumulative tree time
- // as well.
- }
- break;
- case RecordTypes::CUSTOM_EVENT:
- case RecordTypes::TYPED_EVENT:
- // TODO: Support an extension point to allow handling of custom and typed
- // events in profiles.
- break;
- }
- }
- // Once we've gone through the Trace, we now create one Block per thread in
- // the Profile.
- for (const auto &ThreadPaths : ThreadPathData) {
- const auto &TID = ThreadPaths.first;
- const auto &PathsData = ThreadPaths.second;
- if (auto E = P.addBlock({
- TID,
- std::vector<std::pair<Profile::PathID, Profile::Data>>(
- PathsData.begin(), PathsData.end()),
- }))
- return std::move(E);
- }
- return P;
- }
- } // namespace xray
- } // namespace llvm
|