123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728 |
- //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
- #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
- #include "ErrorHandling.h"
- #include "ProfiledBinary.h"
- #include "llvm/Support/Casting.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/Regex.h"
- #include <cstdint>
- #include <fstream>
- #include <list>
- #include <map>
- #include <vector>
- using namespace llvm;
- using namespace sampleprof;
- namespace llvm {
- namespace sampleprof {
- // Stream based trace line iterator
- class TraceStream {
- std::string CurrentLine;
- std::ifstream Fin;
- bool IsAtEoF = false;
- uint64_t LineNumber = 0;
- public:
- TraceStream(StringRef Filename) : Fin(Filename.str()) {
- if (!Fin.good())
- exitWithError("Error read input perf script file", Filename);
- advance();
- }
- StringRef getCurrentLine() {
- assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
- return CurrentLine;
- }
- uint64_t getLineNumber() { return LineNumber; }
- bool isAtEoF() { return IsAtEoF; }
- // Read the next line
- void advance() {
- if (!std::getline(Fin, CurrentLine)) {
- IsAtEoF = true;
- return;
- }
- LineNumber++;
- }
- };
- // The type of input format.
- enum PerfFormat {
- UnknownFormat = 0,
- PerfData = 1, // Raw linux perf.data.
- PerfScript = 2, // Perf script create by `perf script` command.
- UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
- };
- // The type of perfscript content.
- enum PerfContent {
- UnknownContent = 0,
- LBR = 1, // Only LBR sample.
- LBRStack = 2, // Hybrid sample including call stack and LBR stack.
- };
- struct PerfInputFile {
- std::string InputFile;
- PerfFormat Format = PerfFormat::UnknownFormat;
- PerfContent Content = PerfContent::UnknownContent;
- };
- // The parsed LBR sample entry.
- struct LBREntry {
- uint64_t Source = 0;
- uint64_t Target = 0;
- // An artificial branch stands for a series of consecutive branches starting
- // from the current binary with a transition through external code and
- // eventually landing back in the current binary.
- bool IsArtificial = false;
- LBREntry(uint64_t S, uint64_t T, bool I)
- : Source(S), Target(T), IsArtificial(I) {}
- #ifndef NDEBUG
- void print() const {
- dbgs() << "from " << format("%#010x", Source) << " to "
- << format("%#010x", Target);
- if (IsArtificial)
- dbgs() << " Artificial";
- }
- #endif
- };
- #ifndef NDEBUG
- static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
- for (size_t I = 0; I < LBRStack.size(); I++) {
- dbgs() << "[" << I << "] ";
- LBRStack[I].print();
- dbgs() << "\n";
- }
- }
- static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
- for (size_t I = 0; I < CallStack.size(); I++) {
- dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
- }
- }
- #endif
- // Hash interface for generic data of type T
- // Data should implement a \fn getHashCode and a \fn isEqual
- // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
- // i.e we explicitly calculate hash of derived class, assign to base class's
- // HashCode. This also provides the flexibility for calculating the hash code
- // incrementally(like rolling hash) during frame stack unwinding since unwinding
- // only changes the leaf of frame stack. \fn isEqual is a virtual function,
- // which will have perf overhead. In the future, if we redesign a better hash
- // function, then we can just skip this or switch to non-virtual function(like
- // just ignore comparision if hash conflicts probabilities is low)
- template <class T> class Hashable {
- public:
- std::shared_ptr<T> Data;
- Hashable(const std::shared_ptr<T> &D) : Data(D) {}
- // Hash code generation
- struct Hash {
- uint64_t operator()(const Hashable<T> &Key) const {
- // Don't make it virtual for getHashCode
- uint64_t Hash = Key.Data->getHashCode();
- assert(Hash && "Should generate HashCode for it!");
- return Hash;
- }
- };
- // Hash equal
- struct Equal {
- bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
- // Precisely compare the data, vtable will have overhead.
- return LHS.Data->isEqual(RHS.Data.get());
- }
- };
- T *getPtr() const { return Data.get(); }
- };
- struct PerfSample {
- // LBR stack recorded in FIFO order.
- SmallVector<LBREntry, 16> LBRStack;
- // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
- // generation
- SmallVector<uint64_t, 16> CallStack;
- virtual ~PerfSample() = default;
- uint64_t getHashCode() const {
- // Use simple DJB2 hash
- auto HashCombine = [](uint64_t H, uint64_t V) {
- return ((H << 5) + H) + V;
- };
- uint64_t Hash = 5381;
- for (const auto &Value : CallStack) {
- Hash = HashCombine(Hash, Value);
- }
- for (const auto &Entry : LBRStack) {
- Hash = HashCombine(Hash, Entry.Source);
- Hash = HashCombine(Hash, Entry.Target);
- }
- return Hash;
- }
- bool isEqual(const PerfSample *Other) const {
- const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
- const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
- if (CallStack.size() != OtherCallStack.size() ||
- LBRStack.size() != OtherLBRStack.size())
- return false;
- if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
- return false;
- for (size_t I = 0; I < OtherLBRStack.size(); I++) {
- if (LBRStack[I].Source != OtherLBRStack[I].Source ||
- LBRStack[I].Target != OtherLBRStack[I].Target)
- return false;
- }
- return true;
- }
- #ifndef NDEBUG
- void print() const {
- dbgs() << "LBR stack\n";
- printLBRStack(LBRStack);
- dbgs() << "Call stack\n";
- printCallStack(CallStack);
- }
- #endif
- };
- // After parsing the sample, we record the samples by aggregating them
- // into this counter. The key stores the sample data and the value is
- // the sample repeat times.
- using AggregatedCounter =
- std::unordered_map<Hashable<PerfSample>, uint64_t,
- Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
- using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
- // The state for the unwinder, it doesn't hold the data but only keep the
- // pointer/index of the data, While unwinding, the CallStack is changed
- // dynamicially and will be recorded as the context of the sample
- struct UnwindState {
- // Profiled binary that current frame address belongs to
- const ProfiledBinary *Binary;
- // Call stack trie node
- struct ProfiledFrame {
- const uint64_t Address = DummyRoot;
- ProfiledFrame *Parent;
- SampleVector RangeSamples;
- SampleVector BranchSamples;
- std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
- ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
- : Address(Addr), Parent(P) {}
- ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
- assert(Address && "Address can't be zero!");
- auto Ret = Children.emplace(
- Address, std::make_unique<ProfiledFrame>(Address, this));
- return Ret.first->second.get();
- }
- void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
- RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
- }
- void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
- BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
- }
- bool isDummyRoot() { return Address == DummyRoot; }
- bool isExternalFrame() { return Address == ExternalAddr; }
- bool isLeafFrame() { return Children.empty(); }
- };
- ProfiledFrame DummyTrieRoot;
- ProfiledFrame *CurrentLeafFrame;
- // Used to fall through the LBR stack
- uint32_t LBRIndex = 0;
- // Reference to PerfSample.LBRStack
- const SmallVector<LBREntry, 16> &LBRStack;
- // Used to iterate the address range
- InstructionPointer InstPtr;
- UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
- : Binary(Binary), LBRStack(Sample->LBRStack),
- InstPtr(Binary, Sample->CallStack.front()) {
- initFrameTrie(Sample->CallStack);
- }
- bool validateInitialState() {
- uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
- uint64_t LeafAddr = CurrentLeafFrame->Address;
- assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
- "External leading LBR should match the leaf frame.");
- // When we take a stack sample, ideally the sampling distance between the
- // leaf IP of stack and the last LBR target shouldn't be very large.
- // Use a heuristic size (0x100) to filter out broken records.
- if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
- WithColor::warning() << "Bogus trace: stack tip = "
- << format("%#010x", LeafAddr)
- << ", LBR tip = " << format("%#010x\n", LBRLeaf);
- return false;
- }
- return true;
- }
- void checkStateConsistency() {
- assert(InstPtr.Address == CurrentLeafFrame->Address &&
- "IP should align with context leaf");
- }
- bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
- uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
- uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
- const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
- bool IsLastLBR() const { return LBRIndex == 0; }
- bool getLBRStackSize() const { return LBRStack.size(); }
- void advanceLBR() { LBRIndex++; }
- ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
- void pushFrame(uint64_t Address) {
- CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
- }
- void switchToFrame(uint64_t Address) {
- if (CurrentLeafFrame->Address == Address)
- return;
- CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
- }
- void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
- void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
- void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
- ProfiledFrame *Cur = &DummyTrieRoot;
- for (auto Address : reverse(CallStack)) {
- Cur = Cur->getOrCreateChildFrame(Address);
- }
- CurrentLeafFrame = Cur;
- }
- ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
- };
- // Base class for sample counter key with context
- struct ContextKey {
- uint64_t HashCode = 0;
- virtual ~ContextKey() = default;
- uint64_t getHashCode() {
- if (HashCode == 0)
- genHashCode();
- return HashCode;
- }
- virtual void genHashCode() = 0;
- virtual bool isEqual(const ContextKey *K) const {
- return HashCode == K->HashCode;
- };
- // Utilities for LLVM-style RTTI
- enum ContextKind { CK_StringBased, CK_ProbeBased };
- const ContextKind Kind;
- ContextKind getKind() const { return Kind; }
- ContextKey(ContextKind K) : Kind(K){};
- };
- // String based context id
- struct StringBasedCtxKey : public ContextKey {
- SampleContextFrameVector Context;
- bool WasLeafInlined;
- StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
- static bool classof(const ContextKey *K) {
- return K->getKind() == CK_StringBased;
- }
- bool isEqual(const ContextKey *K) const override {
- const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
- return Context == Other->Context;
- }
- void genHashCode() override {
- HashCode = hash_value(SampleContextFrames(Context));
- }
- };
- // Probe based context key as the intermediate key of context
- // String based context key will introduce redundant string handling
- // since the callee context is inferred from the context string which
- // need to be splitted by '@' to get the last location frame, so we
- // can just use probe instead and generate the string in the end.
- struct ProbeBasedCtxKey : public ContextKey {
- SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
- ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
- static bool classof(const ContextKey *K) {
- return K->getKind() == CK_ProbeBased;
- }
- bool isEqual(const ContextKey *K) const override {
- const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
- assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
- return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
- O->Probes.end());
- }
- void genHashCode() override {
- for (const auto *P : Probes) {
- HashCode = hash_combine(HashCode, P);
- }
- if (HashCode == 0) {
- // Avoid zero value of HashCode when it's an empty list
- HashCode = 1;
- }
- }
- };
- // The counter of branch samples for one function indexed by the branch,
- // which is represented as the source and target offset pair.
- using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
- // The counter of range samples for one function indexed by the range,
- // which is represented as the start and end offset pair.
- using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
- // Wrapper for sample counters including range counter and branch counter
- struct SampleCounter {
- RangeSample RangeCounter;
- BranchSample BranchCounter;
- void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
- assert(Start <= End && "Invalid instruction range");
- RangeCounter[{Start, End}] += Repeat;
- }
- void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
- BranchCounter[{Source, Target}] += Repeat;
- }
- };
- // Sample counter with context to support context-sensitive profile
- using ContextSampleCounterMap =
- std::unordered_map<Hashable<ContextKey>, SampleCounter,
- Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
- struct FrameStack {
- SmallVector<uint64_t, 16> Stack;
- ProfiledBinary *Binary;
- FrameStack(ProfiledBinary *B) : Binary(B) {}
- bool pushFrame(UnwindState::ProfiledFrame *Cur) {
- assert(!Cur->isExternalFrame() &&
- "External frame's not expected for context stack.");
- Stack.push_back(Cur->Address);
- return true;
- }
- void popFrame() {
- if (!Stack.empty())
- Stack.pop_back();
- }
- std::shared_ptr<StringBasedCtxKey> getContextKey();
- };
- struct ProbeStack {
- SmallVector<const MCDecodedPseudoProbe *, 16> Stack;
- ProfiledBinary *Binary;
- ProbeStack(ProfiledBinary *B) : Binary(B) {}
- bool pushFrame(UnwindState::ProfiledFrame *Cur) {
- assert(!Cur->isExternalFrame() &&
- "External frame's not expected for context stack.");
- const MCDecodedPseudoProbe *CallProbe =
- Binary->getCallProbeForAddr(Cur->Address);
- // We may not find a probe for a merged or external callsite.
- // Callsite merging may cause the loss of original probe IDs.
- // Cutting off the context from here since the inliner will
- // not know how to consume a context with unknown callsites.
- if (!CallProbe)
- return false;
- Stack.push_back(CallProbe);
- return true;
- }
- void popFrame() {
- if (!Stack.empty())
- Stack.pop_back();
- }
- // Use pseudo probe based context key to get the sample counter
- // A context stands for a call path from 'main' to an uninlined
- // callee with all inline frames recovered on that path. The probes
- // belonging to that call path is the probes either originated from
- // the callee or from any functions inlined into the callee. Since
- // pseudo probes are organized in a tri-tree style after decoded,
- // the tree path from the tri-tree root (which is the uninlined
- // callee) to the probe node forms an inline context.
- // Here we use a list of probe(pointer) as the context key to speed up
- // aggregation and the final context string will be generate in
- // ProfileGenerator
- std::shared_ptr<ProbeBasedCtxKey> getContextKey();
- };
- /*
- As in hybrid sample we have a group of LBRs and the most recent sampling call
- stack, we can walk through those LBRs to infer more call stacks which would be
- used as context for profile. VirtualUnwinder is the class to do the call stack
- unwinding based on LBR state. Two types of unwinding are processd here:
- 1) LBR unwinding and 2) linear range unwinding.
- Specifically, for each LBR entry(can be classified into call, return, regular
- branch), LBR unwinding will replay the operation by pushing, popping or
- switching leaf frame towards the call stack and since the initial call stack
- is most recently sampled, the replay should be in anti-execution order, i.e. for
- the regular case, pop the call stack when LBR is call, push frame on call stack
- when LBR is return. After each LBR processed, it also needs to align with the
- next LBR by going through instructions from previous LBR's target to current
- LBR's source, which is the linear unwinding. As instruction from linear range
- can come from different function by inlining, linear unwinding will do the range
- splitting and record counters by the range with same inline context. Over those
- unwinding process we will record each call stack as context id and LBR/linear
- range as sample counter for further CS profile generation.
- */
- class VirtualUnwinder {
- public:
- VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
- : CtxCounterMap(Counter), Binary(B) {}
- bool unwind(const PerfSample *Sample, uint64_t Repeat);
- std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
- uint64_t NumTotalBranches = 0;
- uint64_t NumExtCallBranch = 0;
- uint64_t NumMissingExternalFrame = 0;
- uint64_t NumMismatchedProEpiBranch = 0;
- uint64_t NumMismatchedExtCallBranch = 0;
- private:
- bool isCallState(UnwindState &State) const {
- // The tail call frame is always missing here in stack sample, we will
- // use a specific tail call tracker to infer it.
- return Binary->addressIsCall(State.getCurrentLBRSource());
- }
- bool isReturnState(UnwindState &State) const {
- // Simply check addressIsReturn, as ret is always reliable, both for
- // regular call and tail call.
- if (!Binary->addressIsReturn(State.getCurrentLBRSource()))
- return false;
- // In a callback case, a return from internal code, say A, to external
- // runtime can happen. The external runtime can then call back to
- // another internal routine, say B. Making an artificial branch that
- // looks like a return from A to B can confuse the unwinder to treat
- // the instruction before B as the call instruction. Here we detect this
- // case if the return target is not the next inst of call inst, then we just
- // do not treat it as a return.
- uint64_t CallAddr =
- Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget());
- return (CallAddr != 0);
- }
- void unwindCall(UnwindState &State);
- void unwindLinear(UnwindState &State, uint64_t Repeat);
- void unwindReturn(UnwindState &State);
- void unwindBranch(UnwindState &State);
- template <typename T>
- void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
- // Collect each samples on trie node by DFS traversal
- template <typename T>
- void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
- void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
- void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
- uint64_t Repeat);
- void recordBranchCount(const LBREntry &Branch, UnwindState &State,
- uint64_t Repeat);
- ContextSampleCounterMap *CtxCounterMap;
- // Profiled binary that current frame address belongs to
- ProfiledBinary *Binary;
- // Keep track of all untracked callsites
- std::set<uint64_t> UntrackedCallsites;
- };
- // Read perf trace to parse the events and samples.
- class PerfReaderBase {
- public:
- PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
- : Binary(B), PerfTraceFile(PerfTrace) {
- // Initialize the base address to preferred address.
- Binary->setBaseAddress(Binary->getPreferredBaseAddress());
- };
- virtual ~PerfReaderBase() = default;
- static std::unique_ptr<PerfReaderBase> create(ProfiledBinary *Binary,
- PerfInputFile &PerfInput);
- // Entry of the reader to parse multiple perf traces
- virtual void parsePerfTraces() = 0;
- const ContextSampleCounterMap &getSampleCounters() const {
- return SampleCounters;
- }
- bool profileIsCSFlat() { return ProfileIsCSFlat; }
- protected:
- ProfiledBinary *Binary = nullptr;
- StringRef PerfTraceFile;
- ContextSampleCounterMap SampleCounters;
- bool ProfileIsCSFlat = false;
- uint64_t NumTotalSample = 0;
- uint64_t NumLeafExternalFrame = 0;
- uint64_t NumLeadingOutgoingLBR = 0;
- };
- // Read perf script to parse the events and samples.
- class PerfScriptReader : public PerfReaderBase {
- public:
- PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace)
- : PerfReaderBase(B, PerfTrace){};
- // Entry of the reader to parse multiple perf traces
- virtual void parsePerfTraces() override;
- // Generate perf script from perf data
- static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
- PerfInputFile &File);
- // Extract perf script type by peaking at the input
- static PerfContent checkPerfScriptType(StringRef FileName);
- protected:
- // The parsed MMap event
- struct MMapEvent {
- uint64_t PID = 0;
- uint64_t Address = 0;
- uint64_t Size = 0;
- uint64_t Offset = 0;
- StringRef BinaryPath;
- };
- // Check whether a given line is LBR sample
- static bool isLBRSample(StringRef Line);
- // Check whether a given line is MMAP event
- static bool isMMap2Event(StringRef Line);
- // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
- // mapping between the binary name and its memory layout.
- static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
- MMapEvent &MMap);
- // Update base address based on mmap events
- void updateBinaryAddress(const MMapEvent &Event);
- // Parse mmap event and update binary address
- void parseMMap2Event(TraceStream &TraceIt);
- // Parse perf events/samples and do aggregation
- void parseAndAggregateTrace();
- // Parse either an MMAP event or a perf sample
- void parseEventOrSample(TraceStream &TraceIt);
- // Warn if the relevant mmap event is missing.
- void warnIfMissingMMap();
- // Emit accumulate warnings.
- void warnTruncatedStack();
- // Warn if range is invalid.
- void warnInvalidRange();
- // Extract call stack from the perf trace lines
- bool extractCallstack(TraceStream &TraceIt,
- SmallVectorImpl<uint64_t> &CallStack);
- // Extract LBR stack from one perf trace line
- bool extractLBRStack(TraceStream &TraceIt,
- SmallVectorImpl<LBREntry> &LBRStack);
- uint64_t parseAggregatedCount(TraceStream &TraceIt);
- // Parse one sample from multiple perf lines, override this for different
- // sample type
- void parseSample(TraceStream &TraceIt);
- // An aggregated count is given to indicate how many times the sample is
- // repeated.
- virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
- void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
- // Post process the profile after trace aggregation, we will do simple range
- // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
- virtual void generateUnsymbolizedProfile();
- void writeUnsymbolizedProfile(StringRef Filename);
- void writeUnsymbolizedProfile(raw_fd_ostream &OS);
- // Samples with the repeating time generated by the perf reader
- AggregatedCounter AggregatedSamples;
- // Keep track of all invalid return addresses
- std::set<uint64_t> InvalidReturnAddresses;
- };
- /*
- The reader of LBR only perf script.
- A typical LBR sample is like:
- 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
- ... 0x4005c8/0x4005dc/P/-/-/0
- */
- class LBRPerfReader : public PerfScriptReader {
- public:
- LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
- : PerfScriptReader(Binary, PerfTrace){};
- // Parse the LBR only sample.
- virtual void parseSample(TraceStream &TraceIt, uint64_t Count) override;
- };
- /*
- Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
- which is used to generate CS profile. An example of hybrid sample:
- 4005dc # call stack leaf
- 400634
- 400684 # call stack root
- 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
- ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
- */
- class HybridPerfReader : public PerfScriptReader {
- public:
- HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace)
- : PerfScriptReader(Binary, PerfTrace){};
- // Parse the hybrid sample including the call and LBR line
- void parseSample(TraceStream &TraceIt, uint64_t Count) override;
- void generateUnsymbolizedProfile() override;
- private:
- // Unwind the hybrid samples after aggregration
- void unwindSamples();
- };
- /*
- Format of unsymbolized profile:
- [frame1 @ frame2 @ ...] # If it's a CS profile
- number of entries in RangeCounter
- from_1-to_1:count_1
- from_2-to_2:count_2
- ......
- from_n-to_n:count_n
- number of entries in BranchCounter
- src_1->dst_1:count_1
- src_2->dst_2:count_2
- ......
- src_n->dst_n:count_n
- [frame1 @ frame2 @ ...] # Next context
- ......
- Note that non-CS profile doesn't have the empty `[]` context.
- */
- class UnsymbolizedProfileReader : public PerfReaderBase {
- public:
- UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
- : PerfReaderBase(Binary, PerfTrace){};
- void parsePerfTraces() override;
- private:
- void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
- void readUnsymbolizedProfile(StringRef Filename);
- std::unordered_set<std::string> ContextStrSet;
- };
- } // end namespace sampleprof
- } // end namespace llvm
- #endif
|