123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312 |
- //===-- ProfileGenerator.h - Profile Generator -----------------*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #ifndef LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
- #define LLVM_TOOLS_LLVM_PROGEN_PROFILEGENERATOR_H
- #include "CSPreInliner.h"
- #include "ErrorHandling.h"
- #include "PerfReader.h"
- #include "ProfiledBinary.h"
- #include "llvm/ProfileData/SampleProfWriter.h"
- #include <memory>
- #include <unordered_set>
- using namespace llvm;
- using namespace sampleprof;
- namespace llvm {
- namespace sampleprof {
- // This base class for profile generation of sample-based PGO. We reuse all
- // structures relating to function profiles and profile writers as seen in
- // /ProfileData/SampleProf.h.
- class ProfileGeneratorBase {
- public:
- ProfileGeneratorBase(ProfiledBinary *Binary,
- const ContextSampleCounterMap &Counters)
- : Binary(Binary), SampleCounters(Counters){};
- virtual ~ProfileGeneratorBase() = default;
- static std::unique_ptr<ProfileGeneratorBase>
- create(ProfiledBinary *Binary, const ContextSampleCounterMap &SampleCounters,
- bool ProfileIsCSFlat);
- virtual void generateProfile() = 0;
- void write();
- static uint32_t
- getDuplicationFactor(unsigned Discriminator,
- bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
- return UseFSD ? 1
- : llvm::DILocation::getDuplicationFactorFromDiscriminator(
- Discriminator);
- }
- static uint32_t
- getBaseDiscriminator(unsigned Discriminator,
- bool UseFSD = ProfileGeneratorBase::UseFSDiscriminator) {
- return UseFSD ? Discriminator
- : DILocation::getBaseDiscriminatorFromDiscriminator(
- Discriminator, /* IsFSDiscriminator */ false);
- }
- static bool UseFSDiscriminator;
- protected:
- // Use SampleProfileWriter to serialize profile map
- void write(std::unique_ptr<SampleProfileWriter> Writer,
- SampleProfileMap &ProfileMap);
- /*
- For each region boundary point, mark if it is begin or end (or both) of
- the region. Boundary points are inclusive. Log the sample count as well
- so we can use it when we compute the sample count of each disjoint region
- later. Note that there might be multiple ranges with different sample
- count that share same begin/end point. We need to accumulate the sample
- count for the boundary point for such case, because for the example
- below,
- |<--100-->|
- |<------200------>|
- A B C
- sample count for disjoint region [A,B] would be 300.
- */
- void findDisjointRanges(RangeSample &DisjointRanges,
- const RangeSample &Ranges);
- // Helper function for updating body sample for a leaf location in
- // FunctionProfile
- void updateBodySamplesforFunctionProfile(FunctionSamples &FunctionProfile,
- const SampleContextFrame &LeafLoc,
- uint64_t Count);
- void updateTotalSamples();
- StringRef getCalleeNameForOffset(uint64_t TargetOffset);
- void computeSummaryAndThreshold();
- void calculateAndShowDensity(const SampleProfileMap &Profiles);
- double calculateDensity(const SampleProfileMap &Profiles,
- uint64_t HotCntThreshold);
- void showDensitySuggestion(double Density);
- // Thresholds from profile summary to answer isHotCount/isColdCount queries.
- uint64_t HotCountThreshold;
- uint64_t ColdCountThreshold;
- // Used by SampleProfileWriter
- SampleProfileMap ProfileMap;
- ProfiledBinary *Binary = nullptr;
- const ContextSampleCounterMap &SampleCounters;
- };
- class ProfileGenerator : public ProfileGeneratorBase {
- public:
- ProfileGenerator(ProfiledBinary *Binary,
- const ContextSampleCounterMap &Counters)
- : ProfileGeneratorBase(Binary, Counters){};
- void generateProfile() override;
- private:
- void generateLineNumBasedProfile();
- RangeSample preprocessRangeCounter(const RangeSample &RangeCounter);
- FunctionSamples &getTopLevelFunctionProfile(StringRef FuncName);
- // Helper function to get the leaf frame's FunctionProfile by traversing the
- // inline stack and meanwhile it adds the total samples for each frame's
- // function profile.
- FunctionSamples &
- getLeafProfileAndAddTotalSamples(const SampleContextFrameVector &FrameVec,
- uint64_t Count);
- void populateBodySamplesForAllFunctions(const RangeSample &RangeCounter);
- void
- populateBoundarySamplesForAllFunctions(const BranchSample &BranchCounters);
- void postProcessProfiles();
- void trimColdProfiles(const SampleProfileMap &Profiles,
- uint64_t ColdCntThreshold);
- };
- using ProbeCounterMap =
- std::unordered_map<const MCDecodedPseudoProbe *, uint64_t>;
- class CSProfileGenerator : public ProfileGeneratorBase {
- public:
- CSProfileGenerator(ProfiledBinary *Binary,
- const ContextSampleCounterMap &Counters)
- : ProfileGeneratorBase(Binary, Counters){};
- void generateProfile() override;
- // Trim the context stack at a given depth.
- template <typename T>
- static void trimContext(SmallVectorImpl<T> &S, int Depth = MaxContextDepth) {
- if (Depth < 0 || static_cast<size_t>(Depth) >= S.size())
- return;
- std::copy(S.begin() + S.size() - static_cast<size_t>(Depth), S.end(),
- S.begin());
- S.resize(Depth);
- }
- // Remove adjacent repeated context sequences up to a given sequence length,
- // -1 means no size limit. Note that repeated sequences are identified based
- // on the exact call site, this is finer granularity than function recursion.
- template <typename T>
- static void compressRecursionContext(SmallVectorImpl<T> &Context,
- int32_t CSize = MaxCompressionSize) {
- uint32_t I = 1;
- uint32_t HS = static_cast<uint32_t>(Context.size() / 2);
- uint32_t MaxDedupSize =
- CSize == -1 ? HS : std::min(static_cast<uint32_t>(CSize), HS);
- auto BeginIter = Context.begin();
- // Use an in-place algorithm to save memory copy
- // End indicates the end location of current iteration's data
- uint32_t End = 0;
- // Deduplicate from length 1 to the max possible size of a repeated
- // sequence.
- while (I <= MaxDedupSize) {
- // This is a linear algorithm that deduplicates adjacent repeated
- // sequences of size I. The deduplication detection runs on a sliding
- // window whose size is 2*I and it keeps sliding the window to deduplicate
- // the data inside. Once duplication is detected, deduplicate it by
- // skipping the right half part of the window, otherwise just copy back
- // the new one by appending them at the back of End pointer(for the next
- // iteration).
- //
- // For example:
- // Input: [a1, a2, b1, b2]
- // (Added index to distinguish the same char, the origin is [a, a, b,
- // b], the size of the dedup window is 2(I = 1) at the beginning)
- //
- // 1) The initial status is a dummy window[null, a1], then just copy the
- // right half of the window(End = 0), then slide the window.
- // Result: [a1], a2, b1, b2 (End points to the element right before ],
- // after ] is the data of the previous iteration)
- //
- // 2) Next window is [a1, a2]. Since a1 == a2, then skip the right half of
- // the window i.e the duplication happen. Only slide the window.
- // Result: [a1], a2, b1, b2
- //
- // 3) Next window is [a2, b1], copy the right half of the window(b1 is
- // new) to the End and slide the window.
- // Result: [a1, b1], b1, b2
- //
- // 4) Next window is [b1, b2], same to 2), skip b2.
- // Result: [a1, b1], b1, b2
- // After resize, it will be [a, b]
- // Use pointers like below to do comparison inside the window
- // [a b c a b c]
- // | | | | |
- // LeftBoundary Left Right Left+I Right+I
- // A duplication found if Left < LeftBoundry.
- int32_t Right = I - 1;
- End = I;
- int32_t LeftBoundary = 0;
- while (Right + I < Context.size()) {
- // To avoids scanning a part of a sequence repeatedly, it finds out
- // the common suffix of two hald in the window. The common suffix will
- // serve as the common prefix of next possible pair of duplicate
- // sequences. The non-common part will be ignored and never scanned
- // again.
- // For example.
- // Input: [a, b1], c1, b2, c2
- // I = 2
- //
- // 1) For the window [a, b1, c1, b2], non-common-suffix for the right
- // part is 'c1', copy it and only slide the window 1 step.
- // Result: [a, b1, c1], b2, c2
- //
- // 2) Next window is [b1, c1, b2, c2], so duplication happen.
- // Result after resize: [a, b, c]
- int32_t Left = Right;
- while (Left >= LeftBoundary && Context[Left] == Context[Left + I]) {
- // Find the longest suffix inside the window. When stops, Left points
- // at the diverging point in the current sequence.
- Left--;
- }
- bool DuplicationFound = (Left < LeftBoundary);
- // Don't need to recheck the data before Right
- LeftBoundary = Right + 1;
- if (DuplicationFound) {
- // Duplication found, skip right half of the window.
- Right += I;
- } else {
- // Copy the non-common-suffix part of the adjacent sequence.
- std::copy(BeginIter + Right + 1, BeginIter + Left + I + 1,
- BeginIter + End);
- End += Left + I - Right;
- // Only slide the window by the size of non-common-suffix
- Right = Left + I;
- }
- }
- // Don't forget the remaining part that's not scanned.
- std::copy(BeginIter + Right + 1, Context.end(), BeginIter + End);
- End += Context.size() - Right - 1;
- I++;
- Context.resize(End);
- MaxDedupSize = std::min(static_cast<uint32_t>(End / 2), MaxDedupSize);
- }
- }
- private:
- void generateLineNumBasedProfile();
- // Lookup or create FunctionSamples for the context
- FunctionSamples &
- getFunctionProfileForContext(const SampleContextFrameVector &Context,
- bool WasLeafInlined = false);
- // For profiled only functions, on-demand compute their inline context
- // function byte size which is used by the pre-inliner.
- void computeSizeForProfiledFunctions();
- // Post processing for profiles before writing out, such as mermining
- // and trimming cold profiles, running preinliner on profiles.
- void postProcessProfiles();
- void populateBodySamplesForFunction(FunctionSamples &FunctionProfile,
- const RangeSample &RangeCounters);
- void populateBoundarySamplesForFunction(SampleContextFrames ContextId,
- FunctionSamples &FunctionProfile,
- const BranchSample &BranchCounters);
- void populateInferredFunctionSamples();
- void generateProbeBasedProfile();
- // Go through each address from range to extract the top frame probe by
- // looking up in the Address2ProbeMap
- void extractProbesFromRange(const RangeSample &RangeCounter,
- ProbeCounterMap &ProbeCounter);
- // Fill in function body samples from probes
- void populateBodySamplesWithProbes(const RangeSample &RangeCounter,
- SampleContextFrames ContextStack);
- // Fill in boundary samples for a call probe
- void populateBoundarySamplesWithProbes(const BranchSample &BranchCounter,
- SampleContextFrames ContextStack);
- // Helper function to get FunctionSamples for the leaf probe
- FunctionSamples &
- getFunctionProfileForLeafProbe(SampleContextFrames ContextStack,
- const MCDecodedPseudoProbe *LeafProbe);
- // Underlying context table serves for sample profile writer.
- std::unordered_set<SampleContextFrameVector, SampleContextFrameHash> Contexts;
- public:
- // Deduplicate adjacent repeated context sequences up to a given sequence
- // length. -1 means no size limit.
- static int32_t MaxCompressionSize;
- static int MaxContextDepth;
- };
- } // end namespace sampleprof
- } // end namespace llvm
- #endif
|