SampleProf.h 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- SampleProf.h - Sampling profiling format support ---------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file contains common definitions used in the reading and writing of
  15. // sample profile data.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H
  19. #define LLVM_PROFILEDATA_SAMPLEPROF_H
  20. #include "llvm/ADT/DenseSet.h"
  21. #include "llvm/ADT/SmallVector.h"
  22. #include "llvm/ADT/StringMap.h"
  23. #include "llvm/ADT/StringRef.h"
  24. #include "llvm/ADT/StringSet.h"
  25. #include "llvm/IR/Function.h"
  26. #include "llvm/IR/GlobalValue.h"
  27. #include "llvm/IR/Module.h"
  28. #include "llvm/Support/Allocator.h"
  29. #include "llvm/Support/Debug.h"
  30. #include "llvm/Support/ErrorOr.h"
  31. #include "llvm/Support/MathExtras.h"
  32. #include "llvm/Support/raw_ostream.h"
  33. #include <algorithm>
  34. #include <cstdint>
  35. #include <map>
  36. #include <set>
  37. #include <string>
  38. #include <system_error>
  39. #include <utility>
  40. namespace llvm {
  41. const std::error_category &sampleprof_category();
  42. enum class sampleprof_error {
  43. success = 0,
  44. bad_magic,
  45. unsupported_version,
  46. too_large,
  47. truncated,
  48. malformed,
  49. unrecognized_format,
  50. unsupported_writing_format,
  51. truncated_name_table,
  52. not_implemented,
  53. counter_overflow,
  54. ostream_seek_unsupported,
  55. compress_failed,
  56. uncompress_failed,
  57. zlib_unavailable,
  58. hash_mismatch
  59. };
  60. inline std::error_code make_error_code(sampleprof_error E) {
  61. return std::error_code(static_cast<int>(E), sampleprof_category());
  62. }
  63. inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
  64. sampleprof_error Result) {
  65. // Prefer first error encountered as later errors may be secondary effects of
  66. // the initial problem.
  67. if (Accumulator == sampleprof_error::success &&
  68. Result != sampleprof_error::success)
  69. Accumulator = Result;
  70. return Accumulator;
  71. }
  72. } // end namespace llvm
  73. namespace std {
  74. template <>
  75. struct is_error_code_enum<llvm::sampleprof_error> : std::true_type {};
  76. } // end namespace std
  77. namespace llvm {
  78. namespace sampleprof {
  79. enum SampleProfileFormat {
  80. SPF_None = 0,
  81. SPF_Text = 0x1,
  82. SPF_Compact_Binary = 0x2,
  83. SPF_GCC = 0x3,
  84. SPF_Ext_Binary = 0x4,
  85. SPF_Binary = 0xff
  86. };
  87. static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
  88. return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
  89. uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
  90. uint64_t('F') << (64 - 40) | uint64_t('4') << (64 - 48) |
  91. uint64_t('2') << (64 - 56) | uint64_t(Format);
  92. }
  93. /// Get the proper representation of a string according to whether the
  94. /// current Format uses MD5 to represent the string.
  95. static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
  96. std::string &GUIDBuf) {
  97. if (Name.empty())
  98. return Name;
  99. GUIDBuf = std::to_string(Function::getGUID(Name));
  100. return UseMD5 ? StringRef(GUIDBuf) : Name;
  101. }
  102. static inline uint64_t SPVersion() { return 103; }
  103. // Section Type used by SampleProfileExtBinaryBaseReader and
  104. // SampleProfileExtBinaryBaseWriter. Never change the existing
  105. // value of enum. Only append new ones.
  106. enum SecType {
  107. SecInValid = 0,
  108. SecProfSummary = 1,
  109. SecNameTable = 2,
  110. SecProfileSymbolList = 3,
  111. SecFuncOffsetTable = 4,
  112. SecFuncMetadata = 5,
  113. // marker for the first type of profile.
  114. SecFuncProfileFirst = 32,
  115. SecLBRProfile = SecFuncProfileFirst
  116. };
  117. static inline std::string getSecName(SecType Type) {
  118. switch (Type) {
  119. case SecInValid:
  120. return "InvalidSection";
  121. case SecProfSummary:
  122. return "ProfileSummarySection";
  123. case SecNameTable:
  124. return "NameTableSection";
  125. case SecProfileSymbolList:
  126. return "ProfileSymbolListSection";
  127. case SecFuncOffsetTable:
  128. return "FuncOffsetTableSection";
  129. case SecFuncMetadata:
  130. return "FunctionMetadata";
  131. case SecLBRProfile:
  132. return "LBRProfileSection";
  133. }
  134. llvm_unreachable("A SecType has no name for output");
  135. }
  136. // Entry type of section header table used by SampleProfileExtBinaryBaseReader
  137. // and SampleProfileExtBinaryBaseWriter.
  138. struct SecHdrTableEntry {
  139. SecType Type;
  140. uint64_t Flags;
  141. uint64_t Offset;
  142. uint64_t Size;
  143. // The index indicating the location of the current entry in
  144. // SectionHdrLayout table.
  145. uint32_t LayoutIndex;
  146. };
  147. // Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
  148. // common flags will be saved in the lower 32bits and section specific flags
  149. // will be saved in the higher 32 bits.
  150. enum class SecCommonFlags : uint32_t {
  151. SecFlagInValid = 0,
  152. SecFlagCompress = (1 << 0),
  153. // Indicate the section contains only profile without context.
  154. SecFlagFlat = (1 << 1)
  155. };
  156. // Section specific flags are defined here.
  157. // !!!Note: Everytime a new enum class is created here, please add
  158. // a new check in verifySecFlag.
  159. enum class SecNameTableFlags : uint32_t {
  160. SecFlagInValid = 0,
  161. SecFlagMD5Name = (1 << 0),
  162. // Store MD5 in fixed length instead of ULEB128 so NameTable can be
  163. // accessed like an array.
  164. SecFlagFixedLengthMD5 = (1 << 1)
  165. };
  166. enum class SecProfSummaryFlags : uint32_t {
  167. SecFlagInValid = 0,
  168. /// SecFlagPartial means the profile is for common/shared code.
  169. /// The common profile is usually merged from profiles collected
  170. /// from running other targets.
  171. SecFlagPartial = (1 << 0)
  172. };
  173. enum class SecFuncMetadataFlags : uint32_t {
  174. SecFlagInvalid = 0,
  175. SecFlagIsProbeBased = (1 << 0),
  176. };
  177. // Verify section specific flag is used for the correct section.
  178. template <class SecFlagType>
  179. static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
  180. // No verification is needed for common flags.
  181. if (std::is_same<SecCommonFlags, SecFlagType>())
  182. return;
  183. // Verification starts here for section specific flag.
  184. bool IsFlagLegal = false;
  185. switch (Type) {
  186. case SecNameTable:
  187. IsFlagLegal = std::is_same<SecNameTableFlags, SecFlagType>();
  188. break;
  189. case SecProfSummary:
  190. IsFlagLegal = std::is_same<SecProfSummaryFlags, SecFlagType>();
  191. break;
  192. case SecFuncMetadata:
  193. IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
  194. break;
  195. default:
  196. break;
  197. }
  198. if (!IsFlagLegal)
  199. llvm_unreachable("Misuse of a flag in an incompatible section");
  200. }
  201. template <class SecFlagType>
  202. static inline void addSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
  203. verifySecFlag(Entry.Type, Flag);
  204. auto FVal = static_cast<uint64_t>(Flag);
  205. bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
  206. Entry.Flags |= IsCommon ? FVal : (FVal << 32);
  207. }
  208. template <class SecFlagType>
  209. static inline void removeSecFlag(SecHdrTableEntry &Entry, SecFlagType Flag) {
  210. verifySecFlag(Entry.Type, Flag);
  211. auto FVal = static_cast<uint64_t>(Flag);
  212. bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
  213. Entry.Flags &= ~(IsCommon ? FVal : (FVal << 32));
  214. }
  215. template <class SecFlagType>
  216. static inline bool hasSecFlag(const SecHdrTableEntry &Entry, SecFlagType Flag) {
  217. verifySecFlag(Entry.Type, Flag);
  218. auto FVal = static_cast<uint64_t>(Flag);
  219. bool IsCommon = std::is_same<SecCommonFlags, SecFlagType>();
  220. return Entry.Flags & (IsCommon ? FVal : (FVal << 32));
  221. }
  222. /// Represents the relative location of an instruction.
  223. ///
  224. /// Instruction locations are specified by the line offset from the
  225. /// beginning of the function (marked by the line where the function
  226. /// header is) and the discriminator value within that line.
  227. ///
  228. /// The discriminator value is useful to distinguish instructions
  229. /// that are on the same line but belong to different basic blocks
  230. /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
  231. struct LineLocation {
  232. LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
  233. void print(raw_ostream &OS) const;
  234. void dump() const;
  235. bool operator<(const LineLocation &O) const {
  236. return LineOffset < O.LineOffset ||
  237. (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
  238. }
  239. bool operator==(const LineLocation &O) const {
  240. return LineOffset == O.LineOffset && Discriminator == O.Discriminator;
  241. }
  242. bool operator!=(const LineLocation &O) const {
  243. return LineOffset != O.LineOffset || Discriminator != O.Discriminator;
  244. }
  245. uint32_t LineOffset;
  246. uint32_t Discriminator;
  247. };
  248. raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
  249. /// Representation of a single sample record.
  250. ///
  251. /// A sample record is represented by a positive integer value, which
  252. /// indicates how frequently was the associated line location executed.
  253. ///
  254. /// Additionally, if the associated location contains a function call,
  255. /// the record will hold a list of all the possible called targets. For
  256. /// direct calls, this will be the exact function being invoked. For
  257. /// indirect calls (function pointers, virtual table dispatch), this
  258. /// will be a list of one or more functions.
  259. class SampleRecord {
  260. public:
  261. using CallTarget = std::pair<StringRef, uint64_t>;
  262. struct CallTargetComparator {
  263. bool operator()(const CallTarget &LHS, const CallTarget &RHS) const {
  264. if (LHS.second != RHS.second)
  265. return LHS.second > RHS.second;
  266. return LHS.first < RHS.first;
  267. }
  268. };
  269. using SortedCallTargetSet = std::set<CallTarget, CallTargetComparator>;
  270. using CallTargetMap = StringMap<uint64_t>;
  271. SampleRecord() = default;
  272. /// Increment the number of samples for this record by \p S.
  273. /// Optionally scale sample count \p S by \p Weight.
  274. ///
  275. /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
  276. /// around unsigned integers.
  277. sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
  278. bool Overflowed;
  279. NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
  280. return Overflowed ? sampleprof_error::counter_overflow
  281. : sampleprof_error::success;
  282. }
  283. /// Add called function \p F with samples \p S.
  284. /// Optionally scale sample count \p S by \p Weight.
  285. ///
  286. /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
  287. /// around unsigned integers.
  288. sampleprof_error addCalledTarget(StringRef F, uint64_t S,
  289. uint64_t Weight = 1) {
  290. uint64_t &TargetSamples = CallTargets[F];
  291. bool Overflowed;
  292. TargetSamples =
  293. SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
  294. return Overflowed ? sampleprof_error::counter_overflow
  295. : sampleprof_error::success;
  296. }
  297. /// Return true if this sample record contains function calls.
  298. bool hasCalls() const { return !CallTargets.empty(); }
  299. uint64_t getSamples() const { return NumSamples; }
  300. const CallTargetMap &getCallTargets() const { return CallTargets; }
  301. const SortedCallTargetSet getSortedCallTargets() const {
  302. return SortCallTargets(CallTargets);
  303. }
  304. /// Sort call targets in descending order of call frequency.
  305. static const SortedCallTargetSet SortCallTargets(const CallTargetMap &Targets) {
  306. SortedCallTargetSet SortedTargets;
  307. for (const auto &I : Targets) {
  308. SortedTargets.emplace(I.first(), I.second);
  309. }
  310. return SortedTargets;
  311. }
  312. /// Prorate call targets by a distribution factor.
  313. static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
  314. float DistributionFactor) {
  315. CallTargetMap AdjustedTargets;
  316. for (const auto &I : Targets) {
  317. AdjustedTargets[I.first()] = I.second * DistributionFactor;
  318. }
  319. return AdjustedTargets;
  320. }
  321. /// Merge the samples in \p Other into this record.
  322. /// Optionally scale sample counts by \p Weight.
  323. sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
  324. sampleprof_error Result = addSamples(Other.getSamples(), Weight);
  325. for (const auto &I : Other.getCallTargets()) {
  326. MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
  327. }
  328. return Result;
  329. }
  330. void print(raw_ostream &OS, unsigned Indent) const;
  331. void dump() const;
  332. private:
  333. uint64_t NumSamples = 0;
  334. CallTargetMap CallTargets;
  335. };
  336. raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
  337. // State of context associated with FunctionSamples
  338. enum ContextStateMask {
  339. UnknownContext = 0x0, // Profile without context
  340. RawContext = 0x1, // Full context profile from input profile
  341. SyntheticContext = 0x2, // Synthetic context created for context promotion
  342. InlinedContext = 0x4, // Profile for context that is inlined into caller
  343. MergedContext = 0x8 // Profile for context merged into base profile
  344. };
  345. // Sample context for FunctionSamples. It consists of the calling context,
  346. // the function name and context state. Internally sample context is represented
  347. // using StringRef, which is also the input for constructing a `SampleContext`.
  348. // It can accept and represent both full context string as well as context-less
  349. // function name.
  350. // Example of full context string (note the wrapping `[]`):
  351. // `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
  352. // Example of context-less function name (same as AutoFDO):
  353. // `_Z8funcLeafi`
  354. class SampleContext {
  355. public:
  356. SampleContext() : State(UnknownContext) {}
  357. SampleContext(StringRef ContextStr,
  358. ContextStateMask CState = UnknownContext) {
  359. setContext(ContextStr, CState);
  360. }
  361. // Promote context by removing top frames (represented by `ContextStrToRemove`).
  362. // Note that with string representation of context, the promotion is effectively
  363. // a substr operation with `ContextStrToRemove` removed from left.
  364. void promoteOnPath(StringRef ContextStrToRemove) {
  365. assert(FullContext.startswith(ContextStrToRemove));
  366. // Remove leading context and frame separator " @ ".
  367. FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
  368. CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
  369. }
  370. // Split the top context frame (left-most substr) from context.
  371. static std::pair<StringRef, StringRef>
  372. splitContextString(StringRef ContextStr) {
  373. return ContextStr.split(" @ ");
  374. }
  375. // Decode context string for a frame to get function name and location.
  376. // `ContextStr` is in the form of `FuncName:StartLine.Discriminator`.
  377. static void decodeContextString(StringRef ContextStr, StringRef &FName,
  378. LineLocation &LineLoc) {
  379. // Get function name
  380. auto EntrySplit = ContextStr.split(':');
  381. FName = EntrySplit.first;
  382. LineLoc = {0, 0};
  383. if (!EntrySplit.second.empty()) {
  384. // Get line offset, use signed int for getAsInteger so string will
  385. // be parsed as signed.
  386. int LineOffset = 0;
  387. auto LocSplit = EntrySplit.second.split('.');
  388. LocSplit.first.getAsInteger(10, LineOffset);
  389. LineLoc.LineOffset = LineOffset;
  390. // Get discriminator
  391. if (!LocSplit.second.empty())
  392. LocSplit.second.getAsInteger(10, LineLoc.Discriminator);
  393. }
  394. }
  395. operator StringRef() const { return FullContext; }
  396. bool hasState(ContextStateMask S) { return State & (uint32_t)S; }
  397. void setState(ContextStateMask S) { State |= (uint32_t)S; }
  398. void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
  399. bool hasContext() const { return State != UnknownContext; }
  400. bool isBaseContext() const { return CallingContext.empty(); }
  401. StringRef getNameWithoutContext() const { return Name; }
  402. StringRef getCallingContext() const { return CallingContext; }
  403. StringRef getNameWithContext(bool WithBracket = false) const {
  404. return WithBracket ? InputContext : FullContext;
  405. }
  406. private:
  407. // Give a context string, decode and populate internal states like
  408. // Function name, Calling context and context state. Example of input
  409. // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
  410. void setContext(StringRef ContextStr, ContextStateMask CState) {
  411. assert(!ContextStr.empty());
  412. InputContext = ContextStr;
  413. // Note that `[]` wrapped input indicates a full context string, otherwise
  414. // it's treated as context-less function name only.
  415. bool HasContext = ContextStr.startswith("[");
  416. if (!HasContext && CState == UnknownContext) {
  417. State = UnknownContext;
  418. Name = FullContext = ContextStr;
  419. } else {
  420. // Assume raw context profile if unspecified
  421. if (CState == UnknownContext)
  422. State = RawContext;
  423. else
  424. State = CState;
  425. // Remove encapsulating '[' and ']' if any
  426. if (HasContext)
  427. FullContext = ContextStr.substr(1, ContextStr.size() - 2);
  428. else
  429. FullContext = ContextStr;
  430. // Caller is to the left of callee in context string
  431. auto NameContext = FullContext.rsplit(" @ ");
  432. if (NameContext.second.empty()) {
  433. Name = NameContext.first;
  434. CallingContext = NameContext.second;
  435. } else {
  436. Name = NameContext.second;
  437. CallingContext = NameContext.first;
  438. }
  439. }
  440. }
  441. // Input context string including bracketed calling context and leaf function
  442. // name
  443. StringRef InputContext;
  444. // Full context string including calling context and leaf function name
  445. StringRef FullContext;
  446. // Function name for the associated sample profile
  447. StringRef Name;
  448. // Calling context (leaf function excluded) for the associated sample profile
  449. StringRef CallingContext;
  450. // State of the associated sample profile
  451. uint32_t State;
  452. };
  453. class FunctionSamples;
  454. class SampleProfileReaderItaniumRemapper;
  455. using BodySampleMap = std::map<LineLocation, SampleRecord>;
  456. // NOTE: Using a StringMap here makes parsed profiles consume around 17% more
  457. // memory, which is *very* significant for large profiles.
  458. using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>;
  459. using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
  460. /// Representation of the samples collected for a function.
  461. ///
  462. /// This data structure contains all the collected samples for the body
  463. /// of a function. Each sample corresponds to a LineLocation instance
  464. /// within the body of the function.
  465. class FunctionSamples {
  466. public:
  467. FunctionSamples() = default;
  468. void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
  469. void dump() const;
  470. sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
  471. bool Overflowed;
  472. TotalSamples =
  473. SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
  474. return Overflowed ? sampleprof_error::counter_overflow
  475. : sampleprof_error::success;
  476. }
  477. void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
  478. sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
  479. bool Overflowed;
  480. TotalHeadSamples =
  481. SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
  482. return Overflowed ? sampleprof_error::counter_overflow
  483. : sampleprof_error::success;
  484. }
  485. sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
  486. uint64_t Num, uint64_t Weight = 1) {
  487. return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
  488. Num, Weight);
  489. }
  490. sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
  491. uint32_t Discriminator,
  492. StringRef FName, uint64_t Num,
  493. uint64_t Weight = 1) {
  494. return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
  495. FName, Num, Weight);
  496. }
  497. /// Return the number of samples collected at the given location.
  498. /// Each location is specified by \p LineOffset and \p Discriminator.
  499. /// If the location is not found in profile, return error.
  500. ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
  501. uint32_t Discriminator) const {
  502. const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
  503. if (ret == BodySamples.end()) {
  504. // For CSSPGO, in order to conserve profile size, we no longer write out
  505. // locations profile for those not hit during training, so we need to
  506. // treat them as zero instead of error here.
  507. if (ProfileIsCS)
  508. return 0;
  509. return std::error_code();
  510. // A missing counter for a probe likely means the probe was not executed.
  511. // Treat it as a zero count instead of an unknown count to help edge
  512. // weight inference.
  513. if (FunctionSamples::ProfileIsProbeBased)
  514. return 0;
  515. return std::error_code();
  516. } else {
  517. return ret->second.getSamples();
  518. }
  519. }
  520. /// Returns the call target map collected at a given location.
  521. /// Each location is specified by \p LineOffset and \p Discriminator.
  522. /// If the location is not found in profile, return error.
  523. ErrorOr<SampleRecord::CallTargetMap>
  524. findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
  525. const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
  526. if (ret == BodySamples.end())
  527. return std::error_code();
  528. return ret->second.getCallTargets();
  529. }
  530. /// Returns the call target map collected at a given location specified by \p
  531. /// CallSite. If the location is not found in profile, return error.
  532. ErrorOr<SampleRecord::CallTargetMap>
  533. findCallTargetMapAt(const LineLocation &CallSite) const {
  534. const auto &Ret = BodySamples.find(CallSite);
  535. if (Ret == BodySamples.end())
  536. return std::error_code();
  537. return Ret->second.getCallTargets();
  538. }
  539. /// Return the function samples at the given callsite location.
  540. FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
  541. return CallsiteSamples[Loc];
  542. }
  543. /// Returns the FunctionSamplesMap at the given \p Loc.
  544. const FunctionSamplesMap *
  545. findFunctionSamplesMapAt(const LineLocation &Loc) const {
  546. auto iter = CallsiteSamples.find(Loc);
  547. if (iter == CallsiteSamples.end())
  548. return nullptr;
  549. return &iter->second;
  550. }
  551. /// Returns a pointer to FunctionSamples at the given callsite location
  552. /// \p Loc with callee \p CalleeName. If no callsite can be found, relax
  553. /// the restriction to return the FunctionSamples at callsite location
  554. /// \p Loc with the maximum total sample count. If \p Remapper is not
  555. /// nullptr, use \p Remapper to find FunctionSamples with equivalent name
  556. /// as \p CalleeName.
  557. const FunctionSamples *
  558. findFunctionSamplesAt(const LineLocation &Loc, StringRef CalleeName,
  559. SampleProfileReaderItaniumRemapper *Remapper) const;
  560. bool empty() const { return TotalSamples == 0; }
  561. /// Return the total number of samples collected inside the function.
  562. uint64_t getTotalSamples() const { return TotalSamples; }
  563. /// Return the total number of branch samples that have the function as the
  564. /// branch target. This should be equivalent to the sample of the first
  565. /// instruction of the symbol. But as we directly get this info for raw
  566. /// profile without referring to potentially inaccurate debug info, this
  567. /// gives more accurate profile data and is preferred for standalone symbols.
  568. uint64_t getHeadSamples() const { return TotalHeadSamples; }
  569. /// Return the sample count of the first instruction of the function.
  570. /// The function can be either a standalone symbol or an inlined function.
  571. uint64_t getEntrySamples() const {
  572. if (FunctionSamples::ProfileIsCS && getHeadSamples()) {
  573. // For CS profile, if we already have more accurate head samples
  574. // counted by branch sample from caller, use them as entry samples.
  575. return getHeadSamples();
  576. }
  577. uint64_t Count = 0;
  578. // Use either BodySamples or CallsiteSamples which ever has the smaller
  579. // lineno.
  580. if (!BodySamples.empty() &&
  581. (CallsiteSamples.empty() ||
  582. BodySamples.begin()->first < CallsiteSamples.begin()->first))
  583. Count = BodySamples.begin()->second.getSamples();
  584. else if (!CallsiteSamples.empty()) {
  585. // An indirect callsite may be promoted to several inlined direct calls.
  586. // We need to get the sum of them.
  587. for (const auto &N_FS : CallsiteSamples.begin()->second)
  588. Count += N_FS.second.getEntrySamples();
  589. }
  590. // Return at least 1 if total sample is not 0.
  591. return Count ? Count : TotalSamples > 0;
  592. }
  593. /// Return all the samples collected in the body of the function.
  594. const BodySampleMap &getBodySamples() const { return BodySamples; }
  595. /// Return all the callsite samples collected in the body of the function.
  596. const CallsiteSampleMap &getCallsiteSamples() const {
  597. return CallsiteSamples;
  598. }
  599. /// Return the maximum of sample counts in a function body including functions
  600. /// inlined in it.
  601. uint64_t getMaxCountInside() const {
  602. uint64_t MaxCount = 0;
  603. for (const auto &L : getBodySamples())
  604. MaxCount = std::max(MaxCount, L.second.getSamples());
  605. for (const auto &C : getCallsiteSamples())
  606. for (const FunctionSamplesMap::value_type &F : C.second)
  607. MaxCount = std::max(MaxCount, F.second.getMaxCountInside());
  608. return MaxCount;
  609. }
  610. /// Merge the samples in \p Other into this one.
  611. /// Optionally scale samples by \p Weight.
  612. sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
  613. sampleprof_error Result = sampleprof_error::success;
  614. Name = Other.getName();
  615. if (!GUIDToFuncNameMap)
  616. GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
  617. if (Context.getNameWithContext(true).empty())
  618. Context = Other.getContext();
  619. if (FunctionHash == 0) {
  620. // Set the function hash code for the target profile.
  621. FunctionHash = Other.getFunctionHash();
  622. } else if (FunctionHash != Other.getFunctionHash()) {
  623. // The two profiles coming with different valid hash codes indicates
  624. // either:
  625. // 1. They are same-named static functions from different compilation
  626. // units (without using -unique-internal-linkage-names), or
  627. // 2. They are really the same function but from different compilations.
  628. // Let's bail out in either case for now, which means one profile is
  629. // dropped.
  630. return sampleprof_error::hash_mismatch;
  631. }
  632. MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
  633. MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
  634. for (const auto &I : Other.getBodySamples()) {
  635. const LineLocation &Loc = I.first;
  636. const SampleRecord &Rec = I.second;
  637. MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
  638. }
  639. for (const auto &I : Other.getCallsiteSamples()) {
  640. const LineLocation &Loc = I.first;
  641. FunctionSamplesMap &FSMap = functionSamplesAt(Loc);
  642. for (const auto &Rec : I.second)
  643. MergeResult(Result, FSMap[Rec.first].merge(Rec.second, Weight));
  644. }
  645. return Result;
  646. }
  647. /// Recursively traverses all children, if the total sample count of the
  648. /// corresponding function is no less than \p Threshold, add its corresponding
  649. /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
  650. /// to \p S.
  651. void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
  652. uint64_t Threshold) const {
  653. if (TotalSamples <= Threshold)
  654. return;
  655. auto isDeclaration = [](const Function *F) {
  656. return !F || F->isDeclaration();
  657. };
  658. if (isDeclaration(M->getFunction(getFuncName()))) {
  659. // Add to the import list only when it's defined out of module.
  660. S.insert(getGUID(Name));
  661. }
  662. // Import hot CallTargets, which may not be available in IR because full
  663. // profile annotation cannot be done until backend compilation in ThinLTO.
  664. for (const auto &BS : BodySamples)
  665. for (const auto &TS : BS.second.getCallTargets())
  666. if (TS.getValue() > Threshold) {
  667. const Function *Callee = M->getFunction(getFuncName(TS.getKey()));
  668. if (isDeclaration(Callee))
  669. S.insert(getGUID(TS.getKey()));
  670. }
  671. for (const auto &CS : CallsiteSamples)
  672. for (const auto &NameFS : CS.second)
  673. NameFS.second.findInlinedFunctions(S, M, Threshold);
  674. }
  675. /// Set the name of the function.
  676. void setName(StringRef FunctionName) { Name = FunctionName; }
  677. /// Return the function name.
  678. StringRef getName() const { return Name; }
  679. /// Return function name with context.
  680. StringRef getNameWithContext(bool WithBracket = false) const {
  681. return FunctionSamples::ProfileIsCS
  682. ? Context.getNameWithContext(WithBracket)
  683. : Name;
  684. }
  685. /// Return the original function name.
  686. StringRef getFuncName() const { return getFuncName(Name); }
  687. void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
  688. uint64_t getFunctionHash() const { return FunctionHash; }
  689. /// Return the canonical name for a function, taking into account
  690. /// suffix elision policy attributes.
  691. static StringRef getCanonicalFnName(const Function &F) {
  692. auto AttrName = "sample-profile-suffix-elision-policy";
  693. auto Attr = F.getFnAttribute(AttrName).getValueAsString();
  694. return getCanonicalFnName(F.getName(), Attr);
  695. }
  696. static StringRef getCanonicalFnName(StringRef FnName, StringRef Attr = "") {
  697. static const char *knownSuffixes[] = { ".llvm.", ".part." };
  698. if (Attr == "" || Attr == "all") {
  699. return FnName.split('.').first;
  700. } else if (Attr == "selected") {
  701. StringRef Cand(FnName);
  702. for (const auto &Suf : knownSuffixes) {
  703. StringRef Suffix(Suf);
  704. auto It = Cand.rfind(Suffix);
  705. if (It == StringRef::npos)
  706. return Cand;
  707. auto Dit = Cand.rfind('.');
  708. if (Dit == It + Suffix.size() - 1)
  709. Cand = Cand.substr(0, It);
  710. }
  711. return Cand;
  712. } else if (Attr == "none") {
  713. return FnName;
  714. } else {
  715. assert(false && "internal error: unknown suffix elision policy");
  716. }
  717. return FnName;
  718. }
  719. /// Translate \p Name into its original name.
  720. /// When profile doesn't use MD5, \p Name needs no translation.
  721. /// When profile uses MD5, \p Name in current FunctionSamples
  722. /// is actually GUID of the original function name. getFuncName will
  723. /// translate \p Name in current FunctionSamples into its original name
  724. /// by looking up in the function map GUIDToFuncNameMap.
  725. /// If the original name doesn't exist in the map, return empty StringRef.
  726. StringRef getFuncName(StringRef Name) const {
  727. if (!UseMD5)
  728. return Name;
  729. assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be popluated first");
  730. return GUIDToFuncNameMap->lookup(std::stoull(Name.data()));
  731. }
  732. /// Returns the line offset to the start line of the subprogram.
  733. /// We assume that a single function will not exceed 65535 LOC.
  734. static unsigned getOffset(const DILocation *DIL);
  735. /// Returns a unique call site identifier for a given debug location of a call
  736. /// instruction. This is wrapper of two scenarios, the probe-based profile and
  737. /// regular profile, to hide implementation details from the sample loader and
  738. /// the context tracker.
  739. static LineLocation getCallSiteIdentifier(const DILocation *DIL);
  740. /// Get the FunctionSamples of the inline instance where DIL originates
  741. /// from.
  742. ///
  743. /// The FunctionSamples of the instruction (Machine or IR) associated to
  744. /// \p DIL is the inlined instance in which that instruction is coming from.
  745. /// We traverse the inline stack of that instruction, and match it with the
  746. /// tree nodes in the profile.
  747. ///
  748. /// \returns the FunctionSamples pointer to the inlined instance.
  749. /// If \p Remapper is not nullptr, it will be used to find matching
  750. /// FunctionSamples with not exactly the same but equivalent name.
  751. const FunctionSamples *findFunctionSamples(
  752. const DILocation *DIL,
  753. SampleProfileReaderItaniumRemapper *Remapper = nullptr) const;
  754. static bool ProfileIsProbeBased;
  755. static bool ProfileIsCS;
  756. SampleContext &getContext() const { return Context; }
  757. void setContext(const SampleContext &FContext) { Context = FContext; }
  758. static SampleProfileFormat Format;
  759. /// Whether the profile uses MD5 to represent string.
  760. static bool UseMD5;
  761. /// GUIDToFuncNameMap saves the mapping from GUID to the symbol name, for
  762. /// all the function symbols defined or declared in current module.
  763. DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap = nullptr;
  764. // Assume the input \p Name is a name coming from FunctionSamples itself.
  765. // If UseMD5 is true, the name is already a GUID and we
  766. // don't want to return the GUID of GUID.
  767. static uint64_t getGUID(StringRef Name) {
  768. return UseMD5 ? std::stoull(Name.data()) : Function::getGUID(Name);
  769. }
  770. // Find all the names in the current FunctionSamples including names in
  771. // all the inline instances and names of call targets.
  772. void findAllNames(DenseSet<StringRef> &NameSet) const;
  773. private:
  774. /// Mangled name of the function.
  775. StringRef Name;
  776. /// CFG hash value for the function.
  777. uint64_t FunctionHash = 0;
  778. /// Calling context for function profile
  779. mutable SampleContext Context;
  780. /// Total number of samples collected inside this function.
  781. ///
  782. /// Samples are cumulative, they include all the samples collected
  783. /// inside this function and all its inlined callees.
  784. uint64_t TotalSamples = 0;
  785. /// Total number of samples collected at the head of the function.
  786. /// This is an approximation of the number of calls made to this function
  787. /// at runtime.
  788. uint64_t TotalHeadSamples = 0;
  789. /// Map instruction locations to collected samples.
  790. ///
  791. /// Each entry in this map contains the number of samples
  792. /// collected at the corresponding line offset. All line locations
  793. /// are an offset from the start of the function.
  794. BodySampleMap BodySamples;
  795. /// Map call sites to collected samples for the called function.
  796. ///
  797. /// Each entry in this map corresponds to all the samples
  798. /// collected for the inlined function call at the given
  799. /// location. For example, given:
  800. ///
  801. /// void foo() {
  802. /// 1 bar();
  803. /// ...
  804. /// 8 baz();
  805. /// }
  806. ///
  807. /// If the bar() and baz() calls were inlined inside foo(), this
  808. /// map will contain two entries. One for all the samples collected
  809. /// in the call to bar() at line offset 1, the other for all the samples
  810. /// collected in the call to baz() at line offset 8.
  811. CallsiteSampleMap CallsiteSamples;
  812. };
  813. raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
  814. /// Sort a LocationT->SampleT map by LocationT.
  815. ///
  816. /// It produces a sorted list of <LocationT, SampleT> records by ascending
  817. /// order of LocationT.
  818. template <class LocationT, class SampleT> class SampleSorter {
  819. public:
  820. using SamplesWithLoc = std::pair<const LocationT, SampleT>;
  821. using SamplesWithLocList = SmallVector<const SamplesWithLoc *, 20>;
  822. SampleSorter(const std::map<LocationT, SampleT> &Samples) {
  823. for (const auto &I : Samples)
  824. V.push_back(&I);
  825. llvm::stable_sort(V, [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
  826. return A->first < B->first;
  827. });
  828. }
  829. const SamplesWithLocList &get() const { return V; }
  830. private:
  831. SamplesWithLocList V;
  832. };
  833. /// ProfileSymbolList records the list of function symbols shown up
  834. /// in the binary used to generate the profile. It is useful to
  835. /// to discriminate a function being so cold as not to shown up
  836. /// in the profile and a function newly added.
  837. class ProfileSymbolList {
  838. public:
  839. /// copy indicates whether we need to copy the underlying memory
  840. /// for the input Name.
  841. void add(StringRef Name, bool copy = false) {
  842. if (!copy) {
  843. Syms.insert(Name);
  844. return;
  845. }
  846. Syms.insert(Name.copy(Allocator));
  847. }
  848. bool contains(StringRef Name) { return Syms.count(Name); }
  849. void merge(const ProfileSymbolList &List) {
  850. for (auto Sym : List.Syms)
  851. add(Sym, true);
  852. }
  853. unsigned size() { return Syms.size(); }
  854. void setToCompress(bool TC) { ToCompress = TC; }
  855. bool toCompress() { return ToCompress; }
  856. std::error_code read(const uint8_t *Data, uint64_t ListSize);
  857. std::error_code write(raw_ostream &OS);
  858. void dump(raw_ostream &OS = dbgs()) const;
  859. private:
  860. // Determine whether or not to compress the symbol list when
  861. // writing it into profile. The variable is unused when the symbol
  862. // list is read from an existing profile.
  863. bool ToCompress = false;
  864. DenseSet<StringRef> Syms;
  865. BumpPtrAllocator Allocator;
  866. };
  867. } // end namespace sampleprof
  868. } // end namespace llvm
  869. #endif // LLVM_PROFILEDATA_SAMPLEPROF_H
  870. #ifdef __GNUC__
  871. #pragma GCC diagnostic pop
  872. #endif