ProfileSummaryBuilder.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. //=-- ProfilesummaryBuilder.cpp - Profile summary computation ---------------=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains support for computing profile summary data.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/IR/ProfileSummary.h"
  13. #include "llvm/ProfileData/InstrProf.h"
  14. #include "llvm/ProfileData/ProfileCommon.h"
  15. #include "llvm/ProfileData/SampleProf.h"
  16. #include "llvm/Support/CommandLine.h"
  17. using namespace llvm;
  18. namespace llvm {
  19. cl::opt<bool> UseContextLessSummary(
  20. "profile-summary-contextless", cl::Hidden,
  21. cl::desc("Merge context profiles before calculating thresholds."));
  22. // The following two parameters determine the threshold for a count to be
  23. // considered hot/cold. These two parameters are percentile values (multiplied
  24. // by 10000). If the counts are sorted in descending order, the minimum count to
  25. // reach ProfileSummaryCutoffHot gives the threshold to determine a hot count.
  26. // Similarly, the minimum count to reach ProfileSummaryCutoffCold gives the
  27. // threshold for determining cold count (everything <= this threshold is
  28. // considered cold).
  29. cl::opt<int> ProfileSummaryCutoffHot(
  30. "profile-summary-cutoff-hot", cl::Hidden, cl::init(990000),
  31. cl::desc("A count is hot if it exceeds the minimum count to"
  32. " reach this percentile of total counts."));
  33. cl::opt<int> ProfileSummaryCutoffCold(
  34. "profile-summary-cutoff-cold", cl::Hidden, cl::init(999999),
  35. cl::desc("A count is cold if it is below the minimum count"
  36. " to reach this percentile of total counts."));
  37. cl::opt<unsigned> ProfileSummaryHugeWorkingSetSizeThreshold(
  38. "profile-summary-huge-working-set-size-threshold", cl::Hidden,
  39. cl::init(15000),
  40. cl::desc("The code working set size is considered huge if the number of"
  41. " blocks required to reach the -profile-summary-cutoff-hot"
  42. " percentile exceeds this count."));
  43. cl::opt<unsigned> ProfileSummaryLargeWorkingSetSizeThreshold(
  44. "profile-summary-large-working-set-size-threshold", cl::Hidden,
  45. cl::init(12500),
  46. cl::desc("The code working set size is considered large if the number of"
  47. " blocks required to reach the -profile-summary-cutoff-hot"
  48. " percentile exceeds this count."));
  49. // The next two options override the counts derived from summary computation and
  50. // are useful for debugging purposes.
  51. cl::opt<uint64_t> ProfileSummaryHotCount(
  52. "profile-summary-hot-count", cl::ReallyHidden,
  53. cl::desc("A fixed hot count that overrides the count derived from"
  54. " profile-summary-cutoff-hot"));
  55. cl::opt<uint64_t> ProfileSummaryColdCount(
  56. "profile-summary-cold-count", cl::ReallyHidden,
  57. cl::desc("A fixed cold count that overrides the count derived from"
  58. " profile-summary-cutoff-cold"));
  59. } // namespace llvm
  60. // A set of cutoff values. Each value, when divided by ProfileSummary::Scale
  61. // (which is 1000000) is a desired percentile of total counts.
  62. static const uint32_t DefaultCutoffsData[] = {
  63. 10000, /* 1% */
  64. 100000, /* 10% */
  65. 200000, 300000, 400000, 500000, 600000, 700000, 800000,
  66. 900000, 950000, 990000, 999000, 999900, 999990, 999999};
  67. const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs =
  68. DefaultCutoffsData;
  69. const ProfileSummaryEntry &
  70. ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS,
  71. uint64_t Percentile) {
  72. auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
  73. return Entry.Cutoff < Percentile;
  74. });
  75. // The required percentile has to be <= one of the percentiles in the
  76. // detailed summary.
  77. if (It == DS.end())
  78. report_fatal_error("Desired percentile exceeds the maximum cutoff");
  79. return *It;
  80. }
  81. void InstrProfSummaryBuilder::addRecord(const InstrProfRecord &R) {
  82. // The first counter is not necessarily an entry count for IR
  83. // instrumentation profiles.
  84. // Eventually MaxFunctionCount will become obsolete and this can be
  85. // removed.
  86. if (R.getCountPseudoKind() != InstrProfRecord::NotPseudo)
  87. return;
  88. addEntryCount(R.Counts[0]);
  89. for (size_t I = 1, E = R.Counts.size(); I < E; ++I)
  90. addInternalCount(R.Counts[I]);
  91. }
  92. // To compute the detailed summary, we consider each line containing samples as
  93. // equivalent to a block with a count in the instrumented profile.
  94. void SampleProfileSummaryBuilder::addRecord(
  95. const sampleprof::FunctionSamples &FS, bool isCallsiteSample) {
  96. if (!isCallsiteSample) {
  97. NumFunctions++;
  98. if (FS.getHeadSamples() > MaxFunctionCount)
  99. MaxFunctionCount = FS.getHeadSamples();
  100. } else if (FS.getContext().hasAttribute(
  101. sampleprof::ContextDuplicatedIntoBase)) {
  102. // Do not recount callee samples if they are already merged into their base
  103. // profiles. This can happen to CS nested profile.
  104. return;
  105. }
  106. for (const auto &I : FS.getBodySamples()) {
  107. uint64_t Count = I.second.getSamples();
  108. addCount(Count);
  109. }
  110. for (const auto &I : FS.getCallsiteSamples())
  111. for (const auto &CS : I.second)
  112. addRecord(CS.second, true);
  113. }
  114. // The argument to this method is a vector of cutoff percentages and the return
  115. // value is a vector of (Cutoff, MinCount, NumCounts) triplets.
  116. void ProfileSummaryBuilder::computeDetailedSummary() {
  117. if (DetailedSummaryCutoffs.empty())
  118. return;
  119. llvm::sort(DetailedSummaryCutoffs);
  120. auto Iter = CountFrequencies.begin();
  121. const auto End = CountFrequencies.end();
  122. uint32_t CountsSeen = 0;
  123. uint64_t CurrSum = 0, Count = 0;
  124. for (const uint32_t Cutoff : DetailedSummaryCutoffs) {
  125. assert(Cutoff <= 999999);
  126. APInt Temp(128, TotalCount);
  127. APInt N(128, Cutoff);
  128. APInt D(128, ProfileSummary::Scale);
  129. Temp *= N;
  130. Temp = Temp.sdiv(D);
  131. uint64_t DesiredCount = Temp.getZExtValue();
  132. assert(DesiredCount <= TotalCount);
  133. while (CurrSum < DesiredCount && Iter != End) {
  134. Count = Iter->first;
  135. uint32_t Freq = Iter->second;
  136. CurrSum += (Count * Freq);
  137. CountsSeen += Freq;
  138. Iter++;
  139. }
  140. assert(CurrSum >= DesiredCount);
  141. ProfileSummaryEntry PSE = {Cutoff, Count, CountsSeen};
  142. DetailedSummary.push_back(PSE);
  143. }
  144. }
  145. uint64_t
  146. ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) {
  147. auto &HotEntry =
  148. ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
  149. uint64_t HotCountThreshold = HotEntry.MinCount;
  150. if (ProfileSummaryHotCount.getNumOccurrences() > 0)
  151. HotCountThreshold = ProfileSummaryHotCount;
  152. return HotCountThreshold;
  153. }
  154. uint64_t
  155. ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) {
  156. auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
  157. DS, ProfileSummaryCutoffCold);
  158. uint64_t ColdCountThreshold = ColdEntry.MinCount;
  159. if (ProfileSummaryColdCount.getNumOccurrences() > 0)
  160. ColdCountThreshold = ProfileSummaryColdCount;
  161. return ColdCountThreshold;
  162. }
  163. std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
  164. computeDetailedSummary();
  165. return std::make_unique<ProfileSummary>(
  166. ProfileSummary::PSK_Sample, DetailedSummary, TotalCount, MaxCount, 0,
  167. MaxFunctionCount, NumCounts, NumFunctions);
  168. }
  169. std::unique_ptr<ProfileSummary>
  170. SampleProfileSummaryBuilder::computeSummaryForProfiles(
  171. const SampleProfileMap &Profiles) {
  172. assert(NumFunctions == 0 &&
  173. "This can only be called on an empty summary builder");
  174. sampleprof::SampleProfileMap ContextLessProfiles;
  175. const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles;
  176. // For CSSPGO, context-sensitive profile effectively split a function profile
  177. // into many copies each representing the CFG profile of a particular calling
  178. // context. That makes the count distribution looks more flat as we now have
  179. // more function profiles each with lower counts, which in turn leads to lower
  180. // hot thresholds. To compensate for that, by default we merge context
  181. // profiles before computing profile summary.
  182. if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
  183. !UseContextLessSummary.getNumOccurrences())) {
  184. for (const auto &I : Profiles) {
  185. ContextLessProfiles[I.second.getName()].merge(I.second);
  186. }
  187. ProfilesToUse = &ContextLessProfiles;
  188. }
  189. for (const auto &I : *ProfilesToUse) {
  190. const sampleprof::FunctionSamples &Profile = I.second;
  191. addRecord(Profile);
  192. }
  193. return getSummary();
  194. }
  195. std::unique_ptr<ProfileSummary> InstrProfSummaryBuilder::getSummary() {
  196. computeDetailedSummary();
  197. return std::make_unique<ProfileSummary>(
  198. ProfileSummary::PSK_Instr, DetailedSummary, TotalCount, MaxCount,
  199. MaxInternalBlockCount, MaxFunctionCount, NumCounts, NumFunctions);
  200. }
  201. void InstrProfSummaryBuilder::addEntryCount(uint64_t Count) {
  202. assert(Count <= getInstrMaxCountValue() &&
  203. "Count value should be less than the max count value.");
  204. NumFunctions++;
  205. addCount(Count);
  206. if (Count > MaxFunctionCount)
  207. MaxFunctionCount = Count;
  208. }
  209. void InstrProfSummaryBuilder::addInternalCount(uint64_t Count) {
  210. assert(Count <= getInstrMaxCountValue() &&
  211. "Count value should be less than the max count value.");
  212. addCount(Count);
  213. if (Count > MaxInternalBlockCount)
  214. MaxInternalBlockCount = Count;
  215. }