MachineFunctionSplitter.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // \file
  10. // Uses profile information to split out cold blocks.
  11. //
  12. // This pass splits out cold machine basic blocks from the parent function. This
  13. // implementation leverages the basic block section framework. Blocks marked
  14. // cold by this pass are grouped together in a separate section prefixed with
  15. // ".text.unlikely.*". The linker can then group these together as a cold
  16. // section. The split part of the function is a contiguous region identified by
  17. // the symbol "foo.cold". Grouping all cold blocks across functions together
  18. // decreases fragmentation and improves icache and itlb utilization. Note that
  19. // the overall changes to the binary size are negligible; only a small number of
  20. // additional jump instructions may be introduced.
  21. //
  22. // For the original RFC of this pass please see
  23. // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
  24. //===----------------------------------------------------------------------===//
  25. #include "llvm/ADT/SmallVector.h"
  26. #include "llvm/ADT/Statistic.h"
  27. #include "llvm/Analysis/ProfileSummaryInfo.h"
  28. #include "llvm/CodeGen/BasicBlockSectionUtils.h"
  29. #include "llvm/CodeGen/MachineBasicBlock.h"
  30. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  31. #include "llvm/CodeGen/MachineFunction.h"
  32. #include "llvm/CodeGen/MachineFunctionPass.h"
  33. #include "llvm/CodeGen/MachineModuleInfo.h"
  34. #include "llvm/CodeGen/Passes.h"
  35. #include "llvm/IR/Function.h"
  36. #include "llvm/IR/Module.h"
  37. #include "llvm/InitializePasses.h"
  38. #include "llvm/Support/CommandLine.h"
  39. using namespace llvm;
  40. // FIXME: This cutoff value is CPU dependent and should be moved to
  41. // TargetTransformInfo once we consider enabling this on other platforms.
  42. // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
  43. // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
  44. // The default was empirically determined to be optimal when considering cutoff
  45. // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
  46. // Intel CPUs.
  47. static cl::opt<unsigned>
  48. PercentileCutoff("mfs-psi-cutoff",
  49. cl::desc("Percentile profile summary cutoff used to "
  50. "determine cold blocks. Unused if set to zero."),
  51. cl::init(999950), cl::Hidden);
  52. static cl::opt<unsigned> ColdCountThreshold(
  53. "mfs-count-threshold",
  54. cl::desc(
  55. "Minimum number of times a block must be executed to be retained."),
  56. cl::init(1), cl::Hidden);
  57. namespace {
  58. class MachineFunctionSplitter : public MachineFunctionPass {
  59. public:
  60. static char ID;
  61. MachineFunctionSplitter() : MachineFunctionPass(ID) {
  62. initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
  63. }
  64. StringRef getPassName() const override {
  65. return "Machine Function Splitter Transformation";
  66. }
  67. void getAnalysisUsage(AnalysisUsage &AU) const override;
  68. bool runOnMachineFunction(MachineFunction &F) override;
  69. };
  70. } // end anonymous namespace
  71. static bool isColdBlock(const MachineBasicBlock &MBB,
  72. const MachineBlockFrequencyInfo *MBFI,
  73. ProfileSummaryInfo *PSI) {
  74. Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
  75. if (!Count.hasValue())
  76. return true;
  77. if (PercentileCutoff > 0) {
  78. return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
  79. }
  80. return (*Count < ColdCountThreshold);
  81. }
  82. bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
  83. // TODO: We only target functions with profile data. Static information may
  84. // also be considered but we don't see performance improvements yet.
  85. if (!MF.getFunction().hasProfileData())
  86. return false;
  87. // TODO: We don't split functions where a section attribute has been set
  88. // since the split part may not be placed in a contiguous region. It may also
  89. // be more beneficial to augment the linker to ensure contiguous layout of
  90. // split functions within the same section as specified by the attribute.
  91. if (MF.getFunction().hasSection() ||
  92. MF.getFunction().hasFnAttribute("implicit-section-name"))
  93. return false;
  94. // We don't want to proceed further for cold functions
  95. // or functions of unknown hotness. Lukewarm functions have no prefix.
  96. Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
  97. if (SectionPrefix.hasValue() &&
  98. (SectionPrefix.getValue().equals("unlikely") ||
  99. SectionPrefix.getValue().equals("unknown"))) {
  100. return false;
  101. }
  102. // Renumbering blocks here preserves the order of the blocks as
  103. // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
  104. // blocks. Preserving the order of blocks is essential to retaining decisions
  105. // made by prior passes such as MachineBlockPlacement.
  106. MF.RenumberBlocks();
  107. MF.setBBSectionsType(BasicBlockSection::Preset);
  108. auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
  109. auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  110. SmallVector<MachineBasicBlock *, 2> LandingPads;
  111. for (auto &MBB : MF) {
  112. if (MBB.isEntryBlock())
  113. continue;
  114. if (MBB.isEHPad())
  115. LandingPads.push_back(&MBB);
  116. else if (isColdBlock(MBB, MBFI, PSI))
  117. MBB.setSectionID(MBBSectionID::ColdSectionID);
  118. }
  119. // We only split out eh pads if all of them are cold.
  120. bool HasHotLandingPads = false;
  121. for (const MachineBasicBlock *LP : LandingPads) {
  122. if (!isColdBlock(*LP, MBFI, PSI))
  123. HasHotLandingPads = true;
  124. }
  125. if (!HasHotLandingPads) {
  126. for (MachineBasicBlock *LP : LandingPads)
  127. LP->setSectionID(MBBSectionID::ColdSectionID);
  128. }
  129. auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
  130. return X.getSectionID().Type < Y.getSectionID().Type;
  131. };
  132. llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
  133. return true;
  134. }
  135. void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
  136. AU.addRequired<MachineModuleInfoWrapperPass>();
  137. AU.addRequired<MachineBlockFrequencyInfo>();
  138. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  139. }
  140. char MachineFunctionSplitter::ID = 0;
  141. INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
  142. "Split machine functions using profile information", false,
  143. false)
  144. MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
  145. return new MachineFunctionSplitter();
  146. }