X86PadShortFunction.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. //===-------- X86PadShortFunction.cpp - pad short functions -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the pass which will pad short functions to prevent
  10. // a stall if a function returns before the return address is ready. This
  11. // is needed for some Intel Atom processors.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "X86.h"
  15. #include "X86InstrInfo.h"
  16. #include "X86Subtarget.h"
  17. #include "llvm/ADT/Statistic.h"
  18. #include "llvm/Analysis/ProfileSummaryInfo.h"
  19. #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
  20. #include "llvm/CodeGen/MachineFunctionPass.h"
  21. #include "llvm/CodeGen/MachineInstrBuilder.h"
  22. #include "llvm/CodeGen/MachineSizeOpts.h"
  23. #include "llvm/CodeGen/Passes.h"
  24. #include "llvm/CodeGen/TargetSchedule.h"
  25. #include "llvm/IR/Function.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. using namespace llvm;
  29. #define DEBUG_TYPE "x86-pad-short-functions"
  30. STATISTIC(NumBBsPadded, "Number of basic blocks padded");
  31. namespace {
  32. struct VisitedBBInfo {
  33. // HasReturn - Whether the BB contains a return instruction
  34. bool HasReturn;
  35. // Cycles - Number of cycles until return if HasReturn is true, otherwise
  36. // number of cycles until end of the BB
  37. unsigned int Cycles;
  38. VisitedBBInfo() : HasReturn(false), Cycles(0) {}
  39. VisitedBBInfo(bool HasReturn, unsigned int Cycles)
  40. : HasReturn(HasReturn), Cycles(Cycles) {}
  41. };
  42. struct PadShortFunc : public MachineFunctionPass {
  43. static char ID;
  44. PadShortFunc() : MachineFunctionPass(ID)
  45. , Threshold(4) {}
  46. bool runOnMachineFunction(MachineFunction &MF) override;
  47. void getAnalysisUsage(AnalysisUsage &AU) const override {
  48. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  49. AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
  50. AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
  51. MachineFunctionPass::getAnalysisUsage(AU);
  52. }
  53. MachineFunctionProperties getRequiredProperties() const override {
  54. return MachineFunctionProperties().set(
  55. MachineFunctionProperties::Property::NoVRegs);
  56. }
  57. StringRef getPassName() const override {
  58. return "X86 Atom pad short functions";
  59. }
  60. private:
  61. void findReturns(MachineBasicBlock *MBB,
  62. unsigned int Cycles = 0);
  63. bool cyclesUntilReturn(MachineBasicBlock *MBB,
  64. unsigned int &Cycles);
  65. void addPadding(MachineBasicBlock *MBB,
  66. MachineBasicBlock::iterator &MBBI,
  67. unsigned int NOOPsToAdd);
  68. const unsigned int Threshold;
  69. // ReturnBBs - Maps basic blocks that return to the minimum number of
  70. // cycles until the return, starting from the entry block.
  71. DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
  72. // VisitedBBs - Cache of previously visited BBs.
  73. DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
  74. TargetSchedModel TSM;
  75. };
  76. char PadShortFunc::ID = 0;
  77. }
  78. FunctionPass *llvm::createX86PadShortFunctions() {
  79. return new PadShortFunc();
  80. }
  81. /// runOnMachineFunction - Loop over all of the basic blocks, inserting
  82. /// NOOP instructions before early exits.
  83. bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
  84. if (skipFunction(MF.getFunction()))
  85. return false;
  86. if (MF.getFunction().hasOptSize())
  87. return false;
  88. if (!MF.getSubtarget<X86Subtarget>().padShortFunctions())
  89. return false;
  90. TSM.init(&MF.getSubtarget());
  91. auto *PSI =
  92. &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  93. auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
  94. &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
  95. nullptr;
  96. // Search through basic blocks and mark the ones that have early returns
  97. ReturnBBs.clear();
  98. VisitedBBs.clear();
  99. findReturns(&MF.front());
  100. bool MadeChange = false;
  101. // Pad the identified basic blocks with NOOPs
  102. for (const auto &ReturnBB : ReturnBBs) {
  103. MachineBasicBlock *MBB = ReturnBB.first;
  104. unsigned Cycles = ReturnBB.second;
  105. // Function::hasOptSize is already checked above.
  106. bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
  107. if (OptForSize)
  108. continue;
  109. if (Cycles < Threshold) {
  110. // BB ends in a return. Skip over any DBG_VALUE instructions
  111. // trailing the terminator.
  112. assert(MBB->size() > 0 &&
  113. "Basic block should contain at least a RET but is empty");
  114. MachineBasicBlock::iterator ReturnLoc = --MBB->end();
  115. while (ReturnLoc->isDebugInstr())
  116. --ReturnLoc;
  117. assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
  118. "Basic block does not end with RET");
  119. addPadding(MBB, ReturnLoc, Threshold - Cycles);
  120. NumBBsPadded++;
  121. MadeChange = true;
  122. }
  123. }
  124. return MadeChange;
  125. }
  126. /// findReturn - Starting at MBB, follow control flow and add all
  127. /// basic blocks that contain a return to ReturnBBs.
  128. void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
  129. // If this BB has a return, note how many cycles it takes to get there.
  130. bool hasReturn = cyclesUntilReturn(MBB, Cycles);
  131. if (Cycles >= Threshold)
  132. return;
  133. if (hasReturn) {
  134. ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
  135. return;
  136. }
  137. // Follow branches in BB and look for returns
  138. for (MachineBasicBlock *Succ : MBB->successors())
  139. if (Succ != MBB)
  140. findReturns(Succ, Cycles);
  141. }
  142. /// cyclesUntilReturn - return true if the MBB has a return instruction,
  143. /// and return false otherwise.
  144. /// Cycles will be incremented by the number of cycles taken to reach the
  145. /// return or the end of the BB, whichever occurs first.
  146. bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
  147. unsigned int &Cycles) {
  148. // Return cached result if BB was previously visited
  149. DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
  150. = VisitedBBs.find(MBB);
  151. if (it != VisitedBBs.end()) {
  152. VisitedBBInfo BBInfo = it->second;
  153. Cycles += BBInfo.Cycles;
  154. return BBInfo.HasReturn;
  155. }
  156. unsigned int CyclesToEnd = 0;
  157. for (MachineInstr &MI : *MBB) {
  158. // Mark basic blocks with a return instruction. Calls to other
  159. // functions do not count because the called function will be padded,
  160. // if necessary.
  161. if (MI.isReturn() && !MI.isCall()) {
  162. VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
  163. Cycles += CyclesToEnd;
  164. return true;
  165. }
  166. CyclesToEnd += TSM.computeInstrLatency(&MI);
  167. }
  168. VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
  169. Cycles += CyclesToEnd;
  170. return false;
  171. }
  172. /// addPadding - Add the given number of NOOP instructions to the function
  173. /// just prior to the return at MBBI
  174. void PadShortFunc::addPadding(MachineBasicBlock *MBB,
  175. MachineBasicBlock::iterator &MBBI,
  176. unsigned int NOOPsToAdd) {
  177. const DebugLoc &DL = MBBI->getDebugLoc();
  178. unsigned IssueWidth = TSM.getIssueWidth();
  179. for (unsigned i = 0, e = IssueWidth * NOOPsToAdd; i != e; ++i)
  180. BuildMI(*MBB, MBBI, DL, TSM.getInstrInfo()->get(X86::NOOP));
  181. }