X86PadShortFunction.cpp 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. //===-------- X86PadShortFunction.cpp - pad short functions -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the pass which will pad short functions to prevent
  10. // a stall if a function returns before the return address is ready. This
  11. // is needed for some Intel Atom processors.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "X86.h"
  15. #include "X86InstrInfo.h"
  16. #include "X86Subtarget.h"
  17. #include "llvm/ADT/Statistic.h"
  18. #include "llvm/Analysis/ProfileSummaryInfo.h"
  19. #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
  20. #include "llvm/CodeGen/MachineFunctionPass.h"
  21. #include "llvm/CodeGen/MachineInstrBuilder.h"
  22. #include "llvm/CodeGen/MachineSizeOpts.h"
  23. #include "llvm/CodeGen/Passes.h"
  24. #include "llvm/CodeGen/TargetSchedule.h"
  25. #include "llvm/IR/Function.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/raw_ostream.h"
  28. using namespace llvm;
  29. #define DEBUG_TYPE "x86-pad-short-functions"
  30. STATISTIC(NumBBsPadded, "Number of basic blocks padded");
  31. namespace {
  32. struct VisitedBBInfo {
  33. // HasReturn - Whether the BB contains a return instruction
  34. bool HasReturn = false;
  35. // Cycles - Number of cycles until return if HasReturn is true, otherwise
  36. // number of cycles until end of the BB
  37. unsigned int Cycles = 0;
  38. VisitedBBInfo() = default;
  39. VisitedBBInfo(bool HasReturn, unsigned int Cycles)
  40. : HasReturn(HasReturn), Cycles(Cycles) {}
  41. };
  42. struct PadShortFunc : public MachineFunctionPass {
  43. static char ID;
  44. PadShortFunc() : MachineFunctionPass(ID) {}
  45. bool runOnMachineFunction(MachineFunction &MF) override;
  46. void getAnalysisUsage(AnalysisUsage &AU) const override {
  47. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  48. AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
  49. AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
  50. MachineFunctionPass::getAnalysisUsage(AU);
  51. }
  52. MachineFunctionProperties getRequiredProperties() const override {
  53. return MachineFunctionProperties().set(
  54. MachineFunctionProperties::Property::NoVRegs);
  55. }
  56. StringRef getPassName() const override {
  57. return "X86 Atom pad short functions";
  58. }
  59. private:
  60. void findReturns(MachineBasicBlock *MBB,
  61. unsigned int Cycles = 0);
  62. bool cyclesUntilReturn(MachineBasicBlock *MBB,
  63. unsigned int &Cycles);
  64. void addPadding(MachineBasicBlock *MBB,
  65. MachineBasicBlock::iterator &MBBI,
  66. unsigned int NOOPsToAdd);
  67. const unsigned int Threshold = 4;
  68. // ReturnBBs - Maps basic blocks that return to the minimum number of
  69. // cycles until the return, starting from the entry block.
  70. DenseMap<MachineBasicBlock*, unsigned int> ReturnBBs;
  71. // VisitedBBs - Cache of previously visited BBs.
  72. DenseMap<MachineBasicBlock*, VisitedBBInfo> VisitedBBs;
  73. TargetSchedModel TSM;
  74. };
  75. char PadShortFunc::ID = 0;
  76. }
  77. FunctionPass *llvm::createX86PadShortFunctions() {
  78. return new PadShortFunc();
  79. }
  80. /// runOnMachineFunction - Loop over all of the basic blocks, inserting
  81. /// NOOP instructions before early exits.
  82. bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
  83. if (skipFunction(MF.getFunction()))
  84. return false;
  85. if (MF.getFunction().hasOptSize())
  86. return false;
  87. if (!MF.getSubtarget<X86Subtarget>().padShortFunctions())
  88. return false;
  89. TSM.init(&MF.getSubtarget());
  90. auto *PSI =
  91. &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  92. auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
  93. &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
  94. nullptr;
  95. // Search through basic blocks and mark the ones that have early returns
  96. ReturnBBs.clear();
  97. VisitedBBs.clear();
  98. findReturns(&MF.front());
  99. bool MadeChange = false;
  100. // Pad the identified basic blocks with NOOPs
  101. for (const auto &ReturnBB : ReturnBBs) {
  102. MachineBasicBlock *MBB = ReturnBB.first;
  103. unsigned Cycles = ReturnBB.second;
  104. // Function::hasOptSize is already checked above.
  105. bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
  106. if (OptForSize)
  107. continue;
  108. if (Cycles < Threshold) {
  109. // BB ends in a return. Skip over any DBG_VALUE instructions
  110. // trailing the terminator.
  111. assert(MBB->size() > 0 &&
  112. "Basic block should contain at least a RET but is empty");
  113. MachineBasicBlock::iterator ReturnLoc = --MBB->end();
  114. while (ReturnLoc->isDebugInstr())
  115. --ReturnLoc;
  116. assert(ReturnLoc->isReturn() && !ReturnLoc->isCall() &&
  117. "Basic block does not end with RET");
  118. addPadding(MBB, ReturnLoc, Threshold - Cycles);
  119. NumBBsPadded++;
  120. MadeChange = true;
  121. }
  122. }
  123. return MadeChange;
  124. }
  125. /// findReturn - Starting at MBB, follow control flow and add all
  126. /// basic blocks that contain a return to ReturnBBs.
  127. void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
  128. // If this BB has a return, note how many cycles it takes to get there.
  129. bool hasReturn = cyclesUntilReturn(MBB, Cycles);
  130. if (Cycles >= Threshold)
  131. return;
  132. if (hasReturn) {
  133. ReturnBBs[MBB] = std::max(ReturnBBs[MBB], Cycles);
  134. return;
  135. }
  136. // Follow branches in BB and look for returns
  137. for (MachineBasicBlock *Succ : MBB->successors())
  138. if (Succ != MBB)
  139. findReturns(Succ, Cycles);
  140. }
  141. /// cyclesUntilReturn - return true if the MBB has a return instruction,
  142. /// and return false otherwise.
  143. /// Cycles will be incremented by the number of cycles taken to reach the
  144. /// return or the end of the BB, whichever occurs first.
  145. bool PadShortFunc::cyclesUntilReturn(MachineBasicBlock *MBB,
  146. unsigned int &Cycles) {
  147. // Return cached result if BB was previously visited
  148. DenseMap<MachineBasicBlock*, VisitedBBInfo>::iterator it
  149. = VisitedBBs.find(MBB);
  150. if (it != VisitedBBs.end()) {
  151. VisitedBBInfo BBInfo = it->second;
  152. Cycles += BBInfo.Cycles;
  153. return BBInfo.HasReturn;
  154. }
  155. unsigned int CyclesToEnd = 0;
  156. for (MachineInstr &MI : *MBB) {
  157. // Mark basic blocks with a return instruction. Calls to other
  158. // functions do not count because the called function will be padded,
  159. // if necessary.
  160. if (MI.isReturn() && !MI.isCall()) {
  161. VisitedBBs[MBB] = VisitedBBInfo(true, CyclesToEnd);
  162. Cycles += CyclesToEnd;
  163. return true;
  164. }
  165. CyclesToEnd += TSM.computeInstrLatency(&MI);
  166. }
  167. VisitedBBs[MBB] = VisitedBBInfo(false, CyclesToEnd);
  168. Cycles += CyclesToEnd;
  169. return false;
  170. }
  171. /// addPadding - Add the given number of NOOP instructions to the function
  172. /// just prior to the return at MBBI
  173. void PadShortFunc::addPadding(MachineBasicBlock *MBB,
  174. MachineBasicBlock::iterator &MBBI,
  175. unsigned int NOOPsToAdd) {
  176. const DebugLoc &DL = MBBI->getDebugLoc();
  177. unsigned IssueWidth = TSM.getIssueWidth();
  178. for (unsigned i = 0, e = IssueWidth * NOOPsToAdd; i != e; ++i)
  179. BuildMI(*MBB, MBBI, DL, TSM.getInstrInfo()->get(X86::NOOP));
  180. }