EarlyIfConversion.cpp 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207
  1. //===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Early if-conversion is for out-of-order CPUs that don't have a lot of
  10. // predicable instructions. The goal is to eliminate conditional branches that
  11. // may mispredict.
  12. //
  13. // Instructions from both sides of the branch are executed specutatively, and a
  14. // cmov instruction selects the result.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/ADT/BitVector.h"
  18. #include "llvm/ADT/PostOrderIterator.h"
  19. #include "llvm/ADT/SetVector.h"
  20. #include "llvm/ADT/SmallPtrSet.h"
  21. #include "llvm/ADT/SparseSet.h"
  22. #include "llvm/ADT/Statistic.h"
  23. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  24. #include "llvm/CodeGen/MachineDominators.h"
  25. #include "llvm/CodeGen/MachineFunction.h"
  26. #include "llvm/CodeGen/MachineFunctionPass.h"
  27. #include "llvm/CodeGen/MachineInstr.h"
  28. #include "llvm/CodeGen/MachineLoopInfo.h"
  29. #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
  30. #include "llvm/CodeGen/MachineRegisterInfo.h"
  31. #include "llvm/CodeGen/MachineTraceMetrics.h"
  32. #include "llvm/CodeGen/Passes.h"
  33. #include "llvm/CodeGen/TargetInstrInfo.h"
  34. #include "llvm/CodeGen/TargetRegisterInfo.h"
  35. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  36. #include "llvm/InitializePasses.h"
  37. #include "llvm/Support/CommandLine.h"
  38. #include "llvm/Support/Debug.h"
  39. #include "llvm/Support/raw_ostream.h"
  40. using namespace llvm;
  41. #define DEBUG_TYPE "early-ifcvt"
  42. // Absolute maximum number of instructions allowed per speculated block.
  43. // This bypasses all other heuristics, so it should be set fairly high.
  44. static cl::opt<unsigned>
  45. BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden,
  46. cl::desc("Maximum number of instructions per speculated block."));
  47. // Stress testing mode - disable heuristics.
  48. static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden,
  49. cl::desc("Turn all knobs to 11"));
  50. STATISTIC(NumDiamondsSeen, "Number of diamonds");
  51. STATISTIC(NumDiamondsConv, "Number of diamonds converted");
  52. STATISTIC(NumTrianglesSeen, "Number of triangles");
  53. STATISTIC(NumTrianglesConv, "Number of triangles converted");
  54. //===----------------------------------------------------------------------===//
  55. // SSAIfConv
  56. //===----------------------------------------------------------------------===//
  57. //
  58. // The SSAIfConv class performs if-conversion on SSA form machine code after
  59. // determining if it is possible. The class contains no heuristics; external
  60. // code should be used to determine when if-conversion is a good idea.
  61. //
  62. // SSAIfConv can convert both triangles and diamonds:
  63. //
  64. // Triangle: Head Diamond: Head
  65. // | \ / \_
  66. // | \ / |
  67. // | [TF]BB FBB TBB
  68. // | / \ /
  69. // | / \ /
  70. // Tail Tail
  71. //
  72. // Instructions in the conditional blocks TBB and/or FBB are spliced into the
  73. // Head block, and phis in the Tail block are converted to select instructions.
  74. //
  75. namespace {
  76. class SSAIfConv {
  77. const TargetInstrInfo *TII;
  78. const TargetRegisterInfo *TRI;
  79. MachineRegisterInfo *MRI;
  80. public:
  81. /// The block containing the conditional branch.
  82. MachineBasicBlock *Head;
  83. /// The block containing phis after the if-then-else.
  84. MachineBasicBlock *Tail;
  85. /// The 'true' conditional block as determined by analyzeBranch.
  86. MachineBasicBlock *TBB;
  87. /// The 'false' conditional block as determined by analyzeBranch.
  88. MachineBasicBlock *FBB;
  89. /// isTriangle - When there is no 'else' block, either TBB or FBB will be
  90. /// equal to Tail.
  91. bool isTriangle() const { return TBB == Tail || FBB == Tail; }
  92. /// Returns the Tail predecessor for the True side.
  93. MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; }
  94. /// Returns the Tail predecessor for the False side.
  95. MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; }
  96. /// Information about each phi in the Tail block.
  97. struct PHIInfo {
  98. MachineInstr *PHI;
  99. unsigned TReg = 0, FReg = 0;
  100. // Latencies from Cond+Branch, TReg, and FReg to DstReg.
  101. int CondCycles = 0, TCycles = 0, FCycles = 0;
  102. PHIInfo(MachineInstr *phi) : PHI(phi) {}
  103. };
  104. SmallVector<PHIInfo, 8> PHIs;
  105. private:
  106. /// The branch condition determined by analyzeBranch.
  107. SmallVector<MachineOperand, 4> Cond;
  108. /// Instructions in Head that define values used by the conditional blocks.
  109. /// The hoisted instructions must be inserted after these instructions.
  110. SmallPtrSet<MachineInstr*, 8> InsertAfter;
  111. /// Register units clobbered by the conditional blocks.
  112. BitVector ClobberedRegUnits;
  113. // Scratch pad for findInsertionPoint.
  114. SparseSet<unsigned> LiveRegUnits;
  115. /// Insertion point in Head for speculatively executed instructions form TBB
  116. /// and FBB.
  117. MachineBasicBlock::iterator InsertionPoint;
  118. /// Return true if all non-terminator instructions in MBB can be safely
  119. /// speculated.
  120. bool canSpeculateInstrs(MachineBasicBlock *MBB);
  121. /// Return true if all non-terminator instructions in MBB can be safely
  122. /// predicated.
  123. bool canPredicateInstrs(MachineBasicBlock *MBB);
  124. /// Scan through instruction dependencies and update InsertAfter array.
  125. /// Return false if any dependency is incompatible with if conversion.
  126. bool InstrDependenciesAllowIfConv(MachineInstr *I);
  127. /// Predicate all instructions of the basic block with current condition
  128. /// except for terminators. Reverse the condition if ReversePredicate is set.
  129. void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate);
  130. /// Find a valid insertion point in Head.
  131. bool findInsertionPoint();
  132. /// Replace PHI instructions in Tail with selects.
  133. void replacePHIInstrs();
  134. /// Insert selects and rewrite PHI operands to use them.
  135. void rewritePHIOperands();
  136. public:
  137. /// runOnMachineFunction - Initialize per-function data structures.
  138. void runOnMachineFunction(MachineFunction &MF) {
  139. TII = MF.getSubtarget().getInstrInfo();
  140. TRI = MF.getSubtarget().getRegisterInfo();
  141. MRI = &MF.getRegInfo();
  142. LiveRegUnits.clear();
  143. LiveRegUnits.setUniverse(TRI->getNumRegUnits());
  144. ClobberedRegUnits.clear();
  145. ClobberedRegUnits.resize(TRI->getNumRegUnits());
  146. }
  147. /// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
  148. /// initialize the internal state, and return true.
  149. /// If predicate is set try to predicate the block otherwise try to
  150. /// speculatively execute it.
  151. bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false);
  152. /// convertIf - If-convert the last block passed to canConvertIf(), assuming
  153. /// it is possible. Add any erased blocks to RemovedBlocks.
  154. void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
  155. bool Predicate = false);
  156. };
  157. } // end anonymous namespace
  158. /// canSpeculateInstrs - Returns true if all the instructions in MBB can safely
  159. /// be speculated. The terminators are not considered.
  160. ///
  161. /// If instructions use any values that are defined in the head basic block,
  162. /// the defining instructions are added to InsertAfter.
  163. ///
  164. /// Any clobbered regunits are added to ClobberedRegUnits.
  165. ///
  166. bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
  167. // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
  168. // get right.
  169. if (!MBB->livein_empty()) {
  170. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
  171. return false;
  172. }
  173. unsigned InstrCount = 0;
  174. // Check all instructions, except the terminators. It is assumed that
  175. // terminators never have side effects or define any used register values.
  176. for (MachineInstr &MI :
  177. llvm::make_range(MBB->begin(), MBB->getFirstTerminator())) {
  178. if (MI.isDebugInstr())
  179. continue;
  180. if (++InstrCount > BlockInstrLimit && !Stress) {
  181. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
  182. << BlockInstrLimit << " instructions.\n");
  183. return false;
  184. }
  185. // There shouldn't normally be any phis in a single-predecessor block.
  186. if (MI.isPHI()) {
  187. LLVM_DEBUG(dbgs() << "Can't hoist: " << MI);
  188. return false;
  189. }
  190. // Don't speculate loads. Note that it may be possible and desirable to
  191. // speculate GOT or constant pool loads that are guaranteed not to trap,
  192. // but we don't support that for now.
  193. if (MI.mayLoad()) {
  194. LLVM_DEBUG(dbgs() << "Won't speculate load: " << MI);
  195. return false;
  196. }
  197. // We never speculate stores, so an AA pointer isn't necessary.
  198. bool DontMoveAcrossStore = true;
  199. if (!MI.isSafeToMove(nullptr, DontMoveAcrossStore)) {
  200. LLVM_DEBUG(dbgs() << "Can't speculate: " << MI);
  201. return false;
  202. }
  203. // Check for any dependencies on Head instructions.
  204. if (!InstrDependenciesAllowIfConv(&MI))
  205. return false;
  206. }
  207. return true;
  208. }
  209. /// Check that there is no dependencies preventing if conversion.
  210. ///
  211. /// If instruction uses any values that are defined in the head basic block,
  212. /// the defining instructions are added to InsertAfter.
  213. bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
  214. for (const MachineOperand &MO : I->operands()) {
  215. if (MO.isRegMask()) {
  216. LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
  217. return false;
  218. }
  219. if (!MO.isReg())
  220. continue;
  221. Register Reg = MO.getReg();
  222. // Remember clobbered regunits.
  223. if (MO.isDef() && Register::isPhysicalRegister(Reg))
  224. for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
  225. ++Units)
  226. ClobberedRegUnits.set(*Units);
  227. if (!MO.readsReg() || !Register::isVirtualRegister(Reg))
  228. continue;
  229. MachineInstr *DefMI = MRI->getVRegDef(Reg);
  230. if (!DefMI || DefMI->getParent() != Head)
  231. continue;
  232. if (InsertAfter.insert(DefMI).second)
  233. LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on "
  234. << *DefMI);
  235. if (DefMI->isTerminator()) {
  236. LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
  237. return false;
  238. }
  239. }
  240. return true;
  241. }
  242. /// canPredicateInstrs - Returns true if all the instructions in MBB can safely
  243. /// be predicates. The terminators are not considered.
  244. ///
  245. /// If instructions use any values that are defined in the head basic block,
  246. /// the defining instructions are added to InsertAfter.
  247. ///
  248. /// Any clobbered regunits are added to ClobberedRegUnits.
  249. ///
  250. bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {
  251. // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
  252. // get right.
  253. if (!MBB->livein_empty()) {
  254. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
  255. return false;
  256. }
  257. unsigned InstrCount = 0;
  258. // Check all instructions, except the terminators. It is assumed that
  259. // terminators never have side effects or define any used register values.
  260. for (MachineBasicBlock::iterator I = MBB->begin(),
  261. E = MBB->getFirstTerminator();
  262. I != E; ++I) {
  263. if (I->isDebugInstr())
  264. continue;
  265. if (++InstrCount > BlockInstrLimit && !Stress) {
  266. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
  267. << BlockInstrLimit << " instructions.\n");
  268. return false;
  269. }
  270. // There shouldn't normally be any phis in a single-predecessor block.
  271. if (I->isPHI()) {
  272. LLVM_DEBUG(dbgs() << "Can't predicate: " << *I);
  273. return false;
  274. }
  275. // Check that instruction is predicable and that it is not already
  276. // predicated.
  277. if (!TII->isPredicable(*I) || TII->isPredicated(*I)) {
  278. return false;
  279. }
  280. // Check for any dependencies on Head instructions.
  281. if (!InstrDependenciesAllowIfConv(&(*I)))
  282. return false;
  283. }
  284. return true;
  285. }
  286. // Apply predicate to all instructions in the machine block.
  287. void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) {
  288. auto Condition = Cond;
  289. if (ReversePredicate)
  290. TII->reverseBranchCondition(Condition);
  291. // Terminators don't need to be predicated as they will be removed.
  292. for (MachineBasicBlock::iterator I = MBB->begin(),
  293. E = MBB->getFirstTerminator();
  294. I != E; ++I) {
  295. if (I->isDebugInstr())
  296. continue;
  297. TII->PredicateInstruction(*I, Condition);
  298. }
  299. }
  300. /// Find an insertion point in Head for the speculated instructions. The
  301. /// insertion point must be:
  302. ///
  303. /// 1. Before any terminators.
  304. /// 2. After any instructions in InsertAfter.
  305. /// 3. Not have any clobbered regunits live.
  306. ///
  307. /// This function sets InsertionPoint and returns true when successful, it
  308. /// returns false if no valid insertion point could be found.
  309. ///
  310. bool SSAIfConv::findInsertionPoint() {
  311. // Keep track of live regunits before the current position.
  312. // Only track RegUnits that are also in ClobberedRegUnits.
  313. LiveRegUnits.clear();
  314. SmallVector<MCRegister, 8> Reads;
  315. MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
  316. MachineBasicBlock::iterator I = Head->end();
  317. MachineBasicBlock::iterator B = Head->begin();
  318. while (I != B) {
  319. --I;
  320. // Some of the conditional code depends in I.
  321. if (InsertAfter.count(&*I)) {
  322. LLVM_DEBUG(dbgs() << "Can't insert code after " << *I);
  323. return false;
  324. }
  325. // Update live regunits.
  326. for (const MachineOperand &MO : I->operands()) {
  327. // We're ignoring regmask operands. That is conservatively correct.
  328. if (!MO.isReg())
  329. continue;
  330. Register Reg = MO.getReg();
  331. if (!Register::isPhysicalRegister(Reg))
  332. continue;
  333. // I clobbers Reg, so it isn't live before I.
  334. if (MO.isDef())
  335. for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
  336. ++Units)
  337. LiveRegUnits.erase(*Units);
  338. // Unless I reads Reg.
  339. if (MO.readsReg())
  340. Reads.push_back(Reg.asMCReg());
  341. }
  342. // Anything read by I is live before I.
  343. while (!Reads.empty())
  344. for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
  345. ++Units)
  346. if (ClobberedRegUnits.test(*Units))
  347. LiveRegUnits.insert(*Units);
  348. // We can't insert before a terminator.
  349. if (I != FirstTerm && I->isTerminator())
  350. continue;
  351. // Some of the clobbered registers are live before I, not a valid insertion
  352. // point.
  353. if (!LiveRegUnits.empty()) {
  354. LLVM_DEBUG({
  355. dbgs() << "Would clobber";
  356. for (unsigned LRU : LiveRegUnits)
  357. dbgs() << ' ' << printRegUnit(LRU, TRI);
  358. dbgs() << " live before " << *I;
  359. });
  360. continue;
  361. }
  362. // This is a valid insertion point.
  363. InsertionPoint = I;
  364. LLVM_DEBUG(dbgs() << "Can insert before " << *I);
  365. return true;
  366. }
  367. LLVM_DEBUG(dbgs() << "No legal insertion point found.\n");
  368. return false;
  369. }
  370. /// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
  371. /// a potential candidate for if-conversion. Fill out the internal state.
  372. ///
  373. bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
  374. Head = MBB;
  375. TBB = FBB = Tail = nullptr;
  376. if (Head->succ_size() != 2)
  377. return false;
  378. MachineBasicBlock *Succ0 = Head->succ_begin()[0];
  379. MachineBasicBlock *Succ1 = Head->succ_begin()[1];
  380. // Canonicalize so Succ0 has MBB as its single predecessor.
  381. if (Succ0->pred_size() != 1)
  382. std::swap(Succ0, Succ1);
  383. if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1)
  384. return false;
  385. Tail = Succ0->succ_begin()[0];
  386. // This is not a triangle.
  387. if (Tail != Succ1) {
  388. // Check for a diamond. We won't deal with any critical edges.
  389. if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 ||
  390. Succ1->succ_begin()[0] != Tail)
  391. return false;
  392. LLVM_DEBUG(dbgs() << "\nDiamond: " << printMBBReference(*Head) << " -> "
  393. << printMBBReference(*Succ0) << "/"
  394. << printMBBReference(*Succ1) << " -> "
  395. << printMBBReference(*Tail) << '\n');
  396. // Live-in physregs are tricky to get right when speculating code.
  397. if (!Tail->livein_empty()) {
  398. LLVM_DEBUG(dbgs() << "Tail has live-ins.\n");
  399. return false;
  400. }
  401. } else {
  402. LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
  403. << printMBBReference(*Succ0) << " -> "
  404. << printMBBReference(*Tail) << '\n');
  405. }
  406. // This is a triangle or a diamond.
  407. // Skip if we cannot predicate and there are no phis skip as there must be
  408. // side effects that can only be handled with predication.
  409. if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) {
  410. LLVM_DEBUG(dbgs() << "No phis in tail.\n");
  411. return false;
  412. }
  413. // The branch we're looking to eliminate must be analyzable.
  414. Cond.clear();
  415. if (TII->analyzeBranch(*Head, TBB, FBB, Cond)) {
  416. LLVM_DEBUG(dbgs() << "Branch not analyzable.\n");
  417. return false;
  418. }
  419. // This is weird, probably some sort of degenerate CFG.
  420. if (!TBB) {
  421. LLVM_DEBUG(dbgs() << "analyzeBranch didn't find conditional branch.\n");
  422. return false;
  423. }
  424. // Make sure the analyzed branch is conditional; one of the successors
  425. // could be a landing pad. (Empty landing pads can be generated on Windows.)
  426. if (Cond.empty()) {
  427. LLVM_DEBUG(dbgs() << "analyzeBranch found an unconditional branch.\n");
  428. return false;
  429. }
  430. // analyzeBranch doesn't set FBB on a fall-through branch.
  431. // Make sure it is always set.
  432. FBB = TBB == Succ0 ? Succ1 : Succ0;
  433. // Any phis in the tail block must be convertible to selects.
  434. PHIs.clear();
  435. MachineBasicBlock *TPred = getTPred();
  436. MachineBasicBlock *FPred = getFPred();
  437. for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
  438. I != E && I->isPHI(); ++I) {
  439. PHIs.push_back(&*I);
  440. PHIInfo &PI = PHIs.back();
  441. // Find PHI operands corresponding to TPred and FPred.
  442. for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) {
  443. if (PI.PHI->getOperand(i+1).getMBB() == TPred)
  444. PI.TReg = PI.PHI->getOperand(i).getReg();
  445. if (PI.PHI->getOperand(i+1).getMBB() == FPred)
  446. PI.FReg = PI.PHI->getOperand(i).getReg();
  447. }
  448. assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI");
  449. assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
  450. // Get target information.
  451. if (!TII->canInsertSelect(*Head, Cond, PI.PHI->getOperand(0).getReg(),
  452. PI.TReg, PI.FReg, PI.CondCycles, PI.TCycles,
  453. PI.FCycles)) {
  454. LLVM_DEBUG(dbgs() << "Can't convert: " << *PI.PHI);
  455. return false;
  456. }
  457. }
  458. // Check that the conditional instructions can be speculated.
  459. InsertAfter.clear();
  460. ClobberedRegUnits.reset();
  461. if (Predicate) {
  462. if (TBB != Tail && !canPredicateInstrs(TBB))
  463. return false;
  464. if (FBB != Tail && !canPredicateInstrs(FBB))
  465. return false;
  466. } else {
  467. if (TBB != Tail && !canSpeculateInstrs(TBB))
  468. return false;
  469. if (FBB != Tail && !canSpeculateInstrs(FBB))
  470. return false;
  471. }
  472. // Try to find a valid insertion point for the speculated instructions in the
  473. // head basic block.
  474. if (!findInsertionPoint())
  475. return false;
  476. if (isTriangle())
  477. ++NumTrianglesSeen;
  478. else
  479. ++NumDiamondsSeen;
  480. return true;
  481. }
  482. /// \return true iff the two registers are known to have the same value.
  483. static bool hasSameValue(const MachineRegisterInfo &MRI,
  484. const TargetInstrInfo *TII, Register TReg,
  485. Register FReg) {
  486. if (TReg == FReg)
  487. return true;
  488. if (!TReg.isVirtual() || !FReg.isVirtual())
  489. return false;
  490. const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg);
  491. const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg);
  492. if (!TDef || !FDef)
  493. return false;
  494. // If there are side-effects, all bets are off.
  495. if (TDef->hasUnmodeledSideEffects())
  496. return false;
  497. // If the instruction could modify memory, or there may be some intervening
  498. // store between the two, we can't consider them to be equal.
  499. if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr))
  500. return false;
  501. // We also can't guarantee that they are the same if, for example, the
  502. // instructions are both a copy from a physical reg, because some other
  503. // instruction may have modified the value in that reg between the two
  504. // defining insts.
  505. if (any_of(TDef->uses(), [](const MachineOperand &MO) {
  506. return MO.isReg() && MO.getReg().isPhysical();
  507. }))
  508. return false;
  509. // Check whether the two defining instructions produce the same value(s).
  510. if (!TII->produceSameValue(*TDef, *FDef, &MRI))
  511. return false;
  512. // Further, check that the two defs come from corresponding operands.
  513. int TIdx = TDef->findRegisterDefOperandIdx(TReg);
  514. int FIdx = FDef->findRegisterDefOperandIdx(FReg);
  515. if (TIdx == -1 || FIdx == -1)
  516. return false;
  517. return TIdx == FIdx;
  518. }
  519. /// replacePHIInstrs - Completely replace PHI instructions with selects.
  520. /// This is possible when the only Tail predecessors are the if-converted
  521. /// blocks.
  522. void SSAIfConv::replacePHIInstrs() {
  523. assert(Tail->pred_size() == 2 && "Cannot replace PHIs");
  524. MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
  525. assert(FirstTerm != Head->end() && "No terminators");
  526. DebugLoc HeadDL = FirstTerm->getDebugLoc();
  527. // Convert all PHIs to select instructions inserted before FirstTerm.
  528. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
  529. PHIInfo &PI = PHIs[i];
  530. LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
  531. Register DstReg = PI.PHI->getOperand(0).getReg();
  532. if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
  533. // We do not need the select instruction if both incoming values are
  534. // equal, but we do need a COPY.
  535. BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg)
  536. .addReg(PI.TReg);
  537. } else {
  538. TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg,
  539. PI.FReg);
  540. }
  541. LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
  542. PI.PHI->eraseFromParent();
  543. PI.PHI = nullptr;
  544. }
  545. }
  546. /// rewritePHIOperands - When there are additional Tail predecessors, insert
  547. /// select instructions in Head and rewrite PHI operands to use the selects.
  548. /// Keep the PHI instructions in Tail to handle the other predecessors.
  549. void SSAIfConv::rewritePHIOperands() {
  550. MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator();
  551. assert(FirstTerm != Head->end() && "No terminators");
  552. DebugLoc HeadDL = FirstTerm->getDebugLoc();
  553. // Convert all PHIs to select instructions inserted before FirstTerm.
  554. for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
  555. PHIInfo &PI = PHIs[i];
  556. unsigned DstReg = 0;
  557. LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
  558. if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
  559. // We do not need the select instruction if both incoming values are
  560. // equal.
  561. DstReg = PI.TReg;
  562. } else {
  563. Register PHIDst = PI.PHI->getOperand(0).getReg();
  564. DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
  565. TII->insertSelect(*Head, FirstTerm, HeadDL,
  566. DstReg, Cond, PI.TReg, PI.FReg);
  567. LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
  568. }
  569. // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred.
  570. for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) {
  571. MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB();
  572. if (MBB == getTPred()) {
  573. PI.PHI->getOperand(i-1).setMBB(Head);
  574. PI.PHI->getOperand(i-2).setReg(DstReg);
  575. } else if (MBB == getFPred()) {
  576. PI.PHI->RemoveOperand(i-1);
  577. PI.PHI->RemoveOperand(i-2);
  578. }
  579. }
  580. LLVM_DEBUG(dbgs() << " --> " << *PI.PHI);
  581. }
  582. }
  583. /// convertIf - Execute the if conversion after canConvertIf has determined the
  584. /// feasibility.
  585. ///
  586. /// Any basic blocks erased will be added to RemovedBlocks.
  587. ///
  588. void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
  589. bool Predicate) {
  590. assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
  591. // Update statistics.
  592. if (isTriangle())
  593. ++NumTrianglesConv;
  594. else
  595. ++NumDiamondsConv;
  596. // Move all instructions into Head, except for the terminators.
  597. if (TBB != Tail) {
  598. if (Predicate)
  599. PredicateBlock(TBB, /*ReversePredicate=*/false);
  600. Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
  601. }
  602. if (FBB != Tail) {
  603. if (Predicate)
  604. PredicateBlock(FBB, /*ReversePredicate=*/true);
  605. Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
  606. }
  607. // Are there extra Tail predecessors?
  608. bool ExtraPreds = Tail->pred_size() != 2;
  609. if (ExtraPreds)
  610. rewritePHIOperands();
  611. else
  612. replacePHIInstrs();
  613. // Fix up the CFG, temporarily leave Head without any successors.
  614. Head->removeSuccessor(TBB);
  615. Head->removeSuccessor(FBB, true);
  616. if (TBB != Tail)
  617. TBB->removeSuccessor(Tail, true);
  618. if (FBB != Tail)
  619. FBB->removeSuccessor(Tail, true);
  620. // Fix up Head's terminators.
  621. // It should become a single branch or a fallthrough.
  622. DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
  623. TII->removeBranch(*Head);
  624. // Erase the now empty conditional blocks. It is likely that Head can fall
  625. // through to Tail, and we can join the two blocks.
  626. if (TBB != Tail) {
  627. RemovedBlocks.push_back(TBB);
  628. TBB->eraseFromParent();
  629. }
  630. if (FBB != Tail) {
  631. RemovedBlocks.push_back(FBB);
  632. FBB->eraseFromParent();
  633. }
  634. assert(Head->succ_empty() && "Additional head successors?");
  635. if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) {
  636. // Splice Tail onto the end of Head.
  637. LLVM_DEBUG(dbgs() << "Joining tail " << printMBBReference(*Tail)
  638. << " into head " << printMBBReference(*Head) << '\n');
  639. Head->splice(Head->end(), Tail,
  640. Tail->begin(), Tail->end());
  641. Head->transferSuccessorsAndUpdatePHIs(Tail);
  642. RemovedBlocks.push_back(Tail);
  643. Tail->eraseFromParent();
  644. } else {
  645. // We need a branch to Tail, let code placement work it out later.
  646. LLVM_DEBUG(dbgs() << "Converting to unconditional branch.\n");
  647. SmallVector<MachineOperand, 0> EmptyCond;
  648. TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
  649. Head->addSuccessor(Tail);
  650. }
  651. LLVM_DEBUG(dbgs() << *Head);
  652. }
  653. //===----------------------------------------------------------------------===//
  654. // EarlyIfConverter Pass
  655. //===----------------------------------------------------------------------===//
  656. namespace {
  657. class EarlyIfConverter : public MachineFunctionPass {
  658. const TargetInstrInfo *TII;
  659. const TargetRegisterInfo *TRI;
  660. MCSchedModel SchedModel;
  661. MachineRegisterInfo *MRI;
  662. MachineDominatorTree *DomTree;
  663. MachineLoopInfo *Loops;
  664. MachineTraceMetrics *Traces;
  665. MachineTraceMetrics::Ensemble *MinInstr;
  666. SSAIfConv IfConv;
  667. public:
  668. static char ID;
  669. EarlyIfConverter() : MachineFunctionPass(ID) {}
  670. void getAnalysisUsage(AnalysisUsage &AU) const override;
  671. bool runOnMachineFunction(MachineFunction &MF) override;
  672. StringRef getPassName() const override { return "Early If-Conversion"; }
  673. private:
  674. bool tryConvertIf(MachineBasicBlock*);
  675. void invalidateTraces();
  676. bool shouldConvertIf();
  677. };
  678. } // end anonymous namespace
  679. char EarlyIfConverter::ID = 0;
  680. char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
  681. INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,
  682. "Early If Converter", false, false)
  683. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  684. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  685. INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
  686. INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,
  687. "Early If Converter", false, false)
  688. void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
  689. AU.addRequired<MachineBranchProbabilityInfo>();
  690. AU.addRequired<MachineDominatorTree>();
  691. AU.addPreserved<MachineDominatorTree>();
  692. AU.addRequired<MachineLoopInfo>();
  693. AU.addPreserved<MachineLoopInfo>();
  694. AU.addRequired<MachineTraceMetrics>();
  695. AU.addPreserved<MachineTraceMetrics>();
  696. MachineFunctionPass::getAnalysisUsage(AU);
  697. }
  698. namespace {
  699. /// Update the dominator tree after if-conversion erased some blocks.
  700. void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
  701. ArrayRef<MachineBasicBlock *> Removed) {
  702. // convertIf can remove TBB, FBB, and Tail can be merged into Head.
  703. // TBB and FBB should not dominate any blocks.
  704. // Tail children should be transferred to Head.
  705. MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
  706. for (auto B : Removed) {
  707. MachineDomTreeNode *Node = DomTree->getNode(B);
  708. assert(Node != HeadNode && "Cannot erase the head node");
  709. while (Node->getNumChildren()) {
  710. assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
  711. DomTree->changeImmediateDominator(Node->back(), HeadNode);
  712. }
  713. DomTree->eraseNode(B);
  714. }
  715. }
  716. /// Update LoopInfo after if-conversion.
  717. void updateLoops(MachineLoopInfo *Loops,
  718. ArrayRef<MachineBasicBlock *> Removed) {
  719. if (!Loops)
  720. return;
  721. // If-conversion doesn't change loop structure, and it doesn't mess with back
  722. // edges, so updating LoopInfo is simply removing the dead blocks.
  723. for (auto B : Removed)
  724. Loops->removeBlock(B);
  725. }
  726. } // namespace
  727. /// Invalidate MachineTraceMetrics before if-conversion.
  728. void EarlyIfConverter::invalidateTraces() {
  729. Traces->verifyAnalysis();
  730. Traces->invalidate(IfConv.Head);
  731. Traces->invalidate(IfConv.Tail);
  732. Traces->invalidate(IfConv.TBB);
  733. Traces->invalidate(IfConv.FBB);
  734. Traces->verifyAnalysis();
  735. }
  736. // Adjust cycles with downward saturation.
  737. static unsigned adjCycles(unsigned Cyc, int Delta) {
  738. if (Delta < 0 && Cyc + Delta > Cyc)
  739. return 0;
  740. return Cyc + Delta;
  741. }
  742. namespace {
  743. /// Helper class to simplify emission of cycle counts into optimization remarks.
  744. struct Cycles {
  745. const char *Key;
  746. unsigned Value;
  747. };
  748. template <typename Remark> Remark &operator<<(Remark &R, Cycles C) {
  749. return R << ore::NV(C.Key, C.Value) << (C.Value == 1 ? " cycle" : " cycles");
  750. }
  751. } // anonymous namespace
  752. /// Apply cost model and heuristics to the if-conversion in IfConv.
  753. /// Return true if the conversion is a good idea.
  754. ///
  755. bool EarlyIfConverter::shouldConvertIf() {
  756. // Stress testing mode disables all cost considerations.
  757. if (Stress)
  758. return true;
  759. if (!MinInstr)
  760. MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
  761. MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
  762. MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
  763. LLVM_DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace);
  764. unsigned MinCrit = std::min(TBBTrace.getCriticalPath(),
  765. FBBTrace.getCriticalPath());
  766. // Set a somewhat arbitrary limit on the critical path extension we accept.
  767. unsigned CritLimit = SchedModel.MispredictPenalty/2;
  768. MachineBasicBlock &MBB = *IfConv.Head;
  769. MachineOptimizationRemarkEmitter MORE(*MBB.getParent(), nullptr);
  770. // If-conversion only makes sense when there is unexploited ILP. Compute the
  771. // maximum-ILP resource length of the trace after if-conversion. Compare it
  772. // to the shortest critical path.
  773. SmallVector<const MachineBasicBlock*, 1> ExtraBlocks;
  774. if (IfConv.TBB != IfConv.Tail)
  775. ExtraBlocks.push_back(IfConv.TBB);
  776. unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks);
  777. LLVM_DEBUG(dbgs() << "Resource length " << ResLength
  778. << ", minimal critical path " << MinCrit << '\n');
  779. if (ResLength > MinCrit + CritLimit) {
  780. LLVM_DEBUG(dbgs() << "Not enough available ILP.\n");
  781. MORE.emit([&]() {
  782. MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
  783. MBB.findDebugLoc(MBB.back()), &MBB);
  784. R << "did not if-convert branch: the resulting critical path ("
  785. << Cycles{"ResLength", ResLength}
  786. << ") would extend the shorter leg's critical path ("
  787. << Cycles{"MinCrit", MinCrit} << ") by more than the threshold of "
  788. << Cycles{"CritLimit", CritLimit}
  789. << ", which cannot be hidden by available ILP.";
  790. return R;
  791. });
  792. return false;
  793. }
  794. // Assume that the depth of the first head terminator will also be the depth
  795. // of the select instruction inserted, as determined by the flag dependency.
  796. // TBB / FBB data dependencies may delay the select even more.
  797. MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head);
  798. unsigned BranchDepth =
  799. HeadTrace.getInstrCycles(*IfConv.Head->getFirstTerminator()).Depth;
  800. LLVM_DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n');
  801. // Look at all the tail phis, and compute the critical path extension caused
  802. // by inserting select instructions.
  803. MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail);
  804. struct CriticalPathInfo {
  805. unsigned Extra; // Count of extra cycles that the component adds.
  806. unsigned Depth; // Absolute depth of the component in cycles.
  807. };
  808. CriticalPathInfo Cond{};
  809. CriticalPathInfo TBlock{};
  810. CriticalPathInfo FBlock{};
  811. bool ShouldConvert = true;
  812. for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) {
  813. SSAIfConv::PHIInfo &PI = IfConv.PHIs[i];
  814. unsigned Slack = TailTrace.getInstrSlack(*PI.PHI);
  815. unsigned MaxDepth = Slack + TailTrace.getInstrCycles(*PI.PHI).Depth;
  816. LLVM_DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI);
  817. // The condition is pulled into the critical path.
  818. unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles);
  819. if (CondDepth > MaxDepth) {
  820. unsigned Extra = CondDepth - MaxDepth;
  821. LLVM_DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n");
  822. if (Extra > Cond.Extra)
  823. Cond = {Extra, CondDepth};
  824. if (Extra > CritLimit) {
  825. LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
  826. ShouldConvert = false;
  827. }
  828. }
  829. // The TBB value is pulled into the critical path.
  830. unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(*PI.PHI), PI.TCycles);
  831. if (TDepth > MaxDepth) {
  832. unsigned Extra = TDepth - MaxDepth;
  833. LLVM_DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n");
  834. if (Extra > TBlock.Extra)
  835. TBlock = {Extra, TDepth};
  836. if (Extra > CritLimit) {
  837. LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
  838. ShouldConvert = false;
  839. }
  840. }
  841. // The FBB value is pulled into the critical path.
  842. unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(*PI.PHI), PI.FCycles);
  843. if (FDepth > MaxDepth) {
  844. unsigned Extra = FDepth - MaxDepth;
  845. LLVM_DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n");
  846. if (Extra > FBlock.Extra)
  847. FBlock = {Extra, FDepth};
  848. if (Extra > CritLimit) {
  849. LLVM_DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n');
  850. ShouldConvert = false;
  851. }
  852. }
  853. }
  854. // Organize by "short" and "long" legs, since the diagnostics get confusing
  855. // when referring to the "true" and "false" sides of the branch, given that
  856. // those don't always correlate with what the user wrote in source-terms.
  857. const CriticalPathInfo Short = TBlock.Extra > FBlock.Extra ? FBlock : TBlock;
  858. const CriticalPathInfo Long = TBlock.Extra > FBlock.Extra ? TBlock : FBlock;
  859. if (ShouldConvert) {
  860. MORE.emit([&]() {
  861. MachineOptimizationRemark R(DEBUG_TYPE, "IfConversion",
  862. MBB.back().getDebugLoc(), &MBB);
  863. R << "performing if-conversion on branch: the condition adds "
  864. << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
  865. if (Short.Extra > 0)
  866. R << ", and the short leg adds another "
  867. << Cycles{"ShortCycles", Short.Extra};
  868. if (Long.Extra > 0)
  869. R << ", and the long leg adds another "
  870. << Cycles{"LongCycles", Long.Extra};
  871. R << ", each staying under the threshold of "
  872. << Cycles{"CritLimit", CritLimit} << ".";
  873. return R;
  874. });
  875. } else {
  876. MORE.emit([&]() {
  877. MachineOptimizationRemarkMissed R(DEBUG_TYPE, "IfConversion",
  878. MBB.back().getDebugLoc(), &MBB);
  879. R << "did not if-convert branch: the condition would add "
  880. << Cycles{"CondCycles", Cond.Extra} << " to the critical path";
  881. if (Cond.Extra > CritLimit)
  882. R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
  883. if (Short.Extra > 0) {
  884. R << ", and the short leg would add another "
  885. << Cycles{"ShortCycles", Short.Extra};
  886. if (Short.Extra > CritLimit)
  887. R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
  888. }
  889. if (Long.Extra > 0) {
  890. R << ", and the long leg would add another "
  891. << Cycles{"LongCycles", Long.Extra};
  892. if (Long.Extra > CritLimit)
  893. R << " exceeding the limit of " << Cycles{"CritLimit", CritLimit};
  894. }
  895. R << ".";
  896. return R;
  897. });
  898. }
  899. return ShouldConvert;
  900. }
  901. /// Attempt repeated if-conversion on MBB, return true if successful.
  902. ///
  903. bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
  904. bool Changed = false;
  905. while (IfConv.canConvertIf(MBB) && shouldConvertIf()) {
  906. // If-convert MBB and update analyses.
  907. invalidateTraces();
  908. SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
  909. IfConv.convertIf(RemovedBlocks);
  910. Changed = true;
  911. updateDomTree(DomTree, IfConv, RemovedBlocks);
  912. updateLoops(Loops, RemovedBlocks);
  913. }
  914. return Changed;
  915. }
  916. bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
  917. LLVM_DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
  918. << "********** Function: " << MF.getName() << '\n');
  919. if (skipFunction(MF.getFunction()))
  920. return false;
  921. // Only run if conversion if the target wants it.
  922. const TargetSubtargetInfo &STI = MF.getSubtarget();
  923. if (!STI.enableEarlyIfConversion())
  924. return false;
  925. TII = STI.getInstrInfo();
  926. TRI = STI.getRegisterInfo();
  927. SchedModel = STI.getSchedModel();
  928. MRI = &MF.getRegInfo();
  929. DomTree = &getAnalysis<MachineDominatorTree>();
  930. Loops = getAnalysisIfAvailable<MachineLoopInfo>();
  931. Traces = &getAnalysis<MachineTraceMetrics>();
  932. MinInstr = nullptr;
  933. bool Changed = false;
  934. IfConv.runOnMachineFunction(MF);
  935. // Visit blocks in dominator tree post-order. The post-order enables nested
  936. // if-conversion in a single pass. The tryConvertIf() function may erase
  937. // blocks, but only blocks dominated by the head block. This makes it safe to
  938. // update the dominator tree while the post-order iterator is still active.
  939. for (auto DomNode : post_order(DomTree))
  940. if (tryConvertIf(DomNode->getBlock()))
  941. Changed = true;
  942. return Changed;
  943. }
  944. //===----------------------------------------------------------------------===//
  945. // EarlyIfPredicator Pass
  946. //===----------------------------------------------------------------------===//
  947. namespace {
  948. class EarlyIfPredicator : public MachineFunctionPass {
  949. const TargetInstrInfo *TII;
  950. const TargetRegisterInfo *TRI;
  951. TargetSchedModel SchedModel;
  952. MachineRegisterInfo *MRI;
  953. MachineDominatorTree *DomTree;
  954. MachineBranchProbabilityInfo *MBPI;
  955. MachineLoopInfo *Loops;
  956. SSAIfConv IfConv;
  957. public:
  958. static char ID;
  959. EarlyIfPredicator() : MachineFunctionPass(ID) {}
  960. void getAnalysisUsage(AnalysisUsage &AU) const override;
  961. bool runOnMachineFunction(MachineFunction &MF) override;
  962. StringRef getPassName() const override { return "Early If-predicator"; }
  963. protected:
  964. bool tryConvertIf(MachineBasicBlock *);
  965. bool shouldConvertIf();
  966. };
  967. } // end anonymous namespace
  968. #undef DEBUG_TYPE
  969. #define DEBUG_TYPE "early-if-predicator"
  970. char EarlyIfPredicator::ID = 0;
  971. char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
  972. INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
  973. false, false)
  974. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  975. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  976. INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
  977. false)
  978. void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
  979. AU.addRequired<MachineBranchProbabilityInfo>();
  980. AU.addRequired<MachineDominatorTree>();
  981. AU.addPreserved<MachineDominatorTree>();
  982. AU.addRequired<MachineLoopInfo>();
  983. AU.addPreserved<MachineLoopInfo>();
  984. MachineFunctionPass::getAnalysisUsage(AU);
  985. }
  986. /// Apply the target heuristic to decide if the transformation is profitable.
  987. bool EarlyIfPredicator::shouldConvertIf() {
  988. auto TrueProbability = MBPI->getEdgeProbability(IfConv.Head, IfConv.TBB);
  989. if (IfConv.isTriangle()) {
  990. MachineBasicBlock &IfBlock =
  991. (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;
  992. unsigned ExtraPredCost = 0;
  993. unsigned Cycles = 0;
  994. for (MachineInstr &I : IfBlock) {
  995. unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
  996. if (NumCycles > 1)
  997. Cycles += NumCycles - 1;
  998. ExtraPredCost += TII->getPredicationCost(I);
  999. }
  1000. return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,
  1001. TrueProbability);
  1002. }
  1003. unsigned TExtra = 0;
  1004. unsigned FExtra = 0;
  1005. unsigned TCycle = 0;
  1006. unsigned FCycle = 0;
  1007. for (MachineInstr &I : *IfConv.TBB) {
  1008. unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
  1009. if (NumCycles > 1)
  1010. TCycle += NumCycles - 1;
  1011. TExtra += TII->getPredicationCost(I);
  1012. }
  1013. for (MachineInstr &I : *IfConv.FBB) {
  1014. unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
  1015. if (NumCycles > 1)
  1016. FCycle += NumCycles - 1;
  1017. FExtra += TII->getPredicationCost(I);
  1018. }
  1019. return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,
  1020. FCycle, FExtra, TrueProbability);
  1021. }
  1022. /// Attempt repeated if-conversion on MBB, return true if successful.
  1023. ///
  1024. bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) {
  1025. bool Changed = false;
  1026. while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) {
  1027. // If-convert MBB and update analyses.
  1028. SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
  1029. IfConv.convertIf(RemovedBlocks, /*Predicate*/ true);
  1030. Changed = true;
  1031. updateDomTree(DomTree, IfConv, RemovedBlocks);
  1032. updateLoops(Loops, RemovedBlocks);
  1033. }
  1034. return Changed;
  1035. }
  1036. bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
  1037. LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n"
  1038. << "********** Function: " << MF.getName() << '\n');
  1039. if (skipFunction(MF.getFunction()))
  1040. return false;
  1041. const TargetSubtargetInfo &STI = MF.getSubtarget();
  1042. TII = STI.getInstrInfo();
  1043. TRI = STI.getRegisterInfo();
  1044. MRI = &MF.getRegInfo();
  1045. SchedModel.init(&STI);
  1046. DomTree = &getAnalysis<MachineDominatorTree>();
  1047. Loops = getAnalysisIfAvailable<MachineLoopInfo>();
  1048. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  1049. bool Changed = false;
  1050. IfConv.runOnMachineFunction(MF);
  1051. // Visit blocks in dominator tree post-order. The post-order enables nested
  1052. // if-conversion in a single pass. The tryConvertIf() function may erase
  1053. // blocks, but only blocks dominated by the head block. This makes it safe to
  1054. // update the dominator tree while the post-order iterator is still active.
  1055. for (auto DomNode : post_order(DomTree))
  1056. if (tryConvertIf(DomNode->getBlock()))
  1057. Changed = true;
  1058. return Changed;
  1059. }