AArch64ConditionalCompares.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957
  1. //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the AArch64ConditionalCompares pass which reduces
  10. // branching and code size by using the conditional compare instructions CCMP,
  11. // CCMN, and FCMP.
  12. //
  13. // The CFG transformations for forming conditional compares are very similar to
  14. // if-conversion, and this pass should run immediately before the early
  15. // if-conversion pass.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #include "AArch64.h"
  19. #include "llvm/ADT/DepthFirstIterator.h"
  20. #include "llvm/ADT/SmallPtrSet.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  23. #include "llvm/CodeGen/MachineDominators.h"
  24. #include "llvm/CodeGen/MachineFunction.h"
  25. #include "llvm/CodeGen/MachineFunctionPass.h"
  26. #include "llvm/CodeGen/MachineInstrBuilder.h"
  27. #include "llvm/CodeGen/MachineLoopInfo.h"
  28. #include "llvm/CodeGen/MachineRegisterInfo.h"
  29. #include "llvm/CodeGen/MachineTraceMetrics.h"
  30. #include "llvm/CodeGen/Passes.h"
  31. #include "llvm/CodeGen/TargetInstrInfo.h"
  32. #include "llvm/CodeGen/TargetRegisterInfo.h"
  33. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  34. #include "llvm/InitializePasses.h"
  35. #include "llvm/Support/CommandLine.h"
  36. #include "llvm/Support/Debug.h"
  37. #include "llvm/Support/raw_ostream.h"
  38. using namespace llvm;
  39. #define DEBUG_TYPE "aarch64-ccmp"
  40. // Absolute maximum number of instructions allowed per speculated block.
  41. // This bypasses all other heuristics, so it should be set fairly high.
  42. static cl::opt<unsigned> BlockInstrLimit(
  43. "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
  44. cl::desc("Maximum number of instructions per speculated block."));
  45. // Stress testing mode - disable heuristics.
  46. static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
  47. cl::desc("Turn all knobs to 11"));
  48. STATISTIC(NumConsidered, "Number of ccmps considered");
  49. STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
  50. STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
  51. STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
  52. STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
  53. STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
  54. STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
  55. STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
  56. STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
  57. STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
  58. STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
  59. STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
  60. STATISTIC(NumConverted, "Number of ccmp instructions created");
  61. STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
  62. //===----------------------------------------------------------------------===//
  63. // SSACCmpConv
  64. //===----------------------------------------------------------------------===//
  65. //
  66. // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
  67. // after determining if it is possible. The class contains no heuristics;
  68. // external code should be used to determine when ccmp-conversion is a good
  69. // idea.
  70. //
  71. // CCmp-formation works on a CFG representing chained conditions, typically
  72. // from C's short-circuit || and && operators:
  73. //
  74. // From: Head To: Head
  75. // / | CmpBB
  76. // / | / |
  77. // | CmpBB / |
  78. // | / | Tail |
  79. // | / | | |
  80. // Tail | | |
  81. // | | | |
  82. // ... ... ... ...
  83. //
  84. // The Head block is terminated by a br.cond instruction, and the CmpBB block
  85. // contains compare + br.cond. Tail must be a successor of both.
  86. //
  87. // The cmp-conversion turns the compare instruction in CmpBB into a conditional
  88. // compare, and merges CmpBB into Head, speculatively executing its
  89. // instructions. The AArch64 conditional compare instructions have an immediate
  90. // operand that specifies the NZCV flag values when the condition is false and
  91. // the compare isn't executed. This makes it possible to chain compares with
  92. // different condition codes.
  93. //
  94. // Example:
  95. //
  96. // if (a == 5 || b == 17)
  97. // foo();
  98. //
  99. // Head:
  100. // cmp w0, #5
  101. // b.eq Tail
  102. // CmpBB:
  103. // cmp w1, #17
  104. // b.eq Tail
  105. // ...
  106. // Tail:
  107. // bl _foo
  108. //
  109. // Becomes:
  110. //
  111. // Head:
  112. // cmp w0, #5
  113. // ccmp w1, #17, 4, ne ; 4 = nZcv
  114. // b.eq Tail
  115. // ...
  116. // Tail:
  117. // bl _foo
  118. //
  119. // The ccmp condition code is the one that would cause the Head terminator to
  120. // branch to CmpBB.
  121. //
  122. // FIXME: It should also be possible to speculate a block on the critical edge
  123. // between Head and Tail, just like if-converting a diamond.
  124. //
  125. // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
  126. namespace {
  127. class SSACCmpConv {
  128. MachineFunction *MF;
  129. const TargetInstrInfo *TII;
  130. const TargetRegisterInfo *TRI;
  131. MachineRegisterInfo *MRI;
  132. const MachineBranchProbabilityInfo *MBPI;
  133. public:
  134. /// The first block containing a conditional branch, dominating everything
  135. /// else.
  136. MachineBasicBlock *Head;
  137. /// The block containing cmp+br.cond with a successor shared with Head.
  138. MachineBasicBlock *CmpBB;
  139. /// The common successor for Head and CmpBB.
  140. MachineBasicBlock *Tail;
  141. /// The compare instruction in CmpBB that can be converted to a ccmp.
  142. MachineInstr *CmpMI;
  143. private:
  144. /// The branch condition in Head as determined by analyzeBranch.
  145. SmallVector<MachineOperand, 4> HeadCond;
  146. /// The condition code that makes Head branch to CmpBB.
  147. AArch64CC::CondCode HeadCmpBBCC;
  148. /// The branch condition in CmpBB.
  149. SmallVector<MachineOperand, 4> CmpBBCond;
  150. /// The condition code that makes CmpBB branch to Tail.
  151. AArch64CC::CondCode CmpBBTailCC;
  152. /// Check if the Tail PHIs are trivially convertible.
  153. bool trivialTailPHIs();
  154. /// Remove CmpBB from the Tail PHIs.
  155. void updateTailPHIs();
  156. /// Check if an operand defining DstReg is dead.
  157. bool isDeadDef(unsigned DstReg);
  158. /// Find the compare instruction in MBB that controls the conditional branch.
  159. /// Return NULL if a convertible instruction can't be found.
  160. MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
  161. /// Return true if all non-terminator instructions in MBB can be safely
  162. /// speculated.
  163. bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
  164. public:
  165. /// runOnMachineFunction - Initialize per-function data structures.
  166. void runOnMachineFunction(MachineFunction &MF,
  167. const MachineBranchProbabilityInfo *MBPI) {
  168. this->MF = &MF;
  169. this->MBPI = MBPI;
  170. TII = MF.getSubtarget().getInstrInfo();
  171. TRI = MF.getSubtarget().getRegisterInfo();
  172. MRI = &MF.getRegInfo();
  173. }
  174. /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
  175. /// internal state, and return true.
  176. bool canConvert(MachineBasicBlock *MBB);
  177. /// Cmo-convert the last block passed to canConvertCmp(), assuming
  178. /// it is possible. Add any erased blocks to RemovedBlocks.
  179. void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
  180. /// Return the expected code size delta if the conversion into a
  181. /// conditional compare is performed.
  182. int expectedCodeSizeDelta() const;
  183. };
  184. } // end anonymous namespace
  185. // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
  186. // This means that no if-conversion is required when merging CmpBB into Head.
  187. bool SSACCmpConv::trivialTailPHIs() {
  188. for (auto &I : *Tail) {
  189. if (!I.isPHI())
  190. break;
  191. unsigned HeadReg = 0, CmpBBReg = 0;
  192. // PHI operands come in (VReg, MBB) pairs.
  193. for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
  194. MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
  195. Register Reg = I.getOperand(oi).getReg();
  196. if (MBB == Head) {
  197. assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
  198. HeadReg = Reg;
  199. }
  200. if (MBB == CmpBB) {
  201. assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
  202. CmpBBReg = Reg;
  203. }
  204. }
  205. if (HeadReg != CmpBBReg)
  206. return false;
  207. }
  208. return true;
  209. }
  210. // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
  211. // removing the CmpBB operands. The Head operands will be identical.
  212. void SSACCmpConv::updateTailPHIs() {
  213. for (auto &I : *Tail) {
  214. if (!I.isPHI())
  215. break;
  216. // I is a PHI. It can have multiple entries for CmpBB.
  217. for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
  218. // PHI operands are (Reg, MBB) at (oi-2, oi-1).
  219. if (I.getOperand(oi - 1).getMBB() == CmpBB) {
  220. I.removeOperand(oi - 1);
  221. I.removeOperand(oi - 2);
  222. }
  223. }
  224. }
  225. }
  226. // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
  227. // are still writing virtual registers without any uses.
  228. bool SSACCmpConv::isDeadDef(unsigned DstReg) {
  229. // Writes to the zero register are dead.
  230. if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
  231. return true;
  232. if (!Register::isVirtualRegister(DstReg))
  233. return false;
  234. // A virtual register def without any uses will be marked dead later, and
  235. // eventually replaced by the zero register.
  236. return MRI->use_nodbg_empty(DstReg);
  237. }
  238. // Parse a condition code returned by analyzeBranch, and compute the CondCode
  239. // corresponding to TBB.
  240. // Return
  241. static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
  242. // A normal br.cond simply has the condition code.
  243. if (Cond[0].getImm() != -1) {
  244. assert(Cond.size() == 1 && "Unknown Cond array format");
  245. CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
  246. return true;
  247. }
  248. // For tbz and cbz instruction, the opcode is next.
  249. switch (Cond[1].getImm()) {
  250. default:
  251. // This includes tbz / tbnz branches which can't be converted to
  252. // ccmp + br.cond.
  253. return false;
  254. case AArch64::CBZW:
  255. case AArch64::CBZX:
  256. assert(Cond.size() == 3 && "Unknown Cond array format");
  257. CC = AArch64CC::EQ;
  258. return true;
  259. case AArch64::CBNZW:
  260. case AArch64::CBNZX:
  261. assert(Cond.size() == 3 && "Unknown Cond array format");
  262. CC = AArch64CC::NE;
  263. return true;
  264. }
  265. }
  266. MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
  267. MachineBasicBlock::iterator I = MBB->getFirstTerminator();
  268. if (I == MBB->end())
  269. return nullptr;
  270. // The terminator must be controlled by the flags.
  271. if (!I->readsRegister(AArch64::NZCV)) {
  272. switch (I->getOpcode()) {
  273. case AArch64::CBZW:
  274. case AArch64::CBZX:
  275. case AArch64::CBNZW:
  276. case AArch64::CBNZX:
  277. // These can be converted into a ccmp against #0.
  278. return &*I;
  279. }
  280. ++NumCmpTermRejs;
  281. LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
  282. return nullptr;
  283. }
  284. // Now find the instruction controlling the terminator.
  285. for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
  286. I = prev_nodbg(I, MBB->begin());
  287. assert(!I->isTerminator() && "Spurious terminator");
  288. switch (I->getOpcode()) {
  289. // cmp is an alias for subs with a dead destination register.
  290. case AArch64::SUBSWri:
  291. case AArch64::SUBSXri:
  292. // cmn is an alias for adds with a dead destination register.
  293. case AArch64::ADDSWri:
  294. case AArch64::ADDSXri:
  295. // Check that the immediate operand is within range, ccmp wants a uimm5.
  296. // Rd = SUBSri Rn, imm, shift
  297. if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
  298. LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
  299. ++NumImmRangeRejs;
  300. return nullptr;
  301. }
  302. [[fallthrough]];
  303. case AArch64::SUBSWrr:
  304. case AArch64::SUBSXrr:
  305. case AArch64::ADDSWrr:
  306. case AArch64::ADDSXrr:
  307. if (isDeadDef(I->getOperand(0).getReg()))
  308. return &*I;
  309. LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
  310. << *I);
  311. ++NumLiveDstRejs;
  312. return nullptr;
  313. case AArch64::FCMPSrr:
  314. case AArch64::FCMPDrr:
  315. case AArch64::FCMPESrr:
  316. case AArch64::FCMPEDrr:
  317. return &*I;
  318. }
  319. // Check for flag reads and clobbers.
  320. PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
  321. if (PRI.Read) {
  322. // The ccmp doesn't produce exactly the same flags as the original
  323. // compare, so reject the transform if there are uses of the flags
  324. // besides the terminators.
  325. LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
  326. ++NumMultNZCVUses;
  327. return nullptr;
  328. }
  329. if (PRI.Defined || PRI.Clobbered) {
  330. LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
  331. ++NumUnknNZCVDefs;
  332. return nullptr;
  333. }
  334. }
  335. LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
  336. << '\n');
  337. return nullptr;
  338. }
  339. /// Determine if all the instructions in MBB can safely
  340. /// be speculated. The terminators are not considered.
  341. ///
  342. /// Only CmpMI is allowed to clobber the flags.
  343. ///
  344. bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
  345. const MachineInstr *CmpMI) {
  346. // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
  347. // get right.
  348. if (!MBB->livein_empty()) {
  349. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
  350. return false;
  351. }
  352. unsigned InstrCount = 0;
  353. // Check all instructions, except the terminators. It is assumed that
  354. // terminators never have side effects or define any used register values.
  355. for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
  356. if (I.isDebugInstr())
  357. continue;
  358. if (++InstrCount > BlockInstrLimit && !Stress) {
  359. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
  360. << BlockInstrLimit << " instructions.\n");
  361. return false;
  362. }
  363. // There shouldn't normally be any phis in a single-predecessor block.
  364. if (I.isPHI()) {
  365. LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
  366. return false;
  367. }
  368. // Don't speculate loads. Note that it may be possible and desirable to
  369. // speculate GOT or constant pool loads that are guaranteed not to trap,
  370. // but we don't support that for now.
  371. if (I.mayLoad()) {
  372. LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
  373. return false;
  374. }
  375. // We never speculate stores, so an AA pointer isn't necessary.
  376. bool DontMoveAcrossStore = true;
  377. if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
  378. LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
  379. return false;
  380. }
  381. // Only CmpMI is allowed to clobber the flags.
  382. if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
  383. LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
  384. return false;
  385. }
  386. }
  387. return true;
  388. }
  389. /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
  390. /// candidate for cmp-conversion. Fill out the internal state.
  391. ///
  392. bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
  393. Head = MBB;
  394. Tail = CmpBB = nullptr;
  395. if (Head->succ_size() != 2)
  396. return false;
  397. MachineBasicBlock *Succ0 = Head->succ_begin()[0];
  398. MachineBasicBlock *Succ1 = Head->succ_begin()[1];
  399. // CmpBB can only have a single predecessor. Tail is allowed many.
  400. if (Succ0->pred_size() != 1)
  401. std::swap(Succ0, Succ1);
  402. // Succ0 is our candidate for CmpBB.
  403. if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
  404. return false;
  405. CmpBB = Succ0;
  406. Tail = Succ1;
  407. if (!CmpBB->isSuccessor(Tail))
  408. return false;
  409. // The CFG topology checks out.
  410. LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
  411. << printMBBReference(*CmpBB) << " -> "
  412. << printMBBReference(*Tail) << '\n');
  413. ++NumConsidered;
  414. // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
  415. //
  416. // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
  417. // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
  418. // always be safe to sink the ccmp down to immediately before the CmpBB
  419. // terminators.
  420. if (!trivialTailPHIs()) {
  421. LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
  422. ++NumPhiRejs;
  423. return false;
  424. }
  425. if (!Tail->livein_empty()) {
  426. LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
  427. ++NumPhysRejs;
  428. return false;
  429. }
  430. // CmpBB should never have PHIs since Head is its only predecessor.
  431. // FIXME: Clean them up if it happens.
  432. if (!CmpBB->empty() && CmpBB->front().isPHI()) {
  433. LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
  434. ++NumPhi2Rejs;
  435. return false;
  436. }
  437. if (!CmpBB->livein_empty()) {
  438. LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
  439. ++NumPhysRejs;
  440. return false;
  441. }
  442. // The branch we're looking to eliminate must be analyzable.
  443. HeadCond.clear();
  444. MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
  445. if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
  446. LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
  447. ++NumHeadBranchRejs;
  448. return false;
  449. }
  450. // This is weird, probably some sort of degenerate CFG, or an edge to a
  451. // landing pad.
  452. if (!TBB || HeadCond.empty()) {
  453. LLVM_DEBUG(
  454. dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
  455. ++NumHeadBranchRejs;
  456. return false;
  457. }
  458. if (!parseCond(HeadCond, HeadCmpBBCC)) {
  459. LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
  460. ++NumHeadBranchRejs;
  461. return false;
  462. }
  463. // Make sure the branch direction is right.
  464. if (TBB != CmpBB) {
  465. assert(TBB == Tail && "Unexpected TBB");
  466. HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
  467. }
  468. CmpBBCond.clear();
  469. TBB = FBB = nullptr;
  470. if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
  471. LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
  472. ++NumCmpBranchRejs;
  473. return false;
  474. }
  475. if (!TBB || CmpBBCond.empty()) {
  476. LLVM_DEBUG(
  477. dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
  478. ++NumCmpBranchRejs;
  479. return false;
  480. }
  481. if (!parseCond(CmpBBCond, CmpBBTailCC)) {
  482. LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
  483. ++NumCmpBranchRejs;
  484. return false;
  485. }
  486. if (TBB != Tail)
  487. CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
  488. LLVM_DEBUG(dbgs() << "Head->CmpBB on "
  489. << AArch64CC::getCondCodeName(HeadCmpBBCC)
  490. << ", CmpBB->Tail on "
  491. << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
  492. CmpMI = findConvertibleCompare(CmpBB);
  493. if (!CmpMI)
  494. return false;
  495. if (!canSpeculateInstrs(CmpBB, CmpMI)) {
  496. ++NumSpeculateRejs;
  497. return false;
  498. }
  499. return true;
  500. }
  501. void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
  502. LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
  503. << printMBBReference(*Head) << ":\n"
  504. << *CmpBB);
  505. // All CmpBB instructions are moved into Head, and CmpBB is deleted.
  506. // Update the CFG first.
  507. updateTailPHIs();
  508. // Save successor probabilties before removing CmpBB and Tail from their
  509. // parents.
  510. BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
  511. BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
  512. Head->removeSuccessor(CmpBB);
  513. CmpBB->removeSuccessor(Tail);
  514. // If Head and CmpBB had successor probabilties, udpate the probabilities to
  515. // reflect the ccmp-conversion.
  516. if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
  517. // Head is allowed two successors. We've removed CmpBB, so the remaining
  518. // successor is Tail. We need to increase the successor probability for
  519. // Tail to account for the CmpBB path we removed.
  520. //
  521. // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
  522. assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
  523. BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
  524. Head->setSuccProbability(Head->succ_begin(),
  525. Head2Tail + Head2CmpBB * CmpBB2Tail);
  526. // We will transfer successors of CmpBB to Head in a moment without
  527. // normalizing the successor probabilities. Set the successor probabilites
  528. // before doing so.
  529. //
  530. // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
  531. for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
  532. BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
  533. CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
  534. }
  535. }
  536. Head->transferSuccessorsAndUpdatePHIs(CmpBB);
  537. DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
  538. TII->removeBranch(*Head);
  539. // If the Head terminator was one of the cbz / tbz branches with built-in
  540. // compare, we need to insert an explicit compare instruction in its place.
  541. if (HeadCond[0].getImm() == -1) {
  542. ++NumCompBranches;
  543. unsigned Opc = 0;
  544. switch (HeadCond[1].getImm()) {
  545. case AArch64::CBZW:
  546. case AArch64::CBNZW:
  547. Opc = AArch64::SUBSWri;
  548. break;
  549. case AArch64::CBZX:
  550. case AArch64::CBNZX:
  551. Opc = AArch64::SUBSXri;
  552. break;
  553. default:
  554. llvm_unreachable("Cannot convert Head branch");
  555. }
  556. const MCInstrDesc &MCID = TII->get(Opc);
  557. // Create a dummy virtual register for the SUBS def.
  558. Register DestReg =
  559. MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
  560. // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
  561. BuildMI(*Head, Head->end(), TermDL, MCID)
  562. .addReg(DestReg, RegState::Define | RegState::Dead)
  563. .add(HeadCond[2])
  564. .addImm(0)
  565. .addImm(0);
  566. // SUBS uses the GPR*sp register classes.
  567. MRI->constrainRegClass(HeadCond[2].getReg(),
  568. TII->getRegClass(MCID, 1, TRI, *MF));
  569. }
  570. Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
  571. // Now replace CmpMI with a ccmp instruction that also considers the incoming
  572. // flags.
  573. unsigned Opc = 0;
  574. unsigned FirstOp = 1; // First CmpMI operand to copy.
  575. bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
  576. switch (CmpMI->getOpcode()) {
  577. default:
  578. llvm_unreachable("Unknown compare opcode");
  579. case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
  580. case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
  581. case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
  582. case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
  583. case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
  584. case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
  585. case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
  586. case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
  587. case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
  588. case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
  589. case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
  590. case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
  591. case AArch64::CBZW:
  592. case AArch64::CBNZW:
  593. Opc = AArch64::CCMPWi;
  594. FirstOp = 0;
  595. isZBranch = true;
  596. break;
  597. case AArch64::CBZX:
  598. case AArch64::CBNZX:
  599. Opc = AArch64::CCMPXi;
  600. FirstOp = 0;
  601. isZBranch = true;
  602. break;
  603. }
  604. // The ccmp instruction should set the flags according to the comparison when
  605. // Head would have branched to CmpBB.
  606. // The NZCV immediate operand should provide flags for the case where Head
  607. // would have branched to Tail. These flags should cause the new Head
  608. // terminator to branch to tail.
  609. unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
  610. const MCInstrDesc &MCID = TII->get(Opc);
  611. MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
  612. TII->getRegClass(MCID, 0, TRI, *MF));
  613. if (CmpMI->getOperand(FirstOp + 1).isReg())
  614. MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
  615. TII->getRegClass(MCID, 1, TRI, *MF));
  616. MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
  617. .add(CmpMI->getOperand(FirstOp)); // Register Rn
  618. if (isZBranch)
  619. MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
  620. else
  621. MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
  622. MIB.addImm(NZCV).addImm(HeadCmpBBCC);
  623. // If CmpMI was a terminator, we need a new conditional branch to replace it.
  624. // This now becomes a Head terminator.
  625. if (isZBranch) {
  626. bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
  627. CmpMI->getOpcode() == AArch64::CBNZX;
  628. BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
  629. .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
  630. .add(CmpMI->getOperand(1)); // Branch target.
  631. }
  632. CmpMI->eraseFromParent();
  633. Head->updateTerminator(CmpBB->getNextNode());
  634. RemovedBlocks.push_back(CmpBB);
  635. CmpBB->eraseFromParent();
  636. LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
  637. ++NumConverted;
  638. }
  639. int SSACCmpConv::expectedCodeSizeDelta() const {
  640. int delta = 0;
  641. // If the Head terminator was one of the cbz / tbz branches with built-in
  642. // compare, we need to insert an explicit compare instruction in its place
  643. // plus a branch instruction.
  644. if (HeadCond[0].getImm() == -1) {
  645. switch (HeadCond[1].getImm()) {
  646. case AArch64::CBZW:
  647. case AArch64::CBNZW:
  648. case AArch64::CBZX:
  649. case AArch64::CBNZX:
  650. // Therefore delta += 1
  651. delta = 1;
  652. break;
  653. default:
  654. llvm_unreachable("Cannot convert Head branch");
  655. }
  656. }
  657. // If the Cmp terminator was one of the cbz / tbz branches with
  658. // built-in compare, it will be turned into a compare instruction
  659. // into Head, but we do not save any instruction.
  660. // Otherwise, we save the branch instruction.
  661. switch (CmpMI->getOpcode()) {
  662. default:
  663. --delta;
  664. break;
  665. case AArch64::CBZW:
  666. case AArch64::CBNZW:
  667. case AArch64::CBZX:
  668. case AArch64::CBNZX:
  669. break;
  670. }
  671. return delta;
  672. }
  673. //===----------------------------------------------------------------------===//
  674. // AArch64ConditionalCompares Pass
  675. //===----------------------------------------------------------------------===//
  676. namespace {
  677. class AArch64ConditionalCompares : public MachineFunctionPass {
  678. const MachineBranchProbabilityInfo *MBPI;
  679. const TargetInstrInfo *TII;
  680. const TargetRegisterInfo *TRI;
  681. MCSchedModel SchedModel;
  682. // Does the proceeded function has Oz attribute.
  683. bool MinSize;
  684. MachineRegisterInfo *MRI;
  685. MachineDominatorTree *DomTree;
  686. MachineLoopInfo *Loops;
  687. MachineTraceMetrics *Traces;
  688. MachineTraceMetrics::Ensemble *MinInstr;
  689. SSACCmpConv CmpConv;
  690. public:
  691. static char ID;
  692. AArch64ConditionalCompares() : MachineFunctionPass(ID) {
  693. initializeAArch64ConditionalComparesPass(*PassRegistry::getPassRegistry());
  694. }
  695. void getAnalysisUsage(AnalysisUsage &AU) const override;
  696. bool runOnMachineFunction(MachineFunction &MF) override;
  697. StringRef getPassName() const override {
  698. return "AArch64 Conditional Compares";
  699. }
  700. private:
  701. bool tryConvert(MachineBasicBlock *);
  702. void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
  703. void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
  704. void invalidateTraces();
  705. bool shouldConvert();
  706. };
  707. } // end anonymous namespace
  708. char AArch64ConditionalCompares::ID = 0;
  709. INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
  710. "AArch64 CCMP Pass", false, false)
  711. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  712. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  713. INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
  714. INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
  715. "AArch64 CCMP Pass", false, false)
  716. FunctionPass *llvm::createAArch64ConditionalCompares() {
  717. return new AArch64ConditionalCompares();
  718. }
  719. void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
  720. AU.addRequired<MachineBranchProbabilityInfo>();
  721. AU.addRequired<MachineDominatorTree>();
  722. AU.addPreserved<MachineDominatorTree>();
  723. AU.addRequired<MachineLoopInfo>();
  724. AU.addPreserved<MachineLoopInfo>();
  725. AU.addRequired<MachineTraceMetrics>();
  726. AU.addPreserved<MachineTraceMetrics>();
  727. MachineFunctionPass::getAnalysisUsage(AU);
  728. }
  729. /// Update the dominator tree after if-conversion erased some blocks.
  730. void AArch64ConditionalCompares::updateDomTree(
  731. ArrayRef<MachineBasicBlock *> Removed) {
  732. // convert() removes CmpBB which was previously dominated by Head.
  733. // CmpBB children should be transferred to Head.
  734. MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
  735. for (MachineBasicBlock *RemovedMBB : Removed) {
  736. MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
  737. assert(Node != HeadNode && "Cannot erase the head node");
  738. assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
  739. while (Node->getNumChildren())
  740. DomTree->changeImmediateDominator(Node->back(), HeadNode);
  741. DomTree->eraseNode(RemovedMBB);
  742. }
  743. }
  744. /// Update LoopInfo after if-conversion.
  745. void
  746. AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
  747. if (!Loops)
  748. return;
  749. for (MachineBasicBlock *RemovedMBB : Removed)
  750. Loops->removeBlock(RemovedMBB);
  751. }
  752. /// Invalidate MachineTraceMetrics before if-conversion.
  753. void AArch64ConditionalCompares::invalidateTraces() {
  754. Traces->invalidate(CmpConv.Head);
  755. Traces->invalidate(CmpConv.CmpBB);
  756. }
  757. /// Apply cost model and heuristics to the if-conversion in IfConv.
  758. /// Return true if the conversion is a good idea.
  759. ///
  760. bool AArch64ConditionalCompares::shouldConvert() {
  761. // Stress testing mode disables all cost considerations.
  762. if (Stress)
  763. return true;
  764. if (!MinInstr)
  765. MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
  766. // Head dominates CmpBB, so it is always included in its trace.
  767. MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
  768. // If code size is the main concern
  769. if (MinSize) {
  770. int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
  771. LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
  772. // If we are minimizing the code size, do the conversion whatever
  773. // the cost is.
  774. if (CodeSizeDelta < 0)
  775. return true;
  776. if (CodeSizeDelta > 0) {
  777. LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
  778. return false;
  779. }
  780. // CodeSizeDelta == 0, continue with the regular heuristics
  781. }
  782. // Heuristic: The compare conversion delays the execution of the branch
  783. // instruction because we must wait for the inputs to the second compare as
  784. // well. The branch has no dependent instructions, but delaying it increases
  785. // the cost of a misprediction.
  786. //
  787. // Set a limit on the delay we will accept.
  788. unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
  789. // Instruction depths can be computed for all trace instructions above CmpBB.
  790. unsigned HeadDepth =
  791. Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
  792. unsigned CmpBBDepth =
  793. Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
  794. LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
  795. << "\nCmpBB depth: " << CmpBBDepth << '\n');
  796. if (CmpBBDepth > HeadDepth + DelayLimit) {
  797. LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
  798. << " cycles.\n");
  799. return false;
  800. }
  801. // Check the resource depth at the bottom of CmpBB - these instructions will
  802. // be speculated.
  803. unsigned ResDepth = Trace.getResourceDepth(true);
  804. LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
  805. // Heuristic: The speculatively executed instructions must all be able to
  806. // merge into the Head block. The Head critical path should dominate the
  807. // resource cost of the speculated instructions.
  808. if (ResDepth > HeadDepth) {
  809. LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
  810. return false;
  811. }
  812. return true;
  813. }
  814. bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
  815. bool Changed = false;
  816. while (CmpConv.canConvert(MBB) && shouldConvert()) {
  817. invalidateTraces();
  818. SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
  819. CmpConv.convert(RemovedBlocks);
  820. Changed = true;
  821. updateDomTree(RemovedBlocks);
  822. updateLoops(RemovedBlocks);
  823. }
  824. return Changed;
  825. }
  826. bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
  827. LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
  828. << "********** Function: " << MF.getName() << '\n');
  829. if (skipFunction(MF.getFunction()))
  830. return false;
  831. TII = MF.getSubtarget().getInstrInfo();
  832. TRI = MF.getSubtarget().getRegisterInfo();
  833. SchedModel = MF.getSubtarget().getSchedModel();
  834. MRI = &MF.getRegInfo();
  835. DomTree = &getAnalysis<MachineDominatorTree>();
  836. Loops = getAnalysisIfAvailable<MachineLoopInfo>();
  837. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  838. Traces = &getAnalysis<MachineTraceMetrics>();
  839. MinInstr = nullptr;
  840. MinSize = MF.getFunction().hasMinSize();
  841. bool Changed = false;
  842. CmpConv.runOnMachineFunction(MF, MBPI);
  843. // Visit blocks in dominator tree pre-order. The pre-order enables multiple
  844. // cmp-conversions from the same head block.
  845. // Note that updateDomTree() modifies the children of the DomTree node
  846. // currently being visited. The df_iterator supports that; it doesn't look at
  847. // child_begin() / child_end() until after a node has been visited.
  848. for (auto *I : depth_first(DomTree))
  849. if (tryConvert(I->getBlock()))
  850. Changed = true;
  851. return Changed;
  852. }