AArch64ConditionalCompares.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958
  1. //===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the AArch64ConditionalCompares pass which reduces
  10. // branching and code size by using the conditional compare instructions CCMP,
  11. // CCMN, and FCMP.
  12. //
  13. // The CFG transformations for forming conditional compares are very similar to
  14. // if-conversion, and this pass should run immediately before the early
  15. // if-conversion pass.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #include "AArch64.h"
  19. #include "llvm/ADT/DepthFirstIterator.h"
  20. #include "llvm/ADT/SetVector.h"
  21. #include "llvm/ADT/SmallPtrSet.h"
  22. #include "llvm/ADT/Statistic.h"
  23. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  24. #include "llvm/CodeGen/MachineDominators.h"
  25. #include "llvm/CodeGen/MachineFunction.h"
  26. #include "llvm/CodeGen/MachineFunctionPass.h"
  27. #include "llvm/CodeGen/MachineInstrBuilder.h"
  28. #include "llvm/CodeGen/MachineLoopInfo.h"
  29. #include "llvm/CodeGen/MachineRegisterInfo.h"
  30. #include "llvm/CodeGen/MachineTraceMetrics.h"
  31. #include "llvm/CodeGen/Passes.h"
  32. #include "llvm/CodeGen/TargetInstrInfo.h"
  33. #include "llvm/CodeGen/TargetRegisterInfo.h"
  34. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  35. #include "llvm/InitializePasses.h"
  36. #include "llvm/Support/CommandLine.h"
  37. #include "llvm/Support/Debug.h"
  38. #include "llvm/Support/raw_ostream.h"
  39. using namespace llvm;
  40. #define DEBUG_TYPE "aarch64-ccmp"
  41. // Absolute maximum number of instructions allowed per speculated block.
  42. // This bypasses all other heuristics, so it should be set fairly high.
  43. static cl::opt<unsigned> BlockInstrLimit(
  44. "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
  45. cl::desc("Maximum number of instructions per speculated block."));
  46. // Stress testing mode - disable heuristics.
  47. static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
  48. cl::desc("Turn all knobs to 11"));
  49. STATISTIC(NumConsidered, "Number of ccmps considered");
  50. STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
  51. STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
  52. STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
  53. STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
  54. STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
  55. STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
  56. STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
  57. STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
  58. STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
  59. STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
  60. STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
  61. STATISTIC(NumConverted, "Number of ccmp instructions created");
  62. STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
  63. //===----------------------------------------------------------------------===//
  64. // SSACCmpConv
  65. //===----------------------------------------------------------------------===//
  66. //
  67. // The SSACCmpConv class performs ccmp-conversion on SSA form machine code
  68. // after determining if it is possible. The class contains no heuristics;
  69. // external code should be used to determine when ccmp-conversion is a good
  70. // idea.
  71. //
  72. // CCmp-formation works on a CFG representing chained conditions, typically
  73. // from C's short-circuit || and && operators:
  74. //
  75. // From: Head To: Head
  76. // / | CmpBB
  77. // / | / |
  78. // | CmpBB / |
  79. // | / | Tail |
  80. // | / | | |
  81. // Tail | | |
  82. // | | | |
  83. // ... ... ... ...
  84. //
  85. // The Head block is terminated by a br.cond instruction, and the CmpBB block
  86. // contains compare + br.cond. Tail must be a successor of both.
  87. //
  88. // The cmp-conversion turns the compare instruction in CmpBB into a conditional
  89. // compare, and merges CmpBB into Head, speculatively executing its
  90. // instructions. The AArch64 conditional compare instructions have an immediate
  91. // operand that specifies the NZCV flag values when the condition is false and
  92. // the compare isn't executed. This makes it possible to chain compares with
  93. // different condition codes.
  94. //
  95. // Example:
  96. //
  97. // if (a == 5 || b == 17)
  98. // foo();
  99. //
  100. // Head:
  101. // cmp w0, #5
  102. // b.eq Tail
  103. // CmpBB:
  104. // cmp w1, #17
  105. // b.eq Tail
  106. // ...
  107. // Tail:
  108. // bl _foo
  109. //
  110. // Becomes:
  111. //
  112. // Head:
  113. // cmp w0, #5
  114. // ccmp w1, #17, 4, ne ; 4 = nZcv
  115. // b.eq Tail
  116. // ...
  117. // Tail:
  118. // bl _foo
  119. //
  120. // The ccmp condition code is the one that would cause the Head terminator to
  121. // branch to CmpBB.
  122. //
  123. // FIXME: It should also be possible to speculate a block on the critical edge
  124. // between Head and Tail, just like if-converting a diamond.
  125. //
  126. // FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
  127. namespace {
  128. class SSACCmpConv {
  129. MachineFunction *MF;
  130. const TargetInstrInfo *TII;
  131. const TargetRegisterInfo *TRI;
  132. MachineRegisterInfo *MRI;
  133. const MachineBranchProbabilityInfo *MBPI;
  134. public:
  135. /// The first block containing a conditional branch, dominating everything
  136. /// else.
  137. MachineBasicBlock *Head;
  138. /// The block containing cmp+br.cond with a successor shared with Head.
  139. MachineBasicBlock *CmpBB;
  140. /// The common successor for Head and CmpBB.
  141. MachineBasicBlock *Tail;
  142. /// The compare instruction in CmpBB that can be converted to a ccmp.
  143. MachineInstr *CmpMI;
  144. private:
  145. /// The branch condition in Head as determined by analyzeBranch.
  146. SmallVector<MachineOperand, 4> HeadCond;
  147. /// The condition code that makes Head branch to CmpBB.
  148. AArch64CC::CondCode HeadCmpBBCC;
  149. /// The branch condition in CmpBB.
  150. SmallVector<MachineOperand, 4> CmpBBCond;
  151. /// The condition code that makes CmpBB branch to Tail.
  152. AArch64CC::CondCode CmpBBTailCC;
  153. /// Check if the Tail PHIs are trivially convertible.
  154. bool trivialTailPHIs();
  155. /// Remove CmpBB from the Tail PHIs.
  156. void updateTailPHIs();
  157. /// Check if an operand defining DstReg is dead.
  158. bool isDeadDef(unsigned DstReg);
  159. /// Find the compare instruction in MBB that controls the conditional branch.
  160. /// Return NULL if a convertible instruction can't be found.
  161. MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
  162. /// Return true if all non-terminator instructions in MBB can be safely
  163. /// speculated.
  164. bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
  165. public:
  166. /// runOnMachineFunction - Initialize per-function data structures.
  167. void runOnMachineFunction(MachineFunction &MF,
  168. const MachineBranchProbabilityInfo *MBPI) {
  169. this->MF = &MF;
  170. this->MBPI = MBPI;
  171. TII = MF.getSubtarget().getInstrInfo();
  172. TRI = MF.getSubtarget().getRegisterInfo();
  173. MRI = &MF.getRegInfo();
  174. }
  175. /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
  176. /// internal state, and return true.
  177. bool canConvert(MachineBasicBlock *MBB);
  178. /// Cmo-convert the last block passed to canConvertCmp(), assuming
  179. /// it is possible. Add any erased blocks to RemovedBlocks.
  180. void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
  181. /// Return the expected code size delta if the conversion into a
  182. /// conditional compare is performed.
  183. int expectedCodeSizeDelta() const;
  184. };
  185. } // end anonymous namespace
  186. // Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
  187. // This means that no if-conversion is required when merging CmpBB into Head.
  188. bool SSACCmpConv::trivialTailPHIs() {
  189. for (auto &I : *Tail) {
  190. if (!I.isPHI())
  191. break;
  192. unsigned HeadReg = 0, CmpBBReg = 0;
  193. // PHI operands come in (VReg, MBB) pairs.
  194. for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
  195. MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
  196. Register Reg = I.getOperand(oi).getReg();
  197. if (MBB == Head) {
  198. assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
  199. HeadReg = Reg;
  200. }
  201. if (MBB == CmpBB) {
  202. assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
  203. CmpBBReg = Reg;
  204. }
  205. }
  206. if (HeadReg != CmpBBReg)
  207. return false;
  208. }
  209. return true;
  210. }
  211. // Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
  212. // removing the CmpBB operands. The Head operands will be identical.
  213. void SSACCmpConv::updateTailPHIs() {
  214. for (auto &I : *Tail) {
  215. if (!I.isPHI())
  216. break;
  217. // I is a PHI. It can have multiple entries for CmpBB.
  218. for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
  219. // PHI operands are (Reg, MBB) at (oi-2, oi-1).
  220. if (I.getOperand(oi - 1).getMBB() == CmpBB) {
  221. I.RemoveOperand(oi - 1);
  222. I.RemoveOperand(oi - 2);
  223. }
  224. }
  225. }
  226. }
  227. // This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
  228. // are still writing virtual registers without any uses.
  229. bool SSACCmpConv::isDeadDef(unsigned DstReg) {
  230. // Writes to the zero register are dead.
  231. if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
  232. return true;
  233. if (!Register::isVirtualRegister(DstReg))
  234. return false;
  235. // A virtual register def without any uses will be marked dead later, and
  236. // eventually replaced by the zero register.
  237. return MRI->use_nodbg_empty(DstReg);
  238. }
  239. // Parse a condition code returned by analyzeBranch, and compute the CondCode
  240. // corresponding to TBB.
  241. // Return
  242. static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
  243. // A normal br.cond simply has the condition code.
  244. if (Cond[0].getImm() != -1) {
  245. assert(Cond.size() == 1 && "Unknown Cond array format");
  246. CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
  247. return true;
  248. }
  249. // For tbz and cbz instruction, the opcode is next.
  250. switch (Cond[1].getImm()) {
  251. default:
  252. // This includes tbz / tbnz branches which can't be converted to
  253. // ccmp + br.cond.
  254. return false;
  255. case AArch64::CBZW:
  256. case AArch64::CBZX:
  257. assert(Cond.size() == 3 && "Unknown Cond array format");
  258. CC = AArch64CC::EQ;
  259. return true;
  260. case AArch64::CBNZW:
  261. case AArch64::CBNZX:
  262. assert(Cond.size() == 3 && "Unknown Cond array format");
  263. CC = AArch64CC::NE;
  264. return true;
  265. }
  266. }
  267. MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
  268. MachineBasicBlock::iterator I = MBB->getFirstTerminator();
  269. if (I == MBB->end())
  270. return nullptr;
  271. // The terminator must be controlled by the flags.
  272. if (!I->readsRegister(AArch64::NZCV)) {
  273. switch (I->getOpcode()) {
  274. case AArch64::CBZW:
  275. case AArch64::CBZX:
  276. case AArch64::CBNZW:
  277. case AArch64::CBNZX:
  278. // These can be converted into a ccmp against #0.
  279. return &*I;
  280. }
  281. ++NumCmpTermRejs;
  282. LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
  283. return nullptr;
  284. }
  285. // Now find the instruction controlling the terminator.
  286. for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
  287. I = prev_nodbg(I, MBB->begin());
  288. assert(!I->isTerminator() && "Spurious terminator");
  289. switch (I->getOpcode()) {
  290. // cmp is an alias for subs with a dead destination register.
  291. case AArch64::SUBSWri:
  292. case AArch64::SUBSXri:
  293. // cmn is an alias for adds with a dead destination register.
  294. case AArch64::ADDSWri:
  295. case AArch64::ADDSXri:
  296. // Check that the immediate operand is within range, ccmp wants a uimm5.
  297. // Rd = SUBSri Rn, imm, shift
  298. if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
  299. LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
  300. ++NumImmRangeRejs;
  301. return nullptr;
  302. }
  303. LLVM_FALLTHROUGH;
  304. case AArch64::SUBSWrr:
  305. case AArch64::SUBSXrr:
  306. case AArch64::ADDSWrr:
  307. case AArch64::ADDSXrr:
  308. if (isDeadDef(I->getOperand(0).getReg()))
  309. return &*I;
  310. LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
  311. << *I);
  312. ++NumLiveDstRejs;
  313. return nullptr;
  314. case AArch64::FCMPSrr:
  315. case AArch64::FCMPDrr:
  316. case AArch64::FCMPESrr:
  317. case AArch64::FCMPEDrr:
  318. return &*I;
  319. }
  320. // Check for flag reads and clobbers.
  321. PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
  322. if (PRI.Read) {
  323. // The ccmp doesn't produce exactly the same flags as the original
  324. // compare, so reject the transform if there are uses of the flags
  325. // besides the terminators.
  326. LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
  327. ++NumMultNZCVUses;
  328. return nullptr;
  329. }
  330. if (PRI.Defined || PRI.Clobbered) {
  331. LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
  332. ++NumUnknNZCVDefs;
  333. return nullptr;
  334. }
  335. }
  336. LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
  337. << '\n');
  338. return nullptr;
  339. }
  340. /// Determine if all the instructions in MBB can safely
  341. /// be speculated. The terminators are not considered.
  342. ///
  343. /// Only CmpMI is allowed to clobber the flags.
  344. ///
  345. bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
  346. const MachineInstr *CmpMI) {
  347. // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
  348. // get right.
  349. if (!MBB->livein_empty()) {
  350. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
  351. return false;
  352. }
  353. unsigned InstrCount = 0;
  354. // Check all instructions, except the terminators. It is assumed that
  355. // terminators never have side effects or define any used register values.
  356. for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
  357. if (I.isDebugInstr())
  358. continue;
  359. if (++InstrCount > BlockInstrLimit && !Stress) {
  360. LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
  361. << BlockInstrLimit << " instructions.\n");
  362. return false;
  363. }
  364. // There shouldn't normally be any phis in a single-predecessor block.
  365. if (I.isPHI()) {
  366. LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
  367. return false;
  368. }
  369. // Don't speculate loads. Note that it may be possible and desirable to
  370. // speculate GOT or constant pool loads that are guaranteed not to trap,
  371. // but we don't support that for now.
  372. if (I.mayLoad()) {
  373. LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
  374. return false;
  375. }
  376. // We never speculate stores, so an AA pointer isn't necessary.
  377. bool DontMoveAcrossStore = true;
  378. if (!I.isSafeToMove(nullptr, DontMoveAcrossStore)) {
  379. LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
  380. return false;
  381. }
  382. // Only CmpMI is allowed to clobber the flags.
  383. if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
  384. LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
  385. return false;
  386. }
  387. }
  388. return true;
  389. }
  390. /// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
  391. /// candidate for cmp-conversion. Fill out the internal state.
  392. ///
  393. bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
  394. Head = MBB;
  395. Tail = CmpBB = nullptr;
  396. if (Head->succ_size() != 2)
  397. return false;
  398. MachineBasicBlock *Succ0 = Head->succ_begin()[0];
  399. MachineBasicBlock *Succ1 = Head->succ_begin()[1];
  400. // CmpBB can only have a single predecessor. Tail is allowed many.
  401. if (Succ0->pred_size() != 1)
  402. std::swap(Succ0, Succ1);
  403. // Succ0 is our candidate for CmpBB.
  404. if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
  405. return false;
  406. CmpBB = Succ0;
  407. Tail = Succ1;
  408. if (!CmpBB->isSuccessor(Tail))
  409. return false;
  410. // The CFG topology checks out.
  411. LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
  412. << printMBBReference(*CmpBB) << " -> "
  413. << printMBBReference(*Tail) << '\n');
  414. ++NumConsidered;
  415. // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
  416. //
  417. // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
  418. // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
  419. // always be safe to sink the ccmp down to immediately before the CmpBB
  420. // terminators.
  421. if (!trivialTailPHIs()) {
  422. LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
  423. ++NumPhiRejs;
  424. return false;
  425. }
  426. if (!Tail->livein_empty()) {
  427. LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
  428. ++NumPhysRejs;
  429. return false;
  430. }
  431. // CmpBB should never have PHIs since Head is its only predecessor.
  432. // FIXME: Clean them up if it happens.
  433. if (!CmpBB->empty() && CmpBB->front().isPHI()) {
  434. LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
  435. ++NumPhi2Rejs;
  436. return false;
  437. }
  438. if (!CmpBB->livein_empty()) {
  439. LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
  440. ++NumPhysRejs;
  441. return false;
  442. }
  443. // The branch we're looking to eliminate must be analyzable.
  444. HeadCond.clear();
  445. MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
  446. if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
  447. LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
  448. ++NumHeadBranchRejs;
  449. return false;
  450. }
  451. // This is weird, probably some sort of degenerate CFG, or an edge to a
  452. // landing pad.
  453. if (!TBB || HeadCond.empty()) {
  454. LLVM_DEBUG(
  455. dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
  456. ++NumHeadBranchRejs;
  457. return false;
  458. }
  459. if (!parseCond(HeadCond, HeadCmpBBCC)) {
  460. LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
  461. ++NumHeadBranchRejs;
  462. return false;
  463. }
  464. // Make sure the branch direction is right.
  465. if (TBB != CmpBB) {
  466. assert(TBB == Tail && "Unexpected TBB");
  467. HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
  468. }
  469. CmpBBCond.clear();
  470. TBB = FBB = nullptr;
  471. if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
  472. LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
  473. ++NumCmpBranchRejs;
  474. return false;
  475. }
  476. if (!TBB || CmpBBCond.empty()) {
  477. LLVM_DEBUG(
  478. dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
  479. ++NumCmpBranchRejs;
  480. return false;
  481. }
  482. if (!parseCond(CmpBBCond, CmpBBTailCC)) {
  483. LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
  484. ++NumCmpBranchRejs;
  485. return false;
  486. }
  487. if (TBB != Tail)
  488. CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
  489. LLVM_DEBUG(dbgs() << "Head->CmpBB on "
  490. << AArch64CC::getCondCodeName(HeadCmpBBCC)
  491. << ", CmpBB->Tail on "
  492. << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
  493. CmpMI = findConvertibleCompare(CmpBB);
  494. if (!CmpMI)
  495. return false;
  496. if (!canSpeculateInstrs(CmpBB, CmpMI)) {
  497. ++NumSpeculateRejs;
  498. return false;
  499. }
  500. return true;
  501. }
  502. void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
  503. LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
  504. << printMBBReference(*Head) << ":\n"
  505. << *CmpBB);
  506. // All CmpBB instructions are moved into Head, and CmpBB is deleted.
  507. // Update the CFG first.
  508. updateTailPHIs();
  509. // Save successor probabilties before removing CmpBB and Tail from their
  510. // parents.
  511. BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
  512. BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
  513. Head->removeSuccessor(CmpBB);
  514. CmpBB->removeSuccessor(Tail);
  515. // If Head and CmpBB had successor probabilties, udpate the probabilities to
  516. // reflect the ccmp-conversion.
  517. if (Head->hasSuccessorProbabilities() && CmpBB->hasSuccessorProbabilities()) {
  518. // Head is allowed two successors. We've removed CmpBB, so the remaining
  519. // successor is Tail. We need to increase the successor probability for
  520. // Tail to account for the CmpBB path we removed.
  521. //
  522. // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
  523. assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
  524. BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
  525. Head->setSuccProbability(Head->succ_begin(),
  526. Head2Tail + Head2CmpBB * CmpBB2Tail);
  527. // We will transfer successors of CmpBB to Head in a moment without
  528. // normalizing the successor probabilities. Set the successor probabilites
  529. // before doing so.
  530. //
  531. // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
  532. for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
  533. BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
  534. CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
  535. }
  536. }
  537. Head->transferSuccessorsAndUpdatePHIs(CmpBB);
  538. DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
  539. TII->removeBranch(*Head);
  540. // If the Head terminator was one of the cbz / tbz branches with built-in
  541. // compare, we need to insert an explicit compare instruction in its place.
  542. if (HeadCond[0].getImm() == -1) {
  543. ++NumCompBranches;
  544. unsigned Opc = 0;
  545. switch (HeadCond[1].getImm()) {
  546. case AArch64::CBZW:
  547. case AArch64::CBNZW:
  548. Opc = AArch64::SUBSWri;
  549. break;
  550. case AArch64::CBZX:
  551. case AArch64::CBNZX:
  552. Opc = AArch64::SUBSXri;
  553. break;
  554. default:
  555. llvm_unreachable("Cannot convert Head branch");
  556. }
  557. const MCInstrDesc &MCID = TII->get(Opc);
  558. // Create a dummy virtual register for the SUBS def.
  559. Register DestReg =
  560. MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
  561. // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
  562. BuildMI(*Head, Head->end(), TermDL, MCID)
  563. .addReg(DestReg, RegState::Define | RegState::Dead)
  564. .add(HeadCond[2])
  565. .addImm(0)
  566. .addImm(0);
  567. // SUBS uses the GPR*sp register classes.
  568. MRI->constrainRegClass(HeadCond[2].getReg(),
  569. TII->getRegClass(MCID, 1, TRI, *MF));
  570. }
  571. Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
  572. // Now replace CmpMI with a ccmp instruction that also considers the incoming
  573. // flags.
  574. unsigned Opc = 0;
  575. unsigned FirstOp = 1; // First CmpMI operand to copy.
  576. bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
  577. switch (CmpMI->getOpcode()) {
  578. default:
  579. llvm_unreachable("Unknown compare opcode");
  580. case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
  581. case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
  582. case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
  583. case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
  584. case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
  585. case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
  586. case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
  587. case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
  588. case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
  589. case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
  590. case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
  591. case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
  592. case AArch64::CBZW:
  593. case AArch64::CBNZW:
  594. Opc = AArch64::CCMPWi;
  595. FirstOp = 0;
  596. isZBranch = true;
  597. break;
  598. case AArch64::CBZX:
  599. case AArch64::CBNZX:
  600. Opc = AArch64::CCMPXi;
  601. FirstOp = 0;
  602. isZBranch = true;
  603. break;
  604. }
  605. // The ccmp instruction should set the flags according to the comparison when
  606. // Head would have branched to CmpBB.
  607. // The NZCV immediate operand should provide flags for the case where Head
  608. // would have branched to Tail. These flags should cause the new Head
  609. // terminator to branch to tail.
  610. unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
  611. const MCInstrDesc &MCID = TII->get(Opc);
  612. MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
  613. TII->getRegClass(MCID, 0, TRI, *MF));
  614. if (CmpMI->getOperand(FirstOp + 1).isReg())
  615. MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
  616. TII->getRegClass(MCID, 1, TRI, *MF));
  617. MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
  618. .add(CmpMI->getOperand(FirstOp)); // Register Rn
  619. if (isZBranch)
  620. MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
  621. else
  622. MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
  623. MIB.addImm(NZCV).addImm(HeadCmpBBCC);
  624. // If CmpMI was a terminator, we need a new conditional branch to replace it.
  625. // This now becomes a Head terminator.
  626. if (isZBranch) {
  627. bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
  628. CmpMI->getOpcode() == AArch64::CBNZX;
  629. BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
  630. .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
  631. .add(CmpMI->getOperand(1)); // Branch target.
  632. }
  633. CmpMI->eraseFromParent();
  634. Head->updateTerminator(CmpBB->getNextNode());
  635. RemovedBlocks.push_back(CmpBB);
  636. CmpBB->eraseFromParent();
  637. LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
  638. ++NumConverted;
  639. }
  640. int SSACCmpConv::expectedCodeSizeDelta() const {
  641. int delta = 0;
  642. // If the Head terminator was one of the cbz / tbz branches with built-in
  643. // compare, we need to insert an explicit compare instruction in its place
  644. // plus a branch instruction.
  645. if (HeadCond[0].getImm() == -1) {
  646. switch (HeadCond[1].getImm()) {
  647. case AArch64::CBZW:
  648. case AArch64::CBNZW:
  649. case AArch64::CBZX:
  650. case AArch64::CBNZX:
  651. // Therefore delta += 1
  652. delta = 1;
  653. break;
  654. default:
  655. llvm_unreachable("Cannot convert Head branch");
  656. }
  657. }
  658. // If the Cmp terminator was one of the cbz / tbz branches with
  659. // built-in compare, it will be turned into a compare instruction
  660. // into Head, but we do not save any instruction.
  661. // Otherwise, we save the branch instruction.
  662. switch (CmpMI->getOpcode()) {
  663. default:
  664. --delta;
  665. break;
  666. case AArch64::CBZW:
  667. case AArch64::CBNZW:
  668. case AArch64::CBZX:
  669. case AArch64::CBNZX:
  670. break;
  671. }
  672. return delta;
  673. }
  674. //===----------------------------------------------------------------------===//
  675. // AArch64ConditionalCompares Pass
  676. //===----------------------------------------------------------------------===//
  677. namespace {
  678. class AArch64ConditionalCompares : public MachineFunctionPass {
  679. const MachineBranchProbabilityInfo *MBPI;
  680. const TargetInstrInfo *TII;
  681. const TargetRegisterInfo *TRI;
  682. MCSchedModel SchedModel;
  683. // Does the proceeded function has Oz attribute.
  684. bool MinSize;
  685. MachineRegisterInfo *MRI;
  686. MachineDominatorTree *DomTree;
  687. MachineLoopInfo *Loops;
  688. MachineTraceMetrics *Traces;
  689. MachineTraceMetrics::Ensemble *MinInstr;
  690. SSACCmpConv CmpConv;
  691. public:
  692. static char ID;
  693. AArch64ConditionalCompares() : MachineFunctionPass(ID) {
  694. initializeAArch64ConditionalComparesPass(*PassRegistry::getPassRegistry());
  695. }
  696. void getAnalysisUsage(AnalysisUsage &AU) const override;
  697. bool runOnMachineFunction(MachineFunction &MF) override;
  698. StringRef getPassName() const override {
  699. return "AArch64 Conditional Compares";
  700. }
  701. private:
  702. bool tryConvert(MachineBasicBlock *);
  703. void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
  704. void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
  705. void invalidateTraces();
  706. bool shouldConvert();
  707. };
  708. } // end anonymous namespace
  709. char AArch64ConditionalCompares::ID = 0;
  710. INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
  711. "AArch64 CCMP Pass", false, false)
  712. INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
  713. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  714. INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
  715. INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
  716. "AArch64 CCMP Pass", false, false)
  717. FunctionPass *llvm::createAArch64ConditionalCompares() {
  718. return new AArch64ConditionalCompares();
  719. }
  720. void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
  721. AU.addRequired<MachineBranchProbabilityInfo>();
  722. AU.addRequired<MachineDominatorTree>();
  723. AU.addPreserved<MachineDominatorTree>();
  724. AU.addRequired<MachineLoopInfo>();
  725. AU.addPreserved<MachineLoopInfo>();
  726. AU.addRequired<MachineTraceMetrics>();
  727. AU.addPreserved<MachineTraceMetrics>();
  728. MachineFunctionPass::getAnalysisUsage(AU);
  729. }
  730. /// Update the dominator tree after if-conversion erased some blocks.
  731. void AArch64ConditionalCompares::updateDomTree(
  732. ArrayRef<MachineBasicBlock *> Removed) {
  733. // convert() removes CmpBB which was previously dominated by Head.
  734. // CmpBB children should be transferred to Head.
  735. MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
  736. for (MachineBasicBlock *RemovedMBB : Removed) {
  737. MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
  738. assert(Node != HeadNode && "Cannot erase the head node");
  739. assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
  740. while (Node->getNumChildren())
  741. DomTree->changeImmediateDominator(Node->back(), HeadNode);
  742. DomTree->eraseNode(RemovedMBB);
  743. }
  744. }
  745. /// Update LoopInfo after if-conversion.
  746. void
  747. AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
  748. if (!Loops)
  749. return;
  750. for (MachineBasicBlock *RemovedMBB : Removed)
  751. Loops->removeBlock(RemovedMBB);
  752. }
  753. /// Invalidate MachineTraceMetrics before if-conversion.
  754. void AArch64ConditionalCompares::invalidateTraces() {
  755. Traces->invalidate(CmpConv.Head);
  756. Traces->invalidate(CmpConv.CmpBB);
  757. }
  758. /// Apply cost model and heuristics to the if-conversion in IfConv.
  759. /// Return true if the conversion is a good idea.
  760. ///
  761. bool AArch64ConditionalCompares::shouldConvert() {
  762. // Stress testing mode disables all cost considerations.
  763. if (Stress)
  764. return true;
  765. if (!MinInstr)
  766. MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
  767. // Head dominates CmpBB, so it is always included in its trace.
  768. MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
  769. // If code size is the main concern
  770. if (MinSize) {
  771. int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
  772. LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
  773. // If we are minimizing the code size, do the conversion whatever
  774. // the cost is.
  775. if (CodeSizeDelta < 0)
  776. return true;
  777. if (CodeSizeDelta > 0) {
  778. LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
  779. return false;
  780. }
  781. // CodeSizeDelta == 0, continue with the regular heuristics
  782. }
  783. // Heuristic: The compare conversion delays the execution of the branch
  784. // instruction because we must wait for the inputs to the second compare as
  785. // well. The branch has no dependent instructions, but delaying it increases
  786. // the cost of a misprediction.
  787. //
  788. // Set a limit on the delay we will accept.
  789. unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
  790. // Instruction depths can be computed for all trace instructions above CmpBB.
  791. unsigned HeadDepth =
  792. Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
  793. unsigned CmpBBDepth =
  794. Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
  795. LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
  796. << "\nCmpBB depth: " << CmpBBDepth << '\n');
  797. if (CmpBBDepth > HeadDepth + DelayLimit) {
  798. LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
  799. << " cycles.\n");
  800. return false;
  801. }
  802. // Check the resource depth at the bottom of CmpBB - these instructions will
  803. // be speculated.
  804. unsigned ResDepth = Trace.getResourceDepth(true);
  805. LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
  806. // Heuristic: The speculatively executed instructions must all be able to
  807. // merge into the Head block. The Head critical path should dominate the
  808. // resource cost of the speculated instructions.
  809. if (ResDepth > HeadDepth) {
  810. LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
  811. return false;
  812. }
  813. return true;
  814. }
  815. bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
  816. bool Changed = false;
  817. while (CmpConv.canConvert(MBB) && shouldConvert()) {
  818. invalidateTraces();
  819. SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
  820. CmpConv.convert(RemovedBlocks);
  821. Changed = true;
  822. updateDomTree(RemovedBlocks);
  823. updateLoops(RemovedBlocks);
  824. }
  825. return Changed;
  826. }
  827. bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
  828. LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
  829. << "********** Function: " << MF.getName() << '\n');
  830. if (skipFunction(MF.getFunction()))
  831. return false;
  832. TII = MF.getSubtarget().getInstrInfo();
  833. TRI = MF.getSubtarget().getRegisterInfo();
  834. SchedModel = MF.getSubtarget().getSchedModel();
  835. MRI = &MF.getRegInfo();
  836. DomTree = &getAnalysis<MachineDominatorTree>();
  837. Loops = getAnalysisIfAvailable<MachineLoopInfo>();
  838. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  839. Traces = &getAnalysis<MachineTraceMetrics>();
  840. MinInstr = nullptr;
  841. MinSize = MF.getFunction().hasMinSize();
  842. bool Changed = false;
  843. CmpConv.runOnMachineFunction(MF, MBPI);
  844. // Visit blocks in dominator tree pre-order. The pre-order enables multiple
  845. // cmp-conversions from the same head block.
  846. // Note that updateDomTree() modifies the children of the DomTree node
  847. // currently being visited. The df_iterator supports that; it doesn't look at
  848. // child_begin() / child_end() until after a node has been visited.
  849. for (auto *I : depth_first(DomTree))
  850. if (tryConvert(I->getBlock()))
  851. Changed = true;
  852. return Changed;
  853. }