PPCReduceCRLogicals.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739
  1. //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===---------------------------------------------------------------------===//
  8. //
  9. // This pass aims to reduce the number of logical operations on bits in the CR
  10. // register. These instructions have a fairly high latency and only a single
  11. // pipeline at their disposal in modern PPC cores. Furthermore, they have a
  12. // tendency to occur in fairly small blocks where there's little opportunity
  13. // to hide the latency between the CR logical operation and its user.
  14. //
  15. //===---------------------------------------------------------------------===//
  16. #include "PPC.h"
  17. #include "PPCInstrInfo.h"
  18. #include "PPCTargetMachine.h"
  19. #include "llvm/ADT/Statistic.h"
  20. #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
  21. #include "llvm/CodeGen/MachineDominators.h"
  22. #include "llvm/CodeGen/MachineFunctionPass.h"
  23. #include "llvm/CodeGen/MachineInstrBuilder.h"
  24. #include "llvm/CodeGen/MachineRegisterInfo.h"
  25. #include "llvm/Config/llvm-config.h"
  26. #include "llvm/InitializePasses.h"
  27. #include "llvm/Support/Debug.h"
  28. using namespace llvm;
  29. #define DEBUG_TYPE "ppc-reduce-cr-ops"
  30. STATISTIC(NumContainedSingleUseBinOps,
  31. "Number of single-use binary CR logical ops contained in a block");
  32. STATISTIC(NumToSplitBlocks,
  33. "Number of binary CR logical ops that can be used to split blocks");
  34. STATISTIC(TotalCRLogicals, "Number of CR logical ops.");
  35. STATISTIC(TotalNullaryCRLogicals,
  36. "Number of nullary CR logical ops (CRSET/CRUNSET).");
  37. STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops.");
  38. STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops.");
  39. STATISTIC(NumBlocksSplitOnBinaryCROp,
  40. "Number of blocks split on CR binary logical ops.");
  41. STATISTIC(NumNotSplitIdenticalOperands,
  42. "Number of blocks not split due to operands being identical.");
  43. STATISTIC(NumNotSplitChainCopies,
  44. "Number of blocks not split due to operands being chained copies.");
  45. STATISTIC(NumNotSplitWrongOpcode,
  46. "Number of blocks not split due to the wrong opcode.");
  47. /// Given a basic block \p Successor that potentially contains PHIs, this
  48. /// function will look for any incoming values in the PHIs that are supposed to
  49. /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
  50. /// Any such PHIs will be updated to reflect reality.
  51. static void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB,
  52. MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) {
  53. for (auto &MI : Successor->instrs()) {
  54. if (!MI.isPHI())
  55. continue;
  56. // This is a really ugly-looking loop, but it was pillaged directly from
  57. // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
  58. for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
  59. MachineOperand &MO = MI.getOperand(i);
  60. if (MO.getMBB() == OrigMBB) {
  61. // Check if the instruction is actually defined in NewMBB.
  62. if (MI.getOperand(i - 1).isReg()) {
  63. MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg());
  64. if (DefMI->getParent() == NewMBB ||
  65. !OrigMBB->isSuccessor(Successor)) {
  66. MO.setMBB(NewMBB);
  67. break;
  68. }
  69. }
  70. }
  71. }
  72. }
  73. }
  74. /// Given a basic block \p Successor that potentially contains PHIs, this
  75. /// function will look for PHIs that have an incoming value from \p OrigMBB
  76. /// and will add the same incoming value from \p NewMBB.
  77. /// NOTE: This should only be used if \p NewMBB is an immediate dominator of
  78. /// \p OrigMBB.
  79. static void addIncomingValuesToPHIs(MachineBasicBlock *Successor,
  80. MachineBasicBlock *OrigMBB,
  81. MachineBasicBlock *NewMBB,
  82. MachineRegisterInfo *MRI) {
  83. assert(OrigMBB->isSuccessor(NewMBB) &&
  84. "NewMBB must be a successor of OrigMBB");
  85. for (auto &MI : Successor->instrs()) {
  86. if (!MI.isPHI())
  87. continue;
  88. // This is a really ugly-looking loop, but it was pillaged directly from
  89. // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
  90. for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
  91. MachineOperand &MO = MI.getOperand(i);
  92. if (MO.getMBB() == OrigMBB) {
  93. MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
  94. MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB);
  95. break;
  96. }
  97. }
  98. }
  99. }
  100. struct BlockSplitInfo {
  101. MachineInstr *OrigBranch;
  102. MachineInstr *SplitBefore;
  103. MachineInstr *SplitCond;
  104. bool InvertNewBranch;
  105. bool InvertOrigBranch;
  106. bool BranchToFallThrough;
  107. const MachineBranchProbabilityInfo *MBPI;
  108. MachineInstr *MIToDelete;
  109. MachineInstr *NewCond;
  110. bool allInstrsInSameMBB() {
  111. if (!OrigBranch || !SplitBefore || !SplitCond)
  112. return false;
  113. MachineBasicBlock *MBB = OrigBranch->getParent();
  114. if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB)
  115. return false;
  116. if (MIToDelete && MIToDelete->getParent() != MBB)
  117. return false;
  118. if (NewCond && NewCond->getParent() != MBB)
  119. return false;
  120. return true;
  121. }
  122. };
  123. /// Splits a MachineBasicBlock to branch before \p SplitBefore. The original
  124. /// branch is \p OrigBranch. The target of the new branch can either be the same
  125. /// as the target of the original branch or the fallthrough successor of the
  126. /// original block as determined by \p BranchToFallThrough. The branch
  127. /// conditions will be inverted according to \p InvertNewBranch and
  128. /// \p InvertOrigBranch. If an instruction that previously fed the branch is to
  129. /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as
  130. /// the branch condition. The branch probabilities will be set if the
  131. /// MachineBranchProbabilityInfo isn't null.
  132. static bool splitMBB(BlockSplitInfo &BSI) {
  133. assert(BSI.allInstrsInSameMBB() &&
  134. "All instructions must be in the same block.");
  135. MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent();
  136. MachineFunction *MF = ThisMBB->getParent();
  137. MachineRegisterInfo *MRI = &MF->getRegInfo();
  138. assert(MRI->isSSA() && "Can only do this while the function is in SSA form.");
  139. if (ThisMBB->succ_size() != 2) {
  140. LLVM_DEBUG(
  141. dbgs() << "Don't know how to handle blocks that don't have exactly"
  142. << " two successors.\n");
  143. return false;
  144. }
  145. const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
  146. unsigned OrigBROpcode = BSI.OrigBranch->getOpcode();
  147. unsigned InvertedOpcode =
  148. OrigBROpcode == PPC::BC
  149. ? PPC::BCn
  150. : OrigBROpcode == PPC::BCn
  151. ? PPC::BC
  152. : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR;
  153. unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode;
  154. MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB();
  155. MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin()
  156. ? *ThisMBB->succ_rbegin()
  157. : *ThisMBB->succ_begin();
  158. MachineBasicBlock *NewBRTarget =
  159. BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
  160. // It's impossible to know the precise branch probability after the split.
  161. // But it still needs to be reasonable, the whole probability to original
  162. // targets should not be changed.
  163. // After split NewBRTarget will get two incoming edges. Assume P0 is the
  164. // original branch probability to NewBRTarget, P1 and P2 are new branch
  165. // probabilies to NewBRTarget after split. If the two edge frequencies are
  166. // same, then
  167. // F * P1 = F * P0 / 2 ==> P1 = P0 / 2
  168. // F * (1 - P1) * P2 = F * P1 ==> P2 = P1 / (1 - P1)
  169. BranchProbability ProbToNewTarget, ProbFallThrough; // Prob for new Br.
  170. BranchProbability ProbOrigTarget, ProbOrigFallThrough; // Prob for orig Br.
  171. ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
  172. ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
  173. if (BSI.MBPI) {
  174. if (BSI.BranchToFallThrough) {
  175. ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
  176. ProbFallThrough = ProbToNewTarget.getCompl();
  177. ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
  178. ProbOrigTarget = ProbOrigFallThrough.getCompl();
  179. } else {
  180. ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
  181. ProbFallThrough = ProbToNewTarget.getCompl();
  182. ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
  183. ProbOrigFallThrough = ProbOrigTarget.getCompl();
  184. }
  185. }
  186. // Create a new basic block.
  187. MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
  188. const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
  189. MachineFunction::iterator It = ThisMBB->getIterator();
  190. MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB);
  191. MF->insert(++It, NewMBB);
  192. // Move everything after SplitBefore into the new block.
  193. NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
  194. NewMBB->transferSuccessors(ThisMBB);
  195. if (!ProbOrigTarget.isUnknown()) {
  196. auto MBBI = find(NewMBB->successors(), OrigTarget);
  197. NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
  198. MBBI = find(NewMBB->successors(), OrigFallThrough);
  199. NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
  200. }
  201. // Add the two successors to ThisMBB.
  202. ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
  203. ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
  204. // Add the branches to ThisMBB.
  205. BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
  206. TII->get(NewBROpcode))
  207. .addReg(BSI.SplitCond->getOperand(0).getReg())
  208. .addMBB(NewBRTarget);
  209. BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
  210. TII->get(PPC::B))
  211. .addMBB(NewMBB);
  212. if (BSI.MIToDelete)
  213. BSI.MIToDelete->eraseFromParent();
  214. // Change the condition on the original branch and invert it if requested.
  215. auto FirstTerminator = NewMBB->getFirstTerminator();
  216. if (BSI.NewCond) {
  217. assert(FirstTerminator->getOperand(0).isReg() &&
  218. "Can't update condition of unconditional branch.");
  219. FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg());
  220. }
  221. if (BSI.InvertOrigBranch)
  222. FirstTerminator->setDesc(TII->get(InvertedOpcode));
  223. // If any of the PHIs in the successors of NewMBB reference values that
  224. // now come from NewMBB, they need to be updated.
  225. for (auto *Succ : NewMBB->successors()) {
  226. updatePHIs(Succ, ThisMBB, NewMBB, MRI);
  227. }
  228. addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI);
  229. LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump());
  230. LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump());
  231. LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump());
  232. return true;
  233. }
  234. static bool isBinary(MachineInstr &MI) {
  235. return MI.getNumOperands() == 3;
  236. }
  237. static bool isNullary(MachineInstr &MI) {
  238. return MI.getNumOperands() == 1;
  239. }
  240. /// Given a CR logical operation \p CROp, branch opcode \p BROp as well as
  241. /// a flag to indicate if the first operand of \p CROp is used as the
  242. /// SplitBefore operand, determines whether either of the branches are to be
  243. /// inverted as well as whether the new target should be the original
  244. /// fall-through block.
  245. static void
  246. computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1,
  247. bool &InvertNewBranch, bool &InvertOrigBranch,
  248. bool &TargetIsFallThrough) {
  249. // The conditions under which each of the output operands should be [un]set
  250. // can certainly be written much more concisely with just 3 if statements or
  251. // ternary expressions. However, this provides a much clearer overview to the
  252. // reader as to what is set for each <CROp, BROp, OpUsed> combination.
  253. if (BROp == PPC::BC || BROp == PPC::BCLR) {
  254. // Regular branches.
  255. switch (CROp) {
  256. default:
  257. llvm_unreachable("Don't know how to handle this CR logical.");
  258. case PPC::CROR:
  259. InvertNewBranch = false;
  260. InvertOrigBranch = false;
  261. TargetIsFallThrough = false;
  262. return;
  263. case PPC::CRAND:
  264. InvertNewBranch = true;
  265. InvertOrigBranch = false;
  266. TargetIsFallThrough = true;
  267. return;
  268. case PPC::CRNAND:
  269. InvertNewBranch = true;
  270. InvertOrigBranch = true;
  271. TargetIsFallThrough = false;
  272. return;
  273. case PPC::CRNOR:
  274. InvertNewBranch = false;
  275. InvertOrigBranch = true;
  276. TargetIsFallThrough = true;
  277. return;
  278. case PPC::CRORC:
  279. InvertNewBranch = UsingDef1;
  280. InvertOrigBranch = !UsingDef1;
  281. TargetIsFallThrough = false;
  282. return;
  283. case PPC::CRANDC:
  284. InvertNewBranch = !UsingDef1;
  285. InvertOrigBranch = !UsingDef1;
  286. TargetIsFallThrough = true;
  287. return;
  288. }
  289. } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) {
  290. // Negated branches.
  291. switch (CROp) {
  292. default:
  293. llvm_unreachable("Don't know how to handle this CR logical.");
  294. case PPC::CROR:
  295. InvertNewBranch = true;
  296. InvertOrigBranch = false;
  297. TargetIsFallThrough = true;
  298. return;
  299. case PPC::CRAND:
  300. InvertNewBranch = false;
  301. InvertOrigBranch = false;
  302. TargetIsFallThrough = false;
  303. return;
  304. case PPC::CRNAND:
  305. InvertNewBranch = false;
  306. InvertOrigBranch = true;
  307. TargetIsFallThrough = true;
  308. return;
  309. case PPC::CRNOR:
  310. InvertNewBranch = true;
  311. InvertOrigBranch = true;
  312. TargetIsFallThrough = false;
  313. return;
  314. case PPC::CRORC:
  315. InvertNewBranch = !UsingDef1;
  316. InvertOrigBranch = !UsingDef1;
  317. TargetIsFallThrough = true;
  318. return;
  319. case PPC::CRANDC:
  320. InvertNewBranch = UsingDef1;
  321. InvertOrigBranch = !UsingDef1;
  322. TargetIsFallThrough = false;
  323. return;
  324. }
  325. } else
  326. llvm_unreachable("Don't know how to handle this branch.");
  327. }
  328. namespace {
  329. class PPCReduceCRLogicals : public MachineFunctionPass {
  330. public:
  331. static char ID;
  332. struct CRLogicalOpInfo {
  333. MachineInstr *MI;
  334. // FIXME: If chains of copies are to be handled, this should be a vector.
  335. std::pair<MachineInstr*, MachineInstr*> CopyDefs;
  336. std::pair<MachineInstr*, MachineInstr*> TrueDefs;
  337. unsigned IsBinary : 1;
  338. unsigned IsNullary : 1;
  339. unsigned ContainedInBlock : 1;
  340. unsigned FeedsISEL : 1;
  341. unsigned FeedsBR : 1;
  342. unsigned FeedsLogical : 1;
  343. unsigned SingleUse : 1;
  344. unsigned DefsSingleUse : 1;
  345. unsigned SubregDef1;
  346. unsigned SubregDef2;
  347. CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0),
  348. ContainedInBlock(0), FeedsISEL(0), FeedsBR(0),
  349. FeedsLogical(0), SingleUse(0), DefsSingleUse(1),
  350. SubregDef1(0), SubregDef2(0) { }
  351. void dump();
  352. };
  353. private:
  354. const PPCInstrInfo *TII = nullptr;
  355. MachineFunction *MF = nullptr;
  356. MachineRegisterInfo *MRI = nullptr;
  357. const MachineBranchProbabilityInfo *MBPI = nullptr;
  358. // A vector to contain all the CR logical operations
  359. SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
  360. void initialize(MachineFunction &MFParm);
  361. void collectCRLogicals();
  362. bool handleCROp(unsigned Idx);
  363. bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
  364. static bool isCRLogical(MachineInstr &MI) {
  365. unsigned Opc = MI.getOpcode();
  366. return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR ||
  367. Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CRNOT ||
  368. Opc == PPC::CREQV || Opc == PPC::CRANDC || Opc == PPC::CRORC ||
  369. Opc == PPC::CRSET || Opc == PPC::CRUNSET || Opc == PPC::CR6SET ||
  370. Opc == PPC::CR6UNSET;
  371. }
  372. bool simplifyCode() {
  373. bool Changed = false;
  374. // Not using a range-based for loop here as the vector may grow while being
  375. // operated on.
  376. for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
  377. Changed |= handleCROp(i);
  378. return Changed;
  379. }
  380. public:
  381. PPCReduceCRLogicals() : MachineFunctionPass(ID) {
  382. initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry());
  383. }
  384. MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg,
  385. MachineInstr *&CpDef);
  386. bool runOnMachineFunction(MachineFunction &MF) override {
  387. if (skipFunction(MF.getFunction()))
  388. return false;
  389. // If the subtarget doesn't use CR bits, there's nothing to do.
  390. const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
  391. if (!STI.useCRBits())
  392. return false;
  393. initialize(MF);
  394. collectCRLogicals();
  395. return simplifyCode();
  396. }
  397. CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI);
  398. void getAnalysisUsage(AnalysisUsage &AU) const override {
  399. AU.addRequired<MachineBranchProbabilityInfo>();
  400. AU.addRequired<MachineDominatorTree>();
  401. MachineFunctionPass::getAnalysisUsage(AU);
  402. }
  403. };
  404. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  405. LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() {
  406. dbgs() << "CRLogicalOpMI: ";
  407. MI->dump();
  408. dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL;
  409. dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: ";
  410. dbgs() << FeedsLogical << ", SingleUse: " << SingleUse;
  411. dbgs() << ", DefsSingleUse: " << DefsSingleUse;
  412. dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: ";
  413. dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock;
  414. if (!IsNullary) {
  415. dbgs() << "\nDefs:\n";
  416. TrueDefs.first->dump();
  417. }
  418. if (IsBinary)
  419. TrueDefs.second->dump();
  420. dbgs() << "\n";
  421. if (CopyDefs.first) {
  422. dbgs() << "CopyDef1: ";
  423. CopyDefs.first->dump();
  424. }
  425. if (CopyDefs.second) {
  426. dbgs() << "CopyDef2: ";
  427. CopyDefs.second->dump();
  428. }
  429. }
  430. #endif
  431. PPCReduceCRLogicals::CRLogicalOpInfo
  432. PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
  433. CRLogicalOpInfo Ret;
  434. Ret.MI = &MIParam;
  435. // Get the defs
  436. if (isNullary(MIParam)) {
  437. Ret.IsNullary = 1;
  438. Ret.TrueDefs = std::make_pair(nullptr, nullptr);
  439. Ret.CopyDefs = std::make_pair(nullptr, nullptr);
  440. } else {
  441. MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
  442. Ret.SubregDef1, Ret.CopyDefs.first);
  443. assert(Def1 && "Must be able to find a definition of operand 1.");
  444. Ret.DefsSingleUse &=
  445. MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
  446. Ret.DefsSingleUse &=
  447. MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
  448. if (isBinary(MIParam)) {
  449. Ret.IsBinary = 1;
  450. MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
  451. Ret.SubregDef2,
  452. Ret.CopyDefs.second);
  453. assert(Def2 && "Must be able to find a definition of operand 2.");
  454. Ret.DefsSingleUse &=
  455. MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
  456. Ret.DefsSingleUse &=
  457. MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
  458. Ret.TrueDefs = std::make_pair(Def1, Def2);
  459. } else {
  460. Ret.TrueDefs = std::make_pair(Def1, nullptr);
  461. Ret.CopyDefs.second = nullptr;
  462. }
  463. }
  464. Ret.ContainedInBlock = 1;
  465. // Get the uses
  466. for (MachineInstr &UseMI :
  467. MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) {
  468. unsigned Opc = UseMI.getOpcode();
  469. if (Opc == PPC::ISEL || Opc == PPC::ISEL8)
  470. Ret.FeedsISEL = 1;
  471. if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR ||
  472. Opc == PPC::BCLRn)
  473. Ret.FeedsBR = 1;
  474. Ret.FeedsLogical = isCRLogical(UseMI);
  475. if (UseMI.getParent() != MIParam.getParent())
  476. Ret.ContainedInBlock = 0;
  477. }
  478. Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0;
  479. // We now know whether all the uses of the CR logical are in the same block.
  480. if (!Ret.IsNullary) {
  481. Ret.ContainedInBlock &=
  482. (MIParam.getParent() == Ret.TrueDefs.first->getParent());
  483. if (Ret.IsBinary)
  484. Ret.ContainedInBlock &=
  485. (MIParam.getParent() == Ret.TrueDefs.second->getParent());
  486. }
  487. LLVM_DEBUG(Ret.dump());
  488. if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) {
  489. NumContainedSingleUseBinOps++;
  490. if (Ret.FeedsBR && Ret.DefsSingleUse)
  491. NumToSplitBlocks++;
  492. }
  493. return Ret;
  494. }
  495. /// Looks through a COPY instruction to the actual definition of the CR-bit
  496. /// register and returns the instruction that defines it.
  497. /// FIXME: This currently handles what is by-far the most common case:
  498. /// an instruction that defines a CR field followed by a single copy of a bit
  499. /// from that field into a virtual register. If chains of copies need to be
  500. /// handled, this should have a loop until a non-copy instruction is found.
  501. MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
  502. unsigned &Subreg,
  503. MachineInstr *&CpDef) {
  504. Subreg = -1;
  505. if (!Register::isVirtualRegister(Reg))
  506. return nullptr;
  507. MachineInstr *Copy = MRI->getVRegDef(Reg);
  508. CpDef = Copy;
  509. if (!Copy->isCopy())
  510. return Copy;
  511. Register CopySrc = Copy->getOperand(1).getReg();
  512. Subreg = Copy->getOperand(1).getSubReg();
  513. if (!CopySrc.isVirtual()) {
  514. const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
  515. // Set the Subreg
  516. if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ)
  517. Subreg = PPC::sub_eq;
  518. if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT)
  519. Subreg = PPC::sub_lt;
  520. if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT)
  521. Subreg = PPC::sub_gt;
  522. if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN)
  523. Subreg = PPC::sub_un;
  524. // Loop backwards and return the first MI that modifies the physical CR Reg.
  525. MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin();
  526. while (Me != B)
  527. if ((--Me)->modifiesRegister(CopySrc, TRI))
  528. return &*Me;
  529. return nullptr;
  530. }
  531. return MRI->getVRegDef(CopySrc);
  532. }
  533. void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
  534. MF = &MFParam;
  535. MRI = &MF->getRegInfo();
  536. TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
  537. MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
  538. AllCRLogicalOps.clear();
  539. }
  540. /// Contains all the implemented transformations on CR logical operations.
  541. /// For example, a binary CR logical can be used to split a block on its inputs,
  542. /// a unary CR logical might be used to change the condition code on a
  543. /// comparison feeding it. A nullary CR logical might simply be removable
  544. /// if the user of the bit it [un]sets can be transformed.
  545. bool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
  546. // We can definitely split a block on the inputs to a binary CR operation
  547. // whose defs and (single) use are within the same block.
  548. bool Changed = false;
  549. CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
  550. if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
  551. CRI.DefsSingleUse) {
  552. Changed = splitBlockOnBinaryCROp(CRI);
  553. if (Changed)
  554. NumBlocksSplitOnBinaryCROp++;
  555. }
  556. return Changed;
  557. }
  558. /// Splits a block that contains a CR-logical operation that feeds a branch
  559. /// and whose operands are produced within the block.
  560. /// Example:
  561. /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
  562. /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
  563. /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
  564. /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
  565. /// %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8
  566. /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
  567. /// Becomes:
  568. /// %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
  569. /// %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
  570. /// BC %vr6<kill>, <BB#2>; CRBITRC:%vr6
  571. ///
  572. /// %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
  573. /// %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
  574. /// BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
  575. bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) {
  576. if (CRI.CopyDefs.first == CRI.CopyDefs.second) {
  577. LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n");
  578. NumNotSplitIdenticalOperands++;
  579. return false;
  580. }
  581. if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() ||
  582. CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) {
  583. LLVM_DEBUG(
  584. dbgs() << "Unable to split because one of the operands is a PHI or "
  585. "chain of copies.\n");
  586. NumNotSplitChainCopies++;
  587. return false;
  588. }
  589. // Note: keep in sync with computeBranchTargetAndInversion().
  590. if (CRI.MI->getOpcode() != PPC::CROR &&
  591. CRI.MI->getOpcode() != PPC::CRAND &&
  592. CRI.MI->getOpcode() != PPC::CRNOR &&
  593. CRI.MI->getOpcode() != PPC::CRNAND &&
  594. CRI.MI->getOpcode() != PPC::CRORC &&
  595. CRI.MI->getOpcode() != PPC::CRANDC) {
  596. LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n");
  597. NumNotSplitWrongOpcode++;
  598. return false;
  599. }
  600. LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump());
  601. MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first;
  602. MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second;
  603. bool UsingDef1 = false;
  604. MachineInstr *SplitBefore = &*Def2It;
  605. for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) {
  606. if (Def1It == Def2It) { // Def2 comes before Def1.
  607. SplitBefore = &*Def1It;
  608. UsingDef1 = true;
  609. break;
  610. }
  611. }
  612. LLVM_DEBUG(dbgs() << "We will split the following block:\n";);
  613. LLVM_DEBUG(CRI.MI->getParent()->dump());
  614. LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump());
  615. // Get the branch instruction.
  616. MachineInstr *Branch =
  617. MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent();
  618. // We want the new block to have no code in it other than the definition
  619. // of the input to the CR logical and the CR logical itself. So we move
  620. // those to the bottom of the block (just before the branch). Then we
  621. // will split before the CR logical.
  622. MachineBasicBlock *MBB = SplitBefore->getParent();
  623. auto FirstTerminator = MBB->getFirstTerminator();
  624. MachineBasicBlock::iterator FirstInstrToMove =
  625. UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second;
  626. MachineBasicBlock::iterator SecondInstrToMove =
  627. UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second;
  628. // The instructions that need to be moved are not guaranteed to be
  629. // contiguous. Move them individually.
  630. // FIXME: If one of the operands is a chain of (single use) copies, they
  631. // can all be moved and we can still split.
  632. MBB->splice(FirstTerminator, MBB, FirstInstrToMove);
  633. if (FirstInstrToMove != SecondInstrToMove)
  634. MBB->splice(FirstTerminator, MBB, SecondInstrToMove);
  635. MBB->splice(FirstTerminator, MBB, CRI.MI);
  636. unsigned Opc = CRI.MI->getOpcode();
  637. bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough;
  638. computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1,
  639. InvertNewBranch, InvertOrigBranch,
  640. TargetIsFallThrough);
  641. MachineInstr *SplitCond =
  642. UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first;
  643. LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy"));
  644. LLVM_DEBUG(dbgs() << " the original branch and the target is the "
  645. << (TargetIsFallThrough ? "fallthrough block\n"
  646. : "orig. target block\n"));
  647. LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump());
  648. BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch,
  649. InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI,
  650. UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second };
  651. bool Changed = splitMBB(BSI);
  652. // If we've split on a CR logical that is fed by a CR logical,
  653. // recompute the source CR logical as it may be usable for splitting.
  654. if (Changed) {
  655. bool Input1CRlogical =
  656. CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first);
  657. bool Input2CRlogical =
  658. CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second);
  659. if (Input1CRlogical)
  660. AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first));
  661. if (Input2CRlogical)
  662. AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second));
  663. }
  664. return Changed;
  665. }
  666. void PPCReduceCRLogicals::collectCRLogicals() {
  667. for (MachineBasicBlock &MBB : *MF) {
  668. for (MachineInstr &MI : MBB) {
  669. if (isCRLogical(MI)) {
  670. AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI));
  671. TotalCRLogicals++;
  672. if (AllCRLogicalOps.back().IsNullary)
  673. TotalNullaryCRLogicals++;
  674. else if (AllCRLogicalOps.back().IsBinary)
  675. TotalBinaryCRLogicals++;
  676. else
  677. TotalUnaryCRLogicals++;
  678. }
  679. }
  680. }
  681. }
  682. } // end anonymous namespace
  683. INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE,
  684. "PowerPC Reduce CR logical Operation", false, false)
  685. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  686. INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE,
  687. "PowerPC Reduce CR logical Operation", false, false)
  688. char PPCReduceCRLogicals::ID = 0;
  689. FunctionPass*
  690. llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); }