MVETPAndVPTOptimisationsPass.cpp 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This pass does a few optimisations related to Tail predicated loops
  10. /// and MVE VPT blocks before register allocation is performed. For VPT blocks
  11. /// the goal is to maximize the sizes of the blocks that will be created by the
  12. /// MVE VPT Block Insertion pass (which runs after register allocation). For
  13. /// tail predicated loops we transform the loop into something that will
  14. /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
  15. ///
  16. //===----------------------------------------------------------------------===//
  17. #include "ARM.h"
  18. #include "ARMSubtarget.h"
  19. #include "MCTargetDesc/ARMBaseInfo.h"
  20. #include "MVETailPredUtils.h"
  21. #include "Thumb2InstrInfo.h"
  22. #include "llvm/ADT/SmallVector.h"
  23. #include "llvm/CodeGen/MachineBasicBlock.h"
  24. #include "llvm/CodeGen/MachineDominators.h"
  25. #include "llvm/CodeGen/MachineFunction.h"
  26. #include "llvm/CodeGen/MachineFunctionPass.h"
  27. #include "llvm/CodeGen/MachineInstr.h"
  28. #include "llvm/CodeGen/MachineLoopInfo.h"
  29. #include "llvm/InitializePasses.h"
  30. #include "llvm/Support/Debug.h"
  31. #include <cassert>
  32. using namespace llvm;
  33. #define DEBUG_TYPE "arm-mve-vpt-opts"
  34. static cl::opt<bool>
  35. MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
  36. cl::desc("Enable merging Loop End and Dec instructions."),
  37. cl::init(true));
  38. static cl::opt<bool>
  39. SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
  40. cl::desc("Enable setting lr as a predicate in tail predication regions."),
  41. cl::init(true));
  42. namespace {
  43. class MVETPAndVPTOptimisations : public MachineFunctionPass {
  44. public:
  45. static char ID;
  46. const Thumb2InstrInfo *TII;
  47. MachineRegisterInfo *MRI;
  48. MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
  49. bool runOnMachineFunction(MachineFunction &Fn) override;
  50. void getAnalysisUsage(AnalysisUsage &AU) const override {
  51. AU.addRequired<MachineLoopInfo>();
  52. AU.addPreserved<MachineLoopInfo>();
  53. AU.addRequired<MachineDominatorTree>();
  54. AU.addPreserved<MachineDominatorTree>();
  55. MachineFunctionPass::getAnalysisUsage(AU);
  56. }
  57. StringRef getPassName() const override {
  58. return "ARM MVE TailPred and VPT Optimisation Pass";
  59. }
  60. private:
  61. bool LowerWhileLoopStart(MachineLoop *ML);
  62. bool MergeLoopEnd(MachineLoop *ML);
  63. bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
  64. MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
  65. MachineInstr &Instr,
  66. MachineOperand &User,
  67. Register Target);
  68. bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
  69. bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
  70. bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
  71. bool ConvertVPSEL(MachineBasicBlock &MBB);
  72. bool HintDoLoopStartReg(MachineBasicBlock &MBB);
  73. MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
  74. MachineInstr *LoopStart);
  75. };
  76. char MVETPAndVPTOptimisations::ID = 0;
  77. } // end anonymous namespace
  78. INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
  79. "ARM MVE TailPred and VPT Optimisations pass", false,
  80. false)
  81. INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
  82. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  83. INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
  84. "ARM MVE TailPred and VPT Optimisations pass", false, false)
  85. static MachineInstr *LookThroughCOPY(MachineInstr *MI,
  86. MachineRegisterInfo *MRI) {
  87. while (MI && MI->getOpcode() == TargetOpcode::COPY &&
  88. MI->getOperand(1).getReg().isVirtual())
  89. MI = MRI->getVRegDef(MI->getOperand(1).getReg());
  90. return MI;
  91. }
  92. // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
  93. // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
  94. // at the moment, returning a t2DoLoopStart in LoopStart.
  95. static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
  96. MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
  97. MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
  98. MachineBasicBlock *Header = ML->getHeader();
  99. MachineBasicBlock *Latch = ML->getLoopLatch();
  100. if (!Header || !Latch) {
  101. LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n");
  102. return false;
  103. }
  104. // Find the loop end from the terminators.
  105. LoopEnd = nullptr;
  106. for (auto &T : Latch->terminators()) {
  107. if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
  108. LoopEnd = &T;
  109. break;
  110. }
  111. if (T.getOpcode() == ARM::t2LoopEndDec &&
  112. T.getOperand(2).getMBB() == Header) {
  113. LoopEnd = &T;
  114. break;
  115. }
  116. }
  117. if (!LoopEnd) {
  118. LLVM_DEBUG(dbgs() << " no LoopEnd\n");
  119. return false;
  120. }
  121. LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd);
  122. // Find the dec from the use of the end. There may be copies between
  123. // instructions. We expect the loop to loop like:
  124. // $vs = t2DoLoopStart ...
  125. // loop:
  126. // $vp = phi [ $vs ], [ $vd ]
  127. // ...
  128. // $vd = t2LoopDec $vp
  129. // ...
  130. // t2LoopEnd $vd, loop
  131. if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
  132. LoopDec = LoopEnd;
  133. else {
  134. LoopDec =
  135. LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
  136. if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
  137. LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n");
  138. return false;
  139. }
  140. }
  141. LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec);
  142. LoopPhi =
  143. LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
  144. if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
  145. LoopPhi->getNumOperands() != 5 ||
  146. (LoopPhi->getOperand(2).getMBB() != Latch &&
  147. LoopPhi->getOperand(4).getMBB() != Latch)) {
  148. LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n");
  149. return false;
  150. }
  151. LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi);
  152. Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
  153. ? LoopPhi->getOperand(3).getReg()
  154. : LoopPhi->getOperand(1).getReg();
  155. LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
  156. if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
  157. LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
  158. LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
  159. LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n");
  160. return false;
  161. }
  162. LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart);
  163. return true;
  164. }
  165. static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
  166. MachineBasicBlock *MBB = MI->getParent();
  167. assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
  168. "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
  169. // Subs
  170. MachineInstrBuilder MIB =
  171. BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
  172. MIB.add(MI->getOperand(0));
  173. MIB.add(MI->getOperand(1));
  174. MIB.addImm(0);
  175. MIB.addImm(ARMCC::AL);
  176. MIB.addReg(ARM::NoRegister);
  177. MIB.addReg(ARM::CPSR, RegState::Define);
  178. // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
  179. for (MachineInstr &I : MBB->terminators()) {
  180. if (I.getOpcode() == ARM::t2WhileLoopStart) {
  181. MachineInstrBuilder MIB =
  182. BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
  183. MIB.add(MI->getOperand(1)); // branch target
  184. MIB.addImm(ARMCC::EQ);
  185. MIB.addReg(ARM::CPSR);
  186. I.eraseFromParent();
  187. break;
  188. }
  189. }
  190. MI->eraseFromParent();
  191. }
  192. // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
  193. // start of a while loop:
  194. // %a:gprlr = t2WhileLoopSetup %Cnt
  195. // t2WhileLoopStart %a, %BB
  196. // We want to convert those to a single instruction which, like t2LoopEndDec and
  197. // t2DoLoopStartTP is both a terminator and produces a value:
  198. // %a:grplr: t2WhileLoopStartLR %Cnt, %BB
  199. //
  200. // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
  201. // t2WhileLoopStart are not valid past regalloc.
  202. bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
  203. LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
  204. << ML->getHeader()->getName() << "\n");
  205. MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
  206. if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
  207. return false;
  208. if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
  209. return false;
  210. Register LR = LoopStart->getOperand(0).getReg();
  211. auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
  212. return MI.getOpcode() == ARM::t2WhileLoopStart;
  213. });
  214. if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
  215. RevertWhileLoopSetup(LoopStart, TII);
  216. RevertLoopDec(LoopStart, TII);
  217. RevertLoopEnd(LoopStart, TII);
  218. return true;
  219. }
  220. MachineInstrBuilder MI =
  221. BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
  222. TII->get(ARM::t2WhileLoopStartLR), LR)
  223. .add(LoopStart->getOperand(1))
  224. .add(WLSIt->getOperand(1));
  225. (void)MI;
  226. LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
  227. WLSIt->eraseFromParent();
  228. LoopStart->eraseFromParent();
  229. return true;
  230. }
  231. // Return true if this instruction is invalid in a low overhead loop, usually
  232. // because it clobbers LR.
  233. static bool IsInvalidTPInstruction(MachineInstr &MI) {
  234. return MI.isCall() || isLoopStart(MI);
  235. }
  236. // Starting from PreHeader, search for invalid instructions back until the
  237. // LoopStart block is reached. If invalid instructions are found, the loop start
  238. // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
  239. // return the new DLS LoopStart if updated.
  240. MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
  241. MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
  242. SmallVector<MachineBasicBlock *> Worklist;
  243. SmallPtrSet<MachineBasicBlock *, 4> Visited;
  244. Worklist.push_back(PreHeader);
  245. Visited.insert(LoopStart->getParent());
  246. while (!Worklist.empty()) {
  247. MachineBasicBlock *MBB = Worklist.pop_back_val();
  248. if (Visited.count(MBB))
  249. continue;
  250. for (MachineInstr &MI : *MBB) {
  251. if (!IsInvalidTPInstruction(MI))
  252. continue;
  253. LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
  254. // Create a t2DoLoopStart at the end of the preheader.
  255. MachineInstrBuilder MIB =
  256. BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
  257. LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
  258. MIB.add(LoopStart->getOperand(0));
  259. MIB.add(LoopStart->getOperand(1));
  260. // Make sure to remove the kill flags, to prevent them from being invalid.
  261. LoopStart->getOperand(1).setIsKill(false);
  262. // Revert the t2WhileLoopStartLR to a CMP and Br.
  263. RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
  264. return MIB;
  265. }
  266. Visited.insert(MBB);
  267. for (auto *Pred : MBB->predecessors())
  268. Worklist.push_back(Pred);
  269. }
  270. return LoopStart;
  271. }
  272. // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
  273. // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
  274. // will be valid to be used for the low overhead loop, which means nothing else
  275. // is using LR (especially calls) and there are no superfluous copies in the
  276. // loop. The t2LoopEndDec is a branching terminator that produces a value (the
  277. // decrement) around the loop edge, which means we need to be careful that they
  278. // will be valid to allocate without any spilling.
  279. bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
  280. if (!MergeEndDec)
  281. return false;
  282. LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
  283. << "\n");
  284. MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
  285. if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
  286. return false;
  287. // Check if there is an illegal instruction (a call) in the low overhead loop
  288. // and if so revert it now before we get any further. While loops also need to
  289. // check the preheaders, but can be reverted to a DLS loop if needed.
  290. auto *PreHeader = ML->getLoopPreheader();
  291. if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
  292. LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
  293. for (MachineBasicBlock *MBB : ML->blocks()) {
  294. for (MachineInstr &MI : *MBB) {
  295. if (IsInvalidTPInstruction(MI)) {
  296. LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
  297. if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
  298. RevertDoLoopStart(LoopStart, TII);
  299. else
  300. RevertWhileLoopStartLR(LoopStart, TII);
  301. RevertLoopDec(LoopDec, TII);
  302. RevertLoopEnd(LoopEnd, TII);
  303. return true;
  304. }
  305. }
  306. }
  307. // Remove any copies from the loop, to ensure the phi that remains is both
  308. // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
  309. // that cannot spill, we need to be careful what remains in the loop.
  310. Register PhiReg = LoopPhi->getOperand(0).getReg();
  311. Register DecReg = LoopDec->getOperand(0).getReg();
  312. Register StartReg = LoopStart->getOperand(0).getReg();
  313. // Ensure the uses are expected, and collect any copies we want to remove.
  314. SmallVector<MachineInstr *, 4> Copies;
  315. auto CheckUsers = [&Copies](Register BaseReg,
  316. ArrayRef<MachineInstr *> ExpectedUsers,
  317. MachineRegisterInfo *MRI) {
  318. SmallVector<Register, 4> Worklist;
  319. Worklist.push_back(BaseReg);
  320. while (!Worklist.empty()) {
  321. Register Reg = Worklist.pop_back_val();
  322. for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
  323. if (llvm::is_contained(ExpectedUsers, &MI))
  324. continue;
  325. if (MI.getOpcode() != TargetOpcode::COPY ||
  326. !MI.getOperand(0).getReg().isVirtual()) {
  327. LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
  328. return false;
  329. }
  330. Worklist.push_back(MI.getOperand(0).getReg());
  331. Copies.push_back(&MI);
  332. }
  333. }
  334. return true;
  335. };
  336. if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
  337. !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
  338. !CheckUsers(StartReg, {LoopPhi}, MRI)) {
  339. // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
  340. if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
  341. RevertWhileLoopStartLR(LoopStart, TII);
  342. RevertLoopDec(LoopDec, TII);
  343. RevertLoopEnd(LoopEnd, TII);
  344. return true;
  345. }
  346. return false;
  347. }
  348. MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
  349. MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
  350. MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
  351. if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
  352. LoopPhi->getOperand(3).setReg(StartReg);
  353. LoopPhi->getOperand(1).setReg(DecReg);
  354. } else {
  355. LoopPhi->getOperand(1).setReg(StartReg);
  356. LoopPhi->getOperand(3).setReg(DecReg);
  357. }
  358. SmallVector<MachineOperand, 4> Cond; // For analyzeBranch.
  359. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
  360. if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
  361. // If the LoopEnd falls through, need to insert a t2B to the fall-through
  362. // block so that the non-analyzable t2LoopEndDec doesn't fall through.
  363. MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
  364. BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
  365. .addMBB(&*MBBI)
  366. .add(predOps(ARMCC::AL));
  367. }
  368. // Replace the loop dec and loop end as a single instruction.
  369. MachineInstrBuilder MI =
  370. BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
  371. TII->get(ARM::t2LoopEndDec), DecReg)
  372. .addReg(PhiReg)
  373. .add(LoopEnd->getOperand(1));
  374. (void)MI;
  375. LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
  376. LoopDec->eraseFromParent();
  377. LoopEnd->eraseFromParent();
  378. for (auto *MI : Copies)
  379. MI->eraseFromParent();
  380. return true;
  381. }
  382. // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
  383. // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
  384. // instruction, making the backend ARMLowOverheadLoops passes job of finding the
  385. // VCTP operand much simpler.
  386. bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
  387. MachineDominatorTree *DT) {
  388. LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
  389. << ML->getHeader()->getName() << "\n");
  390. // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
  391. // in the loop.
  392. MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
  393. if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
  394. return false;
  395. if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
  396. LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
  397. return false;
  398. SmallVector<MachineInstr *, 4> VCTPs;
  399. SmallVector<MachineInstr *, 4> MVEInstrs;
  400. for (MachineBasicBlock *BB : ML->blocks()) {
  401. for (MachineInstr &MI : *BB)
  402. if (isVCTP(&MI))
  403. VCTPs.push_back(&MI);
  404. else if (findFirstVPTPredOperandIdx(MI) != -1)
  405. MVEInstrs.push_back(&MI);
  406. }
  407. if (VCTPs.empty()) {
  408. LLVM_DEBUG(dbgs() << " no VCTPs\n");
  409. return false;
  410. }
  411. // Check all VCTPs are the same.
  412. MachineInstr *FirstVCTP = *VCTPs.begin();
  413. for (MachineInstr *VCTP : VCTPs) {
  414. LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP);
  415. if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
  416. VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
  417. LLVM_DEBUG(dbgs() << " VCTP's are not identical\n");
  418. return false;
  419. }
  420. }
  421. // Check for the register being used can be setup before the loop. We expect
  422. // this to be:
  423. // $vx = ...
  424. // loop:
  425. // $vp = PHI [ $vx ], [ $vd ]
  426. // ..
  427. // $vpr = VCTP $vp
  428. // ..
  429. // $vd = t2SUBri $vp, #n
  430. // ..
  431. Register CountReg = FirstVCTP->getOperand(1).getReg();
  432. if (!CountReg.isVirtual()) {
  433. LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n");
  434. return false;
  435. }
  436. MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
  437. if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
  438. Phi->getNumOperands() != 5 ||
  439. (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
  440. Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
  441. LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n");
  442. return false;
  443. }
  444. CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
  445. ? Phi->getOperand(3).getReg()
  446. : Phi->getOperand(1).getReg();
  447. // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
  448. // the preheader and add the new CountReg to it. We attempt to place it late
  449. // in the preheader, but may need to move that earlier based on uses.
  450. MachineBasicBlock *MBB = LoopStart->getParent();
  451. MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
  452. for (MachineInstr &Use :
  453. MRI->use_instructions(LoopStart->getOperand(0).getReg()))
  454. if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
  455. !DT->dominates(ML->getHeader(), Use.getParent())) {
  456. LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n");
  457. return false;
  458. }
  459. unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
  460. ? ARM::t2DoLoopStartTP
  461. : ARM::t2WhileLoopStartTP;
  462. MachineInstrBuilder MI =
  463. BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
  464. .add(LoopStart->getOperand(0))
  465. .add(LoopStart->getOperand(1))
  466. .addReg(CountReg);
  467. if (NewOpc == ARM::t2WhileLoopStartTP)
  468. MI.add(LoopStart->getOperand(2));
  469. LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with "
  470. << *MI.getInstr());
  471. MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
  472. LoopStart->eraseFromParent();
  473. if (SetLRPredicate) {
  474. // Each instruction in the loop needs to be using LR as the predicate from
  475. // the Phi as the predicate.
  476. Register LR = LoopPhi->getOperand(0).getReg();
  477. for (MachineInstr *MI : MVEInstrs) {
  478. int Idx = findFirstVPTPredOperandIdx(*MI);
  479. MI->getOperand(Idx + 2).setReg(LR);
  480. }
  481. }
  482. return true;
  483. }
  484. // Returns true if Opcode is any VCMP Opcode.
  485. static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
  486. // Returns true if a VCMP with this Opcode can have its operands swapped.
  487. // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
  488. // and VCMPr instructions (since the r is always on the right).
  489. static bool CanHaveSwappedOperands(unsigned Opcode) {
  490. switch (Opcode) {
  491. default:
  492. return true;
  493. case ARM::MVE_VCMPf32:
  494. case ARM::MVE_VCMPf16:
  495. case ARM::MVE_VCMPf32r:
  496. case ARM::MVE_VCMPf16r:
  497. case ARM::MVE_VCMPi8r:
  498. case ARM::MVE_VCMPi16r:
  499. case ARM::MVE_VCMPi32r:
  500. case ARM::MVE_VCMPu8r:
  501. case ARM::MVE_VCMPu16r:
  502. case ARM::MVE_VCMPu32r:
  503. case ARM::MVE_VCMPs8r:
  504. case ARM::MVE_VCMPs16r:
  505. case ARM::MVE_VCMPs32r:
  506. return false;
  507. }
  508. }
  509. // Returns the CondCode of a VCMP Instruction.
  510. static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
  511. assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
  512. return ARMCC::CondCodes(Instr.getOperand(3).getImm());
  513. }
  514. // Returns true if Cond is equivalent to a VPNOT instruction on the result of
  515. // Prev. Cond and Prev must be VCMPs.
  516. static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
  517. assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
  518. // Opcodes must match.
  519. if (Cond.getOpcode() != Prev.getOpcode())
  520. return false;
  521. MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
  522. MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
  523. // If the VCMP has the opposite condition with the same operands, we can
  524. // replace it with a VPNOT
  525. ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
  526. ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
  527. if (ExpectedCode == GetCondCode(Prev))
  528. if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
  529. return true;
  530. // Check again with operands swapped if possible
  531. if (!CanHaveSwappedOperands(Cond.getOpcode()))
  532. return false;
  533. ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
  534. return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
  535. CondOP2.isIdenticalTo(PrevOP1);
  536. }
  537. // Returns true if Instr writes to VCCR.
  538. static bool IsWritingToVCCR(MachineInstr &Instr) {
  539. if (Instr.getNumOperands() == 0)
  540. return false;
  541. MachineOperand &Dst = Instr.getOperand(0);
  542. if (!Dst.isReg())
  543. return false;
  544. Register DstReg = Dst.getReg();
  545. if (!DstReg.isVirtual())
  546. return false;
  547. MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
  548. const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
  549. return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
  550. }
  551. // Transforms
  552. // <Instr that uses %A ('User' Operand)>
  553. // Into
  554. // %K = VPNOT %Target
  555. // <Instr that uses %K ('User' Operand)>
  556. // And returns the newly inserted VPNOT.
  557. // This optimization is done in the hopes of preventing spills/reloads of VPR by
  558. // reducing the number of VCCR values with overlapping lifetimes.
  559. MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
  560. MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
  561. Register Target) {
  562. Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
  563. MachineInstrBuilder MIBuilder =
  564. BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
  565. .addDef(NewResult)
  566. .addReg(Target);
  567. addUnpredicatedMveVpredNOp(MIBuilder);
  568. // Make the user use NewResult instead, and clear its kill flag.
  569. User.setReg(NewResult);
  570. User.setIsKill(false);
  571. LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): ";
  572. MIBuilder.getInstr()->dump());
  573. return *MIBuilder.getInstr();
  574. }
  575. // Moves a VPNOT before its first user if an instruction that uses Reg is found
  576. // in-between the VPNOT and its user.
  577. // Returns true if there is at least one user of the VPNOT in the block.
  578. static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
  579. MachineBasicBlock::iterator Iter,
  580. Register Reg) {
  581. assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
  582. assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
  583. "The VPNOT cannot be predicated");
  584. MachineInstr &VPNOT = *Iter;
  585. Register VPNOTResult = VPNOT.getOperand(0).getReg();
  586. Register VPNOTOperand = VPNOT.getOperand(1).getReg();
  587. // Whether the VPNOT will need to be moved, and whether we found a user of the
  588. // VPNOT.
  589. bool MustMove = false, HasUser = false;
  590. MachineOperand *VPNOTOperandKiller = nullptr;
  591. for (; Iter != MBB.end(); ++Iter) {
  592. if (MachineOperand *MO =
  593. Iter->findRegisterUseOperand(VPNOTOperand, /*isKill*/ true)) {
  594. // If we find the operand that kills the VPNOTOperand's result, save it.
  595. VPNOTOperandKiller = MO;
  596. }
  597. if (Iter->findRegisterUseOperandIdx(Reg) != -1) {
  598. MustMove = true;
  599. continue;
  600. }
  601. if (Iter->findRegisterUseOperandIdx(VPNOTResult) == -1)
  602. continue;
  603. HasUser = true;
  604. if (!MustMove)
  605. break;
  606. // Move the VPNOT right before Iter
  607. LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: ";
  608. Iter->dump());
  609. MBB.splice(Iter, &MBB, VPNOT.getIterator());
  610. // If we move the instr, and its operand was killed earlier, remove the kill
  611. // flag.
  612. if (VPNOTOperandKiller)
  613. VPNOTOperandKiller->setIsKill(false);
  614. break;
  615. }
  616. return HasUser;
  617. }
  618. // This optimisation attempts to reduce the number of overlapping lifetimes of
  619. // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
  620. // this replaces
  621. // %A:vccr = (something)
  622. // %B:vccr = VPNOT %A
  623. // %Foo = (some op that uses %B)
  624. // %Bar = (some op that uses %A)
  625. // With
  626. // %A:vccr = (something)
  627. // %B:vccr = VPNOT %A
  628. // %Foo = (some op that uses %B)
  629. // %TMP2:vccr = VPNOT %B
  630. // %Bar = (some op that uses %A)
  631. bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
  632. MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
  633. SmallVector<MachineInstr *, 4> DeadInstructions;
  634. bool Modified = false;
  635. while (Iter != End) {
  636. Register VCCRValue, OppositeVCCRValue;
  637. // The first loop looks for 2 unpredicated instructions:
  638. // %A:vccr = (instr) ; A is stored in VCCRValue
  639. // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue
  640. for (; Iter != End; ++Iter) {
  641. // We're only interested in unpredicated instructions that write to VCCR.
  642. if (!IsWritingToVCCR(*Iter) ||
  643. getVPTInstrPredicate(*Iter) != ARMVCC::None)
  644. continue;
  645. Register Dst = Iter->getOperand(0).getReg();
  646. // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
  647. // found what we were looking for.
  648. if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
  649. Iter->findRegisterUseOperandIdx(VCCRValue) != -1) {
  650. // Move the VPNOT closer to its first user if needed, and ignore if it
  651. // has no users.
  652. if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
  653. continue;
  654. OppositeVCCRValue = Dst;
  655. ++Iter;
  656. break;
  657. }
  658. // Else, just set VCCRValue.
  659. VCCRValue = Dst;
  660. }
  661. // If the first inner loop didn't find anything, stop here.
  662. if (Iter == End)
  663. break;
  664. assert(VCCRValue && OppositeVCCRValue &&
  665. "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
  666. "stopped before the end of the block!");
  667. assert(VCCRValue != OppositeVCCRValue &&
  668. "VCCRValue should not be equal to OppositeVCCRValue!");
  669. // LastVPNOTResult always contains the same value as OppositeVCCRValue.
  670. Register LastVPNOTResult = OppositeVCCRValue;
  671. // This second loop tries to optimize the remaining instructions.
  672. for (; Iter != End; ++Iter) {
  673. bool IsInteresting = false;
  674. if (MachineOperand *MO = Iter->findRegisterUseOperand(VCCRValue)) {
  675. IsInteresting = true;
  676. // - If the instruction is a VPNOT, it can be removed, and we can just
  677. // replace its uses with LastVPNOTResult.
  678. // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
  679. if (Iter->getOpcode() == ARM::MVE_VPNOT) {
  680. Register Result = Iter->getOperand(0).getReg();
  681. MRI->replaceRegWith(Result, LastVPNOTResult);
  682. DeadInstructions.push_back(&*Iter);
  683. Modified = true;
  684. LLVM_DEBUG(dbgs()
  685. << "Replacing all uses of '" << printReg(Result)
  686. << "' with '" << printReg(LastVPNOTResult) << "'\n");
  687. } else {
  688. MachineInstr &VPNOT =
  689. ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
  690. Modified = true;
  691. LastVPNOTResult = VPNOT.getOperand(0).getReg();
  692. std::swap(VCCRValue, OppositeVCCRValue);
  693. LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
  694. << "' with '" << printReg(LastVPNOTResult)
  695. << "' in instr: " << *Iter);
  696. }
  697. } else {
  698. // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
  699. // instead as they contain the same value.
  700. if (MachineOperand *MO =
  701. Iter->findRegisterUseOperand(OppositeVCCRValue)) {
  702. IsInteresting = true;
  703. // This is pointless if LastVPNOTResult == OppositeVCCRValue.
  704. if (LastVPNOTResult != OppositeVCCRValue) {
  705. LLVM_DEBUG(dbgs() << "Replacing usage of '"
  706. << printReg(OppositeVCCRValue) << "' with '"
  707. << printReg(LastVPNOTResult) << " for instr: ";
  708. Iter->dump());
  709. MO->setReg(LastVPNOTResult);
  710. Modified = true;
  711. }
  712. MO->setIsKill(false);
  713. }
  714. // If this is an unpredicated VPNOT on
  715. // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
  716. if (Iter->getOpcode() == ARM::MVE_VPNOT &&
  717. getVPTInstrPredicate(*Iter) == ARMVCC::None) {
  718. Register VPNOTOperand = Iter->getOperand(1).getReg();
  719. if (VPNOTOperand == LastVPNOTResult ||
  720. VPNOTOperand == OppositeVCCRValue) {
  721. IsInteresting = true;
  722. std::swap(VCCRValue, OppositeVCCRValue);
  723. LastVPNOTResult = Iter->getOperand(0).getReg();
  724. }
  725. }
  726. }
  727. // If this instruction was not interesting, and it writes to VCCR, stop.
  728. if (!IsInteresting && IsWritingToVCCR(*Iter))
  729. break;
  730. }
  731. }
  732. for (MachineInstr *DeadInstruction : DeadInstructions)
  733. DeadInstruction->eraseFromParent();
  734. return Modified;
  735. }
  736. // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
  737. bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
  738. SmallVector<MachineInstr *, 4> DeadInstructions;
  739. // The last VCMP that we have seen and that couldn't be replaced.
  740. // This is reset when an instruction that writes to VCCR/VPR is found, or when
  741. // a VCMP is replaced with a VPNOT.
  742. // We'll only replace VCMPs with VPNOTs when this is not null, and when the
  743. // current VCMP is the opposite of PrevVCMP.
  744. MachineInstr *PrevVCMP = nullptr;
  745. // If we find an instruction that kills the result of PrevVCMP, we save the
  746. // operand here to remove the kill flag in case we need to use PrevVCMP's
  747. // result.
  748. MachineOperand *PrevVCMPResultKiller = nullptr;
  749. for (MachineInstr &Instr : MBB.instrs()) {
  750. if (PrevVCMP) {
  751. if (MachineOperand *MO = Instr.findRegisterUseOperand(
  752. PrevVCMP->getOperand(0).getReg(), /*isKill*/ true)) {
  753. // If we come accross the instr that kills PrevVCMP's result, record it
  754. // so we can remove the kill flag later if we need to.
  755. PrevVCMPResultKiller = MO;
  756. }
  757. }
  758. // Ignore predicated instructions.
  759. if (getVPTInstrPredicate(Instr) != ARMVCC::None)
  760. continue;
  761. // Only look at VCMPs
  762. if (!IsVCMP(Instr.getOpcode())) {
  763. // If the instruction writes to VCCR, forget the previous VCMP.
  764. if (IsWritingToVCCR(Instr))
  765. PrevVCMP = nullptr;
  766. continue;
  767. }
  768. if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
  769. PrevVCMP = &Instr;
  770. continue;
  771. }
  772. // The register containing the result of the VCMP that we're going to
  773. // replace.
  774. Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
  775. // Build a VPNOT to replace the VCMP, reusing its operands.
  776. MachineInstrBuilder MIBuilder =
  777. BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
  778. .add(Instr.getOperand(0))
  779. .addReg(PrevVCMPResultReg);
  780. addUnpredicatedMveVpredNOp(MIBuilder);
  781. LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
  782. MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: ";
  783. Instr.dump());
  784. // If we found an instruction that uses, and kills PrevVCMP's result,
  785. // remove the kill flag.
  786. if (PrevVCMPResultKiller)
  787. PrevVCMPResultKiller->setIsKill(false);
  788. // Finally, mark the old VCMP for removal and reset
  789. // PrevVCMP/PrevVCMPResultKiller.
  790. DeadInstructions.push_back(&Instr);
  791. PrevVCMP = nullptr;
  792. PrevVCMPResultKiller = nullptr;
  793. }
  794. for (MachineInstr *DeadInstruction : DeadInstructions)
  795. DeadInstruction->eraseFromParent();
  796. return !DeadInstructions.empty();
  797. }
  798. bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
  799. MachineDominatorTree *DT) {
  800. // Scan through the block, looking for instructions that use constants moves
  801. // into VPR that are the negative of one another. These are expected to be
  802. // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
  803. // mask is kept it or and VPNOT's of it are added or reused as we scan through
  804. // the function.
  805. unsigned LastVPTImm = 0;
  806. Register LastVPTReg = 0;
  807. SmallSet<MachineInstr *, 4> DeadInstructions;
  808. for (MachineInstr &Instr : MBB.instrs()) {
  809. // Look for predicated MVE instructions.
  810. int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
  811. if (PIdx == -1)
  812. continue;
  813. Register VPR = Instr.getOperand(PIdx + 1).getReg();
  814. if (!VPR.isVirtual())
  815. continue;
  816. // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
  817. MachineInstr *Copy = MRI->getVRegDef(VPR);
  818. if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
  819. !Copy->getOperand(1).getReg().isVirtual() ||
  820. MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
  821. LastVPTReg = 0;
  822. continue;
  823. }
  824. Register GPR = Copy->getOperand(1).getReg();
  825. // Find the Immediate used by the copy.
  826. auto getImm = [&](Register GPR) -> unsigned {
  827. MachineInstr *Def = MRI->getVRegDef(GPR);
  828. if (Def && (Def->getOpcode() == ARM::t2MOVi ||
  829. Def->getOpcode() == ARM::t2MOVi16))
  830. return Def->getOperand(1).getImm();
  831. return -1U;
  832. };
  833. unsigned Imm = getImm(GPR);
  834. if (Imm == -1U) {
  835. LastVPTReg = 0;
  836. continue;
  837. }
  838. unsigned NotImm = ~Imm & 0xffff;
  839. if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
  840. Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
  841. if (MRI->use_empty(VPR)) {
  842. DeadInstructions.insert(Copy);
  843. if (MRI->hasOneUse(GPR))
  844. DeadInstructions.insert(MRI->getVRegDef(GPR));
  845. }
  846. LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
  847. } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
  848. // We have found the not of a previous constant. Create a VPNot of the
  849. // earlier predicate reg and use it instead of the copy.
  850. Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
  851. auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
  852. TII->get(ARM::MVE_VPNOT), NewVPR)
  853. .addReg(LastVPTReg);
  854. addUnpredicatedMveVpredNOp(VPNot);
  855. // Use the new register and check if the def is now dead.
  856. Instr.getOperand(PIdx + 1).setReg(NewVPR);
  857. if (MRI->use_empty(VPR)) {
  858. DeadInstructions.insert(Copy);
  859. if (MRI->hasOneUse(GPR))
  860. DeadInstructions.insert(MRI->getVRegDef(GPR));
  861. }
  862. LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at "
  863. << Instr);
  864. VPR = NewVPR;
  865. }
  866. LastVPTImm = Imm;
  867. LastVPTReg = VPR;
  868. }
  869. for (MachineInstr *DI : DeadInstructions)
  870. DI->eraseFromParent();
  871. return !DeadInstructions.empty();
  872. }
  873. // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
  874. // somewhat blunt approximation to allow tail predicated with vpsel
  875. // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
  876. // different semantics under tail predication. Until that is modelled we just
  877. // convert to a VMOVT (via a predicated VORR) instead.
  878. bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
  879. bool HasVCTP = false;
  880. SmallVector<MachineInstr *, 4> DeadInstructions;
  881. for (MachineInstr &MI : MBB.instrs()) {
  882. if (isVCTP(&MI)) {
  883. HasVCTP = true;
  884. continue;
  885. }
  886. if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
  887. continue;
  888. MachineInstrBuilder MIBuilder =
  889. BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
  890. .add(MI.getOperand(0))
  891. .add(MI.getOperand(1))
  892. .add(MI.getOperand(1))
  893. .addImm(ARMVCC::Then)
  894. .add(MI.getOperand(4))
  895. .add(MI.getOperand(5))
  896. .add(MI.getOperand(2));
  897. // Silence unused variable warning in release builds.
  898. (void)MIBuilder;
  899. LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
  900. dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump());
  901. DeadInstructions.push_back(&MI);
  902. }
  903. for (MachineInstr *DeadInstruction : DeadInstructions)
  904. DeadInstruction->eraseFromParent();
  905. return !DeadInstructions.empty();
  906. }
  907. // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
  908. // the instruction may be removable as a noop.
  909. bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
  910. bool Changed = false;
  911. for (MachineInstr &MI : MBB.instrs()) {
  912. if (MI.getOpcode() != ARM::t2DoLoopStart)
  913. continue;
  914. Register R = MI.getOperand(1).getReg();
  915. MachineFunction *MF = MI.getParent()->getParent();
  916. MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
  917. Changed = true;
  918. }
  919. return Changed;
  920. }
  921. bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
  922. const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
  923. if (!STI.isThumb2() || !STI.hasLOB())
  924. return false;
  925. TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
  926. MRI = &Fn.getRegInfo();
  927. MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfo>();
  928. MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
  929. LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
  930. << "********** Function: " << Fn.getName() << '\n');
  931. bool Modified = false;
  932. for (MachineLoop *ML : MLI->getBase().getLoopsInPreorder()) {
  933. Modified |= LowerWhileLoopStart(ML);
  934. Modified |= MergeLoopEnd(ML);
  935. Modified |= ConvertTailPredLoop(ML, DT);
  936. }
  937. for (MachineBasicBlock &MBB : Fn) {
  938. Modified |= HintDoLoopStartReg(MBB);
  939. Modified |= ReplaceConstByVPNOTs(MBB, DT);
  940. Modified |= ReplaceVCMPsByVPNOTs(MBB);
  941. Modified |= ReduceOldVCCRValueUses(MBB);
  942. Modified |= ConvertVPSEL(MBB);
  943. }
  944. LLVM_DEBUG(dbgs() << "**************************************\n");
  945. return Modified;
  946. }
  947. /// createMVETPAndVPTOptimisationsPass
  948. FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
  949. return new MVETPAndVPTOptimisations();
  950. }