AArch64ExpandPseudoInsts.cpp 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308
  1. //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that expands pseudo instructions into target
  10. // instructions to allow proper scheduling and other late optimizations. This
  11. // pass should be run after register allocation but before the post-regalloc
  12. // scheduling pass.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "AArch64ExpandImm.h"
  16. #include "AArch64InstrInfo.h"
  17. #include "AArch64MachineFunctionInfo.h"
  18. #include "AArch64Subtarget.h"
  19. #include "MCTargetDesc/AArch64AddressingModes.h"
  20. #include "Utils/AArch64BaseInfo.h"
  21. #include "llvm/ADT/DenseMap.h"
  22. #include "llvm/ADT/Triple.h"
  23. #include "llvm/CodeGen/LivePhysRegs.h"
  24. #include "llvm/CodeGen/MachineBasicBlock.h"
  25. #include "llvm/CodeGen/MachineConstantPool.h"
  26. #include "llvm/CodeGen/MachineFunction.h"
  27. #include "llvm/CodeGen/MachineFunctionPass.h"
  28. #include "llvm/CodeGen/MachineInstr.h"
  29. #include "llvm/CodeGen/MachineInstrBuilder.h"
  30. #include "llvm/CodeGen/MachineOperand.h"
  31. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  32. #include "llvm/IR/DebugLoc.h"
  33. #include "llvm/MC/MCInstrDesc.h"
  34. #include "llvm/Pass.h"
  35. #include "llvm/Support/CodeGen.h"
  36. #include "llvm/Support/MathExtras.h"
  37. #include "llvm/Target/TargetMachine.h"
  38. #include <cassert>
  39. #include <cstdint>
  40. #include <iterator>
  41. #include <limits>
  42. #include <utility>
  43. using namespace llvm;
  44. #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
  45. namespace {
  46. class AArch64ExpandPseudo : public MachineFunctionPass {
  47. public:
  48. const AArch64InstrInfo *TII;
  49. static char ID;
  50. AArch64ExpandPseudo() : MachineFunctionPass(ID) {
  51. initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
  52. }
  53. bool runOnMachineFunction(MachineFunction &Fn) override;
  54. StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
  55. private:
  56. bool expandMBB(MachineBasicBlock &MBB);
  57. bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  58. MachineBasicBlock::iterator &NextMBBI);
  59. bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  60. unsigned BitSize);
  61. bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
  62. MachineBasicBlock::iterator MBBI);
  63. bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  64. unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
  65. unsigned ExtendImm, unsigned ZeroReg,
  66. MachineBasicBlock::iterator &NextMBBI);
  67. bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
  68. MachineBasicBlock::iterator MBBI,
  69. MachineBasicBlock::iterator &NextMBBI);
  70. bool expandSetTagLoop(MachineBasicBlock &MBB,
  71. MachineBasicBlock::iterator MBBI,
  72. MachineBasicBlock::iterator &NextMBBI);
  73. bool expandSVESpillFill(MachineBasicBlock &MBB,
  74. MachineBasicBlock::iterator MBBI, unsigned Opc,
  75. unsigned N);
  76. bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
  77. MachineBasicBlock::iterator MBBI);
  78. bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
  79. bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
  80. MachineBasicBlock::iterator MBBI);
  81. };
  82. } // end anonymous namespace
  83. char AArch64ExpandPseudo::ID = 0;
  84. INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
  85. AARCH64_EXPAND_PSEUDO_NAME, false, false)
  86. /// Transfer implicit operands on the pseudo instruction to the
  87. /// instructions created from the expansion.
  88. static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
  89. MachineInstrBuilder &DefMI) {
  90. const MCInstrDesc &Desc = OldMI.getDesc();
  91. for (const MachineOperand &MO :
  92. llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
  93. assert(MO.isReg() && MO.getReg());
  94. if (MO.isUse())
  95. UseMI.add(MO);
  96. else
  97. DefMI.add(MO);
  98. }
  99. }
  100. /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
  101. /// real move-immediate instructions to synthesize the immediate.
  102. bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
  103. MachineBasicBlock::iterator MBBI,
  104. unsigned BitSize) {
  105. MachineInstr &MI = *MBBI;
  106. Register DstReg = MI.getOperand(0).getReg();
  107. uint64_t RenamableState =
  108. MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
  109. uint64_t Imm = MI.getOperand(1).getImm();
  110. if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
  111. // Useless def, and we don't want to risk creating an invalid ORR (which
  112. // would really write to sp).
  113. MI.eraseFromParent();
  114. return true;
  115. }
  116. SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
  117. AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
  118. assert(Insn.size() != 0);
  119. SmallVector<MachineInstrBuilder, 4> MIBS;
  120. for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
  121. bool LastItem = std::next(I) == E;
  122. switch (I->Opcode)
  123. {
  124. default: llvm_unreachable("unhandled!"); break;
  125. case AArch64::ORRWri:
  126. case AArch64::ORRXri:
  127. MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
  128. .add(MI.getOperand(0))
  129. .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
  130. .addImm(I->Op2));
  131. break;
  132. case AArch64::MOVNWi:
  133. case AArch64::MOVNXi:
  134. case AArch64::MOVZWi:
  135. case AArch64::MOVZXi: {
  136. bool DstIsDead = MI.getOperand(0).isDead();
  137. MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
  138. .addReg(DstReg, RegState::Define |
  139. getDeadRegState(DstIsDead && LastItem) |
  140. RenamableState)
  141. .addImm(I->Op1)
  142. .addImm(I->Op2));
  143. } break;
  144. case AArch64::MOVKWi:
  145. case AArch64::MOVKXi: {
  146. Register DstReg = MI.getOperand(0).getReg();
  147. bool DstIsDead = MI.getOperand(0).isDead();
  148. MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
  149. .addReg(DstReg,
  150. RegState::Define |
  151. getDeadRegState(DstIsDead && LastItem) |
  152. RenamableState)
  153. .addReg(DstReg)
  154. .addImm(I->Op1)
  155. .addImm(I->Op2));
  156. } break;
  157. }
  158. }
  159. transferImpOps(MI, MIBS.front(), MIBS.back());
  160. MI.eraseFromParent();
  161. return true;
  162. }
  163. bool AArch64ExpandPseudo::expandCMP_SWAP(
  164. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
  165. unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
  166. MachineBasicBlock::iterator &NextMBBI) {
  167. MachineInstr &MI = *MBBI;
  168. DebugLoc DL = MI.getDebugLoc();
  169. const MachineOperand &Dest = MI.getOperand(0);
  170. Register StatusReg = MI.getOperand(1).getReg();
  171. bool StatusDead = MI.getOperand(1).isDead();
  172. // Duplicating undef operands into 2 instructions does not guarantee the same
  173. // value on both; However undef should be replaced by xzr anyway.
  174. assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
  175. Register AddrReg = MI.getOperand(2).getReg();
  176. Register DesiredReg = MI.getOperand(3).getReg();
  177. Register NewReg = MI.getOperand(4).getReg();
  178. MachineFunction *MF = MBB.getParent();
  179. auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  180. auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  181. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  182. MF->insert(++MBB.getIterator(), LoadCmpBB);
  183. MF->insert(++LoadCmpBB->getIterator(), StoreBB);
  184. MF->insert(++StoreBB->getIterator(), DoneBB);
  185. // .Lloadcmp:
  186. // mov wStatus, 0
  187. // ldaxr xDest, [xAddr]
  188. // cmp xDest, xDesired
  189. // b.ne .Ldone
  190. if (!StatusDead)
  191. BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
  192. .addImm(0).addImm(0);
  193. BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
  194. .addReg(AddrReg);
  195. BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
  196. .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
  197. .addReg(DesiredReg)
  198. .addImm(ExtendImm);
  199. BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
  200. .addImm(AArch64CC::NE)
  201. .addMBB(DoneBB)
  202. .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
  203. LoadCmpBB->addSuccessor(DoneBB);
  204. LoadCmpBB->addSuccessor(StoreBB);
  205. // .Lstore:
  206. // stlxr wStatus, xNew, [xAddr]
  207. // cbnz wStatus, .Lloadcmp
  208. BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
  209. .addReg(NewReg)
  210. .addReg(AddrReg);
  211. BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
  212. .addReg(StatusReg, getKillRegState(StatusDead))
  213. .addMBB(LoadCmpBB);
  214. StoreBB->addSuccessor(LoadCmpBB);
  215. StoreBB->addSuccessor(DoneBB);
  216. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  217. DoneBB->transferSuccessors(&MBB);
  218. MBB.addSuccessor(LoadCmpBB);
  219. NextMBBI = MBB.end();
  220. MI.eraseFromParent();
  221. // Recompute livein lists.
  222. LivePhysRegs LiveRegs;
  223. computeAndAddLiveIns(LiveRegs, *DoneBB);
  224. computeAndAddLiveIns(LiveRegs, *StoreBB);
  225. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  226. // Do an extra pass around the loop to get loop carried registers right.
  227. StoreBB->clearLiveIns();
  228. computeAndAddLiveIns(LiveRegs, *StoreBB);
  229. LoadCmpBB->clearLiveIns();
  230. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  231. return true;
  232. }
  233. bool AArch64ExpandPseudo::expandCMP_SWAP_128(
  234. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  235. MachineBasicBlock::iterator &NextMBBI) {
  236. MachineInstr &MI = *MBBI;
  237. DebugLoc DL = MI.getDebugLoc();
  238. MachineOperand &DestLo = MI.getOperand(0);
  239. MachineOperand &DestHi = MI.getOperand(1);
  240. Register StatusReg = MI.getOperand(2).getReg();
  241. bool StatusDead = MI.getOperand(2).isDead();
  242. // Duplicating undef operands into 2 instructions does not guarantee the same
  243. // value on both; However undef should be replaced by xzr anyway.
  244. assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
  245. Register AddrReg = MI.getOperand(3).getReg();
  246. Register DesiredLoReg = MI.getOperand(4).getReg();
  247. Register DesiredHiReg = MI.getOperand(5).getReg();
  248. Register NewLoReg = MI.getOperand(6).getReg();
  249. Register NewHiReg = MI.getOperand(7).getReg();
  250. unsigned LdxpOp, StxpOp;
  251. switch (MI.getOpcode()) {
  252. case AArch64::CMP_SWAP_128_MONOTONIC:
  253. LdxpOp = AArch64::LDXPX;
  254. StxpOp = AArch64::STXPX;
  255. break;
  256. case AArch64::CMP_SWAP_128_RELEASE:
  257. LdxpOp = AArch64::LDXPX;
  258. StxpOp = AArch64::STLXPX;
  259. break;
  260. case AArch64::CMP_SWAP_128_ACQUIRE:
  261. LdxpOp = AArch64::LDAXPX;
  262. StxpOp = AArch64::STXPX;
  263. break;
  264. case AArch64::CMP_SWAP_128:
  265. LdxpOp = AArch64::LDAXPX;
  266. StxpOp = AArch64::STLXPX;
  267. break;
  268. default:
  269. llvm_unreachable("Unexpected opcode");
  270. }
  271. MachineFunction *MF = MBB.getParent();
  272. auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  273. auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  274. auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  275. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  276. MF->insert(++MBB.getIterator(), LoadCmpBB);
  277. MF->insert(++LoadCmpBB->getIterator(), StoreBB);
  278. MF->insert(++StoreBB->getIterator(), FailBB);
  279. MF->insert(++FailBB->getIterator(), DoneBB);
  280. // .Lloadcmp:
  281. // ldaxp xDestLo, xDestHi, [xAddr]
  282. // cmp xDestLo, xDesiredLo
  283. // sbcs xDestHi, xDesiredHi
  284. // b.ne .Ldone
  285. BuildMI(LoadCmpBB, DL, TII->get(LdxpOp))
  286. .addReg(DestLo.getReg(), RegState::Define)
  287. .addReg(DestHi.getReg(), RegState::Define)
  288. .addReg(AddrReg);
  289. BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
  290. .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
  291. .addReg(DesiredLoReg)
  292. .addImm(0);
  293. BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
  294. .addUse(AArch64::WZR)
  295. .addUse(AArch64::WZR)
  296. .addImm(AArch64CC::EQ);
  297. BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
  298. .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
  299. .addReg(DesiredHiReg)
  300. .addImm(0);
  301. BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
  302. .addUse(StatusReg, RegState::Kill)
  303. .addUse(StatusReg, RegState::Kill)
  304. .addImm(AArch64CC::EQ);
  305. BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
  306. .addUse(StatusReg, getKillRegState(StatusDead))
  307. .addMBB(FailBB);
  308. LoadCmpBB->addSuccessor(FailBB);
  309. LoadCmpBB->addSuccessor(StoreBB);
  310. // .Lstore:
  311. // stlxp wStatus, xNewLo, xNewHi, [xAddr]
  312. // cbnz wStatus, .Lloadcmp
  313. BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg)
  314. .addReg(NewLoReg)
  315. .addReg(NewHiReg)
  316. .addReg(AddrReg);
  317. BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
  318. .addReg(StatusReg, getKillRegState(StatusDead))
  319. .addMBB(LoadCmpBB);
  320. BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB);
  321. StoreBB->addSuccessor(LoadCmpBB);
  322. StoreBB->addSuccessor(DoneBB);
  323. // .Lfail:
  324. // stlxp wStatus, xDestLo, xDestHi, [xAddr]
  325. // cbnz wStatus, .Lloadcmp
  326. BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg)
  327. .addReg(DestLo.getReg())
  328. .addReg(DestHi.getReg())
  329. .addReg(AddrReg);
  330. BuildMI(FailBB, DL, TII->get(AArch64::CBNZW))
  331. .addReg(StatusReg, getKillRegState(StatusDead))
  332. .addMBB(LoadCmpBB);
  333. FailBB->addSuccessor(LoadCmpBB);
  334. FailBB->addSuccessor(DoneBB);
  335. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  336. DoneBB->transferSuccessors(&MBB);
  337. MBB.addSuccessor(LoadCmpBB);
  338. NextMBBI = MBB.end();
  339. MI.eraseFromParent();
  340. // Recompute liveness bottom up.
  341. LivePhysRegs LiveRegs;
  342. computeAndAddLiveIns(LiveRegs, *DoneBB);
  343. computeAndAddLiveIns(LiveRegs, *FailBB);
  344. computeAndAddLiveIns(LiveRegs, *StoreBB);
  345. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  346. // Do an extra pass in the loop to get the loop carried dependencies right.
  347. FailBB->clearLiveIns();
  348. computeAndAddLiveIns(LiveRegs, *FailBB);
  349. StoreBB->clearLiveIns();
  350. computeAndAddLiveIns(LiveRegs, *StoreBB);
  351. LoadCmpBB->clearLiveIns();
  352. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  353. return true;
  354. }
  355. /// \brief Expand Pseudos to Instructions with destructive operands.
  356. ///
  357. /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
  358. /// or for fixing relaxed register allocation conditions to comply with
  359. /// the instructions register constraints. The latter case may be cheaper
  360. /// than setting the register constraints in the register allocator,
  361. /// since that will insert regular MOV instructions rather than MOVPRFX.
  362. ///
  363. /// Example (after register allocation):
  364. ///
  365. /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
  366. ///
  367. /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
  368. /// * We cannot map directly to FSUB_ZPmZ_B because the register
  369. /// constraints of the instruction are not met.
  370. /// * Also the _ZERO specifies the false lanes need to be zeroed.
  371. ///
  372. /// We first try to see if the destructive operand == result operand,
  373. /// if not, we try to swap the operands, e.g.
  374. ///
  375. /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
  376. ///
  377. /// But because FSUB_ZPmZ is not commutative, this is semantically
  378. /// different, so we need a reverse instruction:
  379. ///
  380. /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
  381. ///
  382. /// Then we implement the zeroing of the false lanes of Z0 by adding
  383. /// a zeroing MOVPRFX instruction:
  384. ///
  385. /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
  386. /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
  387. ///
  388. /// Note that this can only be done for _ZERO or _UNDEF variants where
  389. /// we can guarantee the false lanes to be zeroed (by implementing this)
  390. /// or that they are undef (don't care / not used), otherwise the
  391. /// swapping of operands is illegal because the operation is not
  392. /// (or cannot be emulated to be) fully commutative.
  393. bool AArch64ExpandPseudo::expand_DestructiveOp(
  394. MachineInstr &MI,
  395. MachineBasicBlock &MBB,
  396. MachineBasicBlock::iterator MBBI) {
  397. unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
  398. uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
  399. uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
  400. bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
  401. Register DstReg = MI.getOperand(0).getReg();
  402. bool DstIsDead = MI.getOperand(0).isDead();
  403. if (DType == AArch64::DestructiveBinary)
  404. assert(DstReg != MI.getOperand(3).getReg());
  405. bool UseRev = false;
  406. unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
  407. switch (DType) {
  408. case AArch64::DestructiveBinaryComm:
  409. case AArch64::DestructiveBinaryCommWithRev:
  410. if (DstReg == MI.getOperand(3).getReg()) {
  411. // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
  412. std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
  413. UseRev = true;
  414. break;
  415. }
  416. LLVM_FALLTHROUGH;
  417. case AArch64::DestructiveBinary:
  418. case AArch64::DestructiveBinaryImm:
  419. std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
  420. break;
  421. case AArch64::DestructiveUnaryPassthru:
  422. std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
  423. break;
  424. case AArch64::DestructiveTernaryCommWithRev:
  425. std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
  426. if (DstReg == MI.getOperand(3).getReg()) {
  427. // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
  428. std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
  429. UseRev = true;
  430. } else if (DstReg == MI.getOperand(4).getReg()) {
  431. // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
  432. std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
  433. UseRev = true;
  434. }
  435. break;
  436. default:
  437. llvm_unreachable("Unsupported Destructive Operand type");
  438. }
  439. #ifndef NDEBUG
  440. // MOVPRFX can only be used if the destination operand
  441. // is the destructive operand, not as any other operand,
  442. // so the Destructive Operand must be unique.
  443. bool DOPRegIsUnique = false;
  444. switch (DType) {
  445. case AArch64::DestructiveBinaryComm:
  446. case AArch64::DestructiveBinaryCommWithRev:
  447. DOPRegIsUnique =
  448. DstReg != MI.getOperand(DOPIdx).getReg() ||
  449. MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
  450. break;
  451. case AArch64::DestructiveUnaryPassthru:
  452. case AArch64::DestructiveBinaryImm:
  453. DOPRegIsUnique = true;
  454. break;
  455. case AArch64::DestructiveTernaryCommWithRev:
  456. DOPRegIsUnique =
  457. DstReg != MI.getOperand(DOPIdx).getReg() ||
  458. (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
  459. MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
  460. break;
  461. }
  462. #endif
  463. // Resolve the reverse opcode
  464. if (UseRev) {
  465. int NewOpcode;
  466. // e.g. DIV -> DIVR
  467. if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
  468. Opcode = NewOpcode;
  469. // e.g. DIVR -> DIV
  470. else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
  471. Opcode = NewOpcode;
  472. }
  473. // Get the right MOVPRFX
  474. uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
  475. unsigned MovPrfx, MovPrfxZero;
  476. switch (ElementSize) {
  477. case AArch64::ElementSizeNone:
  478. case AArch64::ElementSizeB:
  479. MovPrfx = AArch64::MOVPRFX_ZZ;
  480. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
  481. break;
  482. case AArch64::ElementSizeH:
  483. MovPrfx = AArch64::MOVPRFX_ZZ;
  484. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
  485. break;
  486. case AArch64::ElementSizeS:
  487. MovPrfx = AArch64::MOVPRFX_ZZ;
  488. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
  489. break;
  490. case AArch64::ElementSizeD:
  491. MovPrfx = AArch64::MOVPRFX_ZZ;
  492. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
  493. break;
  494. default:
  495. llvm_unreachable("Unsupported ElementSize");
  496. }
  497. //
  498. // Create the destructive operation (if required)
  499. //
  500. MachineInstrBuilder PRFX, DOP;
  501. if (FalseZero) {
  502. #ifndef NDEBUG
  503. assert(DOPRegIsUnique && "The destructive operand should be unique");
  504. #endif
  505. assert(ElementSize != AArch64::ElementSizeNone &&
  506. "This instruction is unpredicated");
  507. // Merge source operand into destination register
  508. PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
  509. .addReg(DstReg, RegState::Define)
  510. .addReg(MI.getOperand(PredIdx).getReg())
  511. .addReg(MI.getOperand(DOPIdx).getReg());
  512. // After the movprfx, the destructive operand is same as Dst
  513. DOPIdx = 0;
  514. } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
  515. #ifndef NDEBUG
  516. assert(DOPRegIsUnique && "The destructive operand should be unique");
  517. #endif
  518. PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
  519. .addReg(DstReg, RegState::Define)
  520. .addReg(MI.getOperand(DOPIdx).getReg());
  521. DOPIdx = 0;
  522. }
  523. //
  524. // Create the destructive operation
  525. //
  526. DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
  527. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
  528. switch (DType) {
  529. case AArch64::DestructiveUnaryPassthru:
  530. DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
  531. .add(MI.getOperand(PredIdx))
  532. .add(MI.getOperand(SrcIdx));
  533. break;
  534. case AArch64::DestructiveBinaryImm:
  535. case AArch64::DestructiveBinaryComm:
  536. case AArch64::DestructiveBinaryCommWithRev:
  537. DOP.add(MI.getOperand(PredIdx))
  538. .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
  539. .add(MI.getOperand(SrcIdx));
  540. break;
  541. case AArch64::DestructiveTernaryCommWithRev:
  542. DOP.add(MI.getOperand(PredIdx))
  543. .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
  544. .add(MI.getOperand(SrcIdx))
  545. .add(MI.getOperand(Src2Idx));
  546. break;
  547. }
  548. if (PRFX) {
  549. finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
  550. transferImpOps(MI, PRFX, DOP);
  551. } else
  552. transferImpOps(MI, DOP, DOP);
  553. MI.eraseFromParent();
  554. return true;
  555. }
  556. bool AArch64ExpandPseudo::expandSetTagLoop(
  557. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  558. MachineBasicBlock::iterator &NextMBBI) {
  559. MachineInstr &MI = *MBBI;
  560. DebugLoc DL = MI.getDebugLoc();
  561. Register SizeReg = MI.getOperand(0).getReg();
  562. Register AddressReg = MI.getOperand(1).getReg();
  563. MachineFunction *MF = MBB.getParent();
  564. bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
  565. const unsigned OpCode1 =
  566. ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
  567. const unsigned OpCode2 =
  568. ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
  569. unsigned Size = MI.getOperand(2).getImm();
  570. assert(Size > 0 && Size % 16 == 0);
  571. if (Size % (16 * 2) != 0) {
  572. BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
  573. .addReg(AddressReg)
  574. .addReg(AddressReg)
  575. .addImm(1);
  576. Size -= 16;
  577. }
  578. MachineBasicBlock::iterator I =
  579. BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
  580. .addImm(Size);
  581. expandMOVImm(MBB, I, 64);
  582. auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  583. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  584. MF->insert(++MBB.getIterator(), LoopBB);
  585. MF->insert(++LoopBB->getIterator(), DoneBB);
  586. BuildMI(LoopBB, DL, TII->get(OpCode2))
  587. .addDef(AddressReg)
  588. .addReg(AddressReg)
  589. .addReg(AddressReg)
  590. .addImm(2)
  591. .cloneMemRefs(MI)
  592. .setMIFlags(MI.getFlags());
  593. BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
  594. .addDef(SizeReg)
  595. .addReg(SizeReg)
  596. .addImm(16 * 2)
  597. .addImm(0);
  598. BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
  599. LoopBB->addSuccessor(LoopBB);
  600. LoopBB->addSuccessor(DoneBB);
  601. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  602. DoneBB->transferSuccessors(&MBB);
  603. MBB.addSuccessor(LoopBB);
  604. NextMBBI = MBB.end();
  605. MI.eraseFromParent();
  606. // Recompute liveness bottom up.
  607. LivePhysRegs LiveRegs;
  608. computeAndAddLiveIns(LiveRegs, *DoneBB);
  609. computeAndAddLiveIns(LiveRegs, *LoopBB);
  610. // Do an extra pass in the loop to get the loop carried dependencies right.
  611. // FIXME: is this necessary?
  612. LoopBB->clearLiveIns();
  613. computeAndAddLiveIns(LiveRegs, *LoopBB);
  614. DoneBB->clearLiveIns();
  615. computeAndAddLiveIns(LiveRegs, *DoneBB);
  616. return true;
  617. }
  618. bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
  619. MachineBasicBlock::iterator MBBI,
  620. unsigned Opc, unsigned N) {
  621. const TargetRegisterInfo *TRI =
  622. MBB.getParent()->getSubtarget().getRegisterInfo();
  623. MachineInstr &MI = *MBBI;
  624. for (unsigned Offset = 0; Offset < N; ++Offset) {
  625. int ImmOffset = MI.getOperand(2).getImm() + Offset;
  626. bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
  627. assert(ImmOffset >= -256 && ImmOffset < 256 &&
  628. "Immediate spill offset out of range");
  629. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
  630. .addReg(
  631. TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
  632. Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
  633. .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
  634. .addImm(ImmOffset);
  635. }
  636. MI.eraseFromParent();
  637. return true;
  638. }
  639. bool AArch64ExpandPseudo::expandCALL_RVMARKER(
  640. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
  641. // Expand CALL_RVMARKER pseudo to:
  642. // - a branch to the call target, followed by
  643. // - the special `mov x29, x29` marker, and
  644. // - another branch, to the runtime function
  645. // Mark the sequence as bundle, to avoid passes moving other code in between.
  646. MachineInstr &MI = *MBBI;
  647. MachineInstr *OriginalCall;
  648. MachineOperand &RVTarget = MI.getOperand(0);
  649. MachineOperand &CallTarget = MI.getOperand(1);
  650. assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
  651. "invalid operand for regular call");
  652. assert(RVTarget.isGlobal() && "invalid operand for attached call");
  653. unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
  654. OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
  655. OriginalCall->addOperand(CallTarget);
  656. unsigned RegMaskStartIdx = 2;
  657. // Skip register arguments. Those are added during ISel, but are not
  658. // needed for the concrete branch.
  659. while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
  660. auto MOP = MI.getOperand(RegMaskStartIdx);
  661. assert(MOP.isReg() && "can only add register operands");
  662. OriginalCall->addOperand(MachineOperand::CreateReg(
  663. MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
  664. RegMaskStartIdx++;
  665. }
  666. for (const MachineOperand &MO :
  667. llvm::drop_begin(MI.operands(), RegMaskStartIdx))
  668. OriginalCall->addOperand(MO);
  669. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
  670. .addReg(AArch64::FP, RegState::Define)
  671. .addReg(AArch64::XZR)
  672. .addReg(AArch64::FP)
  673. .addImm(0);
  674. auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
  675. .add(RVTarget)
  676. .getInstr();
  677. if (MI.shouldUpdateCallSiteInfo())
  678. MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
  679. MI.eraseFromParent();
  680. finalizeBundle(MBB, OriginalCall->getIterator(),
  681. std::next(RVCall->getIterator()));
  682. return true;
  683. }
  684. bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
  685. MachineBasicBlock::iterator MBBI) {
  686. // Expand CALL_BTI pseudo to:
  687. // - a branch to the call target
  688. // - a BTI instruction
  689. // Mark the sequence as a bundle, to avoid passes moving other code in
  690. // between.
  691. MachineInstr &MI = *MBBI;
  692. MachineOperand &CallTarget = MI.getOperand(0);
  693. assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
  694. "invalid operand for regular call");
  695. unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
  696. MachineInstr *Call =
  697. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
  698. Call->addOperand(CallTarget);
  699. MachineInstr *BTI =
  700. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
  701. // BTI J so that setjmp can to BR to this.
  702. .addImm(36)
  703. .getInstr();
  704. if (MI.shouldUpdateCallSiteInfo())
  705. MBB.getParent()->moveCallSiteInfo(&MI, Call);
  706. MI.eraseFromParent();
  707. finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
  708. return true;
  709. }
  710. bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
  711. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
  712. Register CtxReg = MBBI->getOperand(0).getReg();
  713. Register BaseReg = MBBI->getOperand(1).getReg();
  714. int Offset = MBBI->getOperand(2).getImm();
  715. DebugLoc DL(MBBI->getDebugLoc());
  716. auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
  717. if (STI.getTargetTriple().getArchName() != "arm64e") {
  718. BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
  719. .addUse(CtxReg)
  720. .addUse(BaseReg)
  721. .addImm(Offset / 8)
  722. .setMIFlag(MachineInstr::FrameSetup);
  723. MBBI->eraseFromParent();
  724. return true;
  725. }
  726. // We need to sign the context in an address-discriminated way. 0xc31a is a
  727. // fixed random value, chosen as part of the ABI.
  728. // add x16, xBase, #Offset
  729. // movk x16, #0xc31a, lsl #48
  730. // mov x17, x22/xzr
  731. // pacdb x17, x16
  732. // str x17, [xBase, #Offset]
  733. unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
  734. BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
  735. .addUse(BaseReg)
  736. .addImm(abs(Offset))
  737. .addImm(0)
  738. .setMIFlag(MachineInstr::FrameSetup);
  739. BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
  740. .addUse(AArch64::X16)
  741. .addImm(0xc31a)
  742. .addImm(48)
  743. .setMIFlag(MachineInstr::FrameSetup);
  744. // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
  745. // move it somewhere before signing.
  746. BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
  747. .addUse(AArch64::XZR)
  748. .addUse(CtxReg)
  749. .addImm(0)
  750. .setMIFlag(MachineInstr::FrameSetup);
  751. BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
  752. .addUse(AArch64::X17)
  753. .addUse(AArch64::X16)
  754. .setMIFlag(MachineInstr::FrameSetup);
  755. BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
  756. .addUse(AArch64::X17)
  757. .addUse(BaseReg)
  758. .addImm(Offset / 8)
  759. .setMIFlag(MachineInstr::FrameSetup);
  760. MBBI->eraseFromParent();
  761. return true;
  762. }
  763. /// If MBBI references a pseudo instruction that should be expanded here,
  764. /// do the expansion and return true. Otherwise return false.
  765. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
  766. MachineBasicBlock::iterator MBBI,
  767. MachineBasicBlock::iterator &NextMBBI) {
  768. MachineInstr &MI = *MBBI;
  769. unsigned Opcode = MI.getOpcode();
  770. // Check if we can expand the destructive op
  771. int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
  772. if (OrigInstr != -1) {
  773. auto &Orig = TII->get(OrigInstr);
  774. if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
  775. != AArch64::NotDestructive) {
  776. return expand_DestructiveOp(MI, MBB, MBBI);
  777. }
  778. }
  779. switch (Opcode) {
  780. default:
  781. break;
  782. case AArch64::BSPv8i8:
  783. case AArch64::BSPv16i8: {
  784. Register DstReg = MI.getOperand(0).getReg();
  785. if (DstReg == MI.getOperand(3).getReg()) {
  786. // Expand to BIT
  787. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  788. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
  789. : AArch64::BITv16i8))
  790. .add(MI.getOperand(0))
  791. .add(MI.getOperand(3))
  792. .add(MI.getOperand(2))
  793. .add(MI.getOperand(1));
  794. } else if (DstReg == MI.getOperand(2).getReg()) {
  795. // Expand to BIF
  796. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  797. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
  798. : AArch64::BIFv16i8))
  799. .add(MI.getOperand(0))
  800. .add(MI.getOperand(2))
  801. .add(MI.getOperand(3))
  802. .add(MI.getOperand(1));
  803. } else {
  804. // Expand to BSL, use additional move if required
  805. if (DstReg == MI.getOperand(1).getReg()) {
  806. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  807. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
  808. : AArch64::BSLv16i8))
  809. .add(MI.getOperand(0))
  810. .add(MI.getOperand(1))
  811. .add(MI.getOperand(2))
  812. .add(MI.getOperand(3));
  813. } else {
  814. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  815. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
  816. : AArch64::ORRv16i8))
  817. .addReg(DstReg,
  818. RegState::Define |
  819. getRenamableRegState(MI.getOperand(0).isRenamable()))
  820. .add(MI.getOperand(1))
  821. .add(MI.getOperand(1));
  822. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  823. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
  824. : AArch64::BSLv16i8))
  825. .add(MI.getOperand(0))
  826. .addReg(DstReg,
  827. RegState::Kill |
  828. getRenamableRegState(MI.getOperand(0).isRenamable()))
  829. .add(MI.getOperand(2))
  830. .add(MI.getOperand(3));
  831. }
  832. }
  833. MI.eraseFromParent();
  834. return true;
  835. }
  836. case AArch64::ADDWrr:
  837. case AArch64::SUBWrr:
  838. case AArch64::ADDXrr:
  839. case AArch64::SUBXrr:
  840. case AArch64::ADDSWrr:
  841. case AArch64::SUBSWrr:
  842. case AArch64::ADDSXrr:
  843. case AArch64::SUBSXrr:
  844. case AArch64::ANDWrr:
  845. case AArch64::ANDXrr:
  846. case AArch64::BICWrr:
  847. case AArch64::BICXrr:
  848. case AArch64::ANDSWrr:
  849. case AArch64::ANDSXrr:
  850. case AArch64::BICSWrr:
  851. case AArch64::BICSXrr:
  852. case AArch64::EONWrr:
  853. case AArch64::EONXrr:
  854. case AArch64::EORWrr:
  855. case AArch64::EORXrr:
  856. case AArch64::ORNWrr:
  857. case AArch64::ORNXrr:
  858. case AArch64::ORRWrr:
  859. case AArch64::ORRXrr: {
  860. unsigned Opcode;
  861. switch (MI.getOpcode()) {
  862. default:
  863. return false;
  864. case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
  865. case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
  866. case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
  867. case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
  868. case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
  869. case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
  870. case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
  871. case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
  872. case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
  873. case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
  874. case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
  875. case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
  876. case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
  877. case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
  878. case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
  879. case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
  880. case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
  881. case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
  882. case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
  883. case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
  884. case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
  885. case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
  886. case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
  887. case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
  888. }
  889. MachineFunction &MF = *MBB.getParent();
  890. // Try to create new inst without implicit operands added.
  891. MachineInstr *NewMI = MF.CreateMachineInstr(
  892. TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
  893. MBB.insert(MBBI, NewMI);
  894. MachineInstrBuilder MIB1(MF, NewMI);
  895. MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
  896. .add(MI.getOperand(1))
  897. .add(MI.getOperand(2))
  898. .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
  899. transferImpOps(MI, MIB1, MIB1);
  900. MI.eraseFromParent();
  901. return true;
  902. }
  903. case AArch64::LOADgot: {
  904. MachineFunction *MF = MBB.getParent();
  905. Register DstReg = MI.getOperand(0).getReg();
  906. const MachineOperand &MO1 = MI.getOperand(1);
  907. unsigned Flags = MO1.getTargetFlags();
  908. if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
  909. // Tiny codemodel expand to LDR
  910. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  911. TII->get(AArch64::LDRXl), DstReg);
  912. if (MO1.isGlobal()) {
  913. MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
  914. } else if (MO1.isSymbol()) {
  915. MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
  916. } else {
  917. assert(MO1.isCPI() &&
  918. "Only expect globals, externalsymbols, or constant pools");
  919. MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
  920. }
  921. } else {
  922. // Small codemodel expand into ADRP + LDR.
  923. MachineFunction &MF = *MI.getParent()->getParent();
  924. DebugLoc DL = MI.getDebugLoc();
  925. MachineInstrBuilder MIB1 =
  926. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
  927. MachineInstrBuilder MIB2;
  928. if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
  929. auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
  930. unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
  931. unsigned DstFlags = MI.getOperand(0).getTargetFlags();
  932. MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
  933. .addDef(Reg32)
  934. .addReg(DstReg, RegState::Kill)
  935. .addReg(DstReg, DstFlags | RegState::Implicit);
  936. } else {
  937. Register DstReg = MI.getOperand(0).getReg();
  938. MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
  939. .add(MI.getOperand(0))
  940. .addUse(DstReg, RegState::Kill);
  941. }
  942. if (MO1.isGlobal()) {
  943. MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
  944. MIB2.addGlobalAddress(MO1.getGlobal(), 0,
  945. Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  946. } else if (MO1.isSymbol()) {
  947. MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
  948. MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
  949. AArch64II::MO_PAGEOFF |
  950. AArch64II::MO_NC);
  951. } else {
  952. assert(MO1.isCPI() &&
  953. "Only expect globals, externalsymbols, or constant pools");
  954. MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
  955. Flags | AArch64II::MO_PAGE);
  956. MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
  957. Flags | AArch64II::MO_PAGEOFF |
  958. AArch64II::MO_NC);
  959. }
  960. transferImpOps(MI, MIB1, MIB2);
  961. }
  962. MI.eraseFromParent();
  963. return true;
  964. }
  965. case AArch64::MOVaddrBA: {
  966. MachineFunction &MF = *MI.getParent()->getParent();
  967. if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
  968. // blockaddress expressions have to come from a constant pool because the
  969. // largest addend (and hence offset within a function) allowed for ADRP is
  970. // only 8MB.
  971. const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
  972. assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
  973. MachineConstantPool *MCP = MF.getConstantPool();
  974. unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
  975. Register DstReg = MI.getOperand(0).getReg();
  976. auto MIB1 =
  977. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
  978. .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
  979. auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  980. TII->get(AArch64::LDRXui), DstReg)
  981. .addUse(DstReg)
  982. .addConstantPoolIndex(
  983. CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  984. transferImpOps(MI, MIB1, MIB2);
  985. MI.eraseFromParent();
  986. return true;
  987. }
  988. }
  989. LLVM_FALLTHROUGH;
  990. case AArch64::MOVaddr:
  991. case AArch64::MOVaddrJT:
  992. case AArch64::MOVaddrCP:
  993. case AArch64::MOVaddrTLS:
  994. case AArch64::MOVaddrEXT: {
  995. // Expand into ADRP + ADD.
  996. Register DstReg = MI.getOperand(0).getReg();
  997. assert(DstReg != AArch64::XZR);
  998. MachineInstrBuilder MIB1 =
  999. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
  1000. .add(MI.getOperand(1));
  1001. if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
  1002. // MO_TAGGED on the page indicates a tagged address. Set the tag now.
  1003. // We do so by creating a MOVK that sets bits 48-63 of the register to
  1004. // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
  1005. // the small code model so we can assume a binary size of <= 4GB, which
  1006. // makes the untagged PC relative offset positive. The binary must also be
  1007. // loaded into address range [0, 2^48). Both of these properties need to
  1008. // be ensured at runtime when using tagged addresses.
  1009. auto Tag = MI.getOperand(1);
  1010. Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
  1011. Tag.setOffset(0x100000000);
  1012. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
  1013. .addReg(DstReg)
  1014. .add(Tag)
  1015. .addImm(48);
  1016. }
  1017. MachineInstrBuilder MIB2 =
  1018. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
  1019. .add(MI.getOperand(0))
  1020. .addReg(DstReg)
  1021. .add(MI.getOperand(2))
  1022. .addImm(0);
  1023. transferImpOps(MI, MIB1, MIB2);
  1024. MI.eraseFromParent();
  1025. return true;
  1026. }
  1027. case AArch64::ADDlowTLS:
  1028. // Produce a plain ADD
  1029. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
  1030. .add(MI.getOperand(0))
  1031. .add(MI.getOperand(1))
  1032. .add(MI.getOperand(2))
  1033. .addImm(0);
  1034. MI.eraseFromParent();
  1035. return true;
  1036. case AArch64::MOVbaseTLS: {
  1037. Register DstReg = MI.getOperand(0).getReg();
  1038. auto SysReg = AArch64SysReg::TPIDR_EL0;
  1039. MachineFunction *MF = MBB.getParent();
  1040. if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
  1041. SysReg = AArch64SysReg::TPIDR_EL3;
  1042. else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
  1043. SysReg = AArch64SysReg::TPIDR_EL2;
  1044. else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
  1045. SysReg = AArch64SysReg::TPIDR_EL1;
  1046. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
  1047. .addImm(SysReg);
  1048. MI.eraseFromParent();
  1049. return true;
  1050. }
  1051. case AArch64::MOVi32imm:
  1052. return expandMOVImm(MBB, MBBI, 32);
  1053. case AArch64::MOVi64imm:
  1054. return expandMOVImm(MBB, MBBI, 64);
  1055. case AArch64::RET_ReallyLR: {
  1056. // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
  1057. // function and missing live-ins. We are fine in practice because callee
  1058. // saved register handling ensures the register value is restored before
  1059. // RET, but we need the undef flag here to appease the MachineVerifier
  1060. // liveness checks.
  1061. MachineInstrBuilder MIB =
  1062. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
  1063. .addReg(AArch64::LR, RegState::Undef);
  1064. transferImpOps(MI, MIB, MIB);
  1065. MI.eraseFromParent();
  1066. return true;
  1067. }
  1068. case AArch64::CMP_SWAP_8:
  1069. return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
  1070. AArch64::SUBSWrx,
  1071. AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
  1072. AArch64::WZR, NextMBBI);
  1073. case AArch64::CMP_SWAP_16:
  1074. return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
  1075. AArch64::SUBSWrx,
  1076. AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
  1077. AArch64::WZR, NextMBBI);
  1078. case AArch64::CMP_SWAP_32:
  1079. return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
  1080. AArch64::SUBSWrs,
  1081. AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
  1082. AArch64::WZR, NextMBBI);
  1083. case AArch64::CMP_SWAP_64:
  1084. return expandCMP_SWAP(MBB, MBBI,
  1085. AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
  1086. AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
  1087. AArch64::XZR, NextMBBI);
  1088. case AArch64::CMP_SWAP_128:
  1089. case AArch64::CMP_SWAP_128_RELEASE:
  1090. case AArch64::CMP_SWAP_128_ACQUIRE:
  1091. case AArch64::CMP_SWAP_128_MONOTONIC:
  1092. return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
  1093. case AArch64::AESMCrrTied:
  1094. case AArch64::AESIMCrrTied: {
  1095. MachineInstrBuilder MIB =
  1096. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  1097. TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
  1098. AArch64::AESIMCrr))
  1099. .add(MI.getOperand(0))
  1100. .add(MI.getOperand(1));
  1101. transferImpOps(MI, MIB, MIB);
  1102. MI.eraseFromParent();
  1103. return true;
  1104. }
  1105. case AArch64::IRGstack: {
  1106. MachineFunction &MF = *MBB.getParent();
  1107. const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  1108. const AArch64FrameLowering *TFI =
  1109. MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
  1110. // IRG does not allow immediate offset. getTaggedBasePointerOffset should
  1111. // almost always point to SP-after-prologue; if not, emit a longer
  1112. // instruction sequence.
  1113. int BaseOffset = -AFI->getTaggedBasePointerOffset();
  1114. Register FrameReg;
  1115. StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
  1116. MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
  1117. /*PreferFP=*/false,
  1118. /*ForSimm=*/true);
  1119. Register SrcReg = FrameReg;
  1120. if (FrameRegOffset) {
  1121. // Use output register as temporary.
  1122. SrcReg = MI.getOperand(0).getReg();
  1123. emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
  1124. FrameRegOffset, TII);
  1125. }
  1126. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
  1127. .add(MI.getOperand(0))
  1128. .addUse(SrcReg)
  1129. .add(MI.getOperand(2));
  1130. MI.eraseFromParent();
  1131. return true;
  1132. }
  1133. case AArch64::TAGPstack: {
  1134. int64_t Offset = MI.getOperand(2).getImm();
  1135. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  1136. TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
  1137. .add(MI.getOperand(0))
  1138. .add(MI.getOperand(1))
  1139. .addImm(std::abs(Offset))
  1140. .add(MI.getOperand(4));
  1141. MI.eraseFromParent();
  1142. return true;
  1143. }
  1144. case AArch64::STGloop_wback:
  1145. case AArch64::STZGloop_wback:
  1146. return expandSetTagLoop(MBB, MBBI, NextMBBI);
  1147. case AArch64::STGloop:
  1148. case AArch64::STZGloop:
  1149. report_fatal_error(
  1150. "Non-writeback variants of STGloop / STZGloop should not "
  1151. "survive past PrologEpilogInserter.");
  1152. case AArch64::STR_ZZZZXI:
  1153. return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
  1154. case AArch64::STR_ZZZXI:
  1155. return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
  1156. case AArch64::STR_ZZXI:
  1157. return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
  1158. case AArch64::LDR_ZZZZXI:
  1159. return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
  1160. case AArch64::LDR_ZZZXI:
  1161. return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
  1162. case AArch64::LDR_ZZXI:
  1163. return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
  1164. case AArch64::BLR_RVMARKER:
  1165. return expandCALL_RVMARKER(MBB, MBBI);
  1166. case AArch64::BLR_BTI:
  1167. return expandCALL_BTI(MBB, MBBI);
  1168. case AArch64::StoreSwiftAsyncContext:
  1169. return expandStoreSwiftAsyncContext(MBB, MBBI);
  1170. }
  1171. return false;
  1172. }
  1173. /// Iterate over the instructions in basic block MBB and expand any
  1174. /// pseudo instructions. Return true if anything was modified.
  1175. bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
  1176. bool Modified = false;
  1177. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  1178. while (MBBI != E) {
  1179. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  1180. Modified |= expandMI(MBB, MBBI, NMBBI);
  1181. MBBI = NMBBI;
  1182. }
  1183. return Modified;
  1184. }
  1185. bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
  1186. TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
  1187. bool Modified = false;
  1188. for (auto &MBB : MF)
  1189. Modified |= expandMBB(MBB);
  1190. return Modified;
  1191. }
  1192. /// Returns an instance of the pseudo instruction expansion pass.
  1193. FunctionPass *llvm::createAArch64ExpandPseudoPass() {
  1194. return new AArch64ExpandPseudo();
  1195. }