AArch64ExpandPseudoInsts.cpp 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487
  1. //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that expands pseudo instructions into target
  10. // instructions to allow proper scheduling and other late optimizations. This
  11. // pass should be run after register allocation but before the post-regalloc
  12. // scheduling pass.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "AArch64ExpandImm.h"
  16. #include "AArch64InstrInfo.h"
  17. #include "AArch64MachineFunctionInfo.h"
  18. #include "AArch64Subtarget.h"
  19. #include "MCTargetDesc/AArch64AddressingModes.h"
  20. #include "Utils/AArch64BaseInfo.h"
  21. #include "llvm/ADT/DenseMap.h"
  22. #include "llvm/ADT/Triple.h"
  23. #include "llvm/CodeGen/LivePhysRegs.h"
  24. #include "llvm/CodeGen/MachineBasicBlock.h"
  25. #include "llvm/CodeGen/MachineConstantPool.h"
  26. #include "llvm/CodeGen/MachineFunction.h"
  27. #include "llvm/CodeGen/MachineFunctionPass.h"
  28. #include "llvm/CodeGen/MachineInstr.h"
  29. #include "llvm/CodeGen/MachineInstrBuilder.h"
  30. #include "llvm/CodeGen/MachineOperand.h"
  31. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  32. #include "llvm/IR/DebugLoc.h"
  33. #include "llvm/MC/MCInstrDesc.h"
  34. #include "llvm/Pass.h"
  35. #include "llvm/Support/CodeGen.h"
  36. #include "llvm/Support/MathExtras.h"
  37. #include "llvm/Target/TargetMachine.h"
  38. #include <cassert>
  39. #include <cstdint>
  40. #include <iterator>
  41. #include <limits>
  42. #include <utility>
  43. using namespace llvm;
  44. #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
  45. namespace {
  46. class AArch64ExpandPseudo : public MachineFunctionPass {
  47. public:
  48. const AArch64InstrInfo *TII;
  49. static char ID;
  50. AArch64ExpandPseudo() : MachineFunctionPass(ID) {
  51. initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
  52. }
  53. bool runOnMachineFunction(MachineFunction &Fn) override;
  54. StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
  55. private:
  56. bool expandMBB(MachineBasicBlock &MBB);
  57. bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  58. MachineBasicBlock::iterator &NextMBBI);
  59. bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  60. unsigned BitSize);
  61. bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
  62. MachineBasicBlock::iterator MBBI);
  63. bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  64. unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
  65. unsigned ExtendImm, unsigned ZeroReg,
  66. MachineBasicBlock::iterator &NextMBBI);
  67. bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
  68. MachineBasicBlock::iterator MBBI,
  69. MachineBasicBlock::iterator &NextMBBI);
  70. bool expandSetTagLoop(MachineBasicBlock &MBB,
  71. MachineBasicBlock::iterator MBBI,
  72. MachineBasicBlock::iterator &NextMBBI);
  73. bool expandSVESpillFill(MachineBasicBlock &MBB,
  74. MachineBasicBlock::iterator MBBI, unsigned Opc,
  75. unsigned N);
  76. bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
  77. MachineBasicBlock::iterator MBBI);
  78. bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
  79. bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
  80. MachineBasicBlock::iterator MBBI);
  81. MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
  82. MachineBasicBlock::iterator MBBI);
  83. MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
  84. MachineBasicBlock::iterator MBBI);
  85. };
  86. } // end anonymous namespace
  87. char AArch64ExpandPseudo::ID = 0;
  88. INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
  89. AARCH64_EXPAND_PSEUDO_NAME, false, false)
  90. /// Transfer implicit operands on the pseudo instruction to the
  91. /// instructions created from the expansion.
  92. static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
  93. MachineInstrBuilder &DefMI) {
  94. const MCInstrDesc &Desc = OldMI.getDesc();
  95. for (const MachineOperand &MO :
  96. llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
  97. assert(MO.isReg() && MO.getReg());
  98. if (MO.isUse())
  99. UseMI.add(MO);
  100. else
  101. DefMI.add(MO);
  102. }
  103. }
  104. /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
  105. /// real move-immediate instructions to synthesize the immediate.
  106. bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
  107. MachineBasicBlock::iterator MBBI,
  108. unsigned BitSize) {
  109. MachineInstr &MI = *MBBI;
  110. Register DstReg = MI.getOperand(0).getReg();
  111. uint64_t RenamableState =
  112. MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
  113. uint64_t Imm = MI.getOperand(1).getImm();
  114. if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
  115. // Useless def, and we don't want to risk creating an invalid ORR (which
  116. // would really write to sp).
  117. MI.eraseFromParent();
  118. return true;
  119. }
  120. SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
  121. AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
  122. assert(Insn.size() != 0);
  123. SmallVector<MachineInstrBuilder, 4> MIBS;
  124. for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
  125. bool LastItem = std::next(I) == E;
  126. switch (I->Opcode)
  127. {
  128. default: llvm_unreachable("unhandled!"); break;
  129. case AArch64::ORRWri:
  130. case AArch64::ORRXri:
  131. MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
  132. .add(MI.getOperand(0))
  133. .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
  134. .addImm(I->Op2));
  135. break;
  136. case AArch64::MOVNWi:
  137. case AArch64::MOVNXi:
  138. case AArch64::MOVZWi:
  139. case AArch64::MOVZXi: {
  140. bool DstIsDead = MI.getOperand(0).isDead();
  141. MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
  142. .addReg(DstReg, RegState::Define |
  143. getDeadRegState(DstIsDead && LastItem) |
  144. RenamableState)
  145. .addImm(I->Op1)
  146. .addImm(I->Op2));
  147. } break;
  148. case AArch64::MOVKWi:
  149. case AArch64::MOVKXi: {
  150. Register DstReg = MI.getOperand(0).getReg();
  151. bool DstIsDead = MI.getOperand(0).isDead();
  152. MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
  153. .addReg(DstReg,
  154. RegState::Define |
  155. getDeadRegState(DstIsDead && LastItem) |
  156. RenamableState)
  157. .addReg(DstReg)
  158. .addImm(I->Op1)
  159. .addImm(I->Op2));
  160. } break;
  161. }
  162. }
  163. transferImpOps(MI, MIBS.front(), MIBS.back());
  164. MI.eraseFromParent();
  165. return true;
  166. }
  167. bool AArch64ExpandPseudo::expandCMP_SWAP(
  168. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
  169. unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
  170. MachineBasicBlock::iterator &NextMBBI) {
  171. MachineInstr &MI = *MBBI;
  172. MIMetadata MIMD(MI);
  173. const MachineOperand &Dest = MI.getOperand(0);
  174. Register StatusReg = MI.getOperand(1).getReg();
  175. bool StatusDead = MI.getOperand(1).isDead();
  176. // Duplicating undef operands into 2 instructions does not guarantee the same
  177. // value on both; However undef should be replaced by xzr anyway.
  178. assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
  179. Register AddrReg = MI.getOperand(2).getReg();
  180. Register DesiredReg = MI.getOperand(3).getReg();
  181. Register NewReg = MI.getOperand(4).getReg();
  182. MachineFunction *MF = MBB.getParent();
  183. auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  184. auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  185. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  186. MF->insert(++MBB.getIterator(), LoadCmpBB);
  187. MF->insert(++LoadCmpBB->getIterator(), StoreBB);
  188. MF->insert(++StoreBB->getIterator(), DoneBB);
  189. // .Lloadcmp:
  190. // mov wStatus, 0
  191. // ldaxr xDest, [xAddr]
  192. // cmp xDest, xDesired
  193. // b.ne .Ldone
  194. if (!StatusDead)
  195. BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
  196. .addImm(0).addImm(0);
  197. BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
  198. .addReg(AddrReg);
  199. BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
  200. .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
  201. .addReg(DesiredReg)
  202. .addImm(ExtendImm);
  203. BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
  204. .addImm(AArch64CC::NE)
  205. .addMBB(DoneBB)
  206. .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
  207. LoadCmpBB->addSuccessor(DoneBB);
  208. LoadCmpBB->addSuccessor(StoreBB);
  209. // .Lstore:
  210. // stlxr wStatus, xNew, [xAddr]
  211. // cbnz wStatus, .Lloadcmp
  212. BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
  213. .addReg(NewReg)
  214. .addReg(AddrReg);
  215. BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
  216. .addReg(StatusReg, getKillRegState(StatusDead))
  217. .addMBB(LoadCmpBB);
  218. StoreBB->addSuccessor(LoadCmpBB);
  219. StoreBB->addSuccessor(DoneBB);
  220. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  221. DoneBB->transferSuccessors(&MBB);
  222. MBB.addSuccessor(LoadCmpBB);
  223. NextMBBI = MBB.end();
  224. MI.eraseFromParent();
  225. // Recompute livein lists.
  226. LivePhysRegs LiveRegs;
  227. computeAndAddLiveIns(LiveRegs, *DoneBB);
  228. computeAndAddLiveIns(LiveRegs, *StoreBB);
  229. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  230. // Do an extra pass around the loop to get loop carried registers right.
  231. StoreBB->clearLiveIns();
  232. computeAndAddLiveIns(LiveRegs, *StoreBB);
  233. LoadCmpBB->clearLiveIns();
  234. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  235. return true;
  236. }
  237. bool AArch64ExpandPseudo::expandCMP_SWAP_128(
  238. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  239. MachineBasicBlock::iterator &NextMBBI) {
  240. MachineInstr &MI = *MBBI;
  241. MIMetadata MIMD(MI);
  242. MachineOperand &DestLo = MI.getOperand(0);
  243. MachineOperand &DestHi = MI.getOperand(1);
  244. Register StatusReg = MI.getOperand(2).getReg();
  245. bool StatusDead = MI.getOperand(2).isDead();
  246. // Duplicating undef operands into 2 instructions does not guarantee the same
  247. // value on both; However undef should be replaced by xzr anyway.
  248. assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
  249. Register AddrReg = MI.getOperand(3).getReg();
  250. Register DesiredLoReg = MI.getOperand(4).getReg();
  251. Register DesiredHiReg = MI.getOperand(5).getReg();
  252. Register NewLoReg = MI.getOperand(6).getReg();
  253. Register NewHiReg = MI.getOperand(7).getReg();
  254. unsigned LdxpOp, StxpOp;
  255. switch (MI.getOpcode()) {
  256. case AArch64::CMP_SWAP_128_MONOTONIC:
  257. LdxpOp = AArch64::LDXPX;
  258. StxpOp = AArch64::STXPX;
  259. break;
  260. case AArch64::CMP_SWAP_128_RELEASE:
  261. LdxpOp = AArch64::LDXPX;
  262. StxpOp = AArch64::STLXPX;
  263. break;
  264. case AArch64::CMP_SWAP_128_ACQUIRE:
  265. LdxpOp = AArch64::LDAXPX;
  266. StxpOp = AArch64::STXPX;
  267. break;
  268. case AArch64::CMP_SWAP_128:
  269. LdxpOp = AArch64::LDAXPX;
  270. StxpOp = AArch64::STLXPX;
  271. break;
  272. default:
  273. llvm_unreachable("Unexpected opcode");
  274. }
  275. MachineFunction *MF = MBB.getParent();
  276. auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  277. auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  278. auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  279. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  280. MF->insert(++MBB.getIterator(), LoadCmpBB);
  281. MF->insert(++LoadCmpBB->getIterator(), StoreBB);
  282. MF->insert(++StoreBB->getIterator(), FailBB);
  283. MF->insert(++FailBB->getIterator(), DoneBB);
  284. // .Lloadcmp:
  285. // ldaxp xDestLo, xDestHi, [xAddr]
  286. // cmp xDestLo, xDesiredLo
  287. // sbcs xDestHi, xDesiredHi
  288. // b.ne .Ldone
  289. BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
  290. .addReg(DestLo.getReg(), RegState::Define)
  291. .addReg(DestHi.getReg(), RegState::Define)
  292. .addReg(AddrReg);
  293. BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
  294. .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
  295. .addReg(DesiredLoReg)
  296. .addImm(0);
  297. BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
  298. .addUse(AArch64::WZR)
  299. .addUse(AArch64::WZR)
  300. .addImm(AArch64CC::EQ);
  301. BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
  302. .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
  303. .addReg(DesiredHiReg)
  304. .addImm(0);
  305. BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
  306. .addUse(StatusReg, RegState::Kill)
  307. .addUse(StatusReg, RegState::Kill)
  308. .addImm(AArch64CC::EQ);
  309. BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
  310. .addUse(StatusReg, getKillRegState(StatusDead))
  311. .addMBB(FailBB);
  312. LoadCmpBB->addSuccessor(FailBB);
  313. LoadCmpBB->addSuccessor(StoreBB);
  314. // .Lstore:
  315. // stlxp wStatus, xNewLo, xNewHi, [xAddr]
  316. // cbnz wStatus, .Lloadcmp
  317. BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
  318. .addReg(NewLoReg)
  319. .addReg(NewHiReg)
  320. .addReg(AddrReg);
  321. BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
  322. .addReg(StatusReg, getKillRegState(StatusDead))
  323. .addMBB(LoadCmpBB);
  324. BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
  325. StoreBB->addSuccessor(LoadCmpBB);
  326. StoreBB->addSuccessor(DoneBB);
  327. // .Lfail:
  328. // stlxp wStatus, xDestLo, xDestHi, [xAddr]
  329. // cbnz wStatus, .Lloadcmp
  330. BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
  331. .addReg(DestLo.getReg())
  332. .addReg(DestHi.getReg())
  333. .addReg(AddrReg);
  334. BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
  335. .addReg(StatusReg, getKillRegState(StatusDead))
  336. .addMBB(LoadCmpBB);
  337. FailBB->addSuccessor(LoadCmpBB);
  338. FailBB->addSuccessor(DoneBB);
  339. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  340. DoneBB->transferSuccessors(&MBB);
  341. MBB.addSuccessor(LoadCmpBB);
  342. NextMBBI = MBB.end();
  343. MI.eraseFromParent();
  344. // Recompute liveness bottom up.
  345. LivePhysRegs LiveRegs;
  346. computeAndAddLiveIns(LiveRegs, *DoneBB);
  347. computeAndAddLiveIns(LiveRegs, *FailBB);
  348. computeAndAddLiveIns(LiveRegs, *StoreBB);
  349. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  350. // Do an extra pass in the loop to get the loop carried dependencies right.
  351. FailBB->clearLiveIns();
  352. computeAndAddLiveIns(LiveRegs, *FailBB);
  353. StoreBB->clearLiveIns();
  354. computeAndAddLiveIns(LiveRegs, *StoreBB);
  355. LoadCmpBB->clearLiveIns();
  356. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  357. return true;
  358. }
  359. /// \brief Expand Pseudos to Instructions with destructive operands.
  360. ///
  361. /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
  362. /// or for fixing relaxed register allocation conditions to comply with
  363. /// the instructions register constraints. The latter case may be cheaper
  364. /// than setting the register constraints in the register allocator,
  365. /// since that will insert regular MOV instructions rather than MOVPRFX.
  366. ///
  367. /// Example (after register allocation):
  368. ///
  369. /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
  370. ///
  371. /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
  372. /// * We cannot map directly to FSUB_ZPmZ_B because the register
  373. /// constraints of the instruction are not met.
  374. /// * Also the _ZERO specifies the false lanes need to be zeroed.
  375. ///
  376. /// We first try to see if the destructive operand == result operand,
  377. /// if not, we try to swap the operands, e.g.
  378. ///
  379. /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
  380. ///
  381. /// But because FSUB_ZPmZ is not commutative, this is semantically
  382. /// different, so we need a reverse instruction:
  383. ///
  384. /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
  385. ///
  386. /// Then we implement the zeroing of the false lanes of Z0 by adding
  387. /// a zeroing MOVPRFX instruction:
  388. ///
  389. /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
  390. /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
  391. ///
  392. /// Note that this can only be done for _ZERO or _UNDEF variants where
  393. /// we can guarantee the false lanes to be zeroed (by implementing this)
  394. /// or that they are undef (don't care / not used), otherwise the
  395. /// swapping of operands is illegal because the operation is not
  396. /// (or cannot be emulated to be) fully commutative.
  397. bool AArch64ExpandPseudo::expand_DestructiveOp(
  398. MachineInstr &MI,
  399. MachineBasicBlock &MBB,
  400. MachineBasicBlock::iterator MBBI) {
  401. unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
  402. uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
  403. uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
  404. bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
  405. Register DstReg = MI.getOperand(0).getReg();
  406. bool DstIsDead = MI.getOperand(0).isDead();
  407. bool UseRev = false;
  408. unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
  409. switch (DType) {
  410. case AArch64::DestructiveBinaryComm:
  411. case AArch64::DestructiveBinaryCommWithRev:
  412. if (DstReg == MI.getOperand(3).getReg()) {
  413. // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
  414. std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
  415. UseRev = true;
  416. break;
  417. }
  418. [[fallthrough]];
  419. case AArch64::DestructiveBinary:
  420. case AArch64::DestructiveBinaryImm:
  421. std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
  422. break;
  423. case AArch64::DestructiveUnaryPassthru:
  424. std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
  425. break;
  426. case AArch64::DestructiveTernaryCommWithRev:
  427. std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
  428. if (DstReg == MI.getOperand(3).getReg()) {
  429. // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
  430. std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
  431. UseRev = true;
  432. } else if (DstReg == MI.getOperand(4).getReg()) {
  433. // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
  434. std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
  435. UseRev = true;
  436. }
  437. break;
  438. default:
  439. llvm_unreachable("Unsupported Destructive Operand type");
  440. }
  441. // MOVPRFX can only be used if the destination operand
  442. // is the destructive operand, not as any other operand,
  443. // so the Destructive Operand must be unique.
  444. bool DOPRegIsUnique = false;
  445. switch (DType) {
  446. case AArch64::DestructiveBinary:
  447. DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
  448. break;
  449. case AArch64::DestructiveBinaryComm:
  450. case AArch64::DestructiveBinaryCommWithRev:
  451. DOPRegIsUnique =
  452. DstReg != MI.getOperand(DOPIdx).getReg() ||
  453. MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
  454. break;
  455. case AArch64::DestructiveUnaryPassthru:
  456. case AArch64::DestructiveBinaryImm:
  457. DOPRegIsUnique = true;
  458. break;
  459. case AArch64::DestructiveTernaryCommWithRev:
  460. DOPRegIsUnique =
  461. DstReg != MI.getOperand(DOPIdx).getReg() ||
  462. (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
  463. MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
  464. break;
  465. }
  466. // Resolve the reverse opcode
  467. if (UseRev) {
  468. int NewOpcode;
  469. // e.g. DIV -> DIVR
  470. if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
  471. Opcode = NewOpcode;
  472. // e.g. DIVR -> DIV
  473. else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
  474. Opcode = NewOpcode;
  475. }
  476. // Get the right MOVPRFX
  477. uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
  478. unsigned MovPrfx, LSLZero, MovPrfxZero;
  479. switch (ElementSize) {
  480. case AArch64::ElementSizeNone:
  481. case AArch64::ElementSizeB:
  482. MovPrfx = AArch64::MOVPRFX_ZZ;
  483. LSLZero = AArch64::LSL_ZPmI_B;
  484. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
  485. break;
  486. case AArch64::ElementSizeH:
  487. MovPrfx = AArch64::MOVPRFX_ZZ;
  488. LSLZero = AArch64::LSL_ZPmI_H;
  489. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
  490. break;
  491. case AArch64::ElementSizeS:
  492. MovPrfx = AArch64::MOVPRFX_ZZ;
  493. LSLZero = AArch64::LSL_ZPmI_S;
  494. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
  495. break;
  496. case AArch64::ElementSizeD:
  497. MovPrfx = AArch64::MOVPRFX_ZZ;
  498. LSLZero = AArch64::LSL_ZPmI_D;
  499. MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
  500. break;
  501. default:
  502. llvm_unreachable("Unsupported ElementSize");
  503. }
  504. //
  505. // Create the destructive operation (if required)
  506. //
  507. MachineInstrBuilder PRFX, DOP;
  508. if (FalseZero) {
  509. // If we cannot prefix the requested instruction we'll instead emit a
  510. // prefixed_zeroing_mov for DestructiveBinary.
  511. assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
  512. DType == AArch64::DestructiveBinaryComm) &&
  513. "The destructive operand should be unique");
  514. assert(ElementSize != AArch64::ElementSizeNone &&
  515. "This instruction is unpredicated");
  516. // Merge source operand into destination register
  517. PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
  518. .addReg(DstReg, RegState::Define)
  519. .addReg(MI.getOperand(PredIdx).getReg())
  520. .addReg(MI.getOperand(DOPIdx).getReg());
  521. // After the movprfx, the destructive operand is same as Dst
  522. DOPIdx = 0;
  523. // Create the additional LSL to zero the lanes when the DstReg is not
  524. // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
  525. // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
  526. if ((DType == AArch64::DestructiveBinary ||
  527. DType == AArch64::DestructiveBinaryComm) &&
  528. !DOPRegIsUnique) {
  529. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
  530. .addReg(DstReg, RegState::Define)
  531. .add(MI.getOperand(PredIdx))
  532. .addReg(DstReg)
  533. .addImm(0);
  534. }
  535. } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
  536. assert(DOPRegIsUnique && "The destructive operand should be unique");
  537. PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
  538. .addReg(DstReg, RegState::Define)
  539. .addReg(MI.getOperand(DOPIdx).getReg());
  540. DOPIdx = 0;
  541. }
  542. //
  543. // Create the destructive operation
  544. //
  545. DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
  546. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
  547. switch (DType) {
  548. case AArch64::DestructiveUnaryPassthru:
  549. DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
  550. .add(MI.getOperand(PredIdx))
  551. .add(MI.getOperand(SrcIdx));
  552. break;
  553. case AArch64::DestructiveBinary:
  554. case AArch64::DestructiveBinaryImm:
  555. case AArch64::DestructiveBinaryComm:
  556. case AArch64::DestructiveBinaryCommWithRev:
  557. DOP.add(MI.getOperand(PredIdx))
  558. .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
  559. .add(MI.getOperand(SrcIdx));
  560. break;
  561. case AArch64::DestructiveTernaryCommWithRev:
  562. DOP.add(MI.getOperand(PredIdx))
  563. .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
  564. .add(MI.getOperand(SrcIdx))
  565. .add(MI.getOperand(Src2Idx));
  566. break;
  567. }
  568. if (PRFX) {
  569. finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
  570. transferImpOps(MI, PRFX, DOP);
  571. } else
  572. transferImpOps(MI, DOP, DOP);
  573. MI.eraseFromParent();
  574. return true;
  575. }
  576. bool AArch64ExpandPseudo::expandSetTagLoop(
  577. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  578. MachineBasicBlock::iterator &NextMBBI) {
  579. MachineInstr &MI = *MBBI;
  580. DebugLoc DL = MI.getDebugLoc();
  581. Register SizeReg = MI.getOperand(0).getReg();
  582. Register AddressReg = MI.getOperand(1).getReg();
  583. MachineFunction *MF = MBB.getParent();
  584. bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
  585. const unsigned OpCode1 =
  586. ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
  587. const unsigned OpCode2 =
  588. ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
  589. unsigned Size = MI.getOperand(2).getImm();
  590. assert(Size > 0 && Size % 16 == 0);
  591. if (Size % (16 * 2) != 0) {
  592. BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
  593. .addReg(AddressReg)
  594. .addReg(AddressReg)
  595. .addImm(1);
  596. Size -= 16;
  597. }
  598. MachineBasicBlock::iterator I =
  599. BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
  600. .addImm(Size);
  601. expandMOVImm(MBB, I, 64);
  602. auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  603. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  604. MF->insert(++MBB.getIterator(), LoopBB);
  605. MF->insert(++LoopBB->getIterator(), DoneBB);
  606. BuildMI(LoopBB, DL, TII->get(OpCode2))
  607. .addDef(AddressReg)
  608. .addReg(AddressReg)
  609. .addReg(AddressReg)
  610. .addImm(2)
  611. .cloneMemRefs(MI)
  612. .setMIFlags(MI.getFlags());
  613. BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
  614. .addDef(SizeReg)
  615. .addReg(SizeReg)
  616. .addImm(16 * 2)
  617. .addImm(0);
  618. BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
  619. LoopBB->addSuccessor(LoopBB);
  620. LoopBB->addSuccessor(DoneBB);
  621. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  622. DoneBB->transferSuccessors(&MBB);
  623. MBB.addSuccessor(LoopBB);
  624. NextMBBI = MBB.end();
  625. MI.eraseFromParent();
  626. // Recompute liveness bottom up.
  627. LivePhysRegs LiveRegs;
  628. computeAndAddLiveIns(LiveRegs, *DoneBB);
  629. computeAndAddLiveIns(LiveRegs, *LoopBB);
  630. // Do an extra pass in the loop to get the loop carried dependencies right.
  631. // FIXME: is this necessary?
  632. LoopBB->clearLiveIns();
  633. computeAndAddLiveIns(LiveRegs, *LoopBB);
  634. DoneBB->clearLiveIns();
  635. computeAndAddLiveIns(LiveRegs, *DoneBB);
  636. return true;
  637. }
  638. bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
  639. MachineBasicBlock::iterator MBBI,
  640. unsigned Opc, unsigned N) {
  641. const TargetRegisterInfo *TRI =
  642. MBB.getParent()->getSubtarget().getRegisterInfo();
  643. MachineInstr &MI = *MBBI;
  644. for (unsigned Offset = 0; Offset < N; ++Offset) {
  645. int ImmOffset = MI.getOperand(2).getImm() + Offset;
  646. bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
  647. assert(ImmOffset >= -256 && ImmOffset < 256 &&
  648. "Immediate spill offset out of range");
  649. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
  650. .addReg(
  651. TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
  652. Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
  653. .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
  654. .addImm(ImmOffset);
  655. }
  656. MI.eraseFromParent();
  657. return true;
  658. }
  659. bool AArch64ExpandPseudo::expandCALL_RVMARKER(
  660. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
  661. // Expand CALL_RVMARKER pseudo to:
  662. // - a branch to the call target, followed by
  663. // - the special `mov x29, x29` marker, and
  664. // - another branch, to the runtime function
  665. // Mark the sequence as bundle, to avoid passes moving other code in between.
  666. MachineInstr &MI = *MBBI;
  667. MachineInstr *OriginalCall;
  668. MachineOperand &RVTarget = MI.getOperand(0);
  669. MachineOperand &CallTarget = MI.getOperand(1);
  670. assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
  671. "invalid operand for regular call");
  672. assert(RVTarget.isGlobal() && "invalid operand for attached call");
  673. unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
  674. OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
  675. OriginalCall->addOperand(CallTarget);
  676. unsigned RegMaskStartIdx = 2;
  677. // Skip register arguments. Those are added during ISel, but are not
  678. // needed for the concrete branch.
  679. while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
  680. auto MOP = MI.getOperand(RegMaskStartIdx);
  681. assert(MOP.isReg() && "can only add register operands");
  682. OriginalCall->addOperand(MachineOperand::CreateReg(
  683. MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
  684. RegMaskStartIdx++;
  685. }
  686. for (const MachineOperand &MO :
  687. llvm::drop_begin(MI.operands(), RegMaskStartIdx))
  688. OriginalCall->addOperand(MO);
  689. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
  690. .addReg(AArch64::FP, RegState::Define)
  691. .addReg(AArch64::XZR)
  692. .addReg(AArch64::FP)
  693. .addImm(0);
  694. auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
  695. .add(RVTarget)
  696. .getInstr();
  697. if (MI.shouldUpdateCallSiteInfo())
  698. MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
  699. MI.eraseFromParent();
  700. finalizeBundle(MBB, OriginalCall->getIterator(),
  701. std::next(RVCall->getIterator()));
  702. return true;
  703. }
  704. bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
  705. MachineBasicBlock::iterator MBBI) {
  706. // Expand CALL_BTI pseudo to:
  707. // - a branch to the call target
  708. // - a BTI instruction
  709. // Mark the sequence as a bundle, to avoid passes moving other code in
  710. // between.
  711. MachineInstr &MI = *MBBI;
  712. MachineOperand &CallTarget = MI.getOperand(0);
  713. assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
  714. "invalid operand for regular call");
  715. unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
  716. MachineInstr *Call =
  717. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
  718. Call->addOperand(CallTarget);
  719. Call->setCFIType(*MBB.getParent(), MI.getCFIType());
  720. MachineInstr *BTI =
  721. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
  722. // BTI J so that setjmp can to BR to this.
  723. .addImm(36)
  724. .getInstr();
  725. if (MI.shouldUpdateCallSiteInfo())
  726. MBB.getParent()->moveCallSiteInfo(&MI, Call);
  727. MI.eraseFromParent();
  728. finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
  729. return true;
  730. }
  731. bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
  732. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
  733. Register CtxReg = MBBI->getOperand(0).getReg();
  734. Register BaseReg = MBBI->getOperand(1).getReg();
  735. int Offset = MBBI->getOperand(2).getImm();
  736. DebugLoc DL(MBBI->getDebugLoc());
  737. auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
  738. if (STI.getTargetTriple().getArchName() != "arm64e") {
  739. BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
  740. .addUse(CtxReg)
  741. .addUse(BaseReg)
  742. .addImm(Offset / 8)
  743. .setMIFlag(MachineInstr::FrameSetup);
  744. MBBI->eraseFromParent();
  745. return true;
  746. }
  747. // We need to sign the context in an address-discriminated way. 0xc31a is a
  748. // fixed random value, chosen as part of the ABI.
  749. // add x16, xBase, #Offset
  750. // movk x16, #0xc31a, lsl #48
  751. // mov x17, x22/xzr
  752. // pacdb x17, x16
  753. // str x17, [xBase, #Offset]
  754. unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
  755. BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
  756. .addUse(BaseReg)
  757. .addImm(abs(Offset))
  758. .addImm(0)
  759. .setMIFlag(MachineInstr::FrameSetup);
  760. BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
  761. .addUse(AArch64::X16)
  762. .addImm(0xc31a)
  763. .addImm(48)
  764. .setMIFlag(MachineInstr::FrameSetup);
  765. // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
  766. // move it somewhere before signing.
  767. BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
  768. .addUse(AArch64::XZR)
  769. .addUse(CtxReg)
  770. .addImm(0)
  771. .setMIFlag(MachineInstr::FrameSetup);
  772. BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
  773. .addUse(AArch64::X17)
  774. .addUse(AArch64::X16)
  775. .setMIFlag(MachineInstr::FrameSetup);
  776. BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
  777. .addUse(AArch64::X17)
  778. .addUse(BaseReg)
  779. .addImm(Offset / 8)
  780. .setMIFlag(MachineInstr::FrameSetup);
  781. MBBI->eraseFromParent();
  782. return true;
  783. }
  784. MachineBasicBlock *
  785. AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
  786. MachineBasicBlock::iterator MBBI) {
  787. MachineInstr &MI = *MBBI;
  788. assert((std::next(MBBI) != MBB.end() ||
  789. MI.getParent()->successors().begin() !=
  790. MI.getParent()->successors().end()) &&
  791. "Unexpected unreachable in block that restores ZA");
  792. // Compare TPIDR2_EL0 value against 0.
  793. DebugLoc DL = MI.getDebugLoc();
  794. MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
  795. .add(MI.getOperand(0));
  796. // Split MBB and create two new blocks:
  797. // - MBB now contains all instructions before RestoreZAPseudo.
  798. // - SMBB contains the RestoreZAPseudo instruction only.
  799. // - EndBB contains all instructions after RestoreZAPseudo.
  800. MachineInstr &PrevMI = *std::prev(MBBI);
  801. MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
  802. MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
  803. ? *SMBB->successors().begin()
  804. : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
  805. // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
  806. Cbz.addMBB(SMBB);
  807. BuildMI(&MBB, DL, TII->get(AArch64::B))
  808. .addMBB(EndBB);
  809. MBB.addSuccessor(EndBB);
  810. // Replace the pseudo with a call (BL).
  811. MachineInstrBuilder MIB =
  812. BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
  813. MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
  814. for (unsigned I = 2; I < MI.getNumOperands(); ++I)
  815. MIB.add(MI.getOperand(I));
  816. BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
  817. MI.eraseFromParent();
  818. return EndBB;
  819. }
  820. MachineBasicBlock *
  821. AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
  822. MachineBasicBlock::iterator MBBI) {
  823. MachineInstr &MI = *MBBI;
  824. // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
  825. // Exception handling code generated by Clang may introduce unreachables and it
  826. // seems unnecessary to restore pstate.sm when that happens. Note that it is
  827. // not just an optimisation, the code below expects a successor instruction/block
  828. // in order to split the block at MBBI.
  829. if (std::next(MBBI) == MBB.end() &&
  830. MI.getParent()->successors().begin() ==
  831. MI.getParent()->successors().end()) {
  832. MI.eraseFromParent();
  833. return &MBB;
  834. }
  835. // Expand the pseudo into smstart or smstop instruction. The pseudo has the
  836. // following operands:
  837. //
  838. // MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
  839. //
  840. // The pseudo is expanded into a conditional smstart/smstop, with a
  841. // check if pstate.sm (register) equals the expected value, and if not,
  842. // invokes the smstart/smstop.
  843. //
  844. // As an example, the following block contains a normal call from a
  845. // streaming-compatible function:
  846. //
  847. // OrigBB:
  848. // MSRpstatePseudo 3, 0, %0, 0, <regmask> <- Conditional SMSTOP
  849. // bl @normal_callee
  850. // MSRpstatePseudo 3, 1, %0, 0, <regmask> <- Conditional SMSTART
  851. //
  852. // ...which will be transformed into:
  853. //
  854. // OrigBB:
  855. // TBNZx %0:gpr64, 0, SMBB
  856. // b EndBB
  857. //
  858. // SMBB:
  859. // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
  860. //
  861. // EndBB:
  862. // bl @normal_callee
  863. // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
  864. //
  865. DebugLoc DL = MI.getDebugLoc();
  866. // Create the conditional branch based on the third operand of the
  867. // instruction, which tells us if we are wrapping a normal or streaming
  868. // function.
  869. // We test the live value of pstate.sm and toggle pstate.sm if this is not the
  870. // expected value for the callee (0 for a normal callee and 1 for a streaming
  871. // callee).
  872. auto PStateSM = MI.getOperand(2).getReg();
  873. bool IsStreamingCallee = MI.getOperand(3).getImm();
  874. unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
  875. MachineInstrBuilder Tbx =
  876. BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
  877. // Split MBB and create two new blocks:
  878. // - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
  879. // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
  880. // - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
  881. MachineInstr &PrevMI = *std::prev(MBBI);
  882. MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
  883. MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
  884. ? *SMBB->successors().begin()
  885. : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
  886. // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
  887. Tbx.addMBB(SMBB);
  888. BuildMI(&MBB, DL, TII->get(AArch64::B))
  889. .addMBB(EndBB);
  890. MBB.addSuccessor(EndBB);
  891. // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
  892. MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
  893. TII->get(AArch64::MSRpstatesvcrImm1));
  894. // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
  895. // these contain the CopyFromReg for the first argument and the flag to
  896. // indicate whether the callee is streaming or normal).
  897. MIB.add(MI.getOperand(0));
  898. MIB.add(MI.getOperand(1));
  899. for (unsigned i = 4; i < MI.getNumOperands(); ++i)
  900. MIB.add(MI.getOperand(i));
  901. BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
  902. MI.eraseFromParent();
  903. return EndBB;
  904. }
  905. /// If MBBI references a pseudo instruction that should be expanded here,
  906. /// do the expansion and return true. Otherwise return false.
  907. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
  908. MachineBasicBlock::iterator MBBI,
  909. MachineBasicBlock::iterator &NextMBBI) {
  910. MachineInstr &MI = *MBBI;
  911. unsigned Opcode = MI.getOpcode();
  912. // Check if we can expand the destructive op
  913. int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
  914. if (OrigInstr != -1) {
  915. auto &Orig = TII->get(OrigInstr);
  916. if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
  917. != AArch64::NotDestructive) {
  918. return expand_DestructiveOp(MI, MBB, MBBI);
  919. }
  920. }
  921. switch (Opcode) {
  922. default:
  923. break;
  924. case AArch64::BSPv8i8:
  925. case AArch64::BSPv16i8: {
  926. Register DstReg = MI.getOperand(0).getReg();
  927. if (DstReg == MI.getOperand(3).getReg()) {
  928. // Expand to BIT
  929. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  930. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
  931. : AArch64::BITv16i8))
  932. .add(MI.getOperand(0))
  933. .add(MI.getOperand(3))
  934. .add(MI.getOperand(2))
  935. .add(MI.getOperand(1));
  936. } else if (DstReg == MI.getOperand(2).getReg()) {
  937. // Expand to BIF
  938. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  939. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
  940. : AArch64::BIFv16i8))
  941. .add(MI.getOperand(0))
  942. .add(MI.getOperand(2))
  943. .add(MI.getOperand(3))
  944. .add(MI.getOperand(1));
  945. } else {
  946. // Expand to BSL, use additional move if required
  947. if (DstReg == MI.getOperand(1).getReg()) {
  948. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  949. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
  950. : AArch64::BSLv16i8))
  951. .add(MI.getOperand(0))
  952. .add(MI.getOperand(1))
  953. .add(MI.getOperand(2))
  954. .add(MI.getOperand(3));
  955. } else {
  956. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  957. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
  958. : AArch64::ORRv16i8))
  959. .addReg(DstReg,
  960. RegState::Define |
  961. getRenamableRegState(MI.getOperand(0).isRenamable()))
  962. .add(MI.getOperand(1))
  963. .add(MI.getOperand(1));
  964. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  965. TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
  966. : AArch64::BSLv16i8))
  967. .add(MI.getOperand(0))
  968. .addReg(DstReg,
  969. RegState::Kill |
  970. getRenamableRegState(MI.getOperand(0).isRenamable()))
  971. .add(MI.getOperand(2))
  972. .add(MI.getOperand(3));
  973. }
  974. }
  975. MI.eraseFromParent();
  976. return true;
  977. }
  978. case AArch64::ADDWrr:
  979. case AArch64::SUBWrr:
  980. case AArch64::ADDXrr:
  981. case AArch64::SUBXrr:
  982. case AArch64::ADDSWrr:
  983. case AArch64::SUBSWrr:
  984. case AArch64::ADDSXrr:
  985. case AArch64::SUBSXrr:
  986. case AArch64::ANDWrr:
  987. case AArch64::ANDXrr:
  988. case AArch64::BICWrr:
  989. case AArch64::BICXrr:
  990. case AArch64::ANDSWrr:
  991. case AArch64::ANDSXrr:
  992. case AArch64::BICSWrr:
  993. case AArch64::BICSXrr:
  994. case AArch64::EONWrr:
  995. case AArch64::EONXrr:
  996. case AArch64::EORWrr:
  997. case AArch64::EORXrr:
  998. case AArch64::ORNWrr:
  999. case AArch64::ORNXrr:
  1000. case AArch64::ORRWrr:
  1001. case AArch64::ORRXrr: {
  1002. unsigned Opcode;
  1003. switch (MI.getOpcode()) {
  1004. default:
  1005. return false;
  1006. case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
  1007. case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
  1008. case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
  1009. case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
  1010. case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
  1011. case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
  1012. case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
  1013. case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
  1014. case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
  1015. case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
  1016. case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
  1017. case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
  1018. case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
  1019. case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
  1020. case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
  1021. case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
  1022. case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
  1023. case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
  1024. case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
  1025. case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
  1026. case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
  1027. case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
  1028. case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
  1029. case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
  1030. }
  1031. MachineFunction &MF = *MBB.getParent();
  1032. // Try to create new inst without implicit operands added.
  1033. MachineInstr *NewMI = MF.CreateMachineInstr(
  1034. TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
  1035. MBB.insert(MBBI, NewMI);
  1036. MachineInstrBuilder MIB1(MF, NewMI);
  1037. MIB1->setPCSections(MF, MI.getPCSections());
  1038. MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
  1039. .add(MI.getOperand(1))
  1040. .add(MI.getOperand(2))
  1041. .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
  1042. transferImpOps(MI, MIB1, MIB1);
  1043. MI.eraseFromParent();
  1044. return true;
  1045. }
  1046. case AArch64::LOADgot: {
  1047. MachineFunction *MF = MBB.getParent();
  1048. Register DstReg = MI.getOperand(0).getReg();
  1049. const MachineOperand &MO1 = MI.getOperand(1);
  1050. unsigned Flags = MO1.getTargetFlags();
  1051. if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
  1052. // Tiny codemodel expand to LDR
  1053. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  1054. TII->get(AArch64::LDRXl), DstReg);
  1055. if (MO1.isGlobal()) {
  1056. MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
  1057. } else if (MO1.isSymbol()) {
  1058. MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
  1059. } else {
  1060. assert(MO1.isCPI() &&
  1061. "Only expect globals, externalsymbols, or constant pools");
  1062. MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
  1063. }
  1064. } else {
  1065. // Small codemodel expand into ADRP + LDR.
  1066. MachineFunction &MF = *MI.getParent()->getParent();
  1067. DebugLoc DL = MI.getDebugLoc();
  1068. MachineInstrBuilder MIB1 =
  1069. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
  1070. MachineInstrBuilder MIB2;
  1071. if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
  1072. auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
  1073. unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
  1074. unsigned DstFlags = MI.getOperand(0).getTargetFlags();
  1075. MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
  1076. .addDef(Reg32)
  1077. .addReg(DstReg, RegState::Kill)
  1078. .addReg(DstReg, DstFlags | RegState::Implicit);
  1079. } else {
  1080. Register DstReg = MI.getOperand(0).getReg();
  1081. MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
  1082. .add(MI.getOperand(0))
  1083. .addUse(DstReg, RegState::Kill);
  1084. }
  1085. if (MO1.isGlobal()) {
  1086. MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
  1087. MIB2.addGlobalAddress(MO1.getGlobal(), 0,
  1088. Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  1089. } else if (MO1.isSymbol()) {
  1090. MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
  1091. MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
  1092. AArch64II::MO_PAGEOFF |
  1093. AArch64II::MO_NC);
  1094. } else {
  1095. assert(MO1.isCPI() &&
  1096. "Only expect globals, externalsymbols, or constant pools");
  1097. MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
  1098. Flags | AArch64II::MO_PAGE);
  1099. MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
  1100. Flags | AArch64II::MO_PAGEOFF |
  1101. AArch64II::MO_NC);
  1102. }
  1103. transferImpOps(MI, MIB1, MIB2);
  1104. }
  1105. MI.eraseFromParent();
  1106. return true;
  1107. }
  1108. case AArch64::MOVaddrBA: {
  1109. MachineFunction &MF = *MI.getParent()->getParent();
  1110. if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
  1111. // blockaddress expressions have to come from a constant pool because the
  1112. // largest addend (and hence offset within a function) allowed for ADRP is
  1113. // only 8MB.
  1114. const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
  1115. assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
  1116. MachineConstantPool *MCP = MF.getConstantPool();
  1117. unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
  1118. Register DstReg = MI.getOperand(0).getReg();
  1119. auto MIB1 =
  1120. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
  1121. .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
  1122. auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  1123. TII->get(AArch64::LDRXui), DstReg)
  1124. .addUse(DstReg)
  1125. .addConstantPoolIndex(
  1126. CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
  1127. transferImpOps(MI, MIB1, MIB2);
  1128. MI.eraseFromParent();
  1129. return true;
  1130. }
  1131. }
  1132. [[fallthrough]];
  1133. case AArch64::MOVaddr:
  1134. case AArch64::MOVaddrJT:
  1135. case AArch64::MOVaddrCP:
  1136. case AArch64::MOVaddrTLS:
  1137. case AArch64::MOVaddrEXT: {
  1138. // Expand into ADRP + ADD.
  1139. Register DstReg = MI.getOperand(0).getReg();
  1140. assert(DstReg != AArch64::XZR);
  1141. MachineInstrBuilder MIB1 =
  1142. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
  1143. .add(MI.getOperand(1));
  1144. if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
  1145. // MO_TAGGED on the page indicates a tagged address. Set the tag now.
  1146. // We do so by creating a MOVK that sets bits 48-63 of the register to
  1147. // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
  1148. // the small code model so we can assume a binary size of <= 4GB, which
  1149. // makes the untagged PC relative offset positive. The binary must also be
  1150. // loaded into address range [0, 2^48). Both of these properties need to
  1151. // be ensured at runtime when using tagged addresses.
  1152. auto Tag = MI.getOperand(1);
  1153. Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
  1154. Tag.setOffset(0x100000000);
  1155. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
  1156. .addReg(DstReg)
  1157. .add(Tag)
  1158. .addImm(48);
  1159. }
  1160. MachineInstrBuilder MIB2 =
  1161. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
  1162. .add(MI.getOperand(0))
  1163. .addReg(DstReg)
  1164. .add(MI.getOperand(2))
  1165. .addImm(0);
  1166. transferImpOps(MI, MIB1, MIB2);
  1167. MI.eraseFromParent();
  1168. return true;
  1169. }
  1170. case AArch64::ADDlowTLS:
  1171. // Produce a plain ADD
  1172. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
  1173. .add(MI.getOperand(0))
  1174. .add(MI.getOperand(1))
  1175. .add(MI.getOperand(2))
  1176. .addImm(0);
  1177. MI.eraseFromParent();
  1178. return true;
  1179. case AArch64::MOVbaseTLS: {
  1180. Register DstReg = MI.getOperand(0).getReg();
  1181. auto SysReg = AArch64SysReg::TPIDR_EL0;
  1182. MachineFunction *MF = MBB.getParent();
  1183. if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
  1184. SysReg = AArch64SysReg::TPIDR_EL3;
  1185. else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
  1186. SysReg = AArch64SysReg::TPIDR_EL2;
  1187. else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
  1188. SysReg = AArch64SysReg::TPIDR_EL1;
  1189. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
  1190. .addImm(SysReg);
  1191. MI.eraseFromParent();
  1192. return true;
  1193. }
  1194. case AArch64::MOVi32imm:
  1195. return expandMOVImm(MBB, MBBI, 32);
  1196. case AArch64::MOVi64imm:
  1197. return expandMOVImm(MBB, MBBI, 64);
  1198. case AArch64::RET_ReallyLR: {
  1199. // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
  1200. // function and missing live-ins. We are fine in practice because callee
  1201. // saved register handling ensures the register value is restored before
  1202. // RET, but we need the undef flag here to appease the MachineVerifier
  1203. // liveness checks.
  1204. MachineInstrBuilder MIB =
  1205. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
  1206. .addReg(AArch64::LR, RegState::Undef);
  1207. transferImpOps(MI, MIB, MIB);
  1208. MI.eraseFromParent();
  1209. return true;
  1210. }
  1211. case AArch64::CMP_SWAP_8:
  1212. return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
  1213. AArch64::SUBSWrx,
  1214. AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
  1215. AArch64::WZR, NextMBBI);
  1216. case AArch64::CMP_SWAP_16:
  1217. return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
  1218. AArch64::SUBSWrx,
  1219. AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
  1220. AArch64::WZR, NextMBBI);
  1221. case AArch64::CMP_SWAP_32:
  1222. return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
  1223. AArch64::SUBSWrs,
  1224. AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
  1225. AArch64::WZR, NextMBBI);
  1226. case AArch64::CMP_SWAP_64:
  1227. return expandCMP_SWAP(MBB, MBBI,
  1228. AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
  1229. AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
  1230. AArch64::XZR, NextMBBI);
  1231. case AArch64::CMP_SWAP_128:
  1232. case AArch64::CMP_SWAP_128_RELEASE:
  1233. case AArch64::CMP_SWAP_128_ACQUIRE:
  1234. case AArch64::CMP_SWAP_128_MONOTONIC:
  1235. return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
  1236. case AArch64::AESMCrrTied:
  1237. case AArch64::AESIMCrrTied: {
  1238. MachineInstrBuilder MIB =
  1239. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  1240. TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
  1241. AArch64::AESIMCrr))
  1242. .add(MI.getOperand(0))
  1243. .add(MI.getOperand(1));
  1244. transferImpOps(MI, MIB, MIB);
  1245. MI.eraseFromParent();
  1246. return true;
  1247. }
  1248. case AArch64::IRGstack: {
  1249. MachineFunction &MF = *MBB.getParent();
  1250. const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
  1251. const AArch64FrameLowering *TFI =
  1252. MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
  1253. // IRG does not allow immediate offset. getTaggedBasePointerOffset should
  1254. // almost always point to SP-after-prologue; if not, emit a longer
  1255. // instruction sequence.
  1256. int BaseOffset = -AFI->getTaggedBasePointerOffset();
  1257. Register FrameReg;
  1258. StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
  1259. MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
  1260. /*PreferFP=*/false,
  1261. /*ForSimm=*/true);
  1262. Register SrcReg = FrameReg;
  1263. if (FrameRegOffset) {
  1264. // Use output register as temporary.
  1265. SrcReg = MI.getOperand(0).getReg();
  1266. emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
  1267. FrameRegOffset, TII);
  1268. }
  1269. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
  1270. .add(MI.getOperand(0))
  1271. .addUse(SrcReg)
  1272. .add(MI.getOperand(2));
  1273. MI.eraseFromParent();
  1274. return true;
  1275. }
  1276. case AArch64::TAGPstack: {
  1277. int64_t Offset = MI.getOperand(2).getImm();
  1278. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  1279. TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
  1280. .add(MI.getOperand(0))
  1281. .add(MI.getOperand(1))
  1282. .addImm(std::abs(Offset))
  1283. .add(MI.getOperand(4));
  1284. MI.eraseFromParent();
  1285. return true;
  1286. }
  1287. case AArch64::STGloop_wback:
  1288. case AArch64::STZGloop_wback:
  1289. return expandSetTagLoop(MBB, MBBI, NextMBBI);
  1290. case AArch64::STGloop:
  1291. case AArch64::STZGloop:
  1292. report_fatal_error(
  1293. "Non-writeback variants of STGloop / STZGloop should not "
  1294. "survive past PrologEpilogInserter.");
  1295. case AArch64::STR_ZZZZXI:
  1296. return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
  1297. case AArch64::STR_ZZZXI:
  1298. return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
  1299. case AArch64::STR_ZZXI:
  1300. return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
  1301. case AArch64::LDR_ZZZZXI:
  1302. return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
  1303. case AArch64::LDR_ZZZXI:
  1304. return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
  1305. case AArch64::LDR_ZZXI:
  1306. return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
  1307. case AArch64::BLR_RVMARKER:
  1308. return expandCALL_RVMARKER(MBB, MBBI);
  1309. case AArch64::BLR_BTI:
  1310. return expandCALL_BTI(MBB, MBBI);
  1311. case AArch64::StoreSwiftAsyncContext:
  1312. return expandStoreSwiftAsyncContext(MBB, MBBI);
  1313. case AArch64::RestoreZAPseudo: {
  1314. auto *NewMBB = expandRestoreZA(MBB, MBBI);
  1315. if (NewMBB != &MBB)
  1316. NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
  1317. return true;
  1318. }
  1319. case AArch64::MSRpstatePseudo: {
  1320. auto *NewMBB = expandCondSMToggle(MBB, MBBI);
  1321. if (NewMBB != &MBB)
  1322. NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
  1323. return true;
  1324. }
  1325. case AArch64::OBSCURE_COPY: {
  1326. if (MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) {
  1327. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
  1328. .add(MI.getOperand(0))
  1329. .addReg(AArch64::XZR)
  1330. .add(MI.getOperand(1))
  1331. .addImm(0);
  1332. }
  1333. MI.eraseFromParent();
  1334. return true;
  1335. }
  1336. }
  1337. return false;
  1338. }
  1339. /// Iterate over the instructions in basic block MBB and expand any
  1340. /// pseudo instructions. Return true if anything was modified.
  1341. bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
  1342. bool Modified = false;
  1343. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  1344. while (MBBI != E) {
  1345. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  1346. Modified |= expandMI(MBB, MBBI, NMBBI);
  1347. MBBI = NMBBI;
  1348. }
  1349. return Modified;
  1350. }
  1351. bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
  1352. TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
  1353. bool Modified = false;
  1354. for (auto &MBB : MF)
  1355. Modified |= expandMBB(MBB);
  1356. return Modified;
  1357. }
  1358. /// Returns an instance of the pseudo instruction expansion pass.
  1359. FunctionPass *llvm::createAArch64ExpandPseudoPass() {
  1360. return new AArch64ExpandPseudo();
  1361. }