X86ExpandPseudo.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753
  1. //===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that expands pseudo instructions into target
  10. // instructions to allow proper scheduling, if-conversion, other late
  11. // optimizations, or simply the encoding of the instructions.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "X86.h"
  15. #include "X86FrameLowering.h"
  16. #include "X86InstrBuilder.h"
  17. #include "X86InstrInfo.h"
  18. #include "X86MachineFunctionInfo.h"
  19. #include "X86Subtarget.h"
  20. #include "llvm/Analysis/EHPersonalities.h"
  21. #include "llvm/CodeGen/LivePhysRegs.h"
  22. #include "llvm/CodeGen/MachineFunctionPass.h"
  23. #include "llvm/CodeGen/MachineInstrBuilder.h"
  24. #include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
  25. #include "llvm/IR/GlobalValue.h"
  26. #include "llvm/Target/TargetMachine.h"
  27. using namespace llvm;
  28. #define DEBUG_TYPE "x86-pseudo"
  29. #define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
  30. namespace {
  31. class X86ExpandPseudo : public MachineFunctionPass {
  32. public:
  33. static char ID;
  34. X86ExpandPseudo() : MachineFunctionPass(ID) {}
  35. void getAnalysisUsage(AnalysisUsage &AU) const override {
  36. AU.setPreservesCFG();
  37. AU.addPreservedID(MachineLoopInfoID);
  38. AU.addPreservedID(MachineDominatorsID);
  39. MachineFunctionPass::getAnalysisUsage(AU);
  40. }
  41. const X86Subtarget *STI = nullptr;
  42. const X86InstrInfo *TII = nullptr;
  43. const X86RegisterInfo *TRI = nullptr;
  44. const X86MachineFunctionInfo *X86FI = nullptr;
  45. const X86FrameLowering *X86FL = nullptr;
  46. bool runOnMachineFunction(MachineFunction &Fn) override;
  47. MachineFunctionProperties getRequiredProperties() const override {
  48. return MachineFunctionProperties().set(
  49. MachineFunctionProperties::Property::NoVRegs);
  50. }
  51. StringRef getPassName() const override {
  52. return "X86 pseudo instruction expansion pass";
  53. }
  54. private:
  55. void ExpandICallBranchFunnel(MachineBasicBlock *MBB,
  56. MachineBasicBlock::iterator MBBI);
  57. void expandCALL_RVMARKER(MachineBasicBlock &MBB,
  58. MachineBasicBlock::iterator MBBI);
  59. bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
  60. bool ExpandMBB(MachineBasicBlock &MBB);
  61. /// This function expands pseudos which affects control flow.
  62. /// It is done in separate pass to simplify blocks navigation in main
  63. /// pass(calling ExpandMBB).
  64. bool ExpandPseudosWhichAffectControlFlow(MachineFunction &MF);
  65. /// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions,
  66. /// placed into separate block guarded by check for al register(for SystemV
  67. /// abi).
  68. void ExpandVastartSaveXmmRegs(
  69. MachineBasicBlock *MBB,
  70. MachineBasicBlock::iterator VAStartPseudoInstr) const;
  71. };
  72. char X86ExpandPseudo::ID = 0;
  73. } // End anonymous namespace.
  74. INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false,
  75. false)
  76. void X86ExpandPseudo::ExpandICallBranchFunnel(
  77. MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {
  78. MachineBasicBlock *JTMBB = MBB;
  79. MachineInstr *JTInst = &*MBBI;
  80. MachineFunction *MF = MBB->getParent();
  81. const BasicBlock *BB = MBB->getBasicBlock();
  82. auto InsPt = MachineFunction::iterator(MBB);
  83. ++InsPt;
  84. std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs;
  85. const DebugLoc &DL = JTInst->getDebugLoc();
  86. MachineOperand Selector = JTInst->getOperand(0);
  87. const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();
  88. auto CmpTarget = [&](unsigned Target) {
  89. if (Selector.isReg())
  90. MBB->addLiveIn(Selector.getReg());
  91. BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)
  92. .addReg(X86::RIP)
  93. .addImm(1)
  94. .addReg(0)
  95. .addGlobalAddress(CombinedGlobal,
  96. JTInst->getOperand(2 + 2 * Target).getImm())
  97. .addReg(0);
  98. BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr))
  99. .add(Selector)
  100. .addReg(X86::R11);
  101. };
  102. auto CreateMBB = [&]() {
  103. auto *NewMBB = MF->CreateMachineBasicBlock(BB);
  104. MBB->addSuccessor(NewMBB);
  105. if (!MBB->isLiveIn(X86::EFLAGS))
  106. MBB->addLiveIn(X86::EFLAGS);
  107. return NewMBB;
  108. };
  109. auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) {
  110. BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC);
  111. auto *ElseMBB = CreateMBB();
  112. MF->insert(InsPt, ElseMBB);
  113. MBB = ElseMBB;
  114. MBBI = MBB->end();
  115. };
  116. auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) {
  117. auto *ThenMBB = CreateMBB();
  118. TargetMBBs.push_back({ThenMBB, Target});
  119. EmitCondJump(CC, ThenMBB);
  120. };
  121. auto EmitTailCall = [&](unsigned Target) {
  122. BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64))
  123. .add(JTInst->getOperand(3 + 2 * Target));
  124. };
  125. std::function<void(unsigned, unsigned)> EmitBranchFunnel =
  126. [&](unsigned FirstTarget, unsigned NumTargets) {
  127. if (NumTargets == 1) {
  128. EmitTailCall(FirstTarget);
  129. return;
  130. }
  131. if (NumTargets == 2) {
  132. CmpTarget(FirstTarget + 1);
  133. EmitCondJumpTarget(X86::COND_B, FirstTarget);
  134. EmitTailCall(FirstTarget + 1);
  135. return;
  136. }
  137. if (NumTargets < 6) {
  138. CmpTarget(FirstTarget + 1);
  139. EmitCondJumpTarget(X86::COND_B, FirstTarget);
  140. EmitCondJumpTarget(X86::COND_E, FirstTarget + 1);
  141. EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);
  142. return;
  143. }
  144. auto *ThenMBB = CreateMBB();
  145. CmpTarget(FirstTarget + (NumTargets / 2));
  146. EmitCondJump(X86::COND_B, ThenMBB);
  147. EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2));
  148. EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,
  149. NumTargets - (NumTargets / 2) - 1);
  150. MF->insert(InsPt, ThenMBB);
  151. MBB = ThenMBB;
  152. MBBI = MBB->end();
  153. EmitBranchFunnel(FirstTarget, NumTargets / 2);
  154. };
  155. EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2);
  156. for (auto P : TargetMBBs) {
  157. MF->insert(InsPt, P.first);
  158. BuildMI(P.first, DL, TII->get(X86::TAILJMPd64))
  159. .add(JTInst->getOperand(3 + 2 * P.second));
  160. }
  161. JTMBB->erase(JTInst);
  162. }
  163. void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
  164. MachineBasicBlock::iterator MBBI) {
  165. // Expand CALL_RVMARKER pseudo to call instruction, followed by the special
  166. //"movq %rax, %rdi" marker.
  167. MachineInstr &MI = *MBBI;
  168. MachineInstr *OriginalCall;
  169. assert((MI.getOperand(1).isGlobal() || MI.getOperand(1).isReg()) &&
  170. "invalid operand for regular call");
  171. unsigned Opc = -1;
  172. if (MI.getOpcode() == X86::CALL64m_RVMARKER)
  173. Opc = X86::CALL64m;
  174. else if (MI.getOpcode() == X86::CALL64r_RVMARKER)
  175. Opc = X86::CALL64r;
  176. else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER)
  177. Opc = X86::CALL64pcrel32;
  178. else
  179. llvm_unreachable("unexpected opcode");
  180. OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
  181. bool RAXImplicitDead = false;
  182. for (MachineOperand &Op : llvm::drop_begin(MI.operands())) {
  183. // RAX may be 'implicit dead', if there are no other users of the return
  184. // value. We introduce a new use, so change it to 'implicit def'.
  185. if (Op.isReg() && Op.isImplicit() && Op.isDead() &&
  186. TRI->regsOverlap(Op.getReg(), X86::RAX)) {
  187. Op.setIsDead(false);
  188. Op.setIsDef(true);
  189. RAXImplicitDead = true;
  190. }
  191. OriginalCall->addOperand(Op);
  192. }
  193. // Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be
  194. // live across the earlier call. The call to the ObjC runtime function returns
  195. // the first argument, so the value of %rax is unchanged after the ObjC
  196. // runtime call. On Windows targets, the runtime call follows the regular
  197. // x64 calling convention and expects the first argument in %rcx.
  198. auto TargetReg = STI->getTargetTriple().isOSWindows() ? X86::RCX : X86::RDI;
  199. auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::MOV64rr))
  200. .addReg(TargetReg, RegState::Define)
  201. .addReg(X86::RAX)
  202. .getInstr();
  203. if (MI.shouldUpdateCallSiteInfo())
  204. MBB.getParent()->moveCallSiteInfo(&MI, Marker);
  205. // Emit call to ObjC runtime.
  206. const uint32_t *RegMask =
  207. TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);
  208. MachineInstr *RtCall =
  209. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
  210. .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
  211. .addRegMask(RegMask)
  212. .addReg(X86::RAX,
  213. RegState::Implicit |
  214. (RAXImplicitDead ? (RegState::Dead | RegState::Define)
  215. : RegState::Define))
  216. .getInstr();
  217. MI.eraseFromParent();
  218. auto &TM = MBB.getParent()->getTarget();
  219. // On Darwin platforms, wrap the expanded sequence in a bundle to prevent
  220. // later optimizations from breaking up the sequence.
  221. if (TM.getTargetTriple().isOSDarwin())
  222. finalizeBundle(MBB, OriginalCall->getIterator(),
  223. std::next(RtCall->getIterator()));
  224. }
  225. /// If \p MBBI is a pseudo instruction, this method expands
  226. /// it to the corresponding (sequence of) actual instruction(s).
  227. /// \returns true if \p MBBI has been expanded.
  228. bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
  229. MachineBasicBlock::iterator MBBI) {
  230. MachineInstr &MI = *MBBI;
  231. unsigned Opcode = MI.getOpcode();
  232. const DebugLoc &DL = MBBI->getDebugLoc();
  233. switch (Opcode) {
  234. default:
  235. return false;
  236. case X86::TCRETURNdi:
  237. case X86::TCRETURNdicc:
  238. case X86::TCRETURNri:
  239. case X86::TCRETURNmi:
  240. case X86::TCRETURNdi64:
  241. case X86::TCRETURNdi64cc:
  242. case X86::TCRETURNri64:
  243. case X86::TCRETURNmi64: {
  244. bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
  245. MachineOperand &JumpTarget = MBBI->getOperand(0);
  246. MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands
  247. : 1);
  248. assert(StackAdjust.isImm() && "Expecting immediate value.");
  249. // Adjust stack pointer.
  250. int StackAdj = StackAdjust.getImm();
  251. int MaxTCDelta = X86FI->getTCReturnAddrDelta();
  252. int Offset = 0;
  253. assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
  254. // Incoporate the retaddr area.
  255. Offset = StackAdj - MaxTCDelta;
  256. assert(Offset >= 0 && "Offset should never be negative");
  257. if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
  258. assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
  259. }
  260. if (Offset) {
  261. // Check for possible merge with preceding ADD instruction.
  262. Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
  263. X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true);
  264. }
  265. // Jump to label or value in register.
  266. bool IsWin64 = STI->isTargetWin64();
  267. if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
  268. Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
  269. unsigned Op;
  270. switch (Opcode) {
  271. case X86::TCRETURNdi:
  272. Op = X86::TAILJMPd;
  273. break;
  274. case X86::TCRETURNdicc:
  275. Op = X86::TAILJMPd_CC;
  276. break;
  277. case X86::TCRETURNdi64cc:
  278. assert(!MBB.getParent()->hasWinCFI() &&
  279. "Conditional tail calls confuse "
  280. "the Win64 unwinder.");
  281. Op = X86::TAILJMPd64_CC;
  282. break;
  283. default:
  284. // Note: Win64 uses REX prefixes indirect jumps out of functions, but
  285. // not direct ones.
  286. Op = X86::TAILJMPd64;
  287. break;
  288. }
  289. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
  290. if (JumpTarget.isGlobal()) {
  291. MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
  292. JumpTarget.getTargetFlags());
  293. } else {
  294. assert(JumpTarget.isSymbol());
  295. MIB.addExternalSymbol(JumpTarget.getSymbolName(),
  296. JumpTarget.getTargetFlags());
  297. }
  298. if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
  299. MIB.addImm(MBBI->getOperand(2).getImm());
  300. }
  301. } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
  302. unsigned Op = (Opcode == X86::TCRETURNmi)
  303. ? X86::TAILJMPm
  304. : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
  305. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
  306. for (unsigned i = 0; i != X86::AddrNumOperands; ++i)
  307. MIB.add(MBBI->getOperand(i));
  308. } else if (Opcode == X86::TCRETURNri64) {
  309. JumpTarget.setIsKill();
  310. BuildMI(MBB, MBBI, DL,
  311. TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
  312. .add(JumpTarget);
  313. } else {
  314. JumpTarget.setIsKill();
  315. BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
  316. .add(JumpTarget);
  317. }
  318. MachineInstr &NewMI = *std::prev(MBBI);
  319. NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
  320. NewMI.setCFIType(*MBB.getParent(), MI.getCFIType());
  321. // Update the call site info.
  322. if (MBBI->isCandidateForCallSiteEntry())
  323. MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI);
  324. // Delete the pseudo instruction TCRETURN.
  325. MBB.erase(MBBI);
  326. return true;
  327. }
  328. case X86::EH_RETURN:
  329. case X86::EH_RETURN64: {
  330. MachineOperand &DestAddr = MBBI->getOperand(0);
  331. assert(DestAddr.isReg() && "Offset should be in register!");
  332. const bool Uses64BitFramePtr =
  333. STI->isTarget64BitLP64() || STI->isTargetNaCl64();
  334. Register StackPtr = TRI->getStackRegister();
  335. BuildMI(MBB, MBBI, DL,
  336. TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)
  337. .addReg(DestAddr.getReg());
  338. // The EH_RETURN pseudo is really removed during the MC Lowering.
  339. return true;
  340. }
  341. case X86::IRET: {
  342. // Adjust stack to erase error code
  343. int64_t StackAdj = MBBI->getOperand(0).getImm();
  344. X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, true);
  345. // Replace pseudo with machine iret
  346. unsigned RetOp = STI->is64Bit() ? X86::IRET64 : X86::IRET32;
  347. // Use UIRET if UINTR is present (except for building kernel)
  348. if (STI->is64Bit() && STI->hasUINTR() &&
  349. MBB.getParent()->getTarget().getCodeModel() != CodeModel::Kernel)
  350. RetOp = X86::UIRET;
  351. BuildMI(MBB, MBBI, DL, TII->get(RetOp));
  352. MBB.erase(MBBI);
  353. return true;
  354. }
  355. case X86::RET: {
  356. // Adjust stack to erase error code
  357. int64_t StackAdj = MBBI->getOperand(0).getImm();
  358. MachineInstrBuilder MIB;
  359. if (StackAdj == 0) {
  360. MIB = BuildMI(MBB, MBBI, DL,
  361. TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32));
  362. } else if (isUInt<16>(StackAdj)) {
  363. MIB = BuildMI(MBB, MBBI, DL,
  364. TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32))
  365. .addImm(StackAdj);
  366. } else {
  367. assert(!STI->is64Bit() &&
  368. "shouldn't need to do this for x86_64 targets!");
  369. // A ret can only handle immediates as big as 2**16-1. If we need to pop
  370. // off bytes before the return address, we must do it manually.
  371. BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);
  372. X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true);
  373. BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);
  374. MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32));
  375. }
  376. for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
  377. MIB.add(MBBI->getOperand(I));
  378. MBB.erase(MBBI);
  379. return true;
  380. }
  381. case X86::LCMPXCHG16B_SAVE_RBX: {
  382. // Perform the following transformation.
  383. // SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
  384. // =>
  385. // RBX = InArg
  386. // actualcmpxchg Addr
  387. // RBX = SaveRbx
  388. const MachineOperand &InArg = MBBI->getOperand(6);
  389. Register SaveRbx = MBBI->getOperand(7).getReg();
  390. // Copy the input argument of the pseudo into the argument of the
  391. // actual instruction.
  392. // NOTE: We don't copy the kill flag since the input might be the same reg
  393. // as one of the other operands of LCMPXCHG16B.
  394. TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false);
  395. // Create the actual instruction.
  396. MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B));
  397. // Copy the operands related to the address.
  398. for (unsigned Idx = 1; Idx < 6; ++Idx)
  399. NewInstr->addOperand(MBBI->getOperand(Idx));
  400. // Finally, restore the value of RBX.
  401. TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx,
  402. /*SrcIsKill*/ true);
  403. // Delete the pseudo.
  404. MBBI->eraseFromParent();
  405. return true;
  406. }
  407. // Loading/storing mask pairs requires two kmov operations. The second one of
  408. // these needs a 2 byte displacement relative to the specified address (with
  409. // 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the
  410. // same spill size, they all are stored using MASKPAIR16STORE, loaded using
  411. // MASKPAIR16LOAD.
  412. //
  413. // The displacement value might wrap around in theory, thus the asserts in
  414. // both cases.
  415. case X86::MASKPAIR16LOAD: {
  416. int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm();
  417. assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
  418. Register Reg = MBBI->getOperand(0).getReg();
  419. bool DstIsDead = MBBI->getOperand(0).isDead();
  420. Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
  421. Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
  422. auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
  423. .addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
  424. auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
  425. .addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
  426. for (int i = 0; i < X86::AddrNumOperands; ++i) {
  427. MIBLo.add(MBBI->getOperand(1 + i));
  428. if (i == X86::AddrDisp)
  429. MIBHi.addImm(Disp + 2);
  430. else
  431. MIBHi.add(MBBI->getOperand(1 + i));
  432. }
  433. // Split the memory operand, adjusting the offset and size for the halves.
  434. MachineMemOperand *OldMMO = MBBI->memoperands().front();
  435. MachineFunction *MF = MBB.getParent();
  436. MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
  437. MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
  438. MIBLo.setMemRefs(MMOLo);
  439. MIBHi.setMemRefs(MMOHi);
  440. // Delete the pseudo.
  441. MBB.erase(MBBI);
  442. return true;
  443. }
  444. case X86::MASKPAIR16STORE: {
  445. int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm();
  446. assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
  447. Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg();
  448. bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill();
  449. Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
  450. Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
  451. auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
  452. auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
  453. for (int i = 0; i < X86::AddrNumOperands; ++i) {
  454. MIBLo.add(MBBI->getOperand(i));
  455. if (i == X86::AddrDisp)
  456. MIBHi.addImm(Disp + 2);
  457. else
  458. MIBHi.add(MBBI->getOperand(i));
  459. }
  460. MIBLo.addReg(Reg0, getKillRegState(SrcIsKill));
  461. MIBHi.addReg(Reg1, getKillRegState(SrcIsKill));
  462. // Split the memory operand, adjusting the offset and size for the halves.
  463. MachineMemOperand *OldMMO = MBBI->memoperands().front();
  464. MachineFunction *MF = MBB.getParent();
  465. MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
  466. MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
  467. MIBLo.setMemRefs(MMOLo);
  468. MIBHi.setMemRefs(MMOHi);
  469. // Delete the pseudo.
  470. MBB.erase(MBBI);
  471. return true;
  472. }
  473. case X86::MWAITX_SAVE_RBX: {
  474. // Perform the following transformation.
  475. // SaveRbx = pseudomwaitx InArg, SaveRbx
  476. // =>
  477. // [E|R]BX = InArg
  478. // actualmwaitx
  479. // [E|R]BX = SaveRbx
  480. const MachineOperand &InArg = MBBI->getOperand(1);
  481. // Copy the input argument of the pseudo into the argument of the
  482. // actual instruction.
  483. TII->copyPhysReg(MBB, MBBI, DL, X86::EBX, InArg.getReg(), InArg.isKill());
  484. // Create the actual instruction.
  485. BuildMI(MBB, MBBI, DL, TII->get(X86::MWAITXrrr));
  486. // Finally, restore the value of RBX.
  487. Register SaveRbx = MBBI->getOperand(2).getReg();
  488. TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true);
  489. // Delete the pseudo.
  490. MBBI->eraseFromParent();
  491. return true;
  492. }
  493. case TargetOpcode::ICALL_BRANCH_FUNNEL:
  494. ExpandICallBranchFunnel(&MBB, MBBI);
  495. return true;
  496. case X86::PLDTILECFGV: {
  497. MI.setDesc(TII->get(X86::LDTILECFG));
  498. return true;
  499. }
  500. case X86::PTILELOADDV:
  501. case X86::PTILELOADDT1V: {
  502. for (unsigned i = 2; i > 0; --i)
  503. MI.removeOperand(i);
  504. unsigned Opc =
  505. Opcode == X86::PTILELOADDV ? X86::TILELOADD : X86::TILELOADDT1;
  506. MI.setDesc(TII->get(Opc));
  507. return true;
  508. }
  509. case X86::PTDPBSSDV:
  510. case X86::PTDPBSUDV:
  511. case X86::PTDPBUSDV:
  512. case X86::PTDPBUUDV:
  513. case X86::PTDPBF16PSV:
  514. case X86::PTDPFP16PSV: {
  515. MI.untieRegOperand(4);
  516. for (unsigned i = 3; i > 0; --i)
  517. MI.removeOperand(i);
  518. unsigned Opc;
  519. switch (Opcode) {
  520. case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break;
  521. case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break;
  522. case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break;
  523. case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break;
  524. case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break;
  525. case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break;
  526. default: llvm_unreachable("Impossible Opcode!");
  527. }
  528. MI.setDesc(TII->get(Opc));
  529. MI.tieOperands(0, 1);
  530. return true;
  531. }
  532. case X86::PTILESTOREDV: {
  533. for (int i = 1; i >= 0; --i)
  534. MI.removeOperand(i);
  535. MI.setDesc(TII->get(X86::TILESTORED));
  536. return true;
  537. }
  538. case X86::PTILEZEROV: {
  539. for (int i = 2; i > 0; --i) // Remove row, col
  540. MI.removeOperand(i);
  541. MI.setDesc(TII->get(X86::TILEZERO));
  542. return true;
  543. }
  544. case X86::CALL64pcrel32_RVMARKER:
  545. case X86::CALL64r_RVMARKER:
  546. case X86::CALL64m_RVMARKER:
  547. expandCALL_RVMARKER(MBB, MBBI);
  548. return true;
  549. }
  550. llvm_unreachable("Previous switch has a fallthrough?");
  551. }
  552. // This function creates additional block for storing varargs guarded
  553. // registers. It adds check for %al into entry block, to skip
  554. // GuardedRegsBlk if xmm registers should not be stored.
  555. //
  556. // EntryBlk[VAStartPseudoInstr] EntryBlk
  557. // | | .
  558. // | | .
  559. // | | GuardedRegsBlk
  560. // | => | .
  561. // | | .
  562. // | TailBlk
  563. // | |
  564. // | |
  565. //
  566. void X86ExpandPseudo::ExpandVastartSaveXmmRegs(
  567. MachineBasicBlock *EntryBlk,
  568. MachineBasicBlock::iterator VAStartPseudoInstr) const {
  569. assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS);
  570. MachineFunction *Func = EntryBlk->getParent();
  571. const TargetInstrInfo *TII = STI->getInstrInfo();
  572. const DebugLoc &DL = VAStartPseudoInstr->getDebugLoc();
  573. Register CountReg = VAStartPseudoInstr->getOperand(0).getReg();
  574. // Calculate liveins for newly created blocks.
  575. LivePhysRegs LiveRegs(*STI->getRegisterInfo());
  576. SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;
  577. LiveRegs.addLiveIns(*EntryBlk);
  578. for (MachineInstr &MI : EntryBlk->instrs()) {
  579. if (MI.getOpcode() == VAStartPseudoInstr->getOpcode())
  580. break;
  581. LiveRegs.stepForward(MI, Clobbers);
  582. }
  583. // Create the new basic blocks. One block contains all the XMM stores,
  584. // and another block is the final destination regardless of whether any
  585. // stores were performed.
  586. const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock();
  587. MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator();
  588. MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(LLVMBlk);
  589. MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(LLVMBlk);
  590. Func->insert(EntryBlkIter, GuardedRegsBlk);
  591. Func->insert(EntryBlkIter, TailBlk);
  592. // Transfer the remainder of EntryBlk and its successor edges to TailBlk.
  593. TailBlk->splice(TailBlk->begin(), EntryBlk,
  594. std::next(MachineBasicBlock::iterator(VAStartPseudoInstr)),
  595. EntryBlk->end());
  596. TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk);
  597. uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm();
  598. uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm();
  599. // TODO: add support for YMM and ZMM here.
  600. unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
  601. // In the XMM save block, save all the XMM argument registers.
  602. for (int64_t OpndIdx = 7, RegIdx = 0;
  603. OpndIdx < VAStartPseudoInstr->getNumOperands() - 1;
  604. OpndIdx++, RegIdx++) {
  605. auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc));
  606. for (int i = 0; i < X86::AddrNumOperands; ++i) {
  607. if (i == X86::AddrDisp)
  608. NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16);
  609. else
  610. NewMI.add(VAStartPseudoInstr->getOperand(i + 1));
  611. }
  612. NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg());
  613. assert(VAStartPseudoInstr->getOperand(OpndIdx).getReg().isPhysical());
  614. }
  615. // The original block will now fall through to the GuardedRegsBlk.
  616. EntryBlk->addSuccessor(GuardedRegsBlk);
  617. // The GuardedRegsBlk will fall through to the TailBlk.
  618. GuardedRegsBlk->addSuccessor(TailBlk);
  619. if (!STI->isCallingConvWin64(Func->getFunction().getCallingConv())) {
  620. // If %al is 0, branch around the XMM save block.
  621. BuildMI(EntryBlk, DL, TII->get(X86::TEST8rr))
  622. .addReg(CountReg)
  623. .addReg(CountReg);
  624. BuildMI(EntryBlk, DL, TII->get(X86::JCC_1))
  625. .addMBB(TailBlk)
  626. .addImm(X86::COND_E);
  627. EntryBlk->addSuccessor(TailBlk);
  628. }
  629. // Add liveins to the created block.
  630. addLiveIns(*GuardedRegsBlk, LiveRegs);
  631. addLiveIns(*TailBlk, LiveRegs);
  632. // Delete the pseudo.
  633. VAStartPseudoInstr->eraseFromParent();
  634. }
  635. /// Expand all pseudo instructions contained in \p MBB.
  636. /// \returns true if any expansion occurred for \p MBB.
  637. bool X86ExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
  638. bool Modified = false;
  639. // MBBI may be invalidated by the expansion.
  640. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  641. while (MBBI != E) {
  642. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  643. Modified |= ExpandMI(MBB, MBBI);
  644. MBBI = NMBBI;
  645. }
  646. return Modified;
  647. }
  648. bool X86ExpandPseudo::ExpandPseudosWhichAffectControlFlow(MachineFunction &MF) {
  649. // Currently pseudo which affects control flow is only
  650. // X86::VASTART_SAVE_XMM_REGS which is located in Entry block.
  651. // So we do not need to evaluate other blocks.
  652. for (MachineInstr &Instr : MF.front().instrs()) {
  653. if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) {
  654. ExpandVastartSaveXmmRegs(&(MF.front()), Instr);
  655. return true;
  656. }
  657. }
  658. return false;
  659. }
  660. bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
  661. STI = &MF.getSubtarget<X86Subtarget>();
  662. TII = STI->getInstrInfo();
  663. TRI = STI->getRegisterInfo();
  664. X86FI = MF.getInfo<X86MachineFunctionInfo>();
  665. X86FL = STI->getFrameLowering();
  666. bool Modified = ExpandPseudosWhichAffectControlFlow(MF);
  667. for (MachineBasicBlock &MBB : MF)
  668. Modified |= ExpandMBB(MBB);
  669. return Modified;
  670. }
  671. /// Returns an instance of the pseudo instruction expansion pass.
  672. FunctionPass *llvm::createX86ExpandPseudoPass() {
  673. return new X86ExpandPseudo();
  674. }