X86FlagsCopyLowering.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988
  1. //====- X86FlagsCopyLowering.cpp - Lowers COPY nodes of EFLAGS ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// Lowers COPY nodes of EFLAGS by directly extracting and preserving individual
  11. /// flag bits.
  12. ///
  13. /// We have to do this by carefully analyzing and rewriting the usage of the
  14. /// copied EFLAGS register because there is no general way to rematerialize the
  15. /// entire EFLAGS register safely and efficiently. Using `popf` both forces
  16. /// dynamic stack adjustment and can create correctness issues due to IF, TF,
  17. /// and other non-status flags being overwritten. Using sequences involving
  18. /// SAHF don't work on all x86 processors and are often quite slow compared to
  19. /// directly testing a single status preserved in its own GPR.
  20. ///
  21. //===----------------------------------------------------------------------===//
  22. #include "X86.h"
  23. #include "X86InstrBuilder.h"
  24. #include "X86InstrInfo.h"
  25. #include "X86Subtarget.h"
  26. #include "llvm/ADT/ArrayRef.h"
  27. #include "llvm/ADT/DenseMap.h"
  28. #include "llvm/ADT/PostOrderIterator.h"
  29. #include "llvm/ADT/STLExtras.h"
  30. #include "llvm/ADT/ScopeExit.h"
  31. #include "llvm/ADT/SmallPtrSet.h"
  32. #include "llvm/ADT/SmallSet.h"
  33. #include "llvm/ADT/SmallVector.h"
  34. #include "llvm/ADT/SparseBitVector.h"
  35. #include "llvm/ADT/Statistic.h"
  36. #include "llvm/CodeGen/MachineBasicBlock.h"
  37. #include "llvm/CodeGen/MachineConstantPool.h"
  38. #include "llvm/CodeGen/MachineDominators.h"
  39. #include "llvm/CodeGen/MachineFunction.h"
  40. #include "llvm/CodeGen/MachineFunctionPass.h"
  41. #include "llvm/CodeGen/MachineInstr.h"
  42. #include "llvm/CodeGen/MachineInstrBuilder.h"
  43. #include "llvm/CodeGen/MachineModuleInfo.h"
  44. #include "llvm/CodeGen/MachineOperand.h"
  45. #include "llvm/CodeGen/MachineRegisterInfo.h"
  46. #include "llvm/CodeGen/MachineSSAUpdater.h"
  47. #include "llvm/CodeGen/TargetInstrInfo.h"
  48. #include "llvm/CodeGen/TargetRegisterInfo.h"
  49. #include "llvm/CodeGen/TargetSchedule.h"
  50. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  51. #include "llvm/IR/DebugLoc.h"
  52. #include "llvm/MC/MCSchedule.h"
  53. #include "llvm/Pass.h"
  54. #include "llvm/Support/CommandLine.h"
  55. #include "llvm/Support/Debug.h"
  56. #include "llvm/Support/raw_ostream.h"
  57. #include <algorithm>
  58. #include <cassert>
  59. #include <iterator>
  60. #include <utility>
  61. using namespace llvm;
  62. #define PASS_KEY "x86-flags-copy-lowering"
  63. #define DEBUG_TYPE PASS_KEY
  64. STATISTIC(NumCopiesEliminated, "Number of copies of EFLAGS eliminated");
  65. STATISTIC(NumSetCCsInserted, "Number of setCC instructions inserted");
  66. STATISTIC(NumTestsInserted, "Number of test instructions inserted");
  67. STATISTIC(NumAddsInserted, "Number of adds instructions inserted");
  68. namespace {
  69. // Convenient array type for storing registers associated with each condition.
  70. using CondRegArray = std::array<unsigned, X86::LAST_VALID_COND + 1>;
  71. class X86FlagsCopyLoweringPass : public MachineFunctionPass {
  72. public:
  73. X86FlagsCopyLoweringPass() : MachineFunctionPass(ID) { }
  74. StringRef getPassName() const override { return "X86 EFLAGS copy lowering"; }
  75. bool runOnMachineFunction(MachineFunction &MF) override;
  76. void getAnalysisUsage(AnalysisUsage &AU) const override;
  77. /// Pass identification, replacement for typeid.
  78. static char ID;
  79. private:
  80. MachineRegisterInfo *MRI = nullptr;
  81. const X86Subtarget *Subtarget = nullptr;
  82. const X86InstrInfo *TII = nullptr;
  83. const TargetRegisterInfo *TRI = nullptr;
  84. const TargetRegisterClass *PromoteRC = nullptr;
  85. MachineDominatorTree *MDT = nullptr;
  86. CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
  87. MachineBasicBlock::iterator CopyDefI);
  88. Register promoteCondToReg(MachineBasicBlock &MBB,
  89. MachineBasicBlock::iterator TestPos,
  90. const DebugLoc &TestLoc, X86::CondCode Cond);
  91. std::pair<unsigned, bool> getCondOrInverseInReg(
  92. MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
  93. const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs);
  94. void insertTest(MachineBasicBlock &MBB, MachineBasicBlock::iterator Pos,
  95. const DebugLoc &Loc, unsigned Reg);
  96. void rewriteArithmetic(MachineBasicBlock &TestMBB,
  97. MachineBasicBlock::iterator TestPos,
  98. const DebugLoc &TestLoc, MachineInstr &MI,
  99. MachineOperand &FlagUse, CondRegArray &CondRegs);
  100. void rewriteCMov(MachineBasicBlock &TestMBB,
  101. MachineBasicBlock::iterator TestPos, const DebugLoc &TestLoc,
  102. MachineInstr &CMovI, MachineOperand &FlagUse,
  103. CondRegArray &CondRegs);
  104. void rewriteFCMov(MachineBasicBlock &TestMBB,
  105. MachineBasicBlock::iterator TestPos,
  106. const DebugLoc &TestLoc, MachineInstr &CMovI,
  107. MachineOperand &FlagUse, CondRegArray &CondRegs);
  108. void rewriteCondJmp(MachineBasicBlock &TestMBB,
  109. MachineBasicBlock::iterator TestPos,
  110. const DebugLoc &TestLoc, MachineInstr &JmpI,
  111. CondRegArray &CondRegs);
  112. void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
  113. MachineInstr &CopyDefI);
  114. void rewriteSetCC(MachineBasicBlock &TestMBB,
  115. MachineBasicBlock::iterator TestPos,
  116. const DebugLoc &TestLoc, MachineInstr &SetCCI,
  117. MachineOperand &FlagUse, CondRegArray &CondRegs);
  118. };
  119. } // end anonymous namespace
  120. INITIALIZE_PASS_BEGIN(X86FlagsCopyLoweringPass, DEBUG_TYPE,
  121. "X86 EFLAGS copy lowering", false, false)
  122. INITIALIZE_PASS_END(X86FlagsCopyLoweringPass, DEBUG_TYPE,
  123. "X86 EFLAGS copy lowering", false, false)
  124. FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
  125. return new X86FlagsCopyLoweringPass();
  126. }
  127. char X86FlagsCopyLoweringPass::ID = 0;
  128. void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
  129. AU.addRequired<MachineDominatorTree>();
  130. MachineFunctionPass::getAnalysisUsage(AU);
  131. }
  132. namespace {
  133. /// An enumeration of the arithmetic instruction mnemonics which have
  134. /// interesting flag semantics.
  135. ///
  136. /// We can map instruction opcodes into these mnemonics to make it easy to
  137. /// dispatch with specific functionality.
  138. enum class FlagArithMnemonic {
  139. ADC,
  140. ADCX,
  141. ADOX,
  142. RCL,
  143. RCR,
  144. SBB,
  145. SETB,
  146. };
  147. } // namespace
  148. static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
  149. switch (Opcode) {
  150. default:
  151. report_fatal_error("No support for lowering a copy into EFLAGS when used "
  152. "by this instruction!");
  153. #define LLVM_EXPAND_INSTR_SIZES(MNEMONIC, SUFFIX) \
  154. case X86::MNEMONIC##8##SUFFIX: \
  155. case X86::MNEMONIC##16##SUFFIX: \
  156. case X86::MNEMONIC##32##SUFFIX: \
  157. case X86::MNEMONIC##64##SUFFIX:
  158. #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \
  159. LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \
  160. LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \
  161. LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \
  162. LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \
  163. case X86::MNEMONIC##8ri: \
  164. case X86::MNEMONIC##16ri8: \
  165. case X86::MNEMONIC##32ri8: \
  166. case X86::MNEMONIC##64ri8: \
  167. case X86::MNEMONIC##16ri: \
  168. case X86::MNEMONIC##32ri: \
  169. case X86::MNEMONIC##64ri32: \
  170. case X86::MNEMONIC##8mi: \
  171. case X86::MNEMONIC##16mi8: \
  172. case X86::MNEMONIC##32mi8: \
  173. case X86::MNEMONIC##64mi8: \
  174. case X86::MNEMONIC##16mi: \
  175. case X86::MNEMONIC##32mi: \
  176. case X86::MNEMONIC##64mi32: \
  177. case X86::MNEMONIC##8i8: \
  178. case X86::MNEMONIC##16i16: \
  179. case X86::MNEMONIC##32i32: \
  180. case X86::MNEMONIC##64i32:
  181. LLVM_EXPAND_ADC_SBB_INSTR(ADC)
  182. return FlagArithMnemonic::ADC;
  183. LLVM_EXPAND_ADC_SBB_INSTR(SBB)
  184. return FlagArithMnemonic::SBB;
  185. #undef LLVM_EXPAND_ADC_SBB_INSTR
  186. LLVM_EXPAND_INSTR_SIZES(RCL, rCL)
  187. LLVM_EXPAND_INSTR_SIZES(RCL, r1)
  188. LLVM_EXPAND_INSTR_SIZES(RCL, ri)
  189. return FlagArithMnemonic::RCL;
  190. LLVM_EXPAND_INSTR_SIZES(RCR, rCL)
  191. LLVM_EXPAND_INSTR_SIZES(RCR, r1)
  192. LLVM_EXPAND_INSTR_SIZES(RCR, ri)
  193. return FlagArithMnemonic::RCR;
  194. #undef LLVM_EXPAND_INSTR_SIZES
  195. case X86::ADCX32rr:
  196. case X86::ADCX64rr:
  197. case X86::ADCX32rm:
  198. case X86::ADCX64rm:
  199. return FlagArithMnemonic::ADCX;
  200. case X86::ADOX32rr:
  201. case X86::ADOX64rr:
  202. case X86::ADOX32rm:
  203. case X86::ADOX64rm:
  204. return FlagArithMnemonic::ADOX;
  205. case X86::SETB_C32r:
  206. case X86::SETB_C64r:
  207. return FlagArithMnemonic::SETB;
  208. }
  209. }
  210. static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
  211. MachineInstr &SplitI,
  212. const X86InstrInfo &TII) {
  213. MachineFunction &MF = *MBB.getParent();
  214. assert(SplitI.getParent() == &MBB &&
  215. "Split instruction must be in the split block!");
  216. assert(SplitI.isBranch() &&
  217. "Only designed to split a tail of branch instructions!");
  218. assert(X86::getCondFromBranch(SplitI) != X86::COND_INVALID &&
  219. "Must split on an actual jCC instruction!");
  220. // Dig out the previous instruction to the split point.
  221. MachineInstr &PrevI = *std::prev(SplitI.getIterator());
  222. assert(PrevI.isBranch() && "Must split after a branch!");
  223. assert(X86::getCondFromBranch(PrevI) != X86::COND_INVALID &&
  224. "Must split after an actual jCC instruction!");
  225. assert(!std::prev(PrevI.getIterator())->isTerminator() &&
  226. "Must only have this one terminator prior to the split!");
  227. // Grab the one successor edge that will stay in `MBB`.
  228. MachineBasicBlock &UnsplitSucc = *PrevI.getOperand(0).getMBB();
  229. // Analyze the original block to see if we are actually splitting an edge
  230. // into two edges. This can happen when we have multiple conditional jumps to
  231. // the same successor.
  232. bool IsEdgeSplit =
  233. std::any_of(SplitI.getIterator(), MBB.instr_end(),
  234. [&](MachineInstr &MI) {
  235. assert(MI.isTerminator() &&
  236. "Should only have spliced terminators!");
  237. return llvm::any_of(
  238. MI.operands(), [&](MachineOperand &MOp) {
  239. return MOp.isMBB() && MOp.getMBB() == &UnsplitSucc;
  240. });
  241. }) ||
  242. MBB.getFallThrough() == &UnsplitSucc;
  243. MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock();
  244. // Insert the new block immediately after the current one. Any existing
  245. // fallthrough will be sunk into this new block anyways.
  246. MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
  247. // Splice the tail of instructions into the new block.
  248. NewMBB.splice(NewMBB.end(), &MBB, SplitI.getIterator(), MBB.end());
  249. // Copy the necessary succesors (and their probability info) into the new
  250. // block.
  251. for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI)
  252. if (IsEdgeSplit || *SI != &UnsplitSucc)
  253. NewMBB.copySuccessor(&MBB, SI);
  254. // Normalize the probabilities if we didn't end up splitting the edge.
  255. if (!IsEdgeSplit)
  256. NewMBB.normalizeSuccProbs();
  257. // Now replace all of the moved successors in the original block with the new
  258. // block. This will merge their probabilities.
  259. for (MachineBasicBlock *Succ : NewMBB.successors())
  260. if (Succ != &UnsplitSucc)
  261. MBB.replaceSuccessor(Succ, &NewMBB);
  262. // We should always end up replacing at least one successor.
  263. assert(MBB.isSuccessor(&NewMBB) &&
  264. "Failed to make the new block a successor!");
  265. // Now update all the PHIs.
  266. for (MachineBasicBlock *Succ : NewMBB.successors()) {
  267. for (MachineInstr &MI : *Succ) {
  268. if (!MI.isPHI())
  269. break;
  270. for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
  271. OpIdx += 2) {
  272. MachineOperand &OpV = MI.getOperand(OpIdx);
  273. MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
  274. assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
  275. if (OpMBB.getMBB() != &MBB)
  276. continue;
  277. // Replace the operand for unsplit successors
  278. if (!IsEdgeSplit || Succ != &UnsplitSucc) {
  279. OpMBB.setMBB(&NewMBB);
  280. // We have to continue scanning as there may be multiple entries in
  281. // the PHI.
  282. continue;
  283. }
  284. // When we have split the edge append a new successor.
  285. MI.addOperand(MF, OpV);
  286. MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
  287. break;
  288. }
  289. }
  290. }
  291. return NewMBB;
  292. }
  293. static X86::CondCode getCondFromFCMOV(unsigned Opcode) {
  294. switch (Opcode) {
  295. default: return X86::COND_INVALID;
  296. case X86::CMOVBE_Fp32: case X86::CMOVBE_Fp64: case X86::CMOVBE_Fp80:
  297. return X86::COND_BE;
  298. case X86::CMOVB_Fp32: case X86::CMOVB_Fp64: case X86::CMOVB_Fp80:
  299. return X86::COND_B;
  300. case X86::CMOVE_Fp32: case X86::CMOVE_Fp64: case X86::CMOVE_Fp80:
  301. return X86::COND_E;
  302. case X86::CMOVNBE_Fp32: case X86::CMOVNBE_Fp64: case X86::CMOVNBE_Fp80:
  303. return X86::COND_A;
  304. case X86::CMOVNB_Fp32: case X86::CMOVNB_Fp64: case X86::CMOVNB_Fp80:
  305. return X86::COND_AE;
  306. case X86::CMOVNE_Fp32: case X86::CMOVNE_Fp64: case X86::CMOVNE_Fp80:
  307. return X86::COND_NE;
  308. case X86::CMOVNP_Fp32: case X86::CMOVNP_Fp64: case X86::CMOVNP_Fp80:
  309. return X86::COND_NP;
  310. case X86::CMOVP_Fp32: case X86::CMOVP_Fp64: case X86::CMOVP_Fp80:
  311. return X86::COND_P;
  312. }
  313. }
  314. bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
  315. LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
  316. << " **********\n");
  317. Subtarget = &MF.getSubtarget<X86Subtarget>();
  318. MRI = &MF.getRegInfo();
  319. TII = Subtarget->getInstrInfo();
  320. TRI = Subtarget->getRegisterInfo();
  321. MDT = &getAnalysis<MachineDominatorTree>();
  322. PromoteRC = &X86::GR8RegClass;
  323. if (MF.begin() == MF.end())
  324. // Nothing to do for a degenerate empty function...
  325. return false;
  326. // Collect the copies in RPO so that when there are chains where a copy is in
  327. // turn copied again we visit the first one first. This ensures we can find
  328. // viable locations for testing the original EFLAGS that dominate all the
  329. // uses across complex CFGs.
  330. SmallVector<MachineInstr *, 4> Copies;
  331. ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
  332. for (MachineBasicBlock *MBB : RPOT)
  333. for (MachineInstr &MI : *MBB)
  334. if (MI.getOpcode() == TargetOpcode::COPY &&
  335. MI.getOperand(0).getReg() == X86::EFLAGS)
  336. Copies.push_back(&MI);
  337. for (MachineInstr *CopyI : Copies) {
  338. MachineBasicBlock &MBB = *CopyI->getParent();
  339. MachineOperand &VOp = CopyI->getOperand(1);
  340. assert(VOp.isReg() &&
  341. "The input to the copy for EFLAGS should always be a register!");
  342. MachineInstr &CopyDefI = *MRI->getVRegDef(VOp.getReg());
  343. if (CopyDefI.getOpcode() != TargetOpcode::COPY) {
  344. // FIXME: The big likely candidate here are PHI nodes. We could in theory
  345. // handle PHI nodes, but it gets really, really hard. Insanely hard. Hard
  346. // enough that it is probably better to change every other part of LLVM
  347. // to avoid creating them. The issue is that once we have PHIs we won't
  348. // know which original EFLAGS value we need to capture with our setCCs
  349. // below. The end result will be computing a complete set of setCCs that
  350. // we *might* want, computing them in every place where we copy *out* of
  351. // EFLAGS and then doing SSA formation on all of them to insert necessary
  352. // PHI nodes and consume those here. Then hoping that somehow we DCE the
  353. // unnecessary ones. This DCE seems very unlikely to be successful and so
  354. // we will almost certainly end up with a glut of dead setCC
  355. // instructions. Until we have a motivating test case and fail to avoid
  356. // it by changing other parts of LLVM's lowering, we refuse to handle
  357. // this complex case here.
  358. LLVM_DEBUG(
  359. dbgs() << "ERROR: Encountered unexpected def of an eflags copy: ";
  360. CopyDefI.dump());
  361. report_fatal_error(
  362. "Cannot lower EFLAGS copy unless it is defined in turn by a copy!");
  363. }
  364. auto Cleanup = make_scope_exit([&] {
  365. // All uses of the EFLAGS copy are now rewritten, kill the copy into
  366. // eflags and if dead the copy from.
  367. CopyI->eraseFromParent();
  368. if (MRI->use_empty(CopyDefI.getOperand(0).getReg()))
  369. CopyDefI.eraseFromParent();
  370. ++NumCopiesEliminated;
  371. });
  372. MachineOperand &DOp = CopyI->getOperand(0);
  373. assert(DOp.isDef() && "Expected register def!");
  374. assert(DOp.getReg() == X86::EFLAGS && "Unexpected copy def register!");
  375. if (DOp.isDead())
  376. continue;
  377. MachineBasicBlock *TestMBB = CopyDefI.getParent();
  378. auto TestPos = CopyDefI.getIterator();
  379. DebugLoc TestLoc = CopyDefI.getDebugLoc();
  380. LLVM_DEBUG(dbgs() << "Rewriting copy: "; CopyI->dump());
  381. // Walk up across live-in EFLAGS to find where they were actually def'ed.
  382. //
  383. // This copy's def may just be part of a region of blocks covered by
  384. // a single def of EFLAGS and we want to find the top of that region where
  385. // possible.
  386. //
  387. // This is essentially a search for a *candidate* reaching definition
  388. // location. We don't need to ever find the actual reaching definition here,
  389. // but we want to walk up the dominator tree to find the highest point which
  390. // would be viable for such a definition.
  391. auto HasEFLAGSClobber = [&](MachineBasicBlock::iterator Begin,
  392. MachineBasicBlock::iterator End) {
  393. // Scan backwards as we expect these to be relatively short and often find
  394. // a clobber near the end.
  395. return llvm::any_of(
  396. llvm::reverse(llvm::make_range(Begin, End)), [&](MachineInstr &MI) {
  397. // Flag any instruction (other than the copy we are
  398. // currently rewriting) that defs EFLAGS.
  399. return &MI != CopyI && MI.findRegisterDefOperand(X86::EFLAGS);
  400. });
  401. };
  402. auto HasEFLAGSClobberPath = [&](MachineBasicBlock *BeginMBB,
  403. MachineBasicBlock *EndMBB) {
  404. assert(MDT->dominates(BeginMBB, EndMBB) &&
  405. "Only support paths down the dominator tree!");
  406. SmallPtrSet<MachineBasicBlock *, 4> Visited;
  407. SmallVector<MachineBasicBlock *, 4> Worklist;
  408. // We terminate at the beginning. No need to scan it.
  409. Visited.insert(BeginMBB);
  410. Worklist.push_back(EndMBB);
  411. do {
  412. auto *MBB = Worklist.pop_back_val();
  413. for (auto *PredMBB : MBB->predecessors()) {
  414. if (!Visited.insert(PredMBB).second)
  415. continue;
  416. if (HasEFLAGSClobber(PredMBB->begin(), PredMBB->end()))
  417. return true;
  418. // Enqueue this block to walk its predecessors.
  419. Worklist.push_back(PredMBB);
  420. }
  421. } while (!Worklist.empty());
  422. // No clobber found along a path from the begin to end.
  423. return false;
  424. };
  425. while (TestMBB->isLiveIn(X86::EFLAGS) && !TestMBB->pred_empty() &&
  426. !HasEFLAGSClobber(TestMBB->begin(), TestPos)) {
  427. // Find the nearest common dominator of the predecessors, as
  428. // that will be the best candidate to hoist into.
  429. MachineBasicBlock *HoistMBB =
  430. std::accumulate(std::next(TestMBB->pred_begin()), TestMBB->pred_end(),
  431. *TestMBB->pred_begin(),
  432. [&](MachineBasicBlock *LHS, MachineBasicBlock *RHS) {
  433. return MDT->findNearestCommonDominator(LHS, RHS);
  434. });
  435. // Now we need to scan all predecessors that may be reached along paths to
  436. // the hoist block. A clobber anywhere in any of these blocks the hoist.
  437. // Note that this even handles loops because we require *no* clobbers.
  438. if (HasEFLAGSClobberPath(HoistMBB, TestMBB))
  439. break;
  440. // We also need the terminators to not sneakily clobber flags.
  441. if (HasEFLAGSClobber(HoistMBB->getFirstTerminator()->getIterator(),
  442. HoistMBB->instr_end()))
  443. break;
  444. // We found a viable location, hoist our test position to it.
  445. TestMBB = HoistMBB;
  446. TestPos = TestMBB->getFirstTerminator()->getIterator();
  447. // Clear the debug location as it would just be confusing after hoisting.
  448. TestLoc = DebugLoc();
  449. }
  450. LLVM_DEBUG({
  451. auto DefIt = llvm::find_if(
  452. llvm::reverse(llvm::make_range(TestMBB->instr_begin(), TestPos)),
  453. [&](MachineInstr &MI) {
  454. return MI.findRegisterDefOperand(X86::EFLAGS);
  455. });
  456. if (DefIt.base() != TestMBB->instr_begin()) {
  457. dbgs() << " Using EFLAGS defined by: ";
  458. DefIt->dump();
  459. } else {
  460. dbgs() << " Using live-in flags for BB:\n";
  461. TestMBB->dump();
  462. }
  463. });
  464. // While rewriting uses, we buffer jumps and rewrite them in a second pass
  465. // because doing so will perturb the CFG that we are walking to find the
  466. // uses in the first place.
  467. SmallVector<MachineInstr *, 4> JmpIs;
  468. // Gather the condition flags that have already been preserved in
  469. // registers. We do this from scratch each time as we expect there to be
  470. // very few of them and we expect to not revisit the same copy definition
  471. // many times. If either of those change sufficiently we could build a map
  472. // of these up front instead.
  473. CondRegArray CondRegs = collectCondsInRegs(*TestMBB, TestPos);
  474. // Collect the basic blocks we need to scan. Typically this will just be
  475. // a single basic block but we may have to scan multiple blocks if the
  476. // EFLAGS copy lives into successors.
  477. SmallVector<MachineBasicBlock *, 2> Blocks;
  478. SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
  479. Blocks.push_back(&MBB);
  480. do {
  481. MachineBasicBlock &UseMBB = *Blocks.pop_back_val();
  482. // Track when if/when we find a kill of the flags in this block.
  483. bool FlagsKilled = false;
  484. // In most cases, we walk from the beginning to the end of the block. But
  485. // when the block is the same block as the copy is from, we will visit it
  486. // twice. The first time we start from the copy and go to the end. The
  487. // second time we start from the beginning and go to the copy. This lets
  488. // us handle copies inside of cycles.
  489. // FIXME: This loop is *super* confusing. This is at least in part
  490. // a symptom of all of this routine needing to be refactored into
  491. // documentable components. Once done, there may be a better way to write
  492. // this loop.
  493. for (auto MII = (&UseMBB == &MBB && !VisitedBlocks.count(&UseMBB))
  494. ? std::next(CopyI->getIterator())
  495. : UseMBB.instr_begin(),
  496. MIE = UseMBB.instr_end();
  497. MII != MIE;) {
  498. MachineInstr &MI = *MII++;
  499. // If we are in the original copy block and encounter either the copy
  500. // def or the copy itself, break so that we don't re-process any part of
  501. // the block or process the instructions in the range that was copied
  502. // over.
  503. if (&MI == CopyI || &MI == &CopyDefI) {
  504. assert(&UseMBB == &MBB && VisitedBlocks.count(&MBB) &&
  505. "Should only encounter these on the second pass over the "
  506. "original block.");
  507. break;
  508. }
  509. MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
  510. if (!FlagUse) {
  511. if (MI.findRegisterDefOperand(X86::EFLAGS)) {
  512. // If EFLAGS are defined, it's as-if they were killed. We can stop
  513. // scanning here.
  514. //
  515. // NB!!! Many instructions only modify some flags. LLVM currently
  516. // models this as clobbering all flags, but if that ever changes
  517. // this will need to be carefully updated to handle that more
  518. // complex logic.
  519. FlagsKilled = true;
  520. break;
  521. }
  522. continue;
  523. }
  524. LLVM_DEBUG(dbgs() << " Rewriting use: "; MI.dump());
  525. // Check the kill flag before we rewrite as that may change it.
  526. if (FlagUse->isKill())
  527. FlagsKilled = true;
  528. // Once we encounter a branch, the rest of the instructions must also be
  529. // branches. We can't rewrite in place here, so we handle them below.
  530. //
  531. // Note that we don't have to handle tail calls here, even conditional
  532. // tail calls, as those are not introduced into the X86 MI until post-RA
  533. // branch folding or black placement. As a consequence, we get to deal
  534. // with the simpler formulation of conditional branches followed by tail
  535. // calls.
  536. if (X86::getCondFromBranch(MI) != X86::COND_INVALID) {
  537. auto JmpIt = MI.getIterator();
  538. do {
  539. JmpIs.push_back(&*JmpIt);
  540. ++JmpIt;
  541. } while (JmpIt != UseMBB.instr_end() &&
  542. X86::getCondFromBranch(*JmpIt) !=
  543. X86::COND_INVALID);
  544. break;
  545. }
  546. // Otherwise we can just rewrite in-place.
  547. if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
  548. rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
  549. } else if (getCondFromFCMOV(MI.getOpcode()) != X86::COND_INVALID) {
  550. rewriteFCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
  551. } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
  552. rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
  553. } else if (MI.getOpcode() == TargetOpcode::COPY) {
  554. rewriteCopy(MI, *FlagUse, CopyDefI);
  555. } else {
  556. // We assume all other instructions that use flags also def them.
  557. assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
  558. "Expected a def of EFLAGS for this instruction!");
  559. // NB!!! Several arithmetic instructions only *partially* update
  560. // flags. Theoretically, we could generate MI code sequences that
  561. // would rely on this fact and observe different flags independently.
  562. // But currently LLVM models all of these instructions as clobbering
  563. // all the flags in an undef way. We rely on that to simplify the
  564. // logic.
  565. FlagsKilled = true;
  566. // Generically handle remaining uses as arithmetic instructions.
  567. rewriteArithmetic(*TestMBB, TestPos, TestLoc, MI, *FlagUse,
  568. CondRegs);
  569. }
  570. // If this was the last use of the flags, we're done.
  571. if (FlagsKilled)
  572. break;
  573. }
  574. // If the flags were killed, we're done with this block.
  575. if (FlagsKilled)
  576. continue;
  577. // Otherwise we need to scan successors for ones where the flags live-in
  578. // and queue those up for processing.
  579. for (MachineBasicBlock *SuccMBB : UseMBB.successors())
  580. if (SuccMBB->isLiveIn(X86::EFLAGS) &&
  581. VisitedBlocks.insert(SuccMBB).second) {
  582. // We currently don't do any PHI insertion and so we require that the
  583. // test basic block dominates all of the use basic blocks. Further, we
  584. // can't have a cycle from the test block back to itself as that would
  585. // create a cycle requiring a PHI to break it.
  586. //
  587. // We could in theory do PHI insertion here if it becomes useful by
  588. // just taking undef values in along every edge that we don't trace
  589. // this EFLAGS copy along. This isn't as bad as fully general PHI
  590. // insertion, but still seems like a great deal of complexity.
  591. //
  592. // Because it is theoretically possible that some earlier MI pass or
  593. // other lowering transformation could induce this to happen, we do
  594. // a hard check even in non-debug builds here.
  595. if (SuccMBB == TestMBB || !MDT->dominates(TestMBB, SuccMBB)) {
  596. LLVM_DEBUG({
  597. dbgs()
  598. << "ERROR: Encountered use that is not dominated by our test "
  599. "basic block! Rewriting this would require inserting PHI "
  600. "nodes to track the flag state across the CFG.\n\nTest "
  601. "block:\n";
  602. TestMBB->dump();
  603. dbgs() << "Use block:\n";
  604. SuccMBB->dump();
  605. });
  606. report_fatal_error(
  607. "Cannot lower EFLAGS copy when original copy def "
  608. "does not dominate all uses.");
  609. }
  610. Blocks.push_back(SuccMBB);
  611. // After this, EFLAGS will be recreated before each use.
  612. SuccMBB->removeLiveIn(X86::EFLAGS);
  613. }
  614. } while (!Blocks.empty());
  615. // Now rewrite the jumps that use the flags. These we handle specially
  616. // because if there are multiple jumps in a single basic block we'll have
  617. // to do surgery on the CFG.
  618. MachineBasicBlock *LastJmpMBB = nullptr;
  619. for (MachineInstr *JmpI : JmpIs) {
  620. // Past the first jump within a basic block we need to split the blocks
  621. // apart.
  622. if (JmpI->getParent() == LastJmpMBB)
  623. splitBlock(*JmpI->getParent(), *JmpI, *TII);
  624. else
  625. LastJmpMBB = JmpI->getParent();
  626. rewriteCondJmp(*TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
  627. }
  628. // FIXME: Mark the last use of EFLAGS before the copy's def as a kill if
  629. // the copy's def operand is itself a kill.
  630. }
  631. #ifndef NDEBUG
  632. for (MachineBasicBlock &MBB : MF)
  633. for (MachineInstr &MI : MBB)
  634. if (MI.getOpcode() == TargetOpcode::COPY &&
  635. (MI.getOperand(0).getReg() == X86::EFLAGS ||
  636. MI.getOperand(1).getReg() == X86::EFLAGS)) {
  637. LLVM_DEBUG(dbgs() << "ERROR: Found a COPY involving EFLAGS: ";
  638. MI.dump());
  639. llvm_unreachable("Unlowered EFLAGS copy!");
  640. }
  641. #endif
  642. return true;
  643. }
  644. /// Collect any conditions that have already been set in registers so that we
  645. /// can re-use them rather than adding duplicates.
  646. CondRegArray X86FlagsCopyLoweringPass::collectCondsInRegs(
  647. MachineBasicBlock &MBB, MachineBasicBlock::iterator TestPos) {
  648. CondRegArray CondRegs = {};
  649. // Scan backwards across the range of instructions with live EFLAGS.
  650. for (MachineInstr &MI :
  651. llvm::reverse(llvm::make_range(MBB.begin(), TestPos))) {
  652. X86::CondCode Cond = X86::getCondFromSETCC(MI);
  653. if (Cond != X86::COND_INVALID && !MI.mayStore() &&
  654. MI.getOperand(0).isReg() && MI.getOperand(0).getReg().isVirtual()) {
  655. assert(MI.getOperand(0).isDef() &&
  656. "A non-storing SETcc should always define a register!");
  657. CondRegs[Cond] = MI.getOperand(0).getReg();
  658. }
  659. // Stop scanning when we see the first definition of the EFLAGS as prior to
  660. // this we would potentially capture the wrong flag state.
  661. if (MI.findRegisterDefOperand(X86::EFLAGS))
  662. break;
  663. }
  664. return CondRegs;
  665. }
  666. Register X86FlagsCopyLoweringPass::promoteCondToReg(
  667. MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
  668. const DebugLoc &TestLoc, X86::CondCode Cond) {
  669. Register Reg = MRI->createVirtualRegister(PromoteRC);
  670. auto SetI = BuildMI(TestMBB, TestPos, TestLoc,
  671. TII->get(X86::SETCCr), Reg).addImm(Cond);
  672. (void)SetI;
  673. LLVM_DEBUG(dbgs() << " save cond: "; SetI->dump());
  674. ++NumSetCCsInserted;
  675. return Reg;
  676. }
  677. std::pair<unsigned, bool> X86FlagsCopyLoweringPass::getCondOrInverseInReg(
  678. MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
  679. const DebugLoc &TestLoc, X86::CondCode Cond, CondRegArray &CondRegs) {
  680. unsigned &CondReg = CondRegs[Cond];
  681. unsigned &InvCondReg = CondRegs[X86::GetOppositeBranchCondition(Cond)];
  682. if (!CondReg && !InvCondReg)
  683. CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
  684. if (CondReg)
  685. return {CondReg, false};
  686. else
  687. return {InvCondReg, true};
  688. }
  689. void X86FlagsCopyLoweringPass::insertTest(MachineBasicBlock &MBB,
  690. MachineBasicBlock::iterator Pos,
  691. const DebugLoc &Loc, unsigned Reg) {
  692. auto TestI =
  693. BuildMI(MBB, Pos, Loc, TII->get(X86::TEST8rr)).addReg(Reg).addReg(Reg);
  694. (void)TestI;
  695. LLVM_DEBUG(dbgs() << " test cond: "; TestI->dump());
  696. ++NumTestsInserted;
  697. }
  698. void X86FlagsCopyLoweringPass::rewriteArithmetic(
  699. MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
  700. const DebugLoc &TestLoc, MachineInstr &MI, MachineOperand &FlagUse,
  701. CondRegArray &CondRegs) {
  702. // Arithmetic is either reading CF or OF. Figure out which condition we need
  703. // to preserve in a register.
  704. X86::CondCode Cond = X86::COND_INVALID;
  705. // The addend to use to reset CF or OF when added to the flag value.
  706. int Addend = 0;
  707. switch (getMnemonicFromOpcode(MI.getOpcode())) {
  708. case FlagArithMnemonic::ADC:
  709. case FlagArithMnemonic::ADCX:
  710. case FlagArithMnemonic::RCL:
  711. case FlagArithMnemonic::RCR:
  712. case FlagArithMnemonic::SBB:
  713. case FlagArithMnemonic::SETB:
  714. Cond = X86::COND_B; // CF == 1
  715. // Set up an addend that when one is added will need a carry due to not
  716. // having a higher bit available.
  717. Addend = 255;
  718. break;
  719. case FlagArithMnemonic::ADOX:
  720. Cond = X86::COND_O; // OF == 1
  721. // Set up an addend that when one is added will turn from positive to
  722. // negative and thus overflow in the signed domain.
  723. Addend = 127;
  724. break;
  725. }
  726. // Now get a register that contains the value of the flag input to the
  727. // arithmetic. We require exactly this flag to simplify the arithmetic
  728. // required to materialize it back into the flag.
  729. unsigned &CondReg = CondRegs[Cond];
  730. if (!CondReg)
  731. CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
  732. MachineBasicBlock &MBB = *MI.getParent();
  733. // Insert an instruction that will set the flag back to the desired value.
  734. Register TmpReg = MRI->createVirtualRegister(PromoteRC);
  735. auto AddI =
  736. BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), TII->get(X86::ADD8ri))
  737. .addDef(TmpReg, RegState::Dead)
  738. .addReg(CondReg)
  739. .addImm(Addend);
  740. (void)AddI;
  741. LLVM_DEBUG(dbgs() << " add cond: "; AddI->dump());
  742. ++NumAddsInserted;
  743. FlagUse.setIsKill(true);
  744. }
  745. void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
  746. MachineBasicBlock::iterator TestPos,
  747. const DebugLoc &TestLoc,
  748. MachineInstr &CMovI,
  749. MachineOperand &FlagUse,
  750. CondRegArray &CondRegs) {
  751. // First get the register containing this specific condition.
  752. X86::CondCode Cond = X86::getCondFromCMov(CMovI);
  753. unsigned CondReg;
  754. bool Inverted;
  755. std::tie(CondReg, Inverted) =
  756. getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
  757. MachineBasicBlock &MBB = *CMovI.getParent();
  758. // Insert a direct test of the saved register.
  759. insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
  760. // Rewrite the CMov to use the !ZF flag from the test, and then kill its use
  761. // of the flags afterward.
  762. CMovI.getOperand(CMovI.getDesc().getNumOperands() - 1)
  763. .setImm(Inverted ? X86::COND_E : X86::COND_NE);
  764. FlagUse.setIsKill(true);
  765. LLVM_DEBUG(dbgs() << " fixed cmov: "; CMovI.dump());
  766. }
  767. void X86FlagsCopyLoweringPass::rewriteFCMov(MachineBasicBlock &TestMBB,
  768. MachineBasicBlock::iterator TestPos,
  769. const DebugLoc &TestLoc,
  770. MachineInstr &CMovI,
  771. MachineOperand &FlagUse,
  772. CondRegArray &CondRegs) {
  773. // First get the register containing this specific condition.
  774. X86::CondCode Cond = getCondFromFCMOV(CMovI.getOpcode());
  775. unsigned CondReg;
  776. bool Inverted;
  777. std::tie(CondReg, Inverted) =
  778. getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
  779. MachineBasicBlock &MBB = *CMovI.getParent();
  780. // Insert a direct test of the saved register.
  781. insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);
  782. auto getFCMOVOpcode = [](unsigned Opcode, bool Inverted) {
  783. switch (Opcode) {
  784. default: llvm_unreachable("Unexpected opcode!");
  785. case X86::CMOVBE_Fp32: case X86::CMOVNBE_Fp32:
  786. case X86::CMOVB_Fp32: case X86::CMOVNB_Fp32:
  787. case X86::CMOVE_Fp32: case X86::CMOVNE_Fp32:
  788. case X86::CMOVP_Fp32: case X86::CMOVNP_Fp32:
  789. return Inverted ? X86::CMOVE_Fp32 : X86::CMOVNE_Fp32;
  790. case X86::CMOVBE_Fp64: case X86::CMOVNBE_Fp64:
  791. case X86::CMOVB_Fp64: case X86::CMOVNB_Fp64:
  792. case X86::CMOVE_Fp64: case X86::CMOVNE_Fp64:
  793. case X86::CMOVP_Fp64: case X86::CMOVNP_Fp64:
  794. return Inverted ? X86::CMOVE_Fp64 : X86::CMOVNE_Fp64;
  795. case X86::CMOVBE_Fp80: case X86::CMOVNBE_Fp80:
  796. case X86::CMOVB_Fp80: case X86::CMOVNB_Fp80:
  797. case X86::CMOVE_Fp80: case X86::CMOVNE_Fp80:
  798. case X86::CMOVP_Fp80: case X86::CMOVNP_Fp80:
  799. return Inverted ? X86::CMOVE_Fp80 : X86::CMOVNE_Fp80;
  800. }
  801. };
  802. // Rewrite the CMov to use the !ZF flag from the test.
  803. CMovI.setDesc(TII->get(getFCMOVOpcode(CMovI.getOpcode(), Inverted)));
  804. FlagUse.setIsKill(true);
  805. LLVM_DEBUG(dbgs() << " fixed fcmov: "; CMovI.dump());
  806. }
  807. void X86FlagsCopyLoweringPass::rewriteCondJmp(
  808. MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
  809. const DebugLoc &TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
  810. // First get the register containing this specific condition.
  811. X86::CondCode Cond = X86::getCondFromBranch(JmpI);
  812. unsigned CondReg;
  813. bool Inverted;
  814. std::tie(CondReg, Inverted) =
  815. getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);
  816. MachineBasicBlock &JmpMBB = *JmpI.getParent();
  817. // Insert a direct test of the saved register.
  818. insertTest(JmpMBB, JmpI.getIterator(), JmpI.getDebugLoc(), CondReg);
  819. // Rewrite the jump to use the !ZF flag from the test, and kill its use of
  820. // flags afterward.
  821. JmpI.getOperand(1).setImm(Inverted ? X86::COND_E : X86::COND_NE);
  822. JmpI.findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
  823. LLVM_DEBUG(dbgs() << " fixed jCC: "; JmpI.dump());
  824. }
  825. void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
  826. MachineOperand &FlagUse,
  827. MachineInstr &CopyDefI) {
  828. // Just replace this copy with the original copy def.
  829. MRI->replaceRegWith(MI.getOperand(0).getReg(),
  830. CopyDefI.getOperand(0).getReg());
  831. MI.eraseFromParent();
  832. }
  833. void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
  834. MachineBasicBlock::iterator TestPos,
  835. const DebugLoc &TestLoc,
  836. MachineInstr &SetCCI,
  837. MachineOperand &FlagUse,
  838. CondRegArray &CondRegs) {
  839. X86::CondCode Cond = X86::getCondFromSETCC(SetCCI);
  840. // Note that we can't usefully rewrite this to the inverse without complex
  841. // analysis of the users of the setCC. Largely we rely on duplicates which
  842. // could have been avoided already being avoided here.
  843. unsigned &CondReg = CondRegs[Cond];
  844. if (!CondReg)
  845. CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, Cond);
  846. // Rewriting a register def is trivial: we just replace the register and
  847. // remove the setcc.
  848. if (!SetCCI.mayStore()) {
  849. assert(SetCCI.getOperand(0).isReg() &&
  850. "Cannot have a non-register defined operand to SETcc!");
  851. Register OldReg = SetCCI.getOperand(0).getReg();
  852. // Drop Kill flags on the old register before replacing. CondReg may have
  853. // a longer live range.
  854. MRI->clearKillFlags(OldReg);
  855. MRI->replaceRegWith(OldReg, CondReg);
  856. SetCCI.eraseFromParent();
  857. return;
  858. }
  859. // Otherwise, we need to emit a store.
  860. auto MIB = BuildMI(*SetCCI.getParent(), SetCCI.getIterator(),
  861. SetCCI.getDebugLoc(), TII->get(X86::MOV8mr));
  862. // Copy the address operands.
  863. for (int i = 0; i < X86::AddrNumOperands; ++i)
  864. MIB.add(SetCCI.getOperand(i));
  865. MIB.addReg(CondReg);
  866. MIB.setMemRefs(SetCCI.memoperands());
  867. SetCCI.eraseFromParent();
  868. }