X86InstructionSelector.cpp 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731
  1. //===- X86InstructionSelector.cpp -----------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file implements the targeting of the InstructionSelector class for
  10. /// X86.
  11. /// \todo This should be generated by TableGen.
  12. //===----------------------------------------------------------------------===//
  13. #include "MCTargetDesc/X86BaseInfo.h"
  14. #include "X86.h"
  15. #include "X86InstrBuilder.h"
  16. #include "X86InstrInfo.h"
  17. #include "X86RegisterBankInfo.h"
  18. #include "X86RegisterInfo.h"
  19. #include "X86Subtarget.h"
  20. #include "X86TargetMachine.h"
  21. #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  22. #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
  23. #include "llvm/CodeGen/GlobalISel/Utils.h"
  24. #include "llvm/CodeGen/MachineBasicBlock.h"
  25. #include "llvm/CodeGen/MachineConstantPool.h"
  26. #include "llvm/CodeGen/MachineFunction.h"
  27. #include "llvm/CodeGen/MachineInstr.h"
  28. #include "llvm/CodeGen/MachineInstrBuilder.h"
  29. #include "llvm/CodeGen/MachineMemOperand.h"
  30. #include "llvm/CodeGen/MachineOperand.h"
  31. #include "llvm/CodeGen/MachineRegisterInfo.h"
  32. #include "llvm/CodeGen/RegisterBank.h"
  33. #include "llvm/CodeGen/TargetOpcodes.h"
  34. #include "llvm/CodeGen/TargetRegisterInfo.h"
  35. #include "llvm/IR/DataLayout.h"
  36. #include "llvm/IR/InstrTypes.h"
  37. #include "llvm/IR/IntrinsicsX86.h"
  38. #include "llvm/Support/AtomicOrdering.h"
  39. #include "llvm/Support/CodeGen.h"
  40. #include "llvm/Support/Debug.h"
  41. #include "llvm/Support/ErrorHandling.h"
  42. #include "llvm/Support/LowLevelTypeImpl.h"
  43. #include "llvm/Support/MathExtras.h"
  44. #include "llvm/Support/raw_ostream.h"
  45. #include <cassert>
  46. #include <cstdint>
  47. #include <tuple>
  48. #define DEBUG_TYPE "X86-isel"
  49. using namespace llvm;
  50. namespace {
  51. #define GET_GLOBALISEL_PREDICATE_BITSET
  52. #include "X86GenGlobalISel.inc"
  53. #undef GET_GLOBALISEL_PREDICATE_BITSET
  54. class X86InstructionSelector : public InstructionSelector {
  55. public:
  56. X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
  57. const X86RegisterBankInfo &RBI);
  58. bool select(MachineInstr &I) override;
  59. static const char *getName() { return DEBUG_TYPE; }
  60. private:
  61. /// tblgen-erated 'select' implementation, used as the initial selector for
  62. /// the patterns that don't require complex C++.
  63. bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
  64. // TODO: remove after supported by Tablegen-erated instruction selection.
  65. unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
  66. Align Alignment) const;
  67. bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
  68. MachineFunction &MF) const;
  69. bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
  70. MachineFunction &MF) const;
  71. bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI,
  72. MachineFunction &MF) const;
  73. bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
  74. MachineFunction &MF) const;
  75. bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI,
  76. MachineFunction &MF) const;
  77. bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI,
  78. MachineFunction &MF) const;
  79. bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI,
  80. MachineFunction &MF) const;
  81. bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
  82. MachineFunction &MF) const;
  83. bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
  84. MachineFunction &MF) const;
  85. bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
  86. MachineFunction &MF) const;
  87. bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const;
  88. bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
  89. bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
  90. MachineFunction &MF);
  91. bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
  92. MachineFunction &MF);
  93. bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
  94. MachineFunction &MF) const;
  95. bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
  96. MachineFunction &MF) const;
  97. bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI,
  98. MachineFunction &MF) const;
  99. bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI,
  100. const unsigned DstReg,
  101. const TargetRegisterClass *DstRC,
  102. const unsigned SrcReg,
  103. const TargetRegisterClass *SrcRC) const;
  104. bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
  105. MachineFunction &MF) const;
  106. bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
  107. bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
  108. MachineFunction &MF) const;
  109. bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
  110. MachineFunction &MF) const;
  111. // emit insert subreg instruction and insert it before MachineInstr &I
  112. bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
  113. MachineRegisterInfo &MRI, MachineFunction &MF) const;
  114. // emit extract subreg instruction and insert it before MachineInstr &I
  115. bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
  116. MachineRegisterInfo &MRI, MachineFunction &MF) const;
  117. const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
  118. const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
  119. MachineRegisterInfo &MRI) const;
  120. const X86TargetMachine &TM;
  121. const X86Subtarget &STI;
  122. const X86InstrInfo &TII;
  123. const X86RegisterInfo &TRI;
  124. const X86RegisterBankInfo &RBI;
  125. #define GET_GLOBALISEL_PREDICATES_DECL
  126. #include "X86GenGlobalISel.inc"
  127. #undef GET_GLOBALISEL_PREDICATES_DECL
  128. #define GET_GLOBALISEL_TEMPORARIES_DECL
  129. #include "X86GenGlobalISel.inc"
  130. #undef GET_GLOBALISEL_TEMPORARIES_DECL
  131. };
  132. } // end anonymous namespace
  133. #define GET_GLOBALISEL_IMPL
  134. #include "X86GenGlobalISel.inc"
  135. #undef GET_GLOBALISEL_IMPL
  136. X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
  137. const X86Subtarget &STI,
  138. const X86RegisterBankInfo &RBI)
  139. : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
  140. RBI(RBI),
  141. #define GET_GLOBALISEL_PREDICATES_INIT
  142. #include "X86GenGlobalISel.inc"
  143. #undef GET_GLOBALISEL_PREDICATES_INIT
  144. #define GET_GLOBALISEL_TEMPORARIES_INIT
  145. #include "X86GenGlobalISel.inc"
  146. #undef GET_GLOBALISEL_TEMPORARIES_INIT
  147. {
  148. }
  149. // FIXME: This should be target-independent, inferred from the types declared
  150. // for each class in the bank.
  151. const TargetRegisterClass *
  152. X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
  153. if (RB.getID() == X86::GPRRegBankID) {
  154. if (Ty.getSizeInBits() <= 8)
  155. return &X86::GR8RegClass;
  156. if (Ty.getSizeInBits() == 16)
  157. return &X86::GR16RegClass;
  158. if (Ty.getSizeInBits() == 32)
  159. return &X86::GR32RegClass;
  160. if (Ty.getSizeInBits() == 64)
  161. return &X86::GR64RegClass;
  162. }
  163. if (RB.getID() == X86::VECRRegBankID) {
  164. if (Ty.getSizeInBits() == 16)
  165. return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
  166. if (Ty.getSizeInBits() == 32)
  167. return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
  168. if (Ty.getSizeInBits() == 64)
  169. return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
  170. if (Ty.getSizeInBits() == 128)
  171. return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass;
  172. if (Ty.getSizeInBits() == 256)
  173. return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass;
  174. if (Ty.getSizeInBits() == 512)
  175. return &X86::VR512RegClass;
  176. }
  177. llvm_unreachable("Unknown RegBank!");
  178. }
  179. const TargetRegisterClass *
  180. X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg,
  181. MachineRegisterInfo &MRI) const {
  182. const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI);
  183. return getRegClass(Ty, RegBank);
  184. }
  185. static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
  186. unsigned SubIdx = X86::NoSubRegister;
  187. if (RC == &X86::GR32RegClass) {
  188. SubIdx = X86::sub_32bit;
  189. } else if (RC == &X86::GR16RegClass) {
  190. SubIdx = X86::sub_16bit;
  191. } else if (RC == &X86::GR8RegClass) {
  192. SubIdx = X86::sub_8bit;
  193. }
  194. return SubIdx;
  195. }
  196. static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) {
  197. assert(Reg.isPhysical());
  198. if (X86::GR64RegClass.contains(Reg))
  199. return &X86::GR64RegClass;
  200. if (X86::GR32RegClass.contains(Reg))
  201. return &X86::GR32RegClass;
  202. if (X86::GR16RegClass.contains(Reg))
  203. return &X86::GR16RegClass;
  204. if (X86::GR8RegClass.contains(Reg))
  205. return &X86::GR8RegClass;
  206. llvm_unreachable("Unknown RegClass for PhysReg!");
  207. }
  208. // FIXME: We need some sort of API in RBI/TRI to allow generic code to
  209. // constrain operands of simple instructions given a TargetRegisterClass
  210. // and LLT
  211. bool X86InstructionSelector::selectDebugInstr(MachineInstr &I,
  212. MachineRegisterInfo &MRI) const {
  213. for (MachineOperand &MO : I.operands()) {
  214. if (!MO.isReg())
  215. continue;
  216. Register Reg = MO.getReg();
  217. if (!Reg)
  218. continue;
  219. if (Reg.isPhysical())
  220. continue;
  221. LLT Ty = MRI.getType(Reg);
  222. const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
  223. const TargetRegisterClass *RC =
  224. RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
  225. if (!RC) {
  226. const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
  227. RC = getRegClass(Ty, RB);
  228. if (!RC) {
  229. LLVM_DEBUG(
  230. dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
  231. break;
  232. }
  233. }
  234. RBI.constrainGenericRegister(Reg, *RC, MRI);
  235. }
  236. return true;
  237. }
  238. // Set X86 Opcode and constrain DestReg.
  239. bool X86InstructionSelector::selectCopy(MachineInstr &I,
  240. MachineRegisterInfo &MRI) const {
  241. Register DstReg = I.getOperand(0).getReg();
  242. const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
  243. const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  244. Register SrcReg = I.getOperand(1).getReg();
  245. const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
  246. const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
  247. if (DstReg.isPhysical()) {
  248. assert(I.isCopy() && "Generic operators do not allow physical registers");
  249. if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
  250. DstRegBank.getID() == X86::GPRRegBankID) {
  251. const TargetRegisterClass *SrcRC =
  252. getRegClass(MRI.getType(SrcReg), SrcRegBank);
  253. const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg);
  254. if (SrcRC != DstRC) {
  255. // This case can be generated by ABI lowering, performe anyext
  256. Register ExtSrc = MRI.createVirtualRegister(DstRC);
  257. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  258. TII.get(TargetOpcode::SUBREG_TO_REG))
  259. .addDef(ExtSrc)
  260. .addImm(0)
  261. .addReg(SrcReg)
  262. .addImm(getSubRegIndex(SrcRC));
  263. I.getOperand(1).setReg(ExtSrc);
  264. }
  265. }
  266. return true;
  267. }
  268. assert((!SrcReg.isPhysical() || I.isCopy()) &&
  269. "No phys reg on generic operators");
  270. assert((DstSize == SrcSize ||
  271. // Copies are a mean to setup initial types, the number of
  272. // bits may not exactly match.
  273. (SrcReg.isPhysical() &&
  274. DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
  275. "Copy with different width?!");
  276. const TargetRegisterClass *DstRC =
  277. getRegClass(MRI.getType(DstReg), DstRegBank);
  278. if (SrcRegBank.getID() == X86::GPRRegBankID &&
  279. DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
  280. SrcReg.isPhysical()) {
  281. // Change the physical register to performe truncate.
  282. const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
  283. if (DstRC != SrcRC) {
  284. I.getOperand(1).setSubReg(getSubRegIndex(DstRC));
  285. I.getOperand(1).substPhysReg(SrcReg, TRI);
  286. }
  287. }
  288. // No need to constrain SrcReg. It will get constrained when
  289. // we hit another of its use or its defs.
  290. // Copies do not have constraints.
  291. const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
  292. if (!OldRC || !DstRC->hasSubClassEq(OldRC)) {
  293. if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  294. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  295. << " operand\n");
  296. return false;
  297. }
  298. }
  299. I.setDesc(TII.get(X86::COPY));
  300. return true;
  301. }
  302. bool X86InstructionSelector::select(MachineInstr &I) {
  303. assert(I.getParent() && "Instruction should be in a basic block!");
  304. assert(I.getParent()->getParent() && "Instruction should be in a function!");
  305. MachineBasicBlock &MBB = *I.getParent();
  306. MachineFunction &MF = *MBB.getParent();
  307. MachineRegisterInfo &MRI = MF.getRegInfo();
  308. unsigned Opcode = I.getOpcode();
  309. if (!isPreISelGenericOpcode(Opcode)) {
  310. // Certain non-generic instructions also need some special handling.
  311. if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
  312. return false;
  313. if (I.isCopy())
  314. return selectCopy(I, MRI);
  315. if (I.isDebugInstr())
  316. return selectDebugInstr(I, MRI);
  317. return true;
  318. }
  319. assert(I.getNumOperands() == I.getNumExplicitOperands() &&
  320. "Generic instruction has unexpected implicit operands\n");
  321. if (selectImpl(I, *CoverageInfo))
  322. return true;
  323. LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
  324. // TODO: This should be implemented by tblgen.
  325. switch (I.getOpcode()) {
  326. default:
  327. return false;
  328. case TargetOpcode::G_STORE:
  329. case TargetOpcode::G_LOAD:
  330. return selectLoadStoreOp(I, MRI, MF);
  331. case TargetOpcode::G_PTR_ADD:
  332. case TargetOpcode::G_FRAME_INDEX:
  333. return selectFrameIndexOrGep(I, MRI, MF);
  334. case TargetOpcode::G_GLOBAL_VALUE:
  335. return selectGlobalValue(I, MRI, MF);
  336. case TargetOpcode::G_CONSTANT:
  337. return selectConstant(I, MRI, MF);
  338. case TargetOpcode::G_FCONSTANT:
  339. return materializeFP(I, MRI, MF);
  340. case TargetOpcode::G_PTRTOINT:
  341. case TargetOpcode::G_TRUNC:
  342. return selectTruncOrPtrToInt(I, MRI, MF);
  343. case TargetOpcode::G_INTTOPTR:
  344. return selectCopy(I, MRI);
  345. case TargetOpcode::G_ZEXT:
  346. return selectZext(I, MRI, MF);
  347. case TargetOpcode::G_ANYEXT:
  348. return selectAnyext(I, MRI, MF);
  349. case TargetOpcode::G_ICMP:
  350. return selectCmp(I, MRI, MF);
  351. case TargetOpcode::G_FCMP:
  352. return selectFCmp(I, MRI, MF);
  353. case TargetOpcode::G_UADDE:
  354. return selectUadde(I, MRI, MF);
  355. case TargetOpcode::G_UNMERGE_VALUES:
  356. return selectUnmergeValues(I, MRI, MF);
  357. case TargetOpcode::G_MERGE_VALUES:
  358. case TargetOpcode::G_CONCAT_VECTORS:
  359. return selectMergeValues(I, MRI, MF);
  360. case TargetOpcode::G_EXTRACT:
  361. return selectExtract(I, MRI, MF);
  362. case TargetOpcode::G_INSERT:
  363. return selectInsert(I, MRI, MF);
  364. case TargetOpcode::G_BRCOND:
  365. return selectCondBranch(I, MRI, MF);
  366. case TargetOpcode::G_IMPLICIT_DEF:
  367. case TargetOpcode::G_PHI:
  368. return selectImplicitDefOrPHI(I, MRI);
  369. case TargetOpcode::G_SDIV:
  370. case TargetOpcode::G_UDIV:
  371. case TargetOpcode::G_SREM:
  372. case TargetOpcode::G_UREM:
  373. return selectDivRem(I, MRI, MF);
  374. case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
  375. return selectIntrinsicWSideEffects(I, MRI, MF);
  376. }
  377. return false;
  378. }
  379. unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
  380. const RegisterBank &RB,
  381. unsigned Opc,
  382. Align Alignment) const {
  383. bool Isload = (Opc == TargetOpcode::G_LOAD);
  384. bool HasAVX = STI.hasAVX();
  385. bool HasAVX512 = STI.hasAVX512();
  386. bool HasVLX = STI.hasVLX();
  387. if (Ty == LLT::scalar(8)) {
  388. if (X86::GPRRegBankID == RB.getID())
  389. return Isload ? X86::MOV8rm : X86::MOV8mr;
  390. } else if (Ty == LLT::scalar(16)) {
  391. if (X86::GPRRegBankID == RB.getID())
  392. return Isload ? X86::MOV16rm : X86::MOV16mr;
  393. } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
  394. if (X86::GPRRegBankID == RB.getID())
  395. return Isload ? X86::MOV32rm : X86::MOV32mr;
  396. if (X86::VECRRegBankID == RB.getID())
  397. return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
  398. HasAVX ? X86::VMOVSSrm_alt :
  399. X86::MOVSSrm_alt)
  400. : (HasAVX512 ? X86::VMOVSSZmr :
  401. HasAVX ? X86::VMOVSSmr :
  402. X86::MOVSSmr);
  403. } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
  404. if (X86::GPRRegBankID == RB.getID())
  405. return Isload ? X86::MOV64rm : X86::MOV64mr;
  406. if (X86::VECRRegBankID == RB.getID())
  407. return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
  408. HasAVX ? X86::VMOVSDrm_alt :
  409. X86::MOVSDrm_alt)
  410. : (HasAVX512 ? X86::VMOVSDZmr :
  411. HasAVX ? X86::VMOVSDmr :
  412. X86::MOVSDmr);
  413. } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
  414. if (Alignment >= Align(16))
  415. return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
  416. : HasAVX512
  417. ? X86::VMOVAPSZ128rm_NOVLX
  418. : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
  419. : (HasVLX ? X86::VMOVAPSZ128mr
  420. : HasAVX512
  421. ? X86::VMOVAPSZ128mr_NOVLX
  422. : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
  423. else
  424. return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
  425. : HasAVX512
  426. ? X86::VMOVUPSZ128rm_NOVLX
  427. : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
  428. : (HasVLX ? X86::VMOVUPSZ128mr
  429. : HasAVX512
  430. ? X86::VMOVUPSZ128mr_NOVLX
  431. : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
  432. } else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
  433. if (Alignment >= Align(32))
  434. return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
  435. : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
  436. : X86::VMOVAPSYrm)
  437. : (HasVLX ? X86::VMOVAPSZ256mr
  438. : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
  439. : X86::VMOVAPSYmr);
  440. else
  441. return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
  442. : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
  443. : X86::VMOVUPSYrm)
  444. : (HasVLX ? X86::VMOVUPSZ256mr
  445. : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
  446. : X86::VMOVUPSYmr);
  447. } else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
  448. if (Alignment >= Align(64))
  449. return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
  450. else
  451. return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
  452. }
  453. return Opc;
  454. }
  455. // Fill in an address from the given instruction.
  456. static void X86SelectAddress(const MachineInstr &I,
  457. const MachineRegisterInfo &MRI,
  458. X86AddressMode &AM) {
  459. assert(I.getOperand(0).isReg() && "unsupported opperand.");
  460. assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
  461. "unsupported type.");
  462. if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
  463. if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
  464. int64_t Imm = *COff;
  465. if (isInt<32>(Imm)) { // Check for displacement overflow.
  466. AM.Disp = static_cast<int32_t>(Imm);
  467. AM.Base.Reg = I.getOperand(1).getReg();
  468. return;
  469. }
  470. }
  471. } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
  472. AM.Base.FrameIndex = I.getOperand(1).getIndex();
  473. AM.BaseType = X86AddressMode::FrameIndexBase;
  474. return;
  475. }
  476. // Default behavior.
  477. AM.Base.Reg = I.getOperand(0).getReg();
  478. }
  479. bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
  480. MachineRegisterInfo &MRI,
  481. MachineFunction &MF) const {
  482. unsigned Opc = I.getOpcode();
  483. assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
  484. "unexpected instruction");
  485. const Register DefReg = I.getOperand(0).getReg();
  486. LLT Ty = MRI.getType(DefReg);
  487. const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
  488. assert(I.hasOneMemOperand());
  489. auto &MemOp = **I.memoperands_begin();
  490. if (MemOp.isAtomic()) {
  491. // Note: for unordered operations, we rely on the fact the appropriate MMO
  492. // is already on the instruction we're mutating, and thus we don't need to
  493. // make any changes. So long as we select an opcode which is capable of
  494. // loading or storing the appropriate size atomically, the rest of the
  495. // backend is required to respect the MMO state.
  496. if (!MemOp.isUnordered()) {
  497. LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
  498. return false;
  499. }
  500. if (MemOp.getAlign() < Ty.getSizeInBits() / 8) {
  501. LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
  502. return false;
  503. }
  504. }
  505. unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign());
  506. if (NewOpc == Opc)
  507. return false;
  508. X86AddressMode AM;
  509. X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM);
  510. I.setDesc(TII.get(NewOpc));
  511. MachineInstrBuilder MIB(MF, I);
  512. if (Opc == TargetOpcode::G_LOAD) {
  513. I.removeOperand(1);
  514. addFullAddress(MIB, AM);
  515. } else {
  516. // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
  517. I.removeOperand(1);
  518. I.removeOperand(0);
  519. addFullAddress(MIB, AM).addUse(DefReg);
  520. }
  521. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  522. }
  523. static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
  524. if (Ty == LLT::pointer(0, 64))
  525. return X86::LEA64r;
  526. else if (Ty == LLT::pointer(0, 32))
  527. return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
  528. else
  529. llvm_unreachable("Can't get LEA opcode. Unsupported type.");
  530. }
  531. bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
  532. MachineRegisterInfo &MRI,
  533. MachineFunction &MF) const {
  534. unsigned Opc = I.getOpcode();
  535. assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) &&
  536. "unexpected instruction");
  537. const Register DefReg = I.getOperand(0).getReg();
  538. LLT Ty = MRI.getType(DefReg);
  539. // Use LEA to calculate frame index and GEP
  540. unsigned NewOpc = getLeaOP(Ty, STI);
  541. I.setDesc(TII.get(NewOpc));
  542. MachineInstrBuilder MIB(MF, I);
  543. if (Opc == TargetOpcode::G_FRAME_INDEX) {
  544. addOffset(MIB, 0);
  545. } else {
  546. MachineOperand &InxOp = I.getOperand(2);
  547. I.addOperand(InxOp); // set IndexReg
  548. InxOp.ChangeToImmediate(1); // set Scale
  549. MIB.addImm(0).addReg(0);
  550. }
  551. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  552. }
  553. bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
  554. MachineRegisterInfo &MRI,
  555. MachineFunction &MF) const {
  556. assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) &&
  557. "unexpected instruction");
  558. auto GV = I.getOperand(1).getGlobal();
  559. if (GV->isThreadLocal()) {
  560. return false; // TODO: we don't support TLS yet.
  561. }
  562. // Can't handle alternate code models yet.
  563. if (TM.getCodeModel() != CodeModel::Small)
  564. return false;
  565. X86AddressMode AM;
  566. AM.GV = GV;
  567. AM.GVOpFlags = STI.classifyGlobalReference(GV);
  568. // TODO: The ABI requires an extra load. not supported yet.
  569. if (isGlobalStubReference(AM.GVOpFlags))
  570. return false;
  571. // TODO: This reference is relative to the pic base. not supported yet.
  572. if (isGlobalRelativeToPICBase(AM.GVOpFlags))
  573. return false;
  574. if (STI.isPICStyleRIPRel()) {
  575. // Use rip-relative addressing.
  576. assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
  577. AM.Base.Reg = X86::RIP;
  578. }
  579. const Register DefReg = I.getOperand(0).getReg();
  580. LLT Ty = MRI.getType(DefReg);
  581. unsigned NewOpc = getLeaOP(Ty, STI);
  582. I.setDesc(TII.get(NewOpc));
  583. MachineInstrBuilder MIB(MF, I);
  584. I.removeOperand(1);
  585. addFullAddress(MIB, AM);
  586. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  587. }
  588. bool X86InstructionSelector::selectConstant(MachineInstr &I,
  589. MachineRegisterInfo &MRI,
  590. MachineFunction &MF) const {
  591. assert((I.getOpcode() == TargetOpcode::G_CONSTANT) &&
  592. "unexpected instruction");
  593. const Register DefReg = I.getOperand(0).getReg();
  594. LLT Ty = MRI.getType(DefReg);
  595. if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
  596. return false;
  597. uint64_t Val = 0;
  598. if (I.getOperand(1).isCImm()) {
  599. Val = I.getOperand(1).getCImm()->getZExtValue();
  600. I.getOperand(1).ChangeToImmediate(Val);
  601. } else if (I.getOperand(1).isImm()) {
  602. Val = I.getOperand(1).getImm();
  603. } else
  604. llvm_unreachable("Unsupported operand type.");
  605. unsigned NewOpc;
  606. switch (Ty.getSizeInBits()) {
  607. case 8:
  608. NewOpc = X86::MOV8ri;
  609. break;
  610. case 16:
  611. NewOpc = X86::MOV16ri;
  612. break;
  613. case 32:
  614. NewOpc = X86::MOV32ri;
  615. break;
  616. case 64:
  617. // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
  618. if (isInt<32>(Val))
  619. NewOpc = X86::MOV64ri32;
  620. else
  621. NewOpc = X86::MOV64ri;
  622. break;
  623. default:
  624. llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
  625. }
  626. I.setDesc(TII.get(NewOpc));
  627. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  628. }
  629. // Helper function for selectTruncOrPtrToInt and selectAnyext.
  630. // Returns true if DstRC lives on a floating register class and
  631. // SrcRC lives on a 128-bit vector class.
  632. static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC,
  633. const TargetRegisterClass *SrcRC) {
  634. return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass ||
  635. DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) &&
  636. (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass);
  637. }
  638. bool X86InstructionSelector::selectTurnIntoCOPY(
  639. MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg,
  640. const TargetRegisterClass *DstRC, const unsigned SrcReg,
  641. const TargetRegisterClass *SrcRC) const {
  642. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  643. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  644. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  645. << " operand\n");
  646. return false;
  647. }
  648. I.setDesc(TII.get(X86::COPY));
  649. return true;
  650. }
  651. bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I,
  652. MachineRegisterInfo &MRI,
  653. MachineFunction &MF) const {
  654. assert((I.getOpcode() == TargetOpcode::G_TRUNC ||
  655. I.getOpcode() == TargetOpcode::G_PTRTOINT) &&
  656. "unexpected instruction");
  657. const Register DstReg = I.getOperand(0).getReg();
  658. const Register SrcReg = I.getOperand(1).getReg();
  659. const LLT DstTy = MRI.getType(DstReg);
  660. const LLT SrcTy = MRI.getType(SrcReg);
  661. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  662. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
  663. if (DstRB.getID() != SrcRB.getID()) {
  664. LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode())
  665. << " input/output on different banks\n");
  666. return false;
  667. }
  668. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
  669. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
  670. if (!DstRC || !SrcRC)
  671. return false;
  672. // If that's truncation of the value that lives on the vector class and goes
  673. // into the floating class, just replace it with copy, as we are able to
  674. // select it as a regular move.
  675. if (canTurnIntoCOPY(DstRC, SrcRC))
  676. return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC);
  677. if (DstRB.getID() != X86::GPRRegBankID)
  678. return false;
  679. unsigned SubIdx;
  680. if (DstRC == SrcRC) {
  681. // Nothing to be done
  682. SubIdx = X86::NoSubRegister;
  683. } else if (DstRC == &X86::GR32RegClass) {
  684. SubIdx = X86::sub_32bit;
  685. } else if (DstRC == &X86::GR16RegClass) {
  686. SubIdx = X86::sub_16bit;
  687. } else if (DstRC == &X86::GR8RegClass) {
  688. SubIdx = X86::sub_8bit;
  689. } else {
  690. return false;
  691. }
  692. SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
  693. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  694. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  695. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  696. << "\n");
  697. return false;
  698. }
  699. I.getOperand(1).setSubReg(SubIdx);
  700. I.setDesc(TII.get(X86::COPY));
  701. return true;
  702. }
  703. bool X86InstructionSelector::selectZext(MachineInstr &I,
  704. MachineRegisterInfo &MRI,
  705. MachineFunction &MF) const {
  706. assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction");
  707. const Register DstReg = I.getOperand(0).getReg();
  708. const Register SrcReg = I.getOperand(1).getReg();
  709. const LLT DstTy = MRI.getType(DstReg);
  710. const LLT SrcTy = MRI.getType(SrcReg);
  711. assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) &&
  712. "8=>16 Zext is handled by tablegen");
  713. assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) &&
  714. "8=>32 Zext is handled by tablegen");
  715. assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) &&
  716. "16=>32 Zext is handled by tablegen");
  717. assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) &&
  718. "8=>64 Zext is handled by tablegen");
  719. assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) &&
  720. "16=>64 Zext is handled by tablegen");
  721. assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) &&
  722. "32=>64 Zext is handled by tablegen");
  723. if (SrcTy != LLT::scalar(1))
  724. return false;
  725. unsigned AndOpc;
  726. if (DstTy == LLT::scalar(8))
  727. AndOpc = X86::AND8ri;
  728. else if (DstTy == LLT::scalar(16))
  729. AndOpc = X86::AND16ri8;
  730. else if (DstTy == LLT::scalar(32))
  731. AndOpc = X86::AND32ri8;
  732. else if (DstTy == LLT::scalar(64))
  733. AndOpc = X86::AND64ri8;
  734. else
  735. return false;
  736. Register DefReg = SrcReg;
  737. if (DstTy != LLT::scalar(8)) {
  738. Register ImpDefReg =
  739. MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
  740. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  741. TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg);
  742. DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
  743. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  744. TII.get(TargetOpcode::INSERT_SUBREG), DefReg)
  745. .addReg(ImpDefReg)
  746. .addReg(SrcReg)
  747. .addImm(X86::sub_8bit);
  748. }
  749. MachineInstr &AndInst =
  750. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
  751. .addReg(DefReg)
  752. .addImm(1);
  753. constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
  754. I.eraseFromParent();
  755. return true;
  756. }
  757. bool X86InstructionSelector::selectAnyext(MachineInstr &I,
  758. MachineRegisterInfo &MRI,
  759. MachineFunction &MF) const {
  760. assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction");
  761. const Register DstReg = I.getOperand(0).getReg();
  762. const Register SrcReg = I.getOperand(1).getReg();
  763. const LLT DstTy = MRI.getType(DstReg);
  764. const LLT SrcTy = MRI.getType(SrcReg);
  765. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  766. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
  767. assert(DstRB.getID() == SrcRB.getID() &&
  768. "G_ANYEXT input/output on different banks\n");
  769. assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
  770. "G_ANYEXT incorrect operand size");
  771. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
  772. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
  773. // If that's ANY_EXT of the value that lives on the floating class and goes
  774. // into the vector class, just replace it with copy, as we are able to select
  775. // it as a regular move.
  776. if (canTurnIntoCOPY(SrcRC, DstRC))
  777. return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC);
  778. if (DstRB.getID() != X86::GPRRegBankID)
  779. return false;
  780. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  781. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  782. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  783. << " operand\n");
  784. return false;
  785. }
  786. if (SrcRC == DstRC) {
  787. I.setDesc(TII.get(X86::COPY));
  788. return true;
  789. }
  790. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  791. TII.get(TargetOpcode::SUBREG_TO_REG))
  792. .addDef(DstReg)
  793. .addImm(0)
  794. .addReg(SrcReg)
  795. .addImm(getSubRegIndex(SrcRC));
  796. I.eraseFromParent();
  797. return true;
  798. }
  799. bool X86InstructionSelector::selectCmp(MachineInstr &I,
  800. MachineRegisterInfo &MRI,
  801. MachineFunction &MF) const {
  802. assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction");
  803. X86::CondCode CC;
  804. bool SwapArgs;
  805. std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
  806. (CmpInst::Predicate)I.getOperand(1).getPredicate());
  807. Register LHS = I.getOperand(2).getReg();
  808. Register RHS = I.getOperand(3).getReg();
  809. if (SwapArgs)
  810. std::swap(LHS, RHS);
  811. unsigned OpCmp;
  812. LLT Ty = MRI.getType(LHS);
  813. switch (Ty.getSizeInBits()) {
  814. default:
  815. return false;
  816. case 8:
  817. OpCmp = X86::CMP8rr;
  818. break;
  819. case 16:
  820. OpCmp = X86::CMP16rr;
  821. break;
  822. case 32:
  823. OpCmp = X86::CMP32rr;
  824. break;
  825. case 64:
  826. OpCmp = X86::CMP64rr;
  827. break;
  828. }
  829. MachineInstr &CmpInst =
  830. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
  831. .addReg(LHS)
  832. .addReg(RHS);
  833. MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  834. TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
  835. constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
  836. constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
  837. I.eraseFromParent();
  838. return true;
  839. }
  840. bool X86InstructionSelector::selectFCmp(MachineInstr &I,
  841. MachineRegisterInfo &MRI,
  842. MachineFunction &MF) const {
  843. assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
  844. Register LhsReg = I.getOperand(2).getReg();
  845. Register RhsReg = I.getOperand(3).getReg();
  846. CmpInst::Predicate Predicate =
  847. (CmpInst::Predicate)I.getOperand(1).getPredicate();
  848. // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
  849. static const uint16_t SETFOpcTable[2][3] = {
  850. {X86::COND_E, X86::COND_NP, X86::AND8rr},
  851. {X86::COND_NE, X86::COND_P, X86::OR8rr}};
  852. const uint16_t *SETFOpc = nullptr;
  853. switch (Predicate) {
  854. default:
  855. break;
  856. case CmpInst::FCMP_OEQ:
  857. SETFOpc = &SETFOpcTable[0][0];
  858. break;
  859. case CmpInst::FCMP_UNE:
  860. SETFOpc = &SETFOpcTable[1][0];
  861. break;
  862. }
  863. // Compute the opcode for the CMP instruction.
  864. unsigned OpCmp;
  865. LLT Ty = MRI.getType(LhsReg);
  866. switch (Ty.getSizeInBits()) {
  867. default:
  868. return false;
  869. case 32:
  870. OpCmp = X86::UCOMISSrr;
  871. break;
  872. case 64:
  873. OpCmp = X86::UCOMISDrr;
  874. break;
  875. }
  876. Register ResultReg = I.getOperand(0).getReg();
  877. RBI.constrainGenericRegister(
  878. ResultReg,
  879. *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
  880. if (SETFOpc) {
  881. MachineInstr &CmpInst =
  882. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
  883. .addReg(LhsReg)
  884. .addReg(RhsReg);
  885. Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
  886. Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
  887. MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  888. TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
  889. MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  890. TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
  891. MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  892. TII.get(SETFOpc[2]), ResultReg)
  893. .addReg(FlagReg1)
  894. .addReg(FlagReg2);
  895. constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
  896. constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
  897. constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
  898. constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
  899. I.eraseFromParent();
  900. return true;
  901. }
  902. X86::CondCode CC;
  903. bool SwapArgs;
  904. std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
  905. assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
  906. if (SwapArgs)
  907. std::swap(LhsReg, RhsReg);
  908. // Emit a compare of LHS/RHS.
  909. MachineInstr &CmpInst =
  910. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
  911. .addReg(LhsReg)
  912. .addReg(RhsReg);
  913. MachineInstr &Set =
  914. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
  915. constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
  916. constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
  917. I.eraseFromParent();
  918. return true;
  919. }
  920. bool X86InstructionSelector::selectUadde(MachineInstr &I,
  921. MachineRegisterInfo &MRI,
  922. MachineFunction &MF) const {
  923. assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction");
  924. const Register DstReg = I.getOperand(0).getReg();
  925. const Register CarryOutReg = I.getOperand(1).getReg();
  926. const Register Op0Reg = I.getOperand(2).getReg();
  927. const Register Op1Reg = I.getOperand(3).getReg();
  928. Register CarryInReg = I.getOperand(4).getReg();
  929. const LLT DstTy = MRI.getType(DstReg);
  930. if (DstTy != LLT::scalar(32))
  931. return false;
  932. // find CarryIn def instruction.
  933. MachineInstr *Def = MRI.getVRegDef(CarryInReg);
  934. while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
  935. CarryInReg = Def->getOperand(1).getReg();
  936. Def = MRI.getVRegDef(CarryInReg);
  937. }
  938. unsigned Opcode;
  939. if (Def->getOpcode() == TargetOpcode::G_UADDE) {
  940. // carry set by prev ADD.
  941. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS)
  942. .addReg(CarryInReg);
  943. if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI))
  944. return false;
  945. Opcode = X86::ADC32rr;
  946. } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
  947. // carry is constant, support only 0.
  948. if (*val != 0)
  949. return false;
  950. Opcode = X86::ADD32rr;
  951. } else
  952. return false;
  953. MachineInstr &AddInst =
  954. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
  955. .addReg(Op0Reg)
  956. .addReg(Op1Reg);
  957. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
  958. .addReg(X86::EFLAGS);
  959. if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) ||
  960. !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI))
  961. return false;
  962. I.eraseFromParent();
  963. return true;
  964. }
  965. bool X86InstructionSelector::selectExtract(MachineInstr &I,
  966. MachineRegisterInfo &MRI,
  967. MachineFunction &MF) const {
  968. assert((I.getOpcode() == TargetOpcode::G_EXTRACT) &&
  969. "unexpected instruction");
  970. const Register DstReg = I.getOperand(0).getReg();
  971. const Register SrcReg = I.getOperand(1).getReg();
  972. int64_t Index = I.getOperand(2).getImm();
  973. const LLT DstTy = MRI.getType(DstReg);
  974. const LLT SrcTy = MRI.getType(SrcReg);
  975. // Meanwile handle vector type only.
  976. if (!DstTy.isVector())
  977. return false;
  978. if (Index % DstTy.getSizeInBits() != 0)
  979. return false; // Not extract subvector.
  980. if (Index == 0) {
  981. // Replace by extract subreg copy.
  982. if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF))
  983. return false;
  984. I.eraseFromParent();
  985. return true;
  986. }
  987. bool HasAVX = STI.hasAVX();
  988. bool HasAVX512 = STI.hasAVX512();
  989. bool HasVLX = STI.hasVLX();
  990. if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) {
  991. if (HasVLX)
  992. I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr));
  993. else if (HasAVX)
  994. I.setDesc(TII.get(X86::VEXTRACTF128rr));
  995. else
  996. return false;
  997. } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) {
  998. if (DstTy.getSizeInBits() == 128)
  999. I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr));
  1000. else if (DstTy.getSizeInBits() == 256)
  1001. I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr));
  1002. else
  1003. return false;
  1004. } else
  1005. return false;
  1006. // Convert to X86 VEXTRACT immediate.
  1007. Index = Index / DstTy.getSizeInBits();
  1008. I.getOperand(2).setImm(Index);
  1009. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  1010. }
  1011. bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
  1012. MachineInstr &I,
  1013. MachineRegisterInfo &MRI,
  1014. MachineFunction &MF) const {
  1015. const LLT DstTy = MRI.getType(DstReg);
  1016. const LLT SrcTy = MRI.getType(SrcReg);
  1017. unsigned SubIdx = X86::NoSubRegister;
  1018. if (!DstTy.isVector() || !SrcTy.isVector())
  1019. return false;
  1020. assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() &&
  1021. "Incorrect Src/Dst register size");
  1022. if (DstTy.getSizeInBits() == 128)
  1023. SubIdx = X86::sub_xmm;
  1024. else if (DstTy.getSizeInBits() == 256)
  1025. SubIdx = X86::sub_ymm;
  1026. else
  1027. return false;
  1028. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
  1029. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
  1030. SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
  1031. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  1032. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  1033. LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n");
  1034. return false;
  1035. }
  1036. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg)
  1037. .addReg(SrcReg, 0, SubIdx);
  1038. return true;
  1039. }
  1040. bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
  1041. MachineInstr &I,
  1042. MachineRegisterInfo &MRI,
  1043. MachineFunction &MF) const {
  1044. const LLT DstTy = MRI.getType(DstReg);
  1045. const LLT SrcTy = MRI.getType(SrcReg);
  1046. unsigned SubIdx = X86::NoSubRegister;
  1047. // TODO: support scalar types
  1048. if (!DstTy.isVector() || !SrcTy.isVector())
  1049. return false;
  1050. assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
  1051. "Incorrect Src/Dst register size");
  1052. if (SrcTy.getSizeInBits() == 128)
  1053. SubIdx = X86::sub_xmm;
  1054. else if (SrcTy.getSizeInBits() == 256)
  1055. SubIdx = X86::sub_ymm;
  1056. else
  1057. return false;
  1058. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
  1059. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
  1060. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  1061. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  1062. LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
  1063. return false;
  1064. }
  1065. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
  1066. .addReg(DstReg, RegState::DefineNoRead, SubIdx)
  1067. .addReg(SrcReg);
  1068. return true;
  1069. }
  1070. bool X86InstructionSelector::selectInsert(MachineInstr &I,
  1071. MachineRegisterInfo &MRI,
  1072. MachineFunction &MF) const {
  1073. assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction");
  1074. const Register DstReg = I.getOperand(0).getReg();
  1075. const Register SrcReg = I.getOperand(1).getReg();
  1076. const Register InsertReg = I.getOperand(2).getReg();
  1077. int64_t Index = I.getOperand(3).getImm();
  1078. const LLT DstTy = MRI.getType(DstReg);
  1079. const LLT InsertRegTy = MRI.getType(InsertReg);
  1080. // Meanwile handle vector type only.
  1081. if (!DstTy.isVector())
  1082. return false;
  1083. if (Index % InsertRegTy.getSizeInBits() != 0)
  1084. return false; // Not insert subvector.
  1085. if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
  1086. // Replace by subreg copy.
  1087. if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
  1088. return false;
  1089. I.eraseFromParent();
  1090. return true;
  1091. }
  1092. bool HasAVX = STI.hasAVX();
  1093. bool HasAVX512 = STI.hasAVX512();
  1094. bool HasVLX = STI.hasVLX();
  1095. if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
  1096. if (HasVLX)
  1097. I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
  1098. else if (HasAVX)
  1099. I.setDesc(TII.get(X86::VINSERTF128rr));
  1100. else
  1101. return false;
  1102. } else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
  1103. if (InsertRegTy.getSizeInBits() == 128)
  1104. I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
  1105. else if (InsertRegTy.getSizeInBits() == 256)
  1106. I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
  1107. else
  1108. return false;
  1109. } else
  1110. return false;
  1111. // Convert to X86 VINSERT immediate.
  1112. Index = Index / InsertRegTy.getSizeInBits();
  1113. I.getOperand(3).setImm(Index);
  1114. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  1115. }
  1116. bool X86InstructionSelector::selectUnmergeValues(
  1117. MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
  1118. assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) &&
  1119. "unexpected instruction");
  1120. // Split to extracts.
  1121. unsigned NumDefs = I.getNumOperands() - 1;
  1122. Register SrcReg = I.getOperand(NumDefs).getReg();
  1123. unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
  1124. for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
  1125. MachineInstr &ExtrInst =
  1126. *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1127. TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg())
  1128. .addReg(SrcReg)
  1129. .addImm(Idx * DefSize);
  1130. if (!select(ExtrInst))
  1131. return false;
  1132. }
  1133. I.eraseFromParent();
  1134. return true;
  1135. }
  1136. bool X86InstructionSelector::selectMergeValues(
  1137. MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
  1138. assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
  1139. I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
  1140. "unexpected instruction");
  1141. // Split to inserts.
  1142. Register DstReg = I.getOperand(0).getReg();
  1143. Register SrcReg0 = I.getOperand(1).getReg();
  1144. const LLT DstTy = MRI.getType(DstReg);
  1145. const LLT SrcTy = MRI.getType(SrcReg0);
  1146. unsigned SrcSize = SrcTy.getSizeInBits();
  1147. const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  1148. // For the first src use insertSubReg.
  1149. Register DefReg = MRI.createGenericVirtualRegister(DstTy);
  1150. MRI.setRegBank(DefReg, RegBank);
  1151. if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
  1152. return false;
  1153. for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
  1154. Register Tmp = MRI.createGenericVirtualRegister(DstTy);
  1155. MRI.setRegBank(Tmp, RegBank);
  1156. MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1157. TII.get(TargetOpcode::G_INSERT), Tmp)
  1158. .addReg(DefReg)
  1159. .addReg(I.getOperand(Idx).getReg())
  1160. .addImm((Idx - 1) * SrcSize);
  1161. DefReg = Tmp;
  1162. if (!select(InsertInst))
  1163. return false;
  1164. }
  1165. MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1166. TII.get(TargetOpcode::COPY), DstReg)
  1167. .addReg(DefReg);
  1168. if (!select(CopyInst))
  1169. return false;
  1170. I.eraseFromParent();
  1171. return true;
  1172. }
  1173. bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
  1174. MachineRegisterInfo &MRI,
  1175. MachineFunction &MF) const {
  1176. assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction");
  1177. const Register CondReg = I.getOperand(0).getReg();
  1178. MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
  1179. MachineInstr &TestInst =
  1180. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
  1181. .addReg(CondReg)
  1182. .addImm(1);
  1183. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
  1184. .addMBB(DestMBB).addImm(X86::COND_NE);
  1185. constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
  1186. I.eraseFromParent();
  1187. return true;
  1188. }
  1189. bool X86InstructionSelector::materializeFP(MachineInstr &I,
  1190. MachineRegisterInfo &MRI,
  1191. MachineFunction &MF) const {
  1192. assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) &&
  1193. "unexpected instruction");
  1194. // Can't handle alternate code models yet.
  1195. CodeModel::Model CM = TM.getCodeModel();
  1196. if (CM != CodeModel::Small && CM != CodeModel::Large)
  1197. return false;
  1198. const Register DstReg = I.getOperand(0).getReg();
  1199. const LLT DstTy = MRI.getType(DstReg);
  1200. const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  1201. Align Alignment = Align(DstTy.getSizeInBytes());
  1202. const DebugLoc &DbgLoc = I.getDebugLoc();
  1203. unsigned Opc =
  1204. getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
  1205. // Create the load from the constant pool.
  1206. const ConstantFP *CFP = I.getOperand(1).getFPImm();
  1207. unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
  1208. MachineInstr *LoadInst = nullptr;
  1209. unsigned char OpFlag = STI.classifyLocalReference(nullptr);
  1210. if (CM == CodeModel::Large && STI.is64Bit()) {
  1211. // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
  1212. // they cannot be folded into immediate fields.
  1213. Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
  1214. BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg)
  1215. .addConstantPoolIndex(CPI, 0, OpFlag);
  1216. MachineMemOperand *MMO = MF.getMachineMemOperand(
  1217. MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
  1218. LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
  1219. LoadInst =
  1220. addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
  1221. AddrReg)
  1222. .addMemOperand(MMO);
  1223. } else if (CM == CodeModel::Small || !STI.is64Bit()) {
  1224. // Handle the case when globals fit in our immediate field.
  1225. // This is true for X86-32 always and X86-64 when in -mcmodel=small mode.
  1226. // x86-32 PIC requires a PIC base register for constant pools.
  1227. unsigned PICBase = 0;
  1228. if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) {
  1229. // PICBase can be allocated by TII.getGlobalBaseReg(&MF).
  1230. // In DAGISEL the code that initialize it generated by the CGBR pass.
  1231. return false; // TODO support the mode.
  1232. } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small)
  1233. PICBase = X86::RIP;
  1234. LoadInst = addConstantPoolReference(
  1235. BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase,
  1236. OpFlag);
  1237. } else
  1238. return false;
  1239. constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI);
  1240. I.eraseFromParent();
  1241. return true;
  1242. }
  1243. bool X86InstructionSelector::selectImplicitDefOrPHI(
  1244. MachineInstr &I, MachineRegisterInfo &MRI) const {
  1245. assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
  1246. I.getOpcode() == TargetOpcode::G_PHI) &&
  1247. "unexpected instruction");
  1248. Register DstReg = I.getOperand(0).getReg();
  1249. if (!MRI.getRegClassOrNull(DstReg)) {
  1250. const LLT DstTy = MRI.getType(DstReg);
  1251. const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI);
  1252. if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
  1253. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  1254. << " operand\n");
  1255. return false;
  1256. }
  1257. }
  1258. if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
  1259. I.setDesc(TII.get(X86::IMPLICIT_DEF));
  1260. else
  1261. I.setDesc(TII.get(X86::PHI));
  1262. return true;
  1263. }
  1264. bool X86InstructionSelector::selectDivRem(MachineInstr &I,
  1265. MachineRegisterInfo &MRI,
  1266. MachineFunction &MF) const {
  1267. // The implementation of this function is taken from X86FastISel.
  1268. assert((I.getOpcode() == TargetOpcode::G_SDIV ||
  1269. I.getOpcode() == TargetOpcode::G_SREM ||
  1270. I.getOpcode() == TargetOpcode::G_UDIV ||
  1271. I.getOpcode() == TargetOpcode::G_UREM) &&
  1272. "unexpected instruction");
  1273. const Register DstReg = I.getOperand(0).getReg();
  1274. const Register Op1Reg = I.getOperand(1).getReg();
  1275. const Register Op2Reg = I.getOperand(2).getReg();
  1276. const LLT RegTy = MRI.getType(DstReg);
  1277. assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
  1278. "Arguments and return value types must match");
  1279. const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
  1280. if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
  1281. return false;
  1282. const static unsigned NumTypes = 4; // i8, i16, i32, i64
  1283. const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
  1284. const static bool S = true; // IsSigned
  1285. const static bool U = false; // !IsSigned
  1286. const static unsigned Copy = TargetOpcode::COPY;
  1287. // For the X86 IDIV instruction, in most cases the dividend
  1288. // (numerator) must be in a specific register pair highreg:lowreg,
  1289. // producing the quotient in lowreg and the remainder in highreg.
  1290. // For most data types, to set up the instruction, the dividend is
  1291. // copied into lowreg, and lowreg is sign-extended into highreg. The
  1292. // exception is i8, where the dividend is defined as a single register rather
  1293. // than a register pair, and we therefore directly sign-extend the dividend
  1294. // into lowreg, instead of copying, and ignore the highreg.
  1295. const static struct DivRemEntry {
  1296. // The following portion depends only on the data type.
  1297. unsigned SizeInBits;
  1298. unsigned LowInReg; // low part of the register pair
  1299. unsigned HighInReg; // high part of the register pair
  1300. // The following portion depends on both the data type and the operation.
  1301. struct DivRemResult {
  1302. unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
  1303. unsigned OpSignExtend; // Opcode for sign-extending lowreg into
  1304. // highreg, or copying a zero into highreg.
  1305. unsigned OpCopy; // Opcode for copying dividend into lowreg, or
  1306. // zero/sign-extending into lowreg for i8.
  1307. unsigned DivRemResultReg; // Register containing the desired result.
  1308. bool IsOpSigned; // Whether to use signed or unsigned form.
  1309. } ResultTable[NumOps];
  1310. } OpTable[NumTypes] = {
  1311. {8,
  1312. X86::AX,
  1313. 0,
  1314. {
  1315. {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
  1316. {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
  1317. {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
  1318. {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
  1319. }}, // i8
  1320. {16,
  1321. X86::AX,
  1322. X86::DX,
  1323. {
  1324. {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
  1325. {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
  1326. {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
  1327. {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
  1328. }}, // i16
  1329. {32,
  1330. X86::EAX,
  1331. X86::EDX,
  1332. {
  1333. {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
  1334. {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
  1335. {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
  1336. {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
  1337. }}, // i32
  1338. {64,
  1339. X86::RAX,
  1340. X86::RDX,
  1341. {
  1342. {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv
  1343. {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
  1344. {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
  1345. {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
  1346. }}, // i64
  1347. };
  1348. auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const DivRemEntry &El) {
  1349. return El.SizeInBits == RegTy.getSizeInBits();
  1350. });
  1351. if (OpEntryIt == std::end(OpTable))
  1352. return false;
  1353. unsigned OpIndex;
  1354. switch (I.getOpcode()) {
  1355. default:
  1356. llvm_unreachable("Unexpected div/rem opcode");
  1357. case TargetOpcode::G_SDIV:
  1358. OpIndex = 0;
  1359. break;
  1360. case TargetOpcode::G_SREM:
  1361. OpIndex = 1;
  1362. break;
  1363. case TargetOpcode::G_UDIV:
  1364. OpIndex = 2;
  1365. break;
  1366. case TargetOpcode::G_UREM:
  1367. OpIndex = 3;
  1368. break;
  1369. }
  1370. const DivRemEntry &TypeEntry = *OpEntryIt;
  1371. const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
  1372. const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
  1373. if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
  1374. !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
  1375. !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
  1376. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  1377. << " operand\n");
  1378. return false;
  1379. }
  1380. // Move op1 into low-order input register.
  1381. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
  1382. TypeEntry.LowInReg)
  1383. .addReg(Op1Reg);
  1384. // Zero-extend or sign-extend into high-order input register.
  1385. if (OpEntry.OpSignExtend) {
  1386. if (OpEntry.IsOpSigned)
  1387. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1388. TII.get(OpEntry.OpSignExtend));
  1389. else {
  1390. Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
  1391. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
  1392. Zero32);
  1393. // Copy the zero into the appropriate sub/super/identical physical
  1394. // register. Unfortunately the operations needed are not uniform enough
  1395. // to fit neatly into the table above.
  1396. if (RegTy.getSizeInBits() == 16) {
  1397. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
  1398. TypeEntry.HighInReg)
  1399. .addReg(Zero32, 0, X86::sub_16bit);
  1400. } else if (RegTy.getSizeInBits() == 32) {
  1401. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
  1402. TypeEntry.HighInReg)
  1403. .addReg(Zero32);
  1404. } else if (RegTy.getSizeInBits() == 64) {
  1405. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1406. TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
  1407. .addImm(0)
  1408. .addReg(Zero32)
  1409. .addImm(X86::sub_32bit);
  1410. }
  1411. }
  1412. }
  1413. // Generate the DIV/IDIV instruction.
  1414. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
  1415. .addReg(Op2Reg);
  1416. // For i8 remainder, we can't reference ah directly, as we'll end
  1417. // up with bogus copies like %r9b = COPY %ah. Reference ax
  1418. // instead to prevent ah references in a rex instruction.
  1419. //
  1420. // The current assumption of the fast register allocator is that isel
  1421. // won't generate explicit references to the GR8_NOREX registers. If
  1422. // the allocator and/or the backend get enhanced to be more robust in
  1423. // that regard, this can be, and should be, removed.
  1424. if ((I.getOpcode() == Instruction::SRem ||
  1425. I.getOpcode() == Instruction::URem) &&
  1426. OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
  1427. Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
  1428. Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
  1429. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
  1430. .addReg(X86::AX);
  1431. // Shift AX right by 8 bits instead of using AH.
  1432. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
  1433. ResultSuperReg)
  1434. .addReg(SourceSuperReg)
  1435. .addImm(8);
  1436. // Now reference the 8-bit subreg of the result.
  1437. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1438. TII.get(TargetOpcode::SUBREG_TO_REG))
  1439. .addDef(DstReg)
  1440. .addImm(0)
  1441. .addReg(ResultSuperReg)
  1442. .addImm(X86::sub_8bit);
  1443. } else {
  1444. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
  1445. DstReg)
  1446. .addReg(OpEntry.DivRemResultReg);
  1447. }
  1448. I.eraseFromParent();
  1449. return true;
  1450. }
  1451. bool X86InstructionSelector::selectIntrinsicWSideEffects(
  1452. MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
  1453. assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
  1454. "unexpected instruction");
  1455. if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
  1456. return false;
  1457. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP));
  1458. I.eraseFromParent();
  1459. return true;
  1460. }
  1461. InstructionSelector *
  1462. llvm::createX86InstructionSelector(const X86TargetMachine &TM,
  1463. X86Subtarget &Subtarget,
  1464. X86RegisterBankInfo &RBI) {
  1465. return new X86InstructionSelector(TM, Subtarget, RBI);
  1466. }