X86InstructionSelector.cpp 59 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693
  1. //===- X86InstructionSelector.cpp -----------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file implements the targeting of the InstructionSelector class for
  10. /// X86.
  11. /// \todo This should be generated by TableGen.
  12. //===----------------------------------------------------------------------===//
  13. #include "MCTargetDesc/X86BaseInfo.h"
  14. #include "X86.h"
  15. #include "X86InstrBuilder.h"
  16. #include "X86InstrInfo.h"
  17. #include "X86RegisterBankInfo.h"
  18. #include "X86RegisterInfo.h"
  19. #include "X86Subtarget.h"
  20. #include "X86TargetMachine.h"
  21. #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
  22. #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
  23. #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
  24. #include "llvm/CodeGen/GlobalISel/Utils.h"
  25. #include "llvm/CodeGen/MachineBasicBlock.h"
  26. #include "llvm/CodeGen/MachineConstantPool.h"
  27. #include "llvm/CodeGen/MachineFunction.h"
  28. #include "llvm/CodeGen/MachineInstr.h"
  29. #include "llvm/CodeGen/MachineInstrBuilder.h"
  30. #include "llvm/CodeGen/MachineMemOperand.h"
  31. #include "llvm/CodeGen/MachineOperand.h"
  32. #include "llvm/CodeGen/MachineRegisterInfo.h"
  33. #include "llvm/CodeGen/TargetOpcodes.h"
  34. #include "llvm/CodeGen/TargetRegisterInfo.h"
  35. #include "llvm/IR/DataLayout.h"
  36. #include "llvm/IR/InstrTypes.h"
  37. #include "llvm/IR/IntrinsicsX86.h"
  38. #include "llvm/Support/AtomicOrdering.h"
  39. #include "llvm/Support/CodeGen.h"
  40. #include "llvm/Support/Debug.h"
  41. #include "llvm/Support/ErrorHandling.h"
  42. #include "llvm/Support/LowLevelTypeImpl.h"
  43. #include "llvm/Support/MathExtras.h"
  44. #include "llvm/Support/raw_ostream.h"
  45. #include <cassert>
  46. #include <cstdint>
  47. #include <tuple>
  48. #define DEBUG_TYPE "X86-isel"
  49. using namespace llvm;
  50. namespace {
  51. #define GET_GLOBALISEL_PREDICATE_BITSET
  52. #include "X86GenGlobalISel.inc"
  53. #undef GET_GLOBALISEL_PREDICATE_BITSET
  54. class X86InstructionSelector : public InstructionSelector {
  55. public:
  56. X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
  57. const X86RegisterBankInfo &RBI);
  58. bool select(MachineInstr &I) override;
  59. static const char *getName() { return DEBUG_TYPE; }
  60. private:
  61. /// tblgen-erated 'select' implementation, used as the initial selector for
  62. /// the patterns that don't require complex C++.
  63. bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
  64. // TODO: remove after supported by Tablegen-erated instruction selection.
  65. unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
  66. Align Alignment) const;
  67. bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
  68. MachineFunction &MF) const;
  69. bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
  70. MachineFunction &MF) const;
  71. bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI,
  72. MachineFunction &MF) const;
  73. bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
  74. MachineFunction &MF) const;
  75. bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI,
  76. MachineFunction &MF) const;
  77. bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI,
  78. MachineFunction &MF) const;
  79. bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI,
  80. MachineFunction &MF) const;
  81. bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
  82. MachineFunction &MF) const;
  83. bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
  84. MachineFunction &MF) const;
  85. bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
  86. MachineFunction &MF) const;
  87. bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
  88. bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
  89. MachineFunction &MF);
  90. bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
  91. MachineFunction &MF);
  92. bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
  93. MachineFunction &MF) const;
  94. bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
  95. MachineFunction &MF) const;
  96. bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI,
  97. MachineFunction &MF) const;
  98. bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI,
  99. const unsigned DstReg,
  100. const TargetRegisterClass *DstRC,
  101. const unsigned SrcReg,
  102. const TargetRegisterClass *SrcRC) const;
  103. bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
  104. MachineFunction &MF) const;
  105. bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
  106. bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
  107. MachineFunction &MF) const;
  108. bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
  109. MachineFunction &MF) const;
  110. // emit insert subreg instruction and insert it before MachineInstr &I
  111. bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
  112. MachineRegisterInfo &MRI, MachineFunction &MF) const;
  113. // emit extract subreg instruction and insert it before MachineInstr &I
  114. bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
  115. MachineRegisterInfo &MRI, MachineFunction &MF) const;
  116. const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
  117. const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
  118. MachineRegisterInfo &MRI) const;
  119. const X86TargetMachine &TM;
  120. const X86Subtarget &STI;
  121. const X86InstrInfo &TII;
  122. const X86RegisterInfo &TRI;
  123. const X86RegisterBankInfo &RBI;
  124. #define GET_GLOBALISEL_PREDICATES_DECL
  125. #include "X86GenGlobalISel.inc"
  126. #undef GET_GLOBALISEL_PREDICATES_DECL
  127. #define GET_GLOBALISEL_TEMPORARIES_DECL
  128. #include "X86GenGlobalISel.inc"
  129. #undef GET_GLOBALISEL_TEMPORARIES_DECL
  130. };
  131. } // end anonymous namespace
  132. #define GET_GLOBALISEL_IMPL
  133. #include "X86GenGlobalISel.inc"
  134. #undef GET_GLOBALISEL_IMPL
  135. X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
  136. const X86Subtarget &STI,
  137. const X86RegisterBankInfo &RBI)
  138. : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
  139. RBI(RBI),
  140. #define GET_GLOBALISEL_PREDICATES_INIT
  141. #include "X86GenGlobalISel.inc"
  142. #undef GET_GLOBALISEL_PREDICATES_INIT
  143. #define GET_GLOBALISEL_TEMPORARIES_INIT
  144. #include "X86GenGlobalISel.inc"
  145. #undef GET_GLOBALISEL_TEMPORARIES_INIT
  146. {
  147. }
  148. // FIXME: This should be target-independent, inferred from the types declared
  149. // for each class in the bank.
  150. const TargetRegisterClass *
  151. X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
  152. if (RB.getID() == X86::GPRRegBankID) {
  153. if (Ty.getSizeInBits() <= 8)
  154. return &X86::GR8RegClass;
  155. if (Ty.getSizeInBits() == 16)
  156. return &X86::GR16RegClass;
  157. if (Ty.getSizeInBits() == 32)
  158. return &X86::GR32RegClass;
  159. if (Ty.getSizeInBits() == 64)
  160. return &X86::GR64RegClass;
  161. }
  162. if (RB.getID() == X86::VECRRegBankID) {
  163. if (Ty.getSizeInBits() == 32)
  164. return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
  165. if (Ty.getSizeInBits() == 64)
  166. return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
  167. if (Ty.getSizeInBits() == 128)
  168. return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass;
  169. if (Ty.getSizeInBits() == 256)
  170. return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass;
  171. if (Ty.getSizeInBits() == 512)
  172. return &X86::VR512RegClass;
  173. }
  174. llvm_unreachable("Unknown RegBank!");
  175. }
  176. const TargetRegisterClass *
  177. X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg,
  178. MachineRegisterInfo &MRI) const {
  179. const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI);
  180. return getRegClass(Ty, RegBank);
  181. }
  182. static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
  183. unsigned SubIdx = X86::NoSubRegister;
  184. if (RC == &X86::GR32RegClass) {
  185. SubIdx = X86::sub_32bit;
  186. } else if (RC == &X86::GR16RegClass) {
  187. SubIdx = X86::sub_16bit;
  188. } else if (RC == &X86::GR8RegClass) {
  189. SubIdx = X86::sub_8bit;
  190. }
  191. return SubIdx;
  192. }
  193. static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) {
  194. assert(Reg.isPhysical());
  195. if (X86::GR64RegClass.contains(Reg))
  196. return &X86::GR64RegClass;
  197. if (X86::GR32RegClass.contains(Reg))
  198. return &X86::GR32RegClass;
  199. if (X86::GR16RegClass.contains(Reg))
  200. return &X86::GR16RegClass;
  201. if (X86::GR8RegClass.contains(Reg))
  202. return &X86::GR8RegClass;
  203. llvm_unreachable("Unknown RegClass for PhysReg!");
  204. }
  205. // Set X86 Opcode and constrain DestReg.
  206. bool X86InstructionSelector::selectCopy(MachineInstr &I,
  207. MachineRegisterInfo &MRI) const {
  208. Register DstReg = I.getOperand(0).getReg();
  209. const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
  210. const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  211. Register SrcReg = I.getOperand(1).getReg();
  212. const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
  213. const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
  214. if (DstReg.isPhysical()) {
  215. assert(I.isCopy() && "Generic operators do not allow physical registers");
  216. if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
  217. DstRegBank.getID() == X86::GPRRegBankID) {
  218. const TargetRegisterClass *SrcRC =
  219. getRegClass(MRI.getType(SrcReg), SrcRegBank);
  220. const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg);
  221. if (SrcRC != DstRC) {
  222. // This case can be generated by ABI lowering, performe anyext
  223. Register ExtSrc = MRI.createVirtualRegister(DstRC);
  224. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  225. TII.get(TargetOpcode::SUBREG_TO_REG))
  226. .addDef(ExtSrc)
  227. .addImm(0)
  228. .addReg(SrcReg)
  229. .addImm(getSubRegIndex(SrcRC));
  230. I.getOperand(1).setReg(ExtSrc);
  231. }
  232. }
  233. return true;
  234. }
  235. assert((!SrcReg.isPhysical() || I.isCopy()) &&
  236. "No phys reg on generic operators");
  237. assert((DstSize == SrcSize ||
  238. // Copies are a mean to setup initial types, the number of
  239. // bits may not exactly match.
  240. (SrcReg.isPhysical() &&
  241. DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
  242. "Copy with different width?!");
  243. const TargetRegisterClass *DstRC =
  244. getRegClass(MRI.getType(DstReg), DstRegBank);
  245. if (SrcRegBank.getID() == X86::GPRRegBankID &&
  246. DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
  247. SrcReg.isPhysical()) {
  248. // Change the physical register to performe truncate.
  249. const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
  250. if (DstRC != SrcRC) {
  251. I.getOperand(1).setSubReg(getSubRegIndex(DstRC));
  252. I.getOperand(1).substPhysReg(SrcReg, TRI);
  253. }
  254. }
  255. // No need to constrain SrcReg. It will get constrained when
  256. // we hit another of its use or its defs.
  257. // Copies do not have constraints.
  258. const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
  259. if (!OldRC || !DstRC->hasSubClassEq(OldRC)) {
  260. if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  261. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  262. << " operand\n");
  263. return false;
  264. }
  265. }
  266. I.setDesc(TII.get(X86::COPY));
  267. return true;
  268. }
  269. bool X86InstructionSelector::select(MachineInstr &I) {
  270. assert(I.getParent() && "Instruction should be in a basic block!");
  271. assert(I.getParent()->getParent() && "Instruction should be in a function!");
  272. MachineBasicBlock &MBB = *I.getParent();
  273. MachineFunction &MF = *MBB.getParent();
  274. MachineRegisterInfo &MRI = MF.getRegInfo();
  275. unsigned Opcode = I.getOpcode();
  276. if (!isPreISelGenericOpcode(Opcode)) {
  277. // Certain non-generic instructions also need some special handling.
  278. if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
  279. return false;
  280. if (I.isCopy())
  281. return selectCopy(I, MRI);
  282. return true;
  283. }
  284. assert(I.getNumOperands() == I.getNumExplicitOperands() &&
  285. "Generic instruction has unexpected implicit operands\n");
  286. if (selectImpl(I, *CoverageInfo))
  287. return true;
  288. LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
  289. // TODO: This should be implemented by tblgen.
  290. switch (I.getOpcode()) {
  291. default:
  292. return false;
  293. case TargetOpcode::G_STORE:
  294. case TargetOpcode::G_LOAD:
  295. return selectLoadStoreOp(I, MRI, MF);
  296. case TargetOpcode::G_PTR_ADD:
  297. case TargetOpcode::G_FRAME_INDEX:
  298. return selectFrameIndexOrGep(I, MRI, MF);
  299. case TargetOpcode::G_GLOBAL_VALUE:
  300. return selectGlobalValue(I, MRI, MF);
  301. case TargetOpcode::G_CONSTANT:
  302. return selectConstant(I, MRI, MF);
  303. case TargetOpcode::G_FCONSTANT:
  304. return materializeFP(I, MRI, MF);
  305. case TargetOpcode::G_PTRTOINT:
  306. case TargetOpcode::G_TRUNC:
  307. return selectTruncOrPtrToInt(I, MRI, MF);
  308. case TargetOpcode::G_INTTOPTR:
  309. return selectCopy(I, MRI);
  310. case TargetOpcode::G_ZEXT:
  311. return selectZext(I, MRI, MF);
  312. case TargetOpcode::G_ANYEXT:
  313. return selectAnyext(I, MRI, MF);
  314. case TargetOpcode::G_ICMP:
  315. return selectCmp(I, MRI, MF);
  316. case TargetOpcode::G_FCMP:
  317. return selectFCmp(I, MRI, MF);
  318. case TargetOpcode::G_UADDE:
  319. return selectUadde(I, MRI, MF);
  320. case TargetOpcode::G_UNMERGE_VALUES:
  321. return selectUnmergeValues(I, MRI, MF);
  322. case TargetOpcode::G_MERGE_VALUES:
  323. case TargetOpcode::G_CONCAT_VECTORS:
  324. return selectMergeValues(I, MRI, MF);
  325. case TargetOpcode::G_EXTRACT:
  326. return selectExtract(I, MRI, MF);
  327. case TargetOpcode::G_INSERT:
  328. return selectInsert(I, MRI, MF);
  329. case TargetOpcode::G_BRCOND:
  330. return selectCondBranch(I, MRI, MF);
  331. case TargetOpcode::G_IMPLICIT_DEF:
  332. case TargetOpcode::G_PHI:
  333. return selectImplicitDefOrPHI(I, MRI);
  334. case TargetOpcode::G_SDIV:
  335. case TargetOpcode::G_UDIV:
  336. case TargetOpcode::G_SREM:
  337. case TargetOpcode::G_UREM:
  338. return selectDivRem(I, MRI, MF);
  339. case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
  340. return selectIntrinsicWSideEffects(I, MRI, MF);
  341. }
  342. return false;
  343. }
  344. unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
  345. const RegisterBank &RB,
  346. unsigned Opc,
  347. Align Alignment) const {
  348. bool Isload = (Opc == TargetOpcode::G_LOAD);
  349. bool HasAVX = STI.hasAVX();
  350. bool HasAVX512 = STI.hasAVX512();
  351. bool HasVLX = STI.hasVLX();
  352. if (Ty == LLT::scalar(8)) {
  353. if (X86::GPRRegBankID == RB.getID())
  354. return Isload ? X86::MOV8rm : X86::MOV8mr;
  355. } else if (Ty == LLT::scalar(16)) {
  356. if (X86::GPRRegBankID == RB.getID())
  357. return Isload ? X86::MOV16rm : X86::MOV16mr;
  358. } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
  359. if (X86::GPRRegBankID == RB.getID())
  360. return Isload ? X86::MOV32rm : X86::MOV32mr;
  361. if (X86::VECRRegBankID == RB.getID())
  362. return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
  363. HasAVX ? X86::VMOVSSrm_alt :
  364. X86::MOVSSrm_alt)
  365. : (HasAVX512 ? X86::VMOVSSZmr :
  366. HasAVX ? X86::VMOVSSmr :
  367. X86::MOVSSmr);
  368. } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
  369. if (X86::GPRRegBankID == RB.getID())
  370. return Isload ? X86::MOV64rm : X86::MOV64mr;
  371. if (X86::VECRRegBankID == RB.getID())
  372. return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
  373. HasAVX ? X86::VMOVSDrm_alt :
  374. X86::MOVSDrm_alt)
  375. : (HasAVX512 ? X86::VMOVSDZmr :
  376. HasAVX ? X86::VMOVSDmr :
  377. X86::MOVSDmr);
  378. } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
  379. if (Alignment >= Align(16))
  380. return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
  381. : HasAVX512
  382. ? X86::VMOVAPSZ128rm_NOVLX
  383. : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
  384. : (HasVLX ? X86::VMOVAPSZ128mr
  385. : HasAVX512
  386. ? X86::VMOVAPSZ128mr_NOVLX
  387. : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
  388. else
  389. return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
  390. : HasAVX512
  391. ? X86::VMOVUPSZ128rm_NOVLX
  392. : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
  393. : (HasVLX ? X86::VMOVUPSZ128mr
  394. : HasAVX512
  395. ? X86::VMOVUPSZ128mr_NOVLX
  396. : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
  397. } else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
  398. if (Alignment >= Align(32))
  399. return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
  400. : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
  401. : X86::VMOVAPSYrm)
  402. : (HasVLX ? X86::VMOVAPSZ256mr
  403. : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
  404. : X86::VMOVAPSYmr);
  405. else
  406. return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
  407. : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
  408. : X86::VMOVUPSYrm)
  409. : (HasVLX ? X86::VMOVUPSZ256mr
  410. : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
  411. : X86::VMOVUPSYmr);
  412. } else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
  413. if (Alignment >= Align(64))
  414. return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
  415. else
  416. return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
  417. }
  418. return Opc;
  419. }
  420. // Fill in an address from the given instruction.
  421. static void X86SelectAddress(const MachineInstr &I,
  422. const MachineRegisterInfo &MRI,
  423. X86AddressMode &AM) {
  424. assert(I.getOperand(0).isReg() && "unsupported opperand.");
  425. assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
  426. "unsupported type.");
  427. if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
  428. if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
  429. int64_t Imm = *COff;
  430. if (isInt<32>(Imm)) { // Check for displacement overflow.
  431. AM.Disp = static_cast<int32_t>(Imm);
  432. AM.Base.Reg = I.getOperand(1).getReg();
  433. return;
  434. }
  435. }
  436. } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
  437. AM.Base.FrameIndex = I.getOperand(1).getIndex();
  438. AM.BaseType = X86AddressMode::FrameIndexBase;
  439. return;
  440. }
  441. // Default behavior.
  442. AM.Base.Reg = I.getOperand(0).getReg();
  443. }
  444. bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
  445. MachineRegisterInfo &MRI,
  446. MachineFunction &MF) const {
  447. unsigned Opc = I.getOpcode();
  448. assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
  449. "unexpected instruction");
  450. const Register DefReg = I.getOperand(0).getReg();
  451. LLT Ty = MRI.getType(DefReg);
  452. const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
  453. assert(I.hasOneMemOperand());
  454. auto &MemOp = **I.memoperands_begin();
  455. if (MemOp.isAtomic()) {
  456. // Note: for unordered operations, we rely on the fact the appropriate MMO
  457. // is already on the instruction we're mutating, and thus we don't need to
  458. // make any changes. So long as we select an opcode which is capable of
  459. // loading or storing the appropriate size atomically, the rest of the
  460. // backend is required to respect the MMO state.
  461. if (!MemOp.isUnordered()) {
  462. LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
  463. return false;
  464. }
  465. if (MemOp.getAlign() < Ty.getSizeInBits() / 8) {
  466. LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
  467. return false;
  468. }
  469. }
  470. unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign());
  471. if (NewOpc == Opc)
  472. return false;
  473. X86AddressMode AM;
  474. X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM);
  475. I.setDesc(TII.get(NewOpc));
  476. MachineInstrBuilder MIB(MF, I);
  477. if (Opc == TargetOpcode::G_LOAD) {
  478. I.RemoveOperand(1);
  479. addFullAddress(MIB, AM);
  480. } else {
  481. // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
  482. I.RemoveOperand(1);
  483. I.RemoveOperand(0);
  484. addFullAddress(MIB, AM).addUse(DefReg);
  485. }
  486. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  487. }
  488. static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
  489. if (Ty == LLT::pointer(0, 64))
  490. return X86::LEA64r;
  491. else if (Ty == LLT::pointer(0, 32))
  492. return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
  493. else
  494. llvm_unreachable("Can't get LEA opcode. Unsupported type.");
  495. }
  496. bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
  497. MachineRegisterInfo &MRI,
  498. MachineFunction &MF) const {
  499. unsigned Opc = I.getOpcode();
  500. assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) &&
  501. "unexpected instruction");
  502. const Register DefReg = I.getOperand(0).getReg();
  503. LLT Ty = MRI.getType(DefReg);
  504. // Use LEA to calculate frame index and GEP
  505. unsigned NewOpc = getLeaOP(Ty, STI);
  506. I.setDesc(TII.get(NewOpc));
  507. MachineInstrBuilder MIB(MF, I);
  508. if (Opc == TargetOpcode::G_FRAME_INDEX) {
  509. addOffset(MIB, 0);
  510. } else {
  511. MachineOperand &InxOp = I.getOperand(2);
  512. I.addOperand(InxOp); // set IndexReg
  513. InxOp.ChangeToImmediate(1); // set Scale
  514. MIB.addImm(0).addReg(0);
  515. }
  516. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  517. }
  518. bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
  519. MachineRegisterInfo &MRI,
  520. MachineFunction &MF) const {
  521. assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) &&
  522. "unexpected instruction");
  523. auto GV = I.getOperand(1).getGlobal();
  524. if (GV->isThreadLocal()) {
  525. return false; // TODO: we don't support TLS yet.
  526. }
  527. // Can't handle alternate code models yet.
  528. if (TM.getCodeModel() != CodeModel::Small)
  529. return false;
  530. X86AddressMode AM;
  531. AM.GV = GV;
  532. AM.GVOpFlags = STI.classifyGlobalReference(GV);
  533. // TODO: The ABI requires an extra load. not supported yet.
  534. if (isGlobalStubReference(AM.GVOpFlags))
  535. return false;
  536. // TODO: This reference is relative to the pic base. not supported yet.
  537. if (isGlobalRelativeToPICBase(AM.GVOpFlags))
  538. return false;
  539. if (STI.isPICStyleRIPRel()) {
  540. // Use rip-relative addressing.
  541. assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
  542. AM.Base.Reg = X86::RIP;
  543. }
  544. const Register DefReg = I.getOperand(0).getReg();
  545. LLT Ty = MRI.getType(DefReg);
  546. unsigned NewOpc = getLeaOP(Ty, STI);
  547. I.setDesc(TII.get(NewOpc));
  548. MachineInstrBuilder MIB(MF, I);
  549. I.RemoveOperand(1);
  550. addFullAddress(MIB, AM);
  551. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  552. }
  553. bool X86InstructionSelector::selectConstant(MachineInstr &I,
  554. MachineRegisterInfo &MRI,
  555. MachineFunction &MF) const {
  556. assert((I.getOpcode() == TargetOpcode::G_CONSTANT) &&
  557. "unexpected instruction");
  558. const Register DefReg = I.getOperand(0).getReg();
  559. LLT Ty = MRI.getType(DefReg);
  560. if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
  561. return false;
  562. uint64_t Val = 0;
  563. if (I.getOperand(1).isCImm()) {
  564. Val = I.getOperand(1).getCImm()->getZExtValue();
  565. I.getOperand(1).ChangeToImmediate(Val);
  566. } else if (I.getOperand(1).isImm()) {
  567. Val = I.getOperand(1).getImm();
  568. } else
  569. llvm_unreachable("Unsupported operand type.");
  570. unsigned NewOpc;
  571. switch (Ty.getSizeInBits()) {
  572. case 8:
  573. NewOpc = X86::MOV8ri;
  574. break;
  575. case 16:
  576. NewOpc = X86::MOV16ri;
  577. break;
  578. case 32:
  579. NewOpc = X86::MOV32ri;
  580. break;
  581. case 64:
  582. // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
  583. if (isInt<32>(Val))
  584. NewOpc = X86::MOV64ri32;
  585. else
  586. NewOpc = X86::MOV64ri;
  587. break;
  588. default:
  589. llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
  590. }
  591. I.setDesc(TII.get(NewOpc));
  592. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  593. }
  594. // Helper function for selectTruncOrPtrToInt and selectAnyext.
  595. // Returns true if DstRC lives on a floating register class and
  596. // SrcRC lives on a 128-bit vector class.
  597. static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC,
  598. const TargetRegisterClass *SrcRC) {
  599. return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass ||
  600. DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) &&
  601. (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass);
  602. }
  603. bool X86InstructionSelector::selectTurnIntoCOPY(
  604. MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg,
  605. const TargetRegisterClass *DstRC, const unsigned SrcReg,
  606. const TargetRegisterClass *SrcRC) const {
  607. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  608. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  609. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  610. << " operand\n");
  611. return false;
  612. }
  613. I.setDesc(TII.get(X86::COPY));
  614. return true;
  615. }
  616. bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I,
  617. MachineRegisterInfo &MRI,
  618. MachineFunction &MF) const {
  619. assert((I.getOpcode() == TargetOpcode::G_TRUNC ||
  620. I.getOpcode() == TargetOpcode::G_PTRTOINT) &&
  621. "unexpected instruction");
  622. const Register DstReg = I.getOperand(0).getReg();
  623. const Register SrcReg = I.getOperand(1).getReg();
  624. const LLT DstTy = MRI.getType(DstReg);
  625. const LLT SrcTy = MRI.getType(SrcReg);
  626. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  627. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
  628. if (DstRB.getID() != SrcRB.getID()) {
  629. LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode())
  630. << " input/output on different banks\n");
  631. return false;
  632. }
  633. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
  634. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
  635. if (!DstRC || !SrcRC)
  636. return false;
  637. // If that's truncation of the value that lives on the vector class and goes
  638. // into the floating class, just replace it with copy, as we are able to
  639. // select it as a regular move.
  640. if (canTurnIntoCOPY(DstRC, SrcRC))
  641. return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC);
  642. if (DstRB.getID() != X86::GPRRegBankID)
  643. return false;
  644. unsigned SubIdx;
  645. if (DstRC == SrcRC) {
  646. // Nothing to be done
  647. SubIdx = X86::NoSubRegister;
  648. } else if (DstRC == &X86::GR32RegClass) {
  649. SubIdx = X86::sub_32bit;
  650. } else if (DstRC == &X86::GR16RegClass) {
  651. SubIdx = X86::sub_16bit;
  652. } else if (DstRC == &X86::GR8RegClass) {
  653. SubIdx = X86::sub_8bit;
  654. } else {
  655. return false;
  656. }
  657. SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
  658. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  659. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  660. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  661. << "\n");
  662. return false;
  663. }
  664. I.getOperand(1).setSubReg(SubIdx);
  665. I.setDesc(TII.get(X86::COPY));
  666. return true;
  667. }
  668. bool X86InstructionSelector::selectZext(MachineInstr &I,
  669. MachineRegisterInfo &MRI,
  670. MachineFunction &MF) const {
  671. assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction");
  672. const Register DstReg = I.getOperand(0).getReg();
  673. const Register SrcReg = I.getOperand(1).getReg();
  674. const LLT DstTy = MRI.getType(DstReg);
  675. const LLT SrcTy = MRI.getType(SrcReg);
  676. assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) &&
  677. "8=>16 Zext is handled by tablegen");
  678. assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) &&
  679. "8=>32 Zext is handled by tablegen");
  680. assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) &&
  681. "16=>32 Zext is handled by tablegen");
  682. assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) &&
  683. "8=>64 Zext is handled by tablegen");
  684. assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) &&
  685. "16=>64 Zext is handled by tablegen");
  686. assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) &&
  687. "32=>64 Zext is handled by tablegen");
  688. if (SrcTy != LLT::scalar(1))
  689. return false;
  690. unsigned AndOpc;
  691. if (DstTy == LLT::scalar(8))
  692. AndOpc = X86::AND8ri;
  693. else if (DstTy == LLT::scalar(16))
  694. AndOpc = X86::AND16ri8;
  695. else if (DstTy == LLT::scalar(32))
  696. AndOpc = X86::AND32ri8;
  697. else if (DstTy == LLT::scalar(64))
  698. AndOpc = X86::AND64ri8;
  699. else
  700. return false;
  701. Register DefReg = SrcReg;
  702. if (DstTy != LLT::scalar(8)) {
  703. Register ImpDefReg =
  704. MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
  705. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  706. TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg);
  707. DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
  708. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  709. TII.get(TargetOpcode::INSERT_SUBREG), DefReg)
  710. .addReg(ImpDefReg)
  711. .addReg(SrcReg)
  712. .addImm(X86::sub_8bit);
  713. }
  714. MachineInstr &AndInst =
  715. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
  716. .addReg(DefReg)
  717. .addImm(1);
  718. constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
  719. I.eraseFromParent();
  720. return true;
  721. }
  722. bool X86InstructionSelector::selectAnyext(MachineInstr &I,
  723. MachineRegisterInfo &MRI,
  724. MachineFunction &MF) const {
  725. assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction");
  726. const Register DstReg = I.getOperand(0).getReg();
  727. const Register SrcReg = I.getOperand(1).getReg();
  728. const LLT DstTy = MRI.getType(DstReg);
  729. const LLT SrcTy = MRI.getType(SrcReg);
  730. const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
  731. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
  732. assert(DstRB.getID() == SrcRB.getID() &&
  733. "G_ANYEXT input/output on different banks\n");
  734. assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
  735. "G_ANYEXT incorrect operand size");
  736. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
  737. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
  738. // If that's ANY_EXT of the value that lives on the floating class and goes
  739. // into the vector class, just replace it with copy, as we are able to select
  740. // it as a regular move.
  741. if (canTurnIntoCOPY(SrcRC, DstRC))
  742. return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC);
  743. if (DstRB.getID() != X86::GPRRegBankID)
  744. return false;
  745. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  746. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  747. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  748. << " operand\n");
  749. return false;
  750. }
  751. if (SrcRC == DstRC) {
  752. I.setDesc(TII.get(X86::COPY));
  753. return true;
  754. }
  755. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  756. TII.get(TargetOpcode::SUBREG_TO_REG))
  757. .addDef(DstReg)
  758. .addImm(0)
  759. .addReg(SrcReg)
  760. .addImm(getSubRegIndex(SrcRC));
  761. I.eraseFromParent();
  762. return true;
  763. }
  764. bool X86InstructionSelector::selectCmp(MachineInstr &I,
  765. MachineRegisterInfo &MRI,
  766. MachineFunction &MF) const {
  767. assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction");
  768. X86::CondCode CC;
  769. bool SwapArgs;
  770. std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
  771. (CmpInst::Predicate)I.getOperand(1).getPredicate());
  772. Register LHS = I.getOperand(2).getReg();
  773. Register RHS = I.getOperand(3).getReg();
  774. if (SwapArgs)
  775. std::swap(LHS, RHS);
  776. unsigned OpCmp;
  777. LLT Ty = MRI.getType(LHS);
  778. switch (Ty.getSizeInBits()) {
  779. default:
  780. return false;
  781. case 8:
  782. OpCmp = X86::CMP8rr;
  783. break;
  784. case 16:
  785. OpCmp = X86::CMP16rr;
  786. break;
  787. case 32:
  788. OpCmp = X86::CMP32rr;
  789. break;
  790. case 64:
  791. OpCmp = X86::CMP64rr;
  792. break;
  793. }
  794. MachineInstr &CmpInst =
  795. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
  796. .addReg(LHS)
  797. .addReg(RHS);
  798. MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  799. TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
  800. constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
  801. constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
  802. I.eraseFromParent();
  803. return true;
  804. }
  805. bool X86InstructionSelector::selectFCmp(MachineInstr &I,
  806. MachineRegisterInfo &MRI,
  807. MachineFunction &MF) const {
  808. assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
  809. Register LhsReg = I.getOperand(2).getReg();
  810. Register RhsReg = I.getOperand(3).getReg();
  811. CmpInst::Predicate Predicate =
  812. (CmpInst::Predicate)I.getOperand(1).getPredicate();
  813. // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
  814. static const uint16_t SETFOpcTable[2][3] = {
  815. {X86::COND_E, X86::COND_NP, X86::AND8rr},
  816. {X86::COND_NE, X86::COND_P, X86::OR8rr}};
  817. const uint16_t *SETFOpc = nullptr;
  818. switch (Predicate) {
  819. default:
  820. break;
  821. case CmpInst::FCMP_OEQ:
  822. SETFOpc = &SETFOpcTable[0][0];
  823. break;
  824. case CmpInst::FCMP_UNE:
  825. SETFOpc = &SETFOpcTable[1][0];
  826. break;
  827. }
  828. // Compute the opcode for the CMP instruction.
  829. unsigned OpCmp;
  830. LLT Ty = MRI.getType(LhsReg);
  831. switch (Ty.getSizeInBits()) {
  832. default:
  833. return false;
  834. case 32:
  835. OpCmp = X86::UCOMISSrr;
  836. break;
  837. case 64:
  838. OpCmp = X86::UCOMISDrr;
  839. break;
  840. }
  841. Register ResultReg = I.getOperand(0).getReg();
  842. RBI.constrainGenericRegister(
  843. ResultReg,
  844. *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
  845. if (SETFOpc) {
  846. MachineInstr &CmpInst =
  847. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
  848. .addReg(LhsReg)
  849. .addReg(RhsReg);
  850. Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
  851. Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
  852. MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  853. TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
  854. MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  855. TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
  856. MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  857. TII.get(SETFOpc[2]), ResultReg)
  858. .addReg(FlagReg1)
  859. .addReg(FlagReg2);
  860. constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
  861. constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
  862. constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
  863. constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
  864. I.eraseFromParent();
  865. return true;
  866. }
  867. X86::CondCode CC;
  868. bool SwapArgs;
  869. std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
  870. assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
  871. if (SwapArgs)
  872. std::swap(LhsReg, RhsReg);
  873. // Emit a compare of LHS/RHS.
  874. MachineInstr &CmpInst =
  875. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
  876. .addReg(LhsReg)
  877. .addReg(RhsReg);
  878. MachineInstr &Set =
  879. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
  880. constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
  881. constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
  882. I.eraseFromParent();
  883. return true;
  884. }
  885. bool X86InstructionSelector::selectUadde(MachineInstr &I,
  886. MachineRegisterInfo &MRI,
  887. MachineFunction &MF) const {
  888. assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction");
  889. const Register DstReg = I.getOperand(0).getReg();
  890. const Register CarryOutReg = I.getOperand(1).getReg();
  891. const Register Op0Reg = I.getOperand(2).getReg();
  892. const Register Op1Reg = I.getOperand(3).getReg();
  893. Register CarryInReg = I.getOperand(4).getReg();
  894. const LLT DstTy = MRI.getType(DstReg);
  895. if (DstTy != LLT::scalar(32))
  896. return false;
  897. // find CarryIn def instruction.
  898. MachineInstr *Def = MRI.getVRegDef(CarryInReg);
  899. while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
  900. CarryInReg = Def->getOperand(1).getReg();
  901. Def = MRI.getVRegDef(CarryInReg);
  902. }
  903. unsigned Opcode;
  904. if (Def->getOpcode() == TargetOpcode::G_UADDE) {
  905. // carry set by prev ADD.
  906. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS)
  907. .addReg(CarryInReg);
  908. if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI))
  909. return false;
  910. Opcode = X86::ADC32rr;
  911. } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
  912. // carry is constant, support only 0.
  913. if (*val != 0)
  914. return false;
  915. Opcode = X86::ADD32rr;
  916. } else
  917. return false;
  918. MachineInstr &AddInst =
  919. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
  920. .addReg(Op0Reg)
  921. .addReg(Op1Reg);
  922. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
  923. .addReg(X86::EFLAGS);
  924. if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) ||
  925. !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI))
  926. return false;
  927. I.eraseFromParent();
  928. return true;
  929. }
  930. bool X86InstructionSelector::selectExtract(MachineInstr &I,
  931. MachineRegisterInfo &MRI,
  932. MachineFunction &MF) const {
  933. assert((I.getOpcode() == TargetOpcode::G_EXTRACT) &&
  934. "unexpected instruction");
  935. const Register DstReg = I.getOperand(0).getReg();
  936. const Register SrcReg = I.getOperand(1).getReg();
  937. int64_t Index = I.getOperand(2).getImm();
  938. const LLT DstTy = MRI.getType(DstReg);
  939. const LLT SrcTy = MRI.getType(SrcReg);
  940. // Meanwile handle vector type only.
  941. if (!DstTy.isVector())
  942. return false;
  943. if (Index % DstTy.getSizeInBits() != 0)
  944. return false; // Not extract subvector.
  945. if (Index == 0) {
  946. // Replace by extract subreg copy.
  947. if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF))
  948. return false;
  949. I.eraseFromParent();
  950. return true;
  951. }
  952. bool HasAVX = STI.hasAVX();
  953. bool HasAVX512 = STI.hasAVX512();
  954. bool HasVLX = STI.hasVLX();
  955. if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) {
  956. if (HasVLX)
  957. I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr));
  958. else if (HasAVX)
  959. I.setDesc(TII.get(X86::VEXTRACTF128rr));
  960. else
  961. return false;
  962. } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) {
  963. if (DstTy.getSizeInBits() == 128)
  964. I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr));
  965. else if (DstTy.getSizeInBits() == 256)
  966. I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr));
  967. else
  968. return false;
  969. } else
  970. return false;
  971. // Convert to X86 VEXTRACT immediate.
  972. Index = Index / DstTy.getSizeInBits();
  973. I.getOperand(2).setImm(Index);
  974. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  975. }
  976. bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
  977. MachineInstr &I,
  978. MachineRegisterInfo &MRI,
  979. MachineFunction &MF) const {
  980. const LLT DstTy = MRI.getType(DstReg);
  981. const LLT SrcTy = MRI.getType(SrcReg);
  982. unsigned SubIdx = X86::NoSubRegister;
  983. if (!DstTy.isVector() || !SrcTy.isVector())
  984. return false;
  985. assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() &&
  986. "Incorrect Src/Dst register size");
  987. if (DstTy.getSizeInBits() == 128)
  988. SubIdx = X86::sub_xmm;
  989. else if (DstTy.getSizeInBits() == 256)
  990. SubIdx = X86::sub_ymm;
  991. else
  992. return false;
  993. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
  994. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
  995. SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
  996. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  997. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  998. LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n");
  999. return false;
  1000. }
  1001. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg)
  1002. .addReg(SrcReg, 0, SubIdx);
  1003. return true;
  1004. }
  1005. bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
  1006. MachineInstr &I,
  1007. MachineRegisterInfo &MRI,
  1008. MachineFunction &MF) const {
  1009. const LLT DstTy = MRI.getType(DstReg);
  1010. const LLT SrcTy = MRI.getType(SrcReg);
  1011. unsigned SubIdx = X86::NoSubRegister;
  1012. // TODO: support scalar types
  1013. if (!DstTy.isVector() || !SrcTy.isVector())
  1014. return false;
  1015. assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
  1016. "Incorrect Src/Dst register size");
  1017. if (SrcTy.getSizeInBits() == 128)
  1018. SubIdx = X86::sub_xmm;
  1019. else if (SrcTy.getSizeInBits() == 256)
  1020. SubIdx = X86::sub_ymm;
  1021. else
  1022. return false;
  1023. const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
  1024. const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
  1025. if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
  1026. !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
  1027. LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
  1028. return false;
  1029. }
  1030. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
  1031. .addReg(DstReg, RegState::DefineNoRead, SubIdx)
  1032. .addReg(SrcReg);
  1033. return true;
  1034. }
  1035. bool X86InstructionSelector::selectInsert(MachineInstr &I,
  1036. MachineRegisterInfo &MRI,
  1037. MachineFunction &MF) const {
  1038. assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction");
  1039. const Register DstReg = I.getOperand(0).getReg();
  1040. const Register SrcReg = I.getOperand(1).getReg();
  1041. const Register InsertReg = I.getOperand(2).getReg();
  1042. int64_t Index = I.getOperand(3).getImm();
  1043. const LLT DstTy = MRI.getType(DstReg);
  1044. const LLT InsertRegTy = MRI.getType(InsertReg);
  1045. // Meanwile handle vector type only.
  1046. if (!DstTy.isVector())
  1047. return false;
  1048. if (Index % InsertRegTy.getSizeInBits() != 0)
  1049. return false; // Not insert subvector.
  1050. if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
  1051. // Replace by subreg copy.
  1052. if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
  1053. return false;
  1054. I.eraseFromParent();
  1055. return true;
  1056. }
  1057. bool HasAVX = STI.hasAVX();
  1058. bool HasAVX512 = STI.hasAVX512();
  1059. bool HasVLX = STI.hasVLX();
  1060. if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
  1061. if (HasVLX)
  1062. I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
  1063. else if (HasAVX)
  1064. I.setDesc(TII.get(X86::VINSERTF128rr));
  1065. else
  1066. return false;
  1067. } else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
  1068. if (InsertRegTy.getSizeInBits() == 128)
  1069. I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
  1070. else if (InsertRegTy.getSizeInBits() == 256)
  1071. I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
  1072. else
  1073. return false;
  1074. } else
  1075. return false;
  1076. // Convert to X86 VINSERT immediate.
  1077. Index = Index / InsertRegTy.getSizeInBits();
  1078. I.getOperand(3).setImm(Index);
  1079. return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  1080. }
  1081. bool X86InstructionSelector::selectUnmergeValues(
  1082. MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
  1083. assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) &&
  1084. "unexpected instruction");
  1085. // Split to extracts.
  1086. unsigned NumDefs = I.getNumOperands() - 1;
  1087. Register SrcReg = I.getOperand(NumDefs).getReg();
  1088. unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
  1089. for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
  1090. MachineInstr &ExtrInst =
  1091. *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1092. TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg())
  1093. .addReg(SrcReg)
  1094. .addImm(Idx * DefSize);
  1095. if (!select(ExtrInst))
  1096. return false;
  1097. }
  1098. I.eraseFromParent();
  1099. return true;
  1100. }
  1101. bool X86InstructionSelector::selectMergeValues(
  1102. MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
  1103. assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
  1104. I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
  1105. "unexpected instruction");
  1106. // Split to inserts.
  1107. Register DstReg = I.getOperand(0).getReg();
  1108. Register SrcReg0 = I.getOperand(1).getReg();
  1109. const LLT DstTy = MRI.getType(DstReg);
  1110. const LLT SrcTy = MRI.getType(SrcReg0);
  1111. unsigned SrcSize = SrcTy.getSizeInBits();
  1112. const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  1113. // For the first src use insertSubReg.
  1114. Register DefReg = MRI.createGenericVirtualRegister(DstTy);
  1115. MRI.setRegBank(DefReg, RegBank);
  1116. if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
  1117. return false;
  1118. for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
  1119. Register Tmp = MRI.createGenericVirtualRegister(DstTy);
  1120. MRI.setRegBank(Tmp, RegBank);
  1121. MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1122. TII.get(TargetOpcode::G_INSERT), Tmp)
  1123. .addReg(DefReg)
  1124. .addReg(I.getOperand(Idx).getReg())
  1125. .addImm((Idx - 1) * SrcSize);
  1126. DefReg = Tmp;
  1127. if (!select(InsertInst))
  1128. return false;
  1129. }
  1130. MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1131. TII.get(TargetOpcode::COPY), DstReg)
  1132. .addReg(DefReg);
  1133. if (!select(CopyInst))
  1134. return false;
  1135. I.eraseFromParent();
  1136. return true;
  1137. }
  1138. bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
  1139. MachineRegisterInfo &MRI,
  1140. MachineFunction &MF) const {
  1141. assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction");
  1142. const Register CondReg = I.getOperand(0).getReg();
  1143. MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
  1144. MachineInstr &TestInst =
  1145. *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
  1146. .addReg(CondReg)
  1147. .addImm(1);
  1148. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
  1149. .addMBB(DestMBB).addImm(X86::COND_NE);
  1150. constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
  1151. I.eraseFromParent();
  1152. return true;
  1153. }
  1154. bool X86InstructionSelector::materializeFP(MachineInstr &I,
  1155. MachineRegisterInfo &MRI,
  1156. MachineFunction &MF) const {
  1157. assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) &&
  1158. "unexpected instruction");
  1159. // Can't handle alternate code models yet.
  1160. CodeModel::Model CM = TM.getCodeModel();
  1161. if (CM != CodeModel::Small && CM != CodeModel::Large)
  1162. return false;
  1163. const Register DstReg = I.getOperand(0).getReg();
  1164. const LLT DstTy = MRI.getType(DstReg);
  1165. const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  1166. Align Alignment = Align(DstTy.getSizeInBytes());
  1167. const DebugLoc &DbgLoc = I.getDebugLoc();
  1168. unsigned Opc =
  1169. getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
  1170. // Create the load from the constant pool.
  1171. const ConstantFP *CFP = I.getOperand(1).getFPImm();
  1172. unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
  1173. MachineInstr *LoadInst = nullptr;
  1174. unsigned char OpFlag = STI.classifyLocalReference(nullptr);
  1175. if (CM == CodeModel::Large && STI.is64Bit()) {
  1176. // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
  1177. // they cannot be folded into immediate fields.
  1178. Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
  1179. BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg)
  1180. .addConstantPoolIndex(CPI, 0, OpFlag);
  1181. MachineMemOperand *MMO = MF.getMachineMemOperand(
  1182. MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
  1183. MF.getDataLayout().getPointerSize(), Alignment);
  1184. LoadInst =
  1185. addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
  1186. AddrReg)
  1187. .addMemOperand(MMO);
  1188. } else if (CM == CodeModel::Small || !STI.is64Bit()) {
  1189. // Handle the case when globals fit in our immediate field.
  1190. // This is true for X86-32 always and X86-64 when in -mcmodel=small mode.
  1191. // x86-32 PIC requires a PIC base register for constant pools.
  1192. unsigned PICBase = 0;
  1193. if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) {
  1194. // PICBase can be allocated by TII.getGlobalBaseReg(&MF).
  1195. // In DAGISEL the code that initialize it generated by the CGBR pass.
  1196. return false; // TODO support the mode.
  1197. } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small)
  1198. PICBase = X86::RIP;
  1199. LoadInst = addConstantPoolReference(
  1200. BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase,
  1201. OpFlag);
  1202. } else
  1203. return false;
  1204. constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI);
  1205. I.eraseFromParent();
  1206. return true;
  1207. }
  1208. bool X86InstructionSelector::selectImplicitDefOrPHI(
  1209. MachineInstr &I, MachineRegisterInfo &MRI) const {
  1210. assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
  1211. I.getOpcode() == TargetOpcode::G_PHI) &&
  1212. "unexpected instruction");
  1213. Register DstReg = I.getOperand(0).getReg();
  1214. if (!MRI.getRegClassOrNull(DstReg)) {
  1215. const LLT DstTy = MRI.getType(DstReg);
  1216. const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI);
  1217. if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
  1218. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  1219. << " operand\n");
  1220. return false;
  1221. }
  1222. }
  1223. if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
  1224. I.setDesc(TII.get(X86::IMPLICIT_DEF));
  1225. else
  1226. I.setDesc(TII.get(X86::PHI));
  1227. return true;
  1228. }
  1229. bool X86InstructionSelector::selectDivRem(MachineInstr &I,
  1230. MachineRegisterInfo &MRI,
  1231. MachineFunction &MF) const {
  1232. // The implementation of this function is taken from X86FastISel.
  1233. assert((I.getOpcode() == TargetOpcode::G_SDIV ||
  1234. I.getOpcode() == TargetOpcode::G_SREM ||
  1235. I.getOpcode() == TargetOpcode::G_UDIV ||
  1236. I.getOpcode() == TargetOpcode::G_UREM) &&
  1237. "unexpected instruction");
  1238. const Register DstReg = I.getOperand(0).getReg();
  1239. const Register Op1Reg = I.getOperand(1).getReg();
  1240. const Register Op2Reg = I.getOperand(2).getReg();
  1241. const LLT RegTy = MRI.getType(DstReg);
  1242. assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
  1243. "Arguments and return value types must match");
  1244. const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
  1245. if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
  1246. return false;
  1247. const static unsigned NumTypes = 4; // i8, i16, i32, i64
  1248. const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
  1249. const static bool S = true; // IsSigned
  1250. const static bool U = false; // !IsSigned
  1251. const static unsigned Copy = TargetOpcode::COPY;
  1252. // For the X86 IDIV instruction, in most cases the dividend
  1253. // (numerator) must be in a specific register pair highreg:lowreg,
  1254. // producing the quotient in lowreg and the remainder in highreg.
  1255. // For most data types, to set up the instruction, the dividend is
  1256. // copied into lowreg, and lowreg is sign-extended into highreg. The
  1257. // exception is i8, where the dividend is defined as a single register rather
  1258. // than a register pair, and we therefore directly sign-extend the dividend
  1259. // into lowreg, instead of copying, and ignore the highreg.
  1260. const static struct DivRemEntry {
  1261. // The following portion depends only on the data type.
  1262. unsigned SizeInBits;
  1263. unsigned LowInReg; // low part of the register pair
  1264. unsigned HighInReg; // high part of the register pair
  1265. // The following portion depends on both the data type and the operation.
  1266. struct DivRemResult {
  1267. unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
  1268. unsigned OpSignExtend; // Opcode for sign-extending lowreg into
  1269. // highreg, or copying a zero into highreg.
  1270. unsigned OpCopy; // Opcode for copying dividend into lowreg, or
  1271. // zero/sign-extending into lowreg for i8.
  1272. unsigned DivRemResultReg; // Register containing the desired result.
  1273. bool IsOpSigned; // Whether to use signed or unsigned form.
  1274. } ResultTable[NumOps];
  1275. } OpTable[NumTypes] = {
  1276. {8,
  1277. X86::AX,
  1278. 0,
  1279. {
  1280. {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
  1281. {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
  1282. {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
  1283. {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
  1284. }}, // i8
  1285. {16,
  1286. X86::AX,
  1287. X86::DX,
  1288. {
  1289. {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
  1290. {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
  1291. {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
  1292. {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
  1293. }}, // i16
  1294. {32,
  1295. X86::EAX,
  1296. X86::EDX,
  1297. {
  1298. {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
  1299. {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
  1300. {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
  1301. {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
  1302. }}, // i32
  1303. {64,
  1304. X86::RAX,
  1305. X86::RDX,
  1306. {
  1307. {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv
  1308. {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
  1309. {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
  1310. {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
  1311. }}, // i64
  1312. };
  1313. auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const DivRemEntry &El) {
  1314. return El.SizeInBits == RegTy.getSizeInBits();
  1315. });
  1316. if (OpEntryIt == std::end(OpTable))
  1317. return false;
  1318. unsigned OpIndex;
  1319. switch (I.getOpcode()) {
  1320. default:
  1321. llvm_unreachable("Unexpected div/rem opcode");
  1322. case TargetOpcode::G_SDIV:
  1323. OpIndex = 0;
  1324. break;
  1325. case TargetOpcode::G_SREM:
  1326. OpIndex = 1;
  1327. break;
  1328. case TargetOpcode::G_UDIV:
  1329. OpIndex = 2;
  1330. break;
  1331. case TargetOpcode::G_UREM:
  1332. OpIndex = 3;
  1333. break;
  1334. }
  1335. const DivRemEntry &TypeEntry = *OpEntryIt;
  1336. const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
  1337. const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
  1338. if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
  1339. !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
  1340. !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
  1341. LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
  1342. << " operand\n");
  1343. return false;
  1344. }
  1345. // Move op1 into low-order input register.
  1346. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
  1347. TypeEntry.LowInReg)
  1348. .addReg(Op1Reg);
  1349. // Zero-extend or sign-extend into high-order input register.
  1350. if (OpEntry.OpSignExtend) {
  1351. if (OpEntry.IsOpSigned)
  1352. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1353. TII.get(OpEntry.OpSignExtend));
  1354. else {
  1355. Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
  1356. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
  1357. Zero32);
  1358. // Copy the zero into the appropriate sub/super/identical physical
  1359. // register. Unfortunately the operations needed are not uniform enough
  1360. // to fit neatly into the table above.
  1361. if (RegTy.getSizeInBits() == 16) {
  1362. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
  1363. TypeEntry.HighInReg)
  1364. .addReg(Zero32, 0, X86::sub_16bit);
  1365. } else if (RegTy.getSizeInBits() == 32) {
  1366. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
  1367. TypeEntry.HighInReg)
  1368. .addReg(Zero32);
  1369. } else if (RegTy.getSizeInBits() == 64) {
  1370. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1371. TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
  1372. .addImm(0)
  1373. .addReg(Zero32)
  1374. .addImm(X86::sub_32bit);
  1375. }
  1376. }
  1377. }
  1378. // Generate the DIV/IDIV instruction.
  1379. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
  1380. .addReg(Op2Reg);
  1381. // For i8 remainder, we can't reference ah directly, as we'll end
  1382. // up with bogus copies like %r9b = COPY %ah. Reference ax
  1383. // instead to prevent ah references in a rex instruction.
  1384. //
  1385. // The current assumption of the fast register allocator is that isel
  1386. // won't generate explicit references to the GR8_NOREX registers. If
  1387. // the allocator and/or the backend get enhanced to be more robust in
  1388. // that regard, this can be, and should be, removed.
  1389. if ((I.getOpcode() == Instruction::SRem ||
  1390. I.getOpcode() == Instruction::URem) &&
  1391. OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
  1392. Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
  1393. Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
  1394. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
  1395. .addReg(X86::AX);
  1396. // Shift AX right by 8 bits instead of using AH.
  1397. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
  1398. ResultSuperReg)
  1399. .addReg(SourceSuperReg)
  1400. .addImm(8);
  1401. // Now reference the 8-bit subreg of the result.
  1402. BuildMI(*I.getParent(), I, I.getDebugLoc(),
  1403. TII.get(TargetOpcode::SUBREG_TO_REG))
  1404. .addDef(DstReg)
  1405. .addImm(0)
  1406. .addReg(ResultSuperReg)
  1407. .addImm(X86::sub_8bit);
  1408. } else {
  1409. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
  1410. DstReg)
  1411. .addReg(OpEntry.DivRemResultReg);
  1412. }
  1413. I.eraseFromParent();
  1414. return true;
  1415. }
  1416. bool X86InstructionSelector::selectIntrinsicWSideEffects(
  1417. MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const {
  1418. assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS &&
  1419. "unexpected instruction");
  1420. if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap)
  1421. return false;
  1422. BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP));
  1423. I.eraseFromParent();
  1424. return true;
  1425. }
  1426. InstructionSelector *
  1427. llvm::createX86InstructionSelector(const X86TargetMachine &TM,
  1428. X86Subtarget &Subtarget,
  1429. X86RegisterBankInfo &RBI) {
  1430. return new X86InstructionSelector(TM, Subtarget, RBI);
  1431. }