X86Schedule.td 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. //===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //===----------------------------------------------------------------------===//
  9. // InstrSchedModel annotations for out-of-order CPUs.
  10. // Instructions with folded loads need to read the memory operand immediately,
  11. // but other register operands don't have to be read until the load is ready.
  12. // These operands are marked with ReadAfterLd.
  13. def ReadAfterLd : SchedRead;
  14. def ReadAfterVecLd : SchedRead;
  15. def ReadAfterVecXLd : SchedRead;
  16. def ReadAfterVecYLd : SchedRead;
  17. // Instructions that move data between general purpose registers and vector
  18. // registers may be subject to extra latency due to data bypass delays.
  19. // This SchedRead describes a bypass delay caused by data being moved from the
  20. // integer unit to the floating point unit.
  21. def ReadInt2Fpu : SchedRead;
  22. // Instructions with both a load and a store folded are modeled as a folded
  23. // load + WriteRMW.
  24. def WriteRMW : SchedWrite;
  25. // Helper to set SchedWrite ExePorts/Latency/ResourceCycles/NumMicroOps.
  26. multiclass X86WriteRes<SchedWrite SchedRW,
  27. list<ProcResourceKind> ExePorts,
  28. int Lat, list<int> Res, int UOps> {
  29. def : WriteRes<SchedRW, ExePorts> {
  30. let Latency = Lat;
  31. let ResourceCycles = Res;
  32. let NumMicroOps = UOps;
  33. }
  34. }
  35. // Most instructions can fold loads, so almost every SchedWrite comes in two
  36. // variants: With and without a folded load.
  37. // An X86FoldableSchedWrite holds a reference to the corresponding SchedWrite
  38. // with a folded load.
  39. class X86FoldableSchedWrite : SchedWrite {
  40. // The SchedWrite to use when a load is folded into the instruction.
  41. SchedWrite Folded;
  42. // The SchedRead to tag register operands than don't need to be ready
  43. // until the folded load has completed.
  44. SchedRead ReadAfterFold;
  45. }
  46. // Multiclass that produces a linked pair of SchedWrites.
  47. multiclass X86SchedWritePair<SchedRead ReadAfter = ReadAfterLd> {
  48. // Register-Memory operation.
  49. def Ld : SchedWrite;
  50. // Register-Register operation.
  51. def NAME : X86FoldableSchedWrite {
  52. let Folded = !cast<SchedWrite>(NAME#"Ld");
  53. let ReadAfterFold = ReadAfter;
  54. }
  55. }
  56. // Helpers to mark SchedWrites as unsupported.
  57. multiclass X86WriteResUnsupported<SchedWrite SchedRW> {
  58. let Unsupported = 1 in {
  59. def : WriteRes<SchedRW, []>;
  60. }
  61. }
  62. multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> {
  63. let Unsupported = 1 in {
  64. def : WriteRes<SchedRW, []>;
  65. def : WriteRes<SchedRW.Folded, []>;
  66. }
  67. }
  68. // Multiclass that wraps X86FoldableSchedWrite for each vector width.
  69. class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
  70. X86FoldableSchedWrite s128,
  71. X86FoldableSchedWrite s256,
  72. X86FoldableSchedWrite s512> {
  73. X86FoldableSchedWrite Scl = sScl; // Scalar float/double operations.
  74. X86FoldableSchedWrite MMX = sScl; // MMX operations.
  75. X86FoldableSchedWrite XMM = s128; // XMM operations.
  76. X86FoldableSchedWrite YMM = s256; // YMM operations.
  77. X86FoldableSchedWrite ZMM = s512; // ZMM operations.
  78. }
  79. // Multiclass that wraps X86SchedWriteWidths for each fp vector type.
  80. class X86SchedWriteSizes<X86SchedWriteWidths sPH,
  81. X86SchedWriteWidths sPS,
  82. X86SchedWriteWidths sPD> {
  83. X86SchedWriteWidths PH = sPH;
  84. X86SchedWriteWidths PS = sPS;
  85. X86SchedWriteWidths PD = sPD;
  86. }
  87. // Multiclass that wraps move/load/store triple for a vector width.
  88. class X86SchedWriteMoveLS<SchedWrite MoveRR,
  89. SchedWrite LoadRM,
  90. SchedWrite StoreMR> {
  91. SchedWrite RR = MoveRR;
  92. SchedWrite RM = LoadRM;
  93. SchedWrite MR = StoreMR;
  94. }
  95. // Multiclass that wraps masked load/store writes for a vector width.
  96. class X86SchedWriteMaskMove<SchedWrite LoadRM, SchedWrite StoreMR> {
  97. SchedWrite RM = LoadRM;
  98. SchedWrite MR = StoreMR;
  99. }
  100. // Multiclass that wraps X86SchedWriteMoveLS for each vector width.
  101. class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
  102. X86SchedWriteMoveLS s128,
  103. X86SchedWriteMoveLS s256,
  104. X86SchedWriteMoveLS s512> {
  105. X86SchedWriteMoveLS Scl = sScl; // Scalar float/double operations.
  106. X86SchedWriteMoveLS MMX = sScl; // MMX operations.
  107. X86SchedWriteMoveLS XMM = s128; // XMM operations.
  108. X86SchedWriteMoveLS YMM = s256; // YMM operations.
  109. X86SchedWriteMoveLS ZMM = s512; // ZMM operations.
  110. }
  111. // Loads, stores, and moves, not folded with other operations.
  112. def WriteLoad : SchedWrite;
  113. def WriteStore : SchedWrite;
  114. def WriteStoreNT : SchedWrite;
  115. def WriteMove : SchedWrite;
  116. def WriteVecMaskedGatherWriteback : SchedWrite;
  117. def WriteCopy : WriteSequence<[WriteLoad, WriteStore]>; // mem->mem copy
  118. // Arithmetic.
  119. defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
  120. defm WriteADC : X86SchedWritePair; // Integer ALU + flags op.
  121. def WriteALURMW : WriteSequence<[WriteALULd, WriteRMW]>;
  122. def WriteADCRMW : WriteSequence<[WriteADCLd, WriteRMW]>;
  123. def WriteLEA : SchedWrite; // LEA instructions can't fold loads.
  124. // Integer multiplication
  125. defm WriteIMul8 : X86SchedWritePair; // Integer 8-bit multiplication.
  126. defm WriteIMul16 : X86SchedWritePair; // Integer 16-bit multiplication.
  127. defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate.
  128. defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register.
  129. defm WriteIMul32 : X86SchedWritePair; // Integer 32-bit multiplication.
  130. defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate.
  131. defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register.
  132. defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
  133. defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
  134. defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
  135. defm WriteMULX32 : X86SchedWritePair; // Integer 32-bit Multiplication without affecting flags.
  136. defm WriteMULX64 : X86SchedWritePair; // Integer 64-bit Multiplication without affecting flags.
  137. def WriteIMulH : SchedWrite; // Integer multiplication, high part (only used by the RR variant of MULX).
  138. def WriteIMulHLd : SchedWrite; // Integer multiplication, high part (only used by the RM variant of MULX).
  139. def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
  140. def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
  141. defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap.
  142. def WriteCMPXCHGRMW : SchedWrite; // Compare and set, compare and swap.
  143. def WriteXCHG : SchedWrite; // Compare+Exchange - TODO RMW support.
  144. // Integer division.
  145. defm WriteDiv8 : X86SchedWritePair;
  146. defm WriteDiv16 : X86SchedWritePair;
  147. defm WriteDiv32 : X86SchedWritePair;
  148. defm WriteDiv64 : X86SchedWritePair;
  149. defm WriteIDiv8 : X86SchedWritePair;
  150. defm WriteIDiv16 : X86SchedWritePair;
  151. defm WriteIDiv32 : X86SchedWritePair;
  152. defm WriteIDiv64 : X86SchedWritePair;
  153. defm WriteBSF : X86SchedWritePair; // Bit scan forward.
  154. defm WriteBSR : X86SchedWritePair; // Bit scan reverse.
  155. defm WritePOPCNT : X86SchedWritePair; // Bit population count.
  156. defm WriteLZCNT : X86SchedWritePair; // Leading zero count.
  157. defm WriteTZCNT : X86SchedWritePair; // Trailing zero count.
  158. defm WriteCMOV : X86SchedWritePair; // Conditional move.
  159. def WriteFCMOV : SchedWrite; // X87 conditional move.
  160. def WriteSETCC : SchedWrite; // Set register based on condition code.
  161. def WriteSETCCStore : SchedWrite;
  162. def WriteLAHFSAHF : SchedWrite; // Load/Store flags in AH.
  163. def WriteBitTest : SchedWrite; // Bit Test
  164. def WriteBitTestImmLd : SchedWrite;
  165. def WriteBitTestRegLd : SchedWrite;
  166. def WriteBitTestSet : SchedWrite; // Bit Test + Set
  167. def WriteBitTestSetImmLd : SchedWrite;
  168. def WriteBitTestSetRegLd : SchedWrite;
  169. def WriteBitTestSetImmRMW : WriteSequence<[WriteBitTestSetImmLd, WriteRMW]>;
  170. def WriteBitTestSetRegRMW : WriteSequence<[WriteBitTestSetRegLd, WriteRMW]>;
  171. // Integer shifts and rotates.
  172. defm WriteShift : X86SchedWritePair;
  173. defm WriteShiftCL : X86SchedWritePair;
  174. defm WriteRotate : X86SchedWritePair;
  175. defm WriteRotateCL : X86SchedWritePair;
  176. // Double shift instructions.
  177. def WriteSHDrri : SchedWrite;
  178. def WriteSHDrrcl : SchedWrite;
  179. def WriteSHDmri : SchedWrite;
  180. def WriteSHDmrcl : SchedWrite;
  181. // BMI1 BEXTR/BLS, BMI2 BZHI
  182. defm WriteBEXTR : X86SchedWritePair;
  183. defm WriteBLS : X86SchedWritePair;
  184. defm WriteBZHI : X86SchedWritePair;
  185. // Idioms that clear a register, like xorps %xmm0, %xmm0.
  186. // These can often bypass execution ports completely.
  187. def WriteZero : SchedWrite;
  188. // Branches don't produce values, so they have no latency, but they still
  189. // consume resources. Indirect branches can fold loads.
  190. defm WriteJump : X86SchedWritePair;
  191. // Floating point. This covers both scalar and vector operations.
  192. def WriteFLD0 : SchedWrite;
  193. def WriteFLD1 : SchedWrite;
  194. def WriteFLDC : SchedWrite;
  195. def WriteFLoad : SchedWrite;
  196. def WriteFLoadX : SchedWrite;
  197. def WriteFLoadY : SchedWrite;
  198. def WriteFMaskedLoad : SchedWrite;
  199. def WriteFMaskedLoadY : SchedWrite;
  200. def WriteFStore : SchedWrite;
  201. def WriteFStoreX : SchedWrite;
  202. def WriteFStoreY : SchedWrite;
  203. def WriteFStoreNT : SchedWrite;
  204. def WriteFStoreNTX : SchedWrite;
  205. def WriteFStoreNTY : SchedWrite;
  206. def WriteFMaskedStore32 : SchedWrite;
  207. def WriteFMaskedStore64 : SchedWrite;
  208. def WriteFMaskedStore32Y : SchedWrite;
  209. def WriteFMaskedStore64Y : SchedWrite;
  210. def WriteFMove : SchedWrite;
  211. def WriteFMoveX : SchedWrite;
  212. def WriteFMoveY : SchedWrite;
  213. def WriteFMoveZ : SchedWrite;
  214. defm WriteFAdd : X86SchedWritePair<ReadAfterVecLd>; // Floating point add/sub.
  215. defm WriteFAddX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point add/sub (XMM).
  216. defm WriteFAddY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (YMM).
  217. defm WriteFAddZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point add/sub (ZMM).
  218. defm WriteFAdd64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double add/sub.
  219. defm WriteFAdd64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double add/sub (XMM).
  220. defm WriteFAdd64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (YMM).
  221. defm WriteFAdd64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double add/sub (ZMM).
  222. defm WriteFCmp : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare.
  223. defm WriteFCmpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point compare (XMM).
  224. defm WriteFCmpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (YMM).
  225. defm WriteFCmpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point compare (ZMM).
  226. defm WriteFCmp64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double compare.
  227. defm WriteFCmp64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double compare (XMM).
  228. defm WriteFCmp64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (YMM).
  229. defm WriteFCmp64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double compare (ZMM).
  230. defm WriteFCom : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags (X87).
  231. defm WriteFComX : X86SchedWritePair<ReadAfterVecLd>; // Floating point compare to flags (SSE).
  232. defm WriteFMul : X86SchedWritePair<ReadAfterVecLd>; // Floating point multiplication.
  233. defm WriteFMulX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point multiplication (XMM).
  234. defm WriteFMulY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
  235. defm WriteFMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point multiplication (YMM).
  236. defm WriteFMul64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double multiplication.
  237. defm WriteFMul64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double multiplication (XMM).
  238. defm WriteFMul64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (YMM).
  239. defm WriteFMul64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double multiplication (ZMM).
  240. defm WriteFDiv : X86SchedWritePair<ReadAfterVecLd>; // Floating point division.
  241. defm WriteFDivX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point division (XMM).
  242. defm WriteFDivY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (YMM).
  243. defm WriteFDivZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point division (ZMM).
  244. defm WriteFDiv64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double division.
  245. defm WriteFDiv64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double division (XMM).
  246. defm WriteFDiv64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (YMM).
  247. defm WriteFDiv64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double division (ZMM).
  248. defm WriteFSqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point square root.
  249. defm WriteFSqrtX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point square root (XMM).
  250. defm WriteFSqrtY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (YMM).
  251. defm WriteFSqrtZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point square root (ZMM).
  252. defm WriteFSqrt64 : X86SchedWritePair<ReadAfterVecLd>; // Floating point double square root.
  253. defm WriteFSqrt64X : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double square root (XMM).
  254. defm WriteFSqrt64Y : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (YMM).
  255. defm WriteFSqrt64Z : X86SchedWritePair<ReadAfterVecYLd>; // Floating point double square root (ZMM).
  256. defm WriteFSqrt80 : X86SchedWritePair<ReadAfterVecLd>; // Floating point long double square root.
  257. defm WriteFRcp : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal estimate.
  258. defm WriteFRcpX : X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal estimate (XMM).
  259. defm WriteFRcpY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (YMM).
  260. defm WriteFRcpZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal estimate (ZMM).
  261. defm WriteFRsqrt : X86SchedWritePair<ReadAfterVecLd>; // Floating point reciprocal square root estimate.
  262. defm WriteFRsqrtX: X86SchedWritePair<ReadAfterVecXLd>; // Floating point reciprocal square root estimate (XMM).
  263. defm WriteFRsqrtY: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (YMM).
  264. defm WriteFRsqrtZ: X86SchedWritePair<ReadAfterVecYLd>; // Floating point reciprocal square root estimate (ZMM).
  265. defm WriteFMA : X86SchedWritePair<ReadAfterVecLd>; // Fused Multiply Add.
  266. defm WriteFMAX : X86SchedWritePair<ReadAfterVecXLd>; // Fused Multiply Add (XMM).
  267. defm WriteFMAY : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (YMM).
  268. defm WriteFMAZ : X86SchedWritePair<ReadAfterVecYLd>; // Fused Multiply Add (ZMM).
  269. defm WriteDPPD : X86SchedWritePair<ReadAfterVecXLd>; // Floating point double dot product.
  270. defm WriteDPPS : X86SchedWritePair<ReadAfterVecXLd>; // Floating point single dot product.
  271. defm WriteDPPSY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (YMM).
  272. defm WriteDPPSZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point single dot product (ZMM).
  273. defm WriteFSign : X86SchedWritePair<ReadAfterVecLd>; // Floating point fabs/fchs.
  274. defm WriteFRnd : X86SchedWritePair<ReadAfterVecXLd>; // Floating point rounding.
  275. defm WriteFRndY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (YMM).
  276. defm WriteFRndZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point rounding (ZMM).
  277. defm WriteFLogic : X86SchedWritePair<ReadAfterVecXLd>; // Floating point and/or/xor logicals.
  278. defm WriteFLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (YMM).
  279. defm WriteFLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point and/or/xor logicals (ZMM).
  280. defm WriteFTest : X86SchedWritePair<ReadAfterVecXLd>; // Floating point TEST instructions.
  281. defm WriteFTestY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (YMM).
  282. defm WriteFTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point TEST instructions (ZMM).
  283. defm WriteFShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector shuffles.
  284. defm WriteFShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (YMM).
  285. defm WriteFShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector shuffles (ZMM).
  286. defm WriteFVarShuffle : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector variable shuffles.
  287. defm WriteFVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (YMM).
  288. defm WriteFVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector variable shuffles (ZMM).
  289. defm WriteFBlend : X86SchedWritePair<ReadAfterVecXLd>; // Floating point vector blends.
  290. defm WriteFBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (YMM).
  291. defm WriteFBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Floating point vector blends (ZMM).
  292. defm WriteFVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Fp vector variable blends.
  293. defm WriteFVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMM).
  294. defm WriteFVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Fp vector variable blends (YMZMM).
  295. // FMA Scheduling helper class.
  296. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
  297. // Horizontal Add/Sub (float and integer)
  298. defm WriteFHAdd : X86SchedWritePair<ReadAfterVecXLd>;
  299. defm WriteFHAddY : X86SchedWritePair<ReadAfterVecYLd>;
  300. defm WriteFHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
  301. defm WritePHAdd : X86SchedWritePair<ReadAfterVecLd>;
  302. defm WritePHAddX : X86SchedWritePair<ReadAfterVecXLd>;
  303. defm WritePHAddY : X86SchedWritePair<ReadAfterVecYLd>;
  304. defm WritePHAddZ : X86SchedWritePair<ReadAfterVecYLd>;
  305. // Vector integer operations.
  306. def WriteVecLoad : SchedWrite;
  307. def WriteVecLoadX : SchedWrite;
  308. def WriteVecLoadY : SchedWrite;
  309. def WriteVecLoadNT : SchedWrite;
  310. def WriteVecLoadNTY : SchedWrite;
  311. def WriteVecMaskedLoad : SchedWrite;
  312. def WriteVecMaskedLoadY : SchedWrite;
  313. def WriteVecStore : SchedWrite;
  314. def WriteVecStoreX : SchedWrite;
  315. def WriteVecStoreY : SchedWrite;
  316. def WriteVecStoreNT : SchedWrite;
  317. def WriteVecStoreNTY : SchedWrite;
  318. def WriteVecMaskedStore32 : SchedWrite;
  319. def WriteVecMaskedStore64 : SchedWrite;
  320. def WriteVecMaskedStore32Y : SchedWrite;
  321. def WriteVecMaskedStore64Y : SchedWrite;
  322. def WriteVecMove : SchedWrite;
  323. def WriteVecMoveX : SchedWrite;
  324. def WriteVecMoveY : SchedWrite;
  325. def WriteVecMoveZ : SchedWrite;
  326. def WriteVecMoveToGpr : SchedWrite;
  327. def WriteVecMoveFromGpr : SchedWrite;
  328. defm WriteVecALU : X86SchedWritePair<ReadAfterVecLd>; // Vector integer ALU op, no logicals.
  329. defm WriteVecALUX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer ALU op, no logicals (XMM).
  330. defm WriteVecALUY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (YMM).
  331. defm WriteVecALUZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer ALU op, no logicals (ZMM).
  332. defm WriteVecLogic : X86SchedWritePair<ReadAfterVecLd>; // Vector integer and/or/xor logicals.
  333. defm WriteVecLogicX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer and/or/xor logicals (XMM).
  334. defm WriteVecLogicY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (YMM).
  335. defm WriteVecLogicZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer and/or/xor logicals (ZMM).
  336. defm WriteVecTest : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer TEST instructions.
  337. defm WriteVecTestY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (YMM).
  338. defm WriteVecTestZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer TEST instructions (ZMM).
  339. defm WriteVecShift : X86SchedWritePair<ReadAfterVecLd>; // Vector integer shifts (default).
  340. defm WriteVecShiftX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer shifts (XMM).
  341. defm WriteVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (YMM).
  342. defm WriteVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer shifts (ZMM).
  343. defm WriteVecShiftImm : X86SchedWritePair<ReadAfterVecLd>; // Vector integer immediate shifts (default).
  344. defm WriteVecShiftImmX: X86SchedWritePair<ReadAfterVecXLd>; // Vector integer immediate shifts (XMM).
  345. defm WriteVecShiftImmY: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (YMM).
  346. defm WriteVecShiftImmZ: X86SchedWritePair<ReadAfterVecYLd>; // Vector integer immediate shifts (ZMM).
  347. defm WriteVecIMul : X86SchedWritePair<ReadAfterVecLd>; // Vector integer multiply (default).
  348. defm WriteVecIMulX : X86SchedWritePair<ReadAfterVecXLd>; // Vector integer multiply (XMM).
  349. defm WriteVecIMulY : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (YMM).
  350. defm WriteVecIMulZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector integer multiply (ZMM).
  351. defm WritePMULLD : X86SchedWritePair<ReadAfterVecXLd>; // Vector PMULLD.
  352. defm WritePMULLDY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (YMM).
  353. defm WritePMULLDZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PMULLD (ZMM).
  354. defm WriteShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector shuffles.
  355. defm WriteShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector shuffles (XMM).
  356. defm WriteShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (YMM).
  357. defm WriteShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector shuffles (ZMM).
  358. defm WriteVarShuffle : X86SchedWritePair<ReadAfterVecLd>; // Vector variable shuffles.
  359. defm WriteVarShuffleX : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable shuffles (XMM).
  360. defm WriteVarShuffleY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (YMM).
  361. defm WriteVarShuffleZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable shuffles (ZMM).
  362. defm WriteBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector blends.
  363. defm WriteBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (YMM).
  364. defm WriteBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector blends (ZMM).
  365. defm WriteVarBlend : X86SchedWritePair<ReadAfterVecXLd>; // Vector variable blends.
  366. defm WriteVarBlendY : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (YMM).
  367. defm WriteVarBlendZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector variable blends (ZMM).
  368. defm WritePSADBW : X86SchedWritePair<ReadAfterVecLd>; // Vector PSADBW.
  369. defm WritePSADBWX : X86SchedWritePair<ReadAfterVecXLd>; // Vector PSADBW (XMM).
  370. defm WritePSADBWY : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (YMM).
  371. defm WritePSADBWZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector PSADBW (ZMM).
  372. defm WriteMPSAD : X86SchedWritePair<ReadAfterVecXLd>; // Vector MPSAD.
  373. defm WriteMPSADY : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (YMM).
  374. defm WriteMPSADZ : X86SchedWritePair<ReadAfterVecYLd>; // Vector MPSAD (ZMM).
  375. defm WritePHMINPOS : X86SchedWritePair<ReadAfterVecXLd>; // Vector PHMINPOS.
  376. // Vector insert/extract operations.
  377. defm WriteVecInsert : X86SchedWritePair; // Insert gpr to vector element.
  378. def WriteVecExtract : SchedWrite; // Extract vector element to gpr.
  379. def WriteVecExtractSt : SchedWrite; // Extract vector element and store.
  380. // MOVMSK operations.
  381. def WriteFMOVMSK : SchedWrite;
  382. def WriteVecMOVMSK : SchedWrite;
  383. def WriteVecMOVMSKY : SchedWrite;
  384. def WriteMMXMOVMSK : SchedWrite;
  385. // Conversion between integer and float.
  386. defm WriteCvtSD2I : X86SchedWritePair<ReadAfterVecLd>; // Double -> Integer.
  387. defm WriteCvtPD2I : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Integer (XMM).
  388. defm WriteCvtPD2IY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (YMM).
  389. defm WriteCvtPD2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Integer (ZMM).
  390. defm WriteCvtSS2I : X86SchedWritePair<ReadAfterVecLd>; // Float -> Integer.
  391. defm WriteCvtPS2I : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Integer (XMM).
  392. defm WriteCvtPS2IY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (YMM).
  393. defm WriteCvtPS2IZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Integer (ZMM).
  394. defm WriteCvtI2SD : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Double.
  395. defm WriteCvtI2PD : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Double (XMM).
  396. defm WriteCvtI2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (YMM).
  397. defm WriteCvtI2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Double (ZMM).
  398. defm WriteCvtI2SS : X86SchedWritePair<ReadAfterVecLd>; // Integer -> Float.
  399. defm WriteCvtI2PS : X86SchedWritePair<ReadAfterVecXLd>; // Integer -> Float (XMM).
  400. defm WriteCvtI2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (YMM).
  401. defm WriteCvtI2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Integer -> Float (ZMM).
  402. defm WriteCvtSS2SD : X86SchedWritePair<ReadAfterVecLd>; // Float -> Double size conversion.
  403. defm WriteCvtPS2PD : X86SchedWritePair<ReadAfterVecXLd>; // Float -> Double size conversion (XMM).
  404. defm WriteCvtPS2PDY : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (YMM).
  405. defm WriteCvtPS2PDZ : X86SchedWritePair<ReadAfterVecYLd>; // Float -> Double size conversion (ZMM).
  406. defm WriteCvtSD2SS : X86SchedWritePair<ReadAfterVecLd>; // Double -> Float size conversion.
  407. defm WriteCvtPD2PS : X86SchedWritePair<ReadAfterVecXLd>; // Double -> Float size conversion (XMM).
  408. defm WriteCvtPD2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (YMM).
  409. defm WriteCvtPD2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Double -> Float size conversion (ZMM).
  410. defm WriteCvtPH2PS : X86SchedWritePair<ReadAfterVecXLd>; // Half -> Float size conversion.
  411. defm WriteCvtPH2PSY : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (YMM).
  412. defm WriteCvtPH2PSZ : X86SchedWritePair<ReadAfterVecYLd>; // Half -> Float size conversion (ZMM).
  413. def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion.
  414. def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM).
  415. def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM).
  416. def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion.
  417. def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM).
  418. def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM).
  419. // CRC32 instruction.
  420. defm WriteCRC32 : X86SchedWritePair<ReadAfterLd>;
  421. // Strings instructions.
  422. // Packed Compare Implicit Length Strings, Return Mask
  423. defm WritePCmpIStrM : X86SchedWritePair<ReadAfterVecXLd>;
  424. // Packed Compare Explicit Length Strings, Return Mask
  425. defm WritePCmpEStrM : X86SchedWritePair<ReadAfterVecXLd>;
  426. // Packed Compare Implicit Length Strings, Return Index
  427. defm WritePCmpIStrI : X86SchedWritePair<ReadAfterVecXLd>;
  428. // Packed Compare Explicit Length Strings, Return Index
  429. defm WritePCmpEStrI : X86SchedWritePair<ReadAfterVecXLd>;
  430. // AES instructions.
  431. defm WriteAESDecEnc : X86SchedWritePair<ReadAfterVecXLd>; // Decryption, encryption.
  432. defm WriteAESIMC : X86SchedWritePair<ReadAfterVecXLd>; // InvMixColumn.
  433. defm WriteAESKeyGen : X86SchedWritePair<ReadAfterVecXLd>; // Key Generation.
  434. // Carry-less multiplication instructions.
  435. defm WriteCLMul : X86SchedWritePair<ReadAfterVecXLd>;
  436. // EMMS/FEMMS
  437. def WriteEMMS : SchedWrite;
  438. // Load/store MXCSR
  439. def WriteLDMXCSR : SchedWrite;
  440. def WriteSTMXCSR : SchedWrite;
  441. // Catch-all for expensive system instructions.
  442. def WriteSystem : SchedWrite;
  443. // AVX2.
  444. defm WriteFShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width vector shuffles.
  445. defm WriteFVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // Fp 256-bit width variable shuffles.
  446. defm WriteShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector shuffles.
  447. defm WriteVPMOV256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width packed vector width-changing move.
  448. defm WriteVarShuffle256 : X86SchedWritePair<ReadAfterVecYLd>; // 256-bit width vector variable shuffles.
  449. defm WriteVarVecShift : X86SchedWritePair<ReadAfterVecXLd>; // Variable vector shifts.
  450. defm WriteVarVecShiftY : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (YMM).
  451. defm WriteVarVecShiftZ : X86SchedWritePair<ReadAfterVecYLd>; // Variable vector shifts (ZMM).
  452. // Old microcoded instructions that nobody use.
  453. def WriteMicrocoded : SchedWrite;
  454. // Fence instructions.
  455. def WriteFence : SchedWrite;
  456. // Nop, not very useful expect it provides a model for nops!
  457. def WriteNop : SchedWrite;
  458. // Move/Load/Store wrappers.
  459. def WriteFMoveLS
  460. : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStore>;
  461. def WriteFMoveLSX
  462. : X86SchedWriteMoveLS<WriteFMoveX, WriteFLoadX, WriteFStoreX>;
  463. def WriteFMoveLSY
  464. : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreY>;
  465. def WriteFMoveLSZ
  466. : X86SchedWriteMoveLS<WriteFMoveZ, WriteFLoadY, WriteFStoreY>;
  467. def SchedWriteFMoveLS
  468. : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
  469. WriteFMoveLSY, WriteFMoveLSZ>;
  470. def WriteFMoveLSNT
  471. : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
  472. def WriteFMoveLSNTX
  473. : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
  474. def WriteFMoveLSNTY
  475. : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
  476. def SchedWriteFMoveLSNT
  477. : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
  478. WriteFMoveLSNTY, WriteFMoveLSNTY>;
  479. def WriteVecMoveLS
  480. : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
  481. def WriteVecMoveLSX
  482. : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadX, WriteVecStoreX>;
  483. def WriteVecMoveLSY
  484. : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadY, WriteVecStoreY>;
  485. def WriteVecMoveLSZ
  486. : X86SchedWriteMoveLS<WriteVecMoveZ, WriteVecLoadY, WriteVecStoreY>;
  487. def SchedWriteVecMoveLS
  488. : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
  489. WriteVecMoveLSY, WriteVecMoveLSZ>;
  490. def WriteVecMoveLSNT
  491. : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
  492. def WriteVecMoveLSNTX
  493. : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
  494. def WriteVecMoveLSNTY
  495. : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
  496. def SchedWriteVecMoveLSNT
  497. : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
  498. WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
  499. // Conditional SIMD Packed Loads and Stores wrappers.
  500. def WriteFMaskMove32
  501. : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore32>;
  502. def WriteFMaskMove64
  503. : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore64>;
  504. def WriteFMaskMove32Y
  505. : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore32Y>;
  506. def WriteFMaskMove64Y
  507. : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore64Y>;
  508. def WriteVecMaskMove32
  509. : X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore32>;
  510. def WriteVecMaskMove64
  511. : X86SchedWriteMaskMove<WriteVecMaskedLoad, WriteVecMaskedStore64>;
  512. def WriteVecMaskMove32Y
  513. : X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore32Y>;
  514. def WriteVecMaskMove64Y
  515. : X86SchedWriteMaskMove<WriteVecMaskedLoadY, WriteVecMaskedStore64Y>;
  516. // Vector width wrappers.
  517. def SchedWriteFAdd
  518. : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
  519. def SchedWriteFAdd64
  520. : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>;
  521. def SchedWriteFHAdd
  522. : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>;
  523. def SchedWriteFCmp
  524. : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>;
  525. def SchedWriteFCmp64
  526. : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>;
  527. def SchedWriteFMul
  528. : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>;
  529. def SchedWriteFMul64
  530. : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>;
  531. def SchedWriteFMA
  532. : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>;
  533. def SchedWriteDPPD
  534. : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>;
  535. def SchedWriteDPPS
  536. : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>;
  537. def SchedWriteFDiv
  538. : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>;
  539. def SchedWriteFDiv64
  540. : X86SchedWriteWidths<WriteFDiv64, WriteFDiv64X, WriteFDiv64Y, WriteFDiv64Z>;
  541. def SchedWriteFSqrt
  542. : X86SchedWriteWidths<WriteFSqrt, WriteFSqrtX,
  543. WriteFSqrtY, WriteFSqrtZ>;
  544. def SchedWriteFSqrt64
  545. : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X,
  546. WriteFSqrt64Y, WriteFSqrt64Z>;
  547. def SchedWriteFRcp
  548. : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>;
  549. def SchedWriteFRsqrt
  550. : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>;
  551. def SchedWriteFRnd
  552. : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>;
  553. def SchedWriteFLogic
  554. : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>;
  555. def SchedWriteFTest
  556. : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>;
  557. def SchedWriteFShuffle
  558. : X86SchedWriteWidths<WriteFShuffle, WriteFShuffle,
  559. WriteFShuffleY, WriteFShuffleZ>;
  560. def SchedWriteFVarShuffle
  561. : X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle,
  562. WriteFVarShuffleY, WriteFVarShuffleZ>;
  563. def SchedWriteFBlend
  564. : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>;
  565. def SchedWriteFVarBlend
  566. : X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend,
  567. WriteFVarBlendY, WriteFVarBlendZ>;
  568. def SchedWriteCvtDQ2PD
  569. : X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD,
  570. WriteCvtI2PDY, WriteCvtI2PDZ>;
  571. def SchedWriteCvtDQ2PS
  572. : X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS,
  573. WriteCvtI2PSY, WriteCvtI2PSZ>;
  574. def SchedWriteCvtPD2DQ
  575. : X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I,
  576. WriteCvtPD2IY, WriteCvtPD2IZ>;
  577. def SchedWriteCvtPS2DQ
  578. : X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I,
  579. WriteCvtPS2IY, WriteCvtPS2IZ>;
  580. def SchedWriteCvtPS2PD
  581. : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD,
  582. WriteCvtPS2PDY, WriteCvtPS2PDZ>;
  583. def SchedWriteCvtPD2PS
  584. : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS,
  585. WriteCvtPD2PSY, WriteCvtPD2PSZ>;
  586. def SchedWriteVecALU
  587. : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>;
  588. def SchedWritePHAdd
  589. : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>;
  590. def SchedWriteVecLogic
  591. : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX,
  592. WriteVecLogicY, WriteVecLogicZ>;
  593. def SchedWriteVecTest
  594. : X86SchedWriteWidths<WriteVecTest, WriteVecTest,
  595. WriteVecTestY, WriteVecTestZ>;
  596. def SchedWriteVecShift
  597. : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX,
  598. WriteVecShiftY, WriteVecShiftZ>;
  599. def SchedWriteVecShiftImm
  600. : X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX,
  601. WriteVecShiftImmY, WriteVecShiftImmZ>;
  602. def SchedWriteVarVecShift
  603. : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift,
  604. WriteVarVecShiftY, WriteVarVecShiftZ>;
  605. def SchedWriteVecIMul
  606. : X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX,
  607. WriteVecIMulY, WriteVecIMulZ>;
  608. def SchedWritePMULLD
  609. : X86SchedWriteWidths<WritePMULLD, WritePMULLD,
  610. WritePMULLDY, WritePMULLDZ>;
  611. def SchedWriteMPSAD
  612. : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD,
  613. WriteMPSADY, WriteMPSADZ>;
  614. def SchedWritePSADBW
  615. : X86SchedWriteWidths<WritePSADBW, WritePSADBWX,
  616. WritePSADBWY, WritePSADBWZ>;
  617. def SchedWriteShuffle
  618. : X86SchedWriteWidths<WriteShuffle, WriteShuffleX,
  619. WriteShuffleY, WriteShuffleZ>;
  620. def SchedWriteVarShuffle
  621. : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX,
  622. WriteVarShuffleY, WriteVarShuffleZ>;
  623. def SchedWriteBlend
  624. : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>;
  625. def SchedWriteVarBlend
  626. : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend,
  627. WriteVarBlendY, WriteVarBlendZ>;
  628. // Vector size wrappers.
  629. // FIXME: Currently PH uses the same schedule method as PS.
  630. // We may refine them later.
  631. def SchedWriteFAddSizes
  632. : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd, SchedWriteFAdd64>;
  633. def SchedWriteFCmpSizes
  634. : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp, SchedWriteFCmp64>;
  635. def SchedWriteFMulSizes
  636. : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul, SchedWriteFMul64>;
  637. def SchedWriteFDivSizes
  638. : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv, SchedWriteFDiv64>;
  639. def SchedWriteFSqrtSizes
  640. : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt, SchedWriteFSqrt64>;
  641. def SchedWriteFLogicSizes
  642. : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic, SchedWriteFLogic>;
  643. def SchedWriteFShuffleSizes
  644. : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle, SchedWriteFShuffle>;
  645. //===----------------------------------------------------------------------===//
  646. // Generic Processor Scheduler Models.
  647. // IssueWidth is analogous to the number of decode units. Core and its
  648. // descendents, including Nehalem and SandyBridge have 4 decoders.
  649. // Resources beyond the decoder operate on micro-ops and are bufferred
  650. // so adjacent micro-ops don't directly compete.
  651. //
  652. // MicroOpBufferSize > 1 indicates that RAW dependencies can be
  653. // decoded in the same cycle. The value 32 is a reasonably arbitrary
  654. // number of in-flight instructions.
  655. //
  656. // HighLatency=10 is optimistic. X86InstrInfo::isHighLatencyDef
  657. // indicates high latency opcodes. Alternatively, InstrItinData
  658. // entries may be included here to define specific operand
  659. // latencies. Since these latencies are not used for pipeline hazards,
  660. // they do not need to be exact.
  661. //
  662. // The GenericX86Model contains no instruction schedules
  663. // and disables PostRAScheduler.
  664. class GenericX86Model : SchedMachineModel {
  665. let IssueWidth = 4;
  666. let MicroOpBufferSize = 32;
  667. let LoadLatency = 4;
  668. let HighLatency = 10;
  669. let PostRAScheduler = 0;
  670. let CompleteModel = 0;
  671. }
  672. def GenericModel : GenericX86Model;
  673. // Define a model with the PostRAScheduler enabled.
  674. def GenericPostRAModel : GenericX86Model {
  675. let PostRAScheduler = 1;
  676. }