X86SchedSandyBridge.td 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285
  1. //=- X86SchedSandyBridge.td - X86 Sandy Bridge Scheduling ----*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the machine model for Sandy Bridge to support instruction
  10. // scheduling and other instruction cost heuristics.
  11. //
  12. // Note that we define some instructions here that are not supported by SNB,
  13. // but we still have to define them because SNB is the default subtarget for
  14. // X86. These instructions are tagged with a comment `Unsupported = 1`.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. def SandyBridgeModel : SchedMachineModel {
  18. // All x86 instructions are modeled as a single micro-op, and SB can decode 4
  19. // instructions per cycle.
  20. // FIXME: Identify instructions that aren't a single fused micro-op.
  21. let IssueWidth = 4;
  22. let MicroOpBufferSize = 168; // Based on the reorder buffer.
  23. let LoadLatency = 5;
  24. let MispredictPenalty = 16;
  25. // Based on the LSD (loop-stream detector) queue size.
  26. let LoopMicroOpBufferSize = 28;
  27. // This flag is set to allow the scheduler to assign
  28. // a default model to unrecognized opcodes.
  29. let CompleteModel = 0;
  30. }
  31. let SchedModel = SandyBridgeModel in {
  32. // Sandy Bridge can issue micro-ops to 6 different ports in one cycle.
  33. // Ports 0, 1, and 5 handle all computation.
  34. def SBPort0 : ProcResource<1>;
  35. def SBPort1 : ProcResource<1>;
  36. def SBPort5 : ProcResource<1>;
  37. // Ports 2 and 3 are identical. They handle loads and the address half of
  38. // stores.
  39. def SBPort23 : ProcResource<2>;
  40. // Port 4 gets the data half of stores. Store data can be available later than
  41. // the store address, but since we don't model the latency of stores, we can
  42. // ignore that.
  43. def SBPort4 : ProcResource<1>;
  44. // Many micro-ops are capable of issuing on multiple ports.
  45. def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>;
  46. def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>;
  47. def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>;
  48. def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>;
  49. // 54 Entry Unified Scheduler
  50. def SBPortAny : ProcResGroup<[SBPort0, SBPort1, SBPort23, SBPort4, SBPort5]> {
  51. let BufferSize=54;
  52. }
  53. // Integer division issued on port 0.
  54. def SBDivider : ProcResource<1>;
  55. // FP division and sqrt on port 0.
  56. def SBFPDivider : ProcResource<1>;
  57. // Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
  58. // cycles after the memory operand.
  59. def : ReadAdvance<ReadAfterLd, 5>;
  60. // Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
  61. // until 5/6/7 cycles after the memory operand.
  62. def : ReadAdvance<ReadAfterVecLd, 5>;
  63. def : ReadAdvance<ReadAfterVecXLd, 6>;
  64. def : ReadAdvance<ReadAfterVecYLd, 7>;
  65. def : ReadAdvance<ReadInt2Fpu, 0>;
  66. // Many SchedWrites are defined in pairs with and without a folded load.
  67. // Instructions with folded loads are usually micro-fused, so they only appear
  68. // as two micro-ops when queued in the reservation station.
  69. // This multiclass defines the resource usage for variants with and without
  70. // folded loads.
  71. multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
  72. list<ProcResourceKind> ExePorts,
  73. int Lat, list<int> Res = [1], int UOps = 1,
  74. int LoadLat = 5, int LoadUOps = 1> {
  75. // Register variant is using a single cycle on ExePort.
  76. def : WriteRes<SchedRW, ExePorts> {
  77. let Latency = Lat;
  78. let ResourceCycles = Res;
  79. let NumMicroOps = UOps;
  80. }
  81. // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
  82. // the latency (default = 5).
  83. def : WriteRes<SchedRW.Folded, !listconcat([SBPort23], ExePorts)> {
  84. let Latency = !add(Lat, LoadLat);
  85. let ResourceCycles = !listconcat([1], Res);
  86. let NumMicroOps = !add(UOps, LoadUOps);
  87. }
  88. }
  89. // A folded store needs a cycle on port 4 for the store data, and an extra port
  90. // 2/3 cycle to recompute the address.
  91. def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
  92. def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
  93. def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
  94. def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
  95. def : WriteRes<WriteMove, [SBPort015]>;
  96. // Treat misc copies as a move.
  97. def : InstRW<[WriteMove], (instrs COPY)>;
  98. // Idioms that clear a register, like xorps %xmm0, %xmm0.
  99. // These can often bypass execution ports completely.
  100. def : WriteRes<WriteZero, []>;
  101. // Model the effect of clobbering the read-write mask operand of the GATHER operation.
  102. // Does not cost anything by itself, only has latency, matching that of the WriteLoad,
  103. defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
  104. // Arithmetic.
  105. defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
  106. defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>;
  107. defm : SBWriteResPair<WriteIMul8, [SBPort1], 3>;
  108. defm : SBWriteResPair<WriteIMul16, [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>;
  109. defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
  110. defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
  111. defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
  112. defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
  113. defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 3, [1,1,1], 3>;
  114. defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
  115. defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
  116. defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
  117. defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 3, [1,1], 2>;
  118. defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
  119. defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
  120. def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
  121. def : WriteRes<WriteIMulHLd, []> {
  122. let Latency = !add(SBWriteIMulH.Latency, SandyBridgeModel.LoadLatency);
  123. }
  124. defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
  125. defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
  126. defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>;
  127. defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>;
  128. defm : X86WriteRes<WriteCMPXCHGRMW,[SBPort015, SBPort5, SBPort23, SBPort4], 8, [1, 2, 2, 1], 6>;
  129. defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
  130. defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
  131. defm : SBWriteResPair<WriteDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
  132. defm : SBWriteResPair<WriteDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
  133. defm : SBWriteResPair<WriteIDiv8, [SBPort0, SBDivider], 25, [1, 10]>;
  134. defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>;
  135. defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>;
  136. defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>;
  137. // SHLD/SHRD.
  138. defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>;
  139. defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>;
  140. defm : X86WriteRes<WriteSHDmri, [SBPort4,SBPort23,SBPort05,SBPort015], 8, [1, 2, 1, 1], 5>;
  141. defm : X86WriteRes<WriteSHDmrcl,[SBPort4,SBPort23,SBPort05,SBPort015], 10, [1, 2, 3, 1], 7>;
  142. defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
  143. defm : SBWriteResPair<WriteShiftCL, [SBPort05], 3, [3], 3>;
  144. defm : SBWriteResPair<WriteRotate, [SBPort05], 2, [2], 2>;
  145. defm : SBWriteResPair<WriteRotateCL, [SBPort05], 3, [3], 3>;
  146. defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
  147. defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
  148. defm : SBWriteResPair<WriteCMOV, [SBPort05,SBPort015], 2, [1,1], 2>; // Conditional move.
  149. defm : X86WriteRes<WriteFCMOV, [SBPort5,SBPort05], 3, [2,1], 3>; // x87 conditional move.
  150. def : WriteRes<WriteSETCC, [SBPort05]>; // Setcc.
  151. def : WriteRes<WriteSETCCStore, [SBPort05,SBPort4,SBPort23]> {
  152. let Latency = 2;
  153. let NumMicroOps = 3;
  154. }
  155. defm : X86WriteRes<WriteLAHFSAHF, [SBPort05], 1, [1], 1>;
  156. defm : X86WriteRes<WriteBitTest, [SBPort05], 1, [1], 1>;
  157. defm : X86WriteRes<WriteBitTestImmLd, [SBPort05,SBPort23], 6, [1,1], 2>;
  158. //defm : X86WriteRes<WriteBitTestRegLd, [SBPort05,SBPort23], 6, [1,1], 2>;
  159. defm : X86WriteRes<WriteBitTestSet, [SBPort05], 1, [1], 1>;
  160. defm : X86WriteRes<WriteBitTestSetImmLd, [SBPort05,SBPort23], 6, [1,1], 3>;
  161. defm : X86WriteRes<WriteBitTestSetRegLd, [SBPort05,SBPort23,SBPort5,SBPort015], 8, [1,1,1,1], 5>;
  162. // This is for simple LEAs with one or two input operands.
  163. // The complex ones can only execute on port 1, and they require two cycles on
  164. // the port to read all inputs. We don't model that.
  165. def : WriteRes<WriteLEA, [SBPort01]>;
  166. // Bit counts.
  167. defm : SBWriteResPair<WriteBSF, [SBPort1], 3, [1], 1, 5>;
  168. defm : SBWriteResPair<WriteBSR, [SBPort1], 3, [1], 1, 5>;
  169. defm : SBWriteResPair<WriteLZCNT, [SBPort1], 3, [1], 1, 5>;
  170. defm : SBWriteResPair<WriteTZCNT, [SBPort1], 3, [1], 1, 5>;
  171. defm : SBWriteResPair<WritePOPCNT, [SBPort1], 3, [1], 1, 6>;
  172. // BMI1 BEXTR/BLS, BMI2 BZHI
  173. // NOTE: These don't exist on Sandy Bridge. Ports are guesses.
  174. defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>;
  175. defm : SBWriteResPair<WriteBLS, [SBPort015], 1>;
  176. defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>;
  177. // Scalar and vector floating point.
  178. defm : X86WriteRes<WriteFLD0, [SBPort5], 1, [1], 1>;
  179. defm : X86WriteRes<WriteFLD1, [SBPort0,SBPort5], 1, [1,1], 2>;
  180. defm : X86WriteRes<WriteFLDC, [SBPort0,SBPort1], 1, [1,1], 2>;
  181. defm : X86WriteRes<WriteFLoad, [SBPort23], 5, [1], 1>;
  182. defm : X86WriteRes<WriteFLoadX, [SBPort23], 6, [1], 1>;
  183. defm : X86WriteRes<WriteFLoadY, [SBPort23], 7, [1], 1>;
  184. defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
  185. defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
  186. defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
  187. defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
  188. defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
  189. defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
  190. defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
  191. defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
  192. defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  193. defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  194. defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  195. defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  196. defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
  197. defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
  198. defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>;
  199. defm : X86WriteRes<WriteFMoveZ, [SBPort5], 1, [1], 1>;
  200. defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>;
  201. defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>;
  202. defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>;
  203. defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>;
  204. defm : SBWriteResPair<WriteFAddZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
  205. defm : SBWriteResPair<WriteFAdd64, [SBPort1], 3, [1], 1, 6>;
  206. defm : SBWriteResPair<WriteFAdd64X, [SBPort1], 3, [1], 1, 6>;
  207. defm : SBWriteResPair<WriteFAdd64Y, [SBPort1], 3, [1], 1, 7>;
  208. defm : SBWriteResPair<WriteFAdd64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
  209. defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>;
  210. defm : SBWriteResPair<WriteFCmpX, [SBPort1], 3, [1], 1, 6>;
  211. defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>;
  212. defm : SBWriteResPair<WriteFCmpZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
  213. defm : SBWriteResPair<WriteFCmp64, [SBPort1], 3, [1], 1, 6>;
  214. defm : SBWriteResPair<WriteFCmp64X, [SBPort1], 3, [1], 1, 6>;
  215. defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>;
  216. defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
  217. defm : SBWriteResPair<WriteFCom, [SBPort1], 3>;
  218. defm : SBWriteResPair<WriteFComX, [SBPort1], 3>;
  219. defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>;
  220. defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>;
  221. defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>;
  222. defm : SBWriteResPair<WriteFMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
  223. defm : SBWriteResPair<WriteFMul64, [SBPort0], 5, [1], 1, 6>;
  224. defm : SBWriteResPair<WriteFMul64X, [SBPort0], 5, [1], 1, 6>;
  225. defm : SBWriteResPair<WriteFMul64Y, [SBPort0], 5, [1], 1, 7>;
  226. defm : SBWriteResPair<WriteFMul64Z, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
  227. defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
  228. defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
  229. defm : SBWriteResPair<WriteFDivY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
  230. defm : SBWriteResPair<WriteFDivZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1
  231. defm : SBWriteResPair<WriteFDiv64, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
  232. defm : SBWriteResPair<WriteFDiv64X, [SBPort0,SBFPDivider], 22, [1,22], 1, 6>;
  233. defm : SBWriteResPair<WriteFDiv64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
  234. defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1
  235. defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>;
  236. defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>;
  237. defm : SBWriteResPair<WriteFRcpY, [SBPort0,SBPort05], 7, [2,1], 3, 7>;
  238. defm : SBWriteResPair<WriteFRcpZ, [SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
  239. defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>;
  240. defm : SBWriteResPair<WriteFRsqrtX,[SBPort0], 5, [1], 1, 6>;
  241. defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05], 7, [2,1], 3, 7>;
  242. defm : SBWriteResPair<WriteFRsqrtZ,[SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1
  243. defm : SBWriteResPair<WriteFSqrt, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
  244. defm : SBWriteResPair<WriteFSqrtX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>;
  245. defm : SBWriteResPair<WriteFSqrtY, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>;
  246. defm : SBWriteResPair<WriteFSqrtZ, [SBPort0,SBPort05,SBFPDivider], 29, [2,1,28], 3, 7>; // Unsupported = 1
  247. defm : SBWriteResPair<WriteFSqrt64, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
  248. defm : SBWriteResPair<WriteFSqrt64X, [SBPort0,SBFPDivider], 21, [1,21], 1, 6>;
  249. defm : SBWriteResPair<WriteFSqrt64Y, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>;
  250. defm : SBWriteResPair<WriteFSqrt64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44], 3, 7>; // Unsupported = 1
  251. defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>;
  252. defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>;
  253. defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>;
  254. defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>;
  255. defm : SBWriteResPair<WriteFSign, [SBPort5], 1>;
  256. defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>;
  257. defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>;
  258. defm : SBWriteResPair<WriteFRndZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
  259. defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>;
  260. defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>;
  261. defm : SBWriteResPair<WriteFLogicZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
  262. defm : SBWriteResPair<WriteFTest, [SBPort0], 1, [1], 1, 6>;
  263. defm : SBWriteResPair<WriteFTestY, [SBPort0], 1, [1], 1, 7>;
  264. defm : SBWriteResPair<WriteFTestZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
  265. defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>;
  266. defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>;
  267. defm : SBWriteResPair<WriteFShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
  268. defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>;
  269. defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>;
  270. defm : SBWriteResPair<WriteFVarShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1
  271. defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>;
  272. defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>;
  273. defm : SBWriteResPair<WriteFBlendZ, [SBPort05], 1, [1], 1, 7>; // Unsupported = 1
  274. defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>;
  275. defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>;
  276. defm : SBWriteResPair<WriteFVarBlendZ,[SBPort05], 2, [2], 2, 7>; // Unsupported = 1
  277. // Conversion between integer and float.
  278. defm : SBWriteResPair<WriteCvtSS2I, [SBPort0,SBPort1], 5, [1,1], 2>;
  279. defm : SBWriteResPair<WriteCvtPS2I, [SBPort1], 3, [1], 1, 6>;
  280. defm : SBWriteResPair<WriteCvtPS2IY, [SBPort1], 3, [1], 1, 7>;
  281. defm : SBWriteResPair<WriteCvtPS2IZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
  282. defm : SBWriteResPair<WriteCvtSD2I, [SBPort0,SBPort1], 5, [1,1], 2>;
  283. defm : SBWriteResPair<WriteCvtPD2I, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
  284. defm : X86WriteRes<WriteCvtPD2IY, [SBPort1,SBPort5], 4, [1,1], 2>;
  285. defm : X86WriteRes<WriteCvtPD2IZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
  286. defm : X86WriteRes<WriteCvtPD2IYLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>;
  287. defm : X86WriteRes<WriteCvtPD2IZLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; // Unsupported = 1
  288. defm : X86WriteRes<WriteCvtI2SS, [SBPort1,SBPort5], 5, [1,2], 3>;
  289. defm : X86WriteRes<WriteCvtI2SSLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
  290. defm : SBWriteResPair<WriteCvtI2PS, [SBPort1], 3, [1], 1, 6>;
  291. defm : SBWriteResPair<WriteCvtI2PSY, [SBPort1], 3, [1], 1, 7>;
  292. defm : SBWriteResPair<WriteCvtI2PSZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1
  293. defm : X86WriteRes<WriteCvtI2SD, [SBPort1,SBPort5], 4, [1,1], 2>;
  294. defm : X86WriteRes<WriteCvtI2PD, [SBPort1,SBPort5], 4, [1,1], 2>;
  295. defm : X86WriteRes<WriteCvtI2PDY, [SBPort1,SBPort5], 4, [1,1], 2>;
  296. defm : X86WriteRes<WriteCvtI2PDZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1
  297. defm : X86WriteRes<WriteCvtI2SDLd, [SBPort1,SBPort23], 9, [1,1], 2>;
  298. defm : X86WriteRes<WriteCvtI2PDLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
  299. defm : X86WriteRes<WriteCvtI2PDYLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>;
  300. defm : X86WriteRes<WriteCvtI2PDZLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1
  301. defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>;
  302. defm : X86WriteRes<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>;
  303. defm : X86WriteRes<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>;
  304. defm : X86WriteRes<WriteCvtPS2PDZ, [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1
  305. defm : X86WriteRes<WriteCvtPS2PDLd, [SBPort0,SBPort23], 7, [1,1], 2>;
  306. defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>;
  307. defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1
  308. defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
  309. defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
  310. defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
  311. defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
  312. defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
  313. defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
  314. defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
  315. defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
  316. defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
  317. defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1
  318. defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
  319. defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
  320. defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
  321. // Vector integer operations.
  322. defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
  323. defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>;
  324. defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>;
  325. defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>;
  326. defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>;
  327. defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
  328. defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
  329. defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
  330. defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
  331. defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
  332. defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
  333. defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
  334. defm : X86WriteRes<WriteVecMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  335. defm : X86WriteRes<WriteVecMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  336. defm : X86WriteRes<WriteVecMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  337. defm : X86WriteRes<WriteVecMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
  338. defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
  339. defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>;
  340. defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>;
  341. defm : X86WriteRes<WriteVecMoveZ, [SBPort05], 1, [1], 1>;
  342. defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>;
  343. defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>;
  344. defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>;
  345. defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>;
  346. defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>;
  347. defm : SBWriteResPair<WriteVecLogicZ,[SBPort015], 1, [1], 1, 7>; // Unsupported = 1
  348. defm : SBWriteResPair<WriteVecTest, [SBPort0,SBPort5], 2, [1,1], 2, 6>;
  349. defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>;
  350. defm : SBWriteResPair<WriteVecTestZ, [SBPort0,SBPort5], 2, [1,1], 2, 7>; // Unsupported = 1
  351. defm : SBWriteResPair<WriteVecALU, [SBPort1], 3, [1], 1, 5>;
  352. defm : SBWriteResPair<WriteVecALUX, [SBPort15], 1, [1], 1, 6>;
  353. defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>;
  354. defm : SBWriteResPair<WriteVecALUZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
  355. defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 5>;
  356. defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>;
  357. defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>;
  358. defm : SBWriteResPair<WriteVecIMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
  359. defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>;
  360. defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model
  361. defm : SBWriteResPair<WritePMULLDZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
  362. defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>;
  363. defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>;
  364. defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>;
  365. defm : SBWriteResPair<WriteShuffleZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1
  366. defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>;
  367. defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>;
  368. defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>;
  369. defm : SBWriteResPair<WriteVarShuffleZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
  370. defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>;
  371. defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>;
  372. defm : SBWriteResPair<WriteBlendZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1
  373. defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>;
  374. defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>;
  375. defm : SBWriteResPair<WriteVarBlendZ,[SBPort15], 2, [2], 2, 7>; // Unsupported = 1
  376. defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>;
  377. defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>;
  378. defm : SBWriteResPair<WriteMPSADZ, [SBPort0, SBPort15], 7, [1,2], 3, 7>; // Unsupported = 1
  379. defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 5>;
  380. defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>;
  381. defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>;
  382. defm : SBWriteResPair<WritePSADBWZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1
  383. defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>;
  384. // Vector integer shifts.
  385. defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>;
  386. defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>;
  387. defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>;
  388. defm : SBWriteResPair<WriteVecShiftZ, [SBPort0,SBPort15], 4, [1,1], 2, 7>; // Unsupported = 1
  389. defm : SBWriteResPair<WriteVecShiftImm, [SBPort5], 1, [1], 1, 5>;
  390. defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>;
  391. defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>;
  392. defm : SBWriteResPair<WriteVecShiftImmZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
  393. defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>;
  394. defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>;
  395. defm : SBWriteResPair<WriteVarVecShiftZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1
  396. // Vector insert/extract operations.
  397. def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> {
  398. let Latency = 2;
  399. let NumMicroOps = 2;
  400. }
  401. def : WriteRes<WriteVecInsertLd, [SBPort23,SBPort15]> {
  402. let Latency = 7;
  403. let NumMicroOps = 2;
  404. }
  405. def : WriteRes<WriteVecExtract, [SBPort0,SBPort15]> {
  406. let Latency = 3;
  407. let NumMicroOps = 2;
  408. }
  409. def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> {
  410. let Latency = 5;
  411. let NumMicroOps = 3;
  412. }
  413. ////////////////////////////////////////////////////////////////////////////////
  414. // Horizontal add/sub instructions.
  415. ////////////////////////////////////////////////////////////////////////////////
  416. defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>;
  417. defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>;
  418. defm : SBWriteResPair<WriteFHAddZ, [SBPort1,SBPort5], 5, [1,2], 3, 7>; // Unsupported = 1
  419. defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 5>;
  420. defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>;
  421. defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>;
  422. defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1
  423. ////////////////////////////////////////////////////////////////////////////////
  424. // String instructions.
  425. ////////////////////////////////////////////////////////////////////////////////
  426. // Packed Compare Implicit Length Strings, Return Mask
  427. def : WriteRes<WritePCmpIStrM, [SBPort0]> {
  428. let Latency = 11;
  429. let NumMicroOps = 3;
  430. let ResourceCycles = [3];
  431. }
  432. def : WriteRes<WritePCmpIStrMLd, [SBPort0, SBPort23]> {
  433. let Latency = 17;
  434. let NumMicroOps = 4;
  435. let ResourceCycles = [3,1];
  436. }
  437. // Packed Compare Explicit Length Strings, Return Mask
  438. def : WriteRes<WritePCmpEStrM, [SBPort015]> {
  439. let Latency = 11;
  440. let ResourceCycles = [8];
  441. }
  442. def : WriteRes<WritePCmpEStrMLd, [SBPort015, SBPort23]> {
  443. let Latency = 17;
  444. let ResourceCycles = [7, 1];
  445. }
  446. // Packed Compare Implicit Length Strings, Return Index
  447. def : WriteRes<WritePCmpIStrI, [SBPort0]> {
  448. let Latency = 11;
  449. let NumMicroOps = 3;
  450. let ResourceCycles = [3];
  451. }
  452. def : WriteRes<WritePCmpIStrILd, [SBPort0,SBPort23]> {
  453. let Latency = 17;
  454. let NumMicroOps = 4;
  455. let ResourceCycles = [3,1];
  456. }
  457. // Packed Compare Explicit Length Strings, Return Index
  458. def : WriteRes<WritePCmpEStrI, [SBPort015]> {
  459. let Latency = 4;
  460. let ResourceCycles = [8];
  461. }
  462. def : WriteRes<WritePCmpEStrILd, [SBPort015, SBPort23]> {
  463. let Latency = 10;
  464. let ResourceCycles = [7, 1];
  465. }
  466. // MOVMSK Instructions.
  467. def : WriteRes<WriteFMOVMSK, [SBPort0]> { let Latency = 2; }
  468. def : WriteRes<WriteVecMOVMSK, [SBPort0]> { let Latency = 2; }
  469. def : WriteRes<WriteVecMOVMSKY, [SBPort0]> { let Latency = 2; }
  470. def : WriteRes<WriteMMXMOVMSK, [SBPort0]> { let Latency = 1; }
  471. // AES Instructions.
  472. def : WriteRes<WriteAESDecEnc, [SBPort5,SBPort015]> {
  473. let Latency = 7;
  474. let NumMicroOps = 2;
  475. let ResourceCycles = [1,1];
  476. }
  477. def : WriteRes<WriteAESDecEncLd, [SBPort5,SBPort23,SBPort015]> {
  478. let Latency = 13;
  479. let NumMicroOps = 3;
  480. let ResourceCycles = [1,1,1];
  481. }
  482. def : WriteRes<WriteAESIMC, [SBPort5]> {
  483. let Latency = 12;
  484. let NumMicroOps = 2;
  485. let ResourceCycles = [2];
  486. }
  487. def : WriteRes<WriteAESIMCLd, [SBPort5,SBPort23]> {
  488. let Latency = 18;
  489. let NumMicroOps = 3;
  490. let ResourceCycles = [2,1];
  491. }
  492. def : WriteRes<WriteAESKeyGen, [SBPort015]> {
  493. let Latency = 8;
  494. let ResourceCycles = [11];
  495. }
  496. def : WriteRes<WriteAESKeyGenLd, [SBPort015, SBPort23]> {
  497. let Latency = 14;
  498. let ResourceCycles = [10, 1];
  499. }
  500. // Carry-less multiplication instructions.
  501. def : WriteRes<WriteCLMul, [SBPort015]> {
  502. let Latency = 14;
  503. let ResourceCycles = [18];
  504. }
  505. def : WriteRes<WriteCLMulLd, [SBPort015, SBPort23]> {
  506. let Latency = 20;
  507. let ResourceCycles = [17, 1];
  508. }
  509. // Load/store MXCSR.
  510. // FIXME: This is probably wrong. Only STMXCSR should require Port4.
  511. def : WriteRes<WriteLDMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; }
  512. def : WriteRes<WriteSTMXCSR, [SBPort0,SBPort4,SBPort5,SBPort23]> { let Latency = 5; let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; }
  513. def : WriteRes<WriteSystem, [SBPort015]> { let Latency = 100; }
  514. def : WriteRes<WriteMicrocoded, [SBPort015]> { let Latency = 100; }
  515. def : WriteRes<WriteFence, [SBPort23, SBPort4]>;
  516. def : WriteRes<WriteNop, []>;
  517. // AVX2/FMA is not supported on that architecture, but we should define the basic
  518. // scheduling resources anyway.
  519. defm : SBWriteResPair<WriteFShuffle256, [SBPort5], 1, [1], 1, 7>;
  520. defm : SBWriteResPair<WriteFVarShuffle256, [SBPort5], 1, [1], 1, 7>;
  521. defm : SBWriteResPair<WriteShuffle256, [SBPort5], 1, [1], 1, 7>;
  522. defm : SBWriteResPair<WriteVPMOV256, [SBPort5], 1, [1], 1, 7>;
  523. defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>;
  524. defm : SBWriteResPair<WriteFMA, [SBPort01], 5>;
  525. defm : SBWriteResPair<WriteFMAX, [SBPort01], 5>;
  526. defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>;
  527. defm : SBWriteResPair<WriteFMAZ, [SBPort01], 5>; // Unsupported = 1
  528. // Remaining SNB instrs.
  529. def SBWriteResGroup1 : SchedWriteRes<[SBPort1]> {
  530. let Latency = 1;
  531. let NumMicroOps = 1;
  532. let ResourceCycles = [1];
  533. }
  534. def: InstRW<[SBWriteResGroup1], (instrs COMP_FST0r,
  535. COM_FST0r,
  536. UCOM_FPr,
  537. UCOM_Fr)>;
  538. def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
  539. let Latency = 1;
  540. let NumMicroOps = 1;
  541. let ResourceCycles = [1];
  542. }
  543. def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP,
  544. LD_Frr, ST_Frr, ST_FPrr)>;
  545. def: InstRW<[SBWriteResGroup2], (instrs RET64)>;
  546. def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
  547. let Latency = 1;
  548. let NumMicroOps = 1;
  549. let ResourceCycles = [1];
  550. }
  551. def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>;
  552. def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> {
  553. let Latency = 1;
  554. let NumMicroOps = 1;
  555. let ResourceCycles = [1];
  556. }
  557. def: InstRW<[SBWriteResGroup5], (instrs MMX_PABSBrr,
  558. MMX_PABSDrr,
  559. MMX_PABSWrr,
  560. MMX_PADDQrr,
  561. MMX_PALIGNRrri,
  562. MMX_PSIGNBrr,
  563. MMX_PSIGNDrr,
  564. MMX_PSIGNWrr)>;
  565. def SBWriteResGroup11 : SchedWriteRes<[SBPort015]> {
  566. let Latency = 2;
  567. let NumMicroOps = 2;
  568. let ResourceCycles = [2];
  569. }
  570. def: InstRW<[SBWriteResGroup11], (instrs SCASB,
  571. SCASL,
  572. SCASQ,
  573. SCASW)>;
  574. def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort1]> {
  575. let Latency = 2;
  576. let NumMicroOps = 2;
  577. let ResourceCycles = [1,1];
  578. }
  579. def: InstRW<[SBWriteResGroup12], (instregex "(V?)(U?)COMI(SD|SS)rr")>;
  580. def SBWriteResGroup15 : SchedWriteRes<[SBPort0,SBPort015]> {
  581. let Latency = 2;
  582. let NumMicroOps = 2;
  583. let ResourceCycles = [1,1];
  584. }
  585. def: InstRW<[SBWriteResGroup15], (instrs CWD,
  586. FNSTSW16r)>;
  587. def SBWriteResGroup18 : SchedWriteRes<[SBPort5,SBPort015]> {
  588. let Latency = 2;
  589. let NumMicroOps = 2;
  590. let ResourceCycles = [1,1];
  591. }
  592. def: InstRW<[SBWriteResGroup18], (instrs JCXZ, JECXZ, JRCXZ,
  593. MMX_MOVDQ2Qrr)>;
  594. def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> {
  595. let Latency = 3;
  596. let NumMicroOps = 1;
  597. let ResourceCycles = [1];
  598. }
  599. def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>;
  600. def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> {
  601. let Latency = 3;
  602. let NumMicroOps = 2;
  603. let ResourceCycles = [1,1];
  604. }
  605. def: InstRW<[SBWriteResGroup22], (instregex "(V?)EXTRACTPSrr")>;
  606. def SBWriteResGroup23 : SchedWriteRes<[SBPort05,SBPort015]> {
  607. let Latency = 2;
  608. let NumMicroOps = 3;
  609. let ResourceCycles = [2,1];
  610. }
  611. def: InstRW<[SBWriteResGroup23], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
  612. RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
  613. def SBWriteResGroup24 : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
  614. let Latency = 3;
  615. let NumMicroOps = 8;
  616. let ResourceCycles = [1,1,4,2];
  617. }
  618. def: InstRW<[SBWriteResGroup24], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
  619. def SBWriteResGroup24b : SchedWriteRes<[SBPort1,SBPort5,SBPort05,SBPort015]> {
  620. let Latency = 4;
  621. let NumMicroOps = 8;
  622. let ResourceCycles = [1,1,4,2];
  623. }
  624. def: InstRW<[SBWriteResGroup24b], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
  625. def SBWriteResGroup25_1 : SchedWriteRes<[SBPort23,SBPort015]> {
  626. let Latency = 7;
  627. let NumMicroOps = 3;
  628. let ResourceCycles = [1,2];
  629. }
  630. def: InstRW<[SBWriteResGroup25_1], (instrs LEAVE, LEAVE64)>;
  631. def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> {
  632. let Latency = 3;
  633. let NumMicroOps = 3;
  634. let ResourceCycles = [1,1,1];
  635. }
  636. def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
  637. def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> {
  638. let Latency = 4;
  639. let NumMicroOps = 2;
  640. let ResourceCycles = [1,1];
  641. }
  642. def: InstRW<[SBWriteResGroup29], (instrs MOV64sr)>;
  643. def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> {
  644. let Latency = 4;
  645. let NumMicroOps = 4;
  646. let ResourceCycles = [1,3];
  647. }
  648. def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>;
  649. def SBWriteResGroup30 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
  650. let Latency = 3;
  651. let NumMicroOps = 8;
  652. let ResourceCycles = [1,3,4];
  653. }
  654. def: InstRW<[SBWriteResGroup30], (instrs LOOP)>;
  655. def SBWriteResGroup31 : SchedWriteRes<[SBPort1,SBPort5,SBPort015,SBPort05]> {
  656. let Latency = 4;
  657. let NumMicroOps = 12;
  658. let ResourceCycles = [1,3,6,2];
  659. }
  660. def: InstRW<[SBWriteResGroup31], (instrs LOOPE, LOOPNE)>;
  661. def SBWriteResGroup76 : SchedWriteRes<[SBPort05]> {
  662. let Latency = 5;
  663. let NumMicroOps = 8;
  664. let ResourceCycles = [8];
  665. }
  666. def: InstRW<[SBWriteResGroup76], (instregex "RCL(8|16|32|64)rCL",
  667. "RCR(8|16|32|64)rCL")>;
  668. def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> {
  669. let Latency = 5;
  670. let NumMicroOps = 2;
  671. let ResourceCycles = [1,1];
  672. }
  673. def: InstRW<[SBWriteResGroup33], (instregex "PUSH(16r|32r|64r|64i8)")>;
  674. def SBWriteResGroup35 : SchedWriteRes<[SBPort1,SBPort5]> {
  675. let Latency = 5;
  676. let NumMicroOps = 3;
  677. let ResourceCycles = [1,2];
  678. }
  679. def: InstRW<[SBWriteResGroup35], (instrs CLI)>;
  680. def SBWriteResGroup35_2 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
  681. let Latency = 5;
  682. let NumMicroOps = 3;
  683. let ResourceCycles = [1,1,1];
  684. }
  685. def: InstRW<[SBWriteResGroup35_2], (instrs PUSHGS64)>;
  686. def: InstRW<[SBWriteResGroup35_2], (instregex "ISTT_FP(16|32|64)m")>;
  687. def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
  688. let Latency = 5;
  689. let NumMicroOps = 3;
  690. let ResourceCycles = [1,1,1];
  691. }
  692. def: InstRW<[SBWriteResGroup36], (instrs CALL64pcrel32)>;
  693. def: InstRW<[SBWriteResGroup36], (instregex "CALL(16|32|64)r",
  694. "(V?)EXTRACTPSmr")>;
  695. def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
  696. let Latency = 5;
  697. let NumMicroOps = 3;
  698. let ResourceCycles = [1,1,1];
  699. }
  700. def: InstRW<[SBWriteResGroup40], (instrs STOSB, STOSL, STOSQ, STOSW)>;
  701. def SBWriteResGroup41 : SchedWriteRes<[SBPort5,SBPort015]> {
  702. let Latency = 5;
  703. let NumMicroOps = 4;
  704. let ResourceCycles = [1,3];
  705. }
  706. def: InstRW<[SBWriteResGroup41], (instrs FNINIT)>;
  707. def SBWriteResGroup45 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> {
  708. let Latency = 5;
  709. let NumMicroOps = 4;
  710. let ResourceCycles = [1,1,1,1];
  711. }
  712. def: InstRW<[SBWriteResGroup45], (instregex "(V?)PEXTR(D|Q)mr",
  713. "PUSHF(16|64)")>;
  714. def SBWriteResGroup46 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
  715. let Latency = 5;
  716. let NumMicroOps = 4;
  717. let ResourceCycles = [1,1,1,1];
  718. }
  719. def: InstRW<[SBWriteResGroup46], (instregex "CLFLUSH")>;
  720. def SBWriteResGroup47 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
  721. let Latency = 5;
  722. let NumMicroOps = 5;
  723. let ResourceCycles = [1,2,1,1];
  724. }
  725. def: InstRW<[SBWriteResGroup47], (instregex "FXRSTOR")>;
  726. def SBWriteResGroup48 : SchedWriteRes<[SBPort23]> {
  727. let Latency = 6;
  728. let NumMicroOps = 1;
  729. let ResourceCycles = [1];
  730. }
  731. def: InstRW<[SBWriteResGroup48], (instrs VBROADCASTSSrm)>;
  732. def: InstRW<[SBWriteResGroup48], (instregex "POP(16|32|64)r",
  733. "(V?)MOV64toPQIrm",
  734. "(V?)MOVDDUPrm",
  735. "(V?)MOVDI2PDIrm",
  736. "(V?)MOVQI2PQIrm",
  737. "(V?)MOVSDrm",
  738. "(V?)MOVSHDUPrm",
  739. "(V?)MOVSLDUPrm",
  740. "(V?)MOVSSrm")>;
  741. def SBWriteResGroup49 : SchedWriteRes<[SBPort5,SBPort23]> {
  742. let Latency = 6;
  743. let NumMicroOps = 2;
  744. let ResourceCycles = [1,1];
  745. }
  746. def: InstRW<[SBWriteResGroup49], (instrs MOV16sm)>;
  747. def SBWriteResGroup51 : SchedWriteRes<[SBPort23,SBPort15]> {
  748. let Latency = 6;
  749. let NumMicroOps = 2;
  750. let ResourceCycles = [1,1];
  751. }
  752. def: InstRW<[SBWriteResGroup51], (instrs MMX_PABSBrm,
  753. MMX_PABSDrm,
  754. MMX_PABSWrm,
  755. MMX_PALIGNRrmi,
  756. MMX_PSIGNBrm,
  757. MMX_PSIGNDrm,
  758. MMX_PSIGNWrm)>;
  759. def SBWriteResGroup52 : SchedWriteRes<[SBPort23,SBPort015]> {
  760. let Latency = 6;
  761. let NumMicroOps = 2;
  762. let ResourceCycles = [1,1];
  763. }
  764. def: InstRW<[SBWriteResGroup52], (instrs LODSL, LODSQ)>;
  765. def SBWriteResGroup53 : SchedWriteRes<[SBPort4,SBPort23]> {
  766. let Latency = 6;
  767. let NumMicroOps = 3;
  768. let ResourceCycles = [1,2];
  769. }
  770. def: InstRW<[SBWriteResGroup53], (instregex "ST_F(32|64)m",
  771. "ST_FP(32|64|80)m")>;
  772. def SBWriteResGroup54 : SchedWriteRes<[SBPort23]> {
  773. let Latency = 7;
  774. let NumMicroOps = 1;
  775. let ResourceCycles = [1];
  776. }
  777. def: InstRW<[SBWriteResGroup54], (instrs VBROADCASTSDYrm,
  778. VBROADCASTSSYrm,
  779. VMOVDDUPYrm,
  780. VMOVSHDUPYrm,
  781. VMOVSLDUPYrm)>;
  782. def SBWriteResGroup58 : SchedWriteRes<[SBPort23,SBPort05]> {
  783. let Latency = 7;
  784. let NumMicroOps = 2;
  785. let ResourceCycles = [1,1];
  786. }
  787. def: InstRW<[SBWriteResGroup58], (instrs VINSERTF128rm)>;
  788. def SBWriteResGroup59 : SchedWriteRes<[SBPort23,SBPort15]> {
  789. let Latency = 7;
  790. let NumMicroOps = 2;
  791. let ResourceCycles = [1,1];
  792. }
  793. def: InstRW<[SBWriteResGroup59], (instrs MMX_PADDQrm)>;
  794. def SBWriteResGroup62 : SchedWriteRes<[SBPort5,SBPort23]> {
  795. let Latency = 7;
  796. let NumMicroOps = 3;
  797. let ResourceCycles = [2,1];
  798. }
  799. def: InstRW<[SBWriteResGroup62], (instrs VERRm, VERWm)>;
  800. def SBWriteResGroup63 : SchedWriteRes<[SBPort23,SBPort015]> {
  801. let Latency = 7;
  802. let NumMicroOps = 3;
  803. let ResourceCycles = [1,2];
  804. }
  805. def: InstRW<[SBWriteResGroup63], (instrs LODSB, LODSW)>;
  806. def SBWriteResGroup64 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
  807. let Latency = 7;
  808. let NumMicroOps = 3;
  809. let ResourceCycles = [1,1,1];
  810. }
  811. def: InstRW<[SBWriteResGroup64], (instrs FARJMP64m)>;
  812. def SBWriteResGroup66 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> {
  813. let Latency = 7;
  814. let NumMicroOps = 4;
  815. let ResourceCycles = [1,1,2];
  816. }
  817. def: InstRW<[SBWriteResGroup66], (instrs FNSTSWm)>;
  818. def SBWriteResGroup67 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> {
  819. let Latency = 7;
  820. let NumMicroOps = 4;
  821. let ResourceCycles = [1,2,1];
  822. }
  823. def: InstRW<[SBWriteResGroup67], (instregex "SLDT(16|32|64)r",
  824. "STR(16|32|64)r")>;
  825. def SBWriteResGroup68 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
  826. let Latency = 7;
  827. let NumMicroOps = 4;
  828. let ResourceCycles = [1,1,2];
  829. }
  830. def: InstRW<[SBWriteResGroup68], (instrs FNSTCW16m)>;
  831. def: InstRW<[SBWriteResGroup68], (instregex "CALL(16|32|64)m")>;
  832. def SBWriteResGroup69 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
  833. let Latency = 7;
  834. let NumMicroOps = 4;
  835. let ResourceCycles = [1,2,1];
  836. }
  837. def: InstRW<[SBWriteResGroup69], (instregex "SAR(8|16|32|64)m(1|i)",
  838. "SHL(8|16|32|64)m(1|i)",
  839. "SHR(8|16|32|64)m(1|i)")>;
  840. def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
  841. let Latency = 8;
  842. let NumMicroOps = 3;
  843. let ResourceCycles = [1,1,1];
  844. }
  845. def: InstRW<[SBWriteResGroup77], (instregex "(V?)(U?)COMI(SD|SS)rm")>;
  846. def SBWriteResGroup81 : SchedWriteRes<[SBPort4, SBPort23, SBPort015]> {
  847. let Latency = 6;
  848. let NumMicroOps = 3;
  849. let ResourceCycles = [1, 2, 1];
  850. }
  851. def: InstRW<[SBWriteResGroup81], (instregex "CMPXCHG(8|16)B")>;
  852. def SBWriteResGroup83 : SchedWriteRes<[SBPort23,SBPort015]> {
  853. let Latency = 8;
  854. let NumMicroOps = 5;
  855. let ResourceCycles = [2,3];
  856. }
  857. def: InstRW<[SBWriteResGroup83], (instrs CMPSB,
  858. CMPSL,
  859. CMPSQ,
  860. CMPSW)>;
  861. def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> {
  862. let Latency = 8;
  863. let NumMicroOps = 5;
  864. let ResourceCycles = [1,2,2];
  865. }
  866. def: InstRW<[SBWriteResGroup84], (instrs FLDCW16m)>;
  867. def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
  868. let Latency = 8;
  869. let NumMicroOps = 5;
  870. let ResourceCycles = [1,2,2];
  871. }
  872. def: InstRW<[SBWriteResGroup85], (instregex "ROL(8|16|32|64)m(1|i)",
  873. "ROR(8|16|32|64)m(1|i)")>;
  874. def SBWriteResGroup86 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
  875. let Latency = 8;
  876. let NumMicroOps = 5;
  877. let ResourceCycles = [1,2,2];
  878. }
  879. def: InstRW<[SBWriteResGroup86], (instrs MOVSB, MOVSL, MOVSQ, MOVSW)>;
  880. def: InstRW<[SBWriteResGroup86], (instregex "XADD(8|16|32|64)rm")>;
  881. def SBWriteResGroup87 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> {
  882. let Latency = 8;
  883. let NumMicroOps = 5;
  884. let ResourceCycles = [1,1,1,2];
  885. }
  886. def: InstRW<[SBWriteResGroup87], (instrs FARCALL64m)>;
  887. def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> {
  888. let Latency = 9;
  889. let NumMicroOps = 3;
  890. let ResourceCycles = [1,1,1];
  891. }
  892. def: InstRW<[SBWriteResGroup95], (instregex "LD_F(32|64|80)m")>;
  893. def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort4,SBPort23]> {
  894. let Latency = 9;
  895. let NumMicroOps = 4;
  896. let ResourceCycles = [1,1,2];
  897. }
  898. def: InstRW<[SBWriteResGroup97], (instregex "IST_F(16|32)m",
  899. "IST_FP(16|32|64)m")>;
  900. def SBWriteResGroup97_2 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
  901. let Latency = 9;
  902. let NumMicroOps = 6;
  903. let ResourceCycles = [1,2,3];
  904. }
  905. def: InstRW<[SBWriteResGroup97_2], (instregex "ROL(8|16|32|64)mCL",
  906. "ROR(8|16|32|64)mCL",
  907. "SAR(8|16|32|64)mCL",
  908. "SHL(8|16|32|64)mCL",
  909. "SHR(8|16|32|64)mCL")>;
  910. def SBWriteResGroup98 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> {
  911. let Latency = 9;
  912. let NumMicroOps = 4;
  913. let ResourceCycles = [1,2,3];
  914. }
  915. def: SchedAlias<WriteADCRMW, SBWriteResGroup98>;
  916. def SBWriteResGroup99 : SchedWriteRes<[SBPort4,SBPort23,SBPort05,SBPort015]> {
  917. let Latency = 9;
  918. let NumMicroOps = 4;
  919. let ResourceCycles = [1,2,2,1];
  920. }
  921. def: InstRW<[SBWriteResGroup99, ReadAfterLd], (instrs ADC8mr, ADC16mr, ADC32mr, ADC64mr,
  922. SBB8mr, SBB16mr, SBB32mr, SBB64mr)>;
  923. def SBWriteResGroup100 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort05,SBPort015]> {
  924. let Latency = 9;
  925. let NumMicroOps = 6;
  926. let ResourceCycles = [1,1,2,1,1];
  927. }
  928. def : SchedAlias<WriteBitTestRegLd, SBWriteResGroup100>; // TODO - this is incorrect - no RMW
  929. def SBWriteResGroup101 : SchedWriteRes<[SBPort1,SBPort23]> {
  930. let Latency = 10;
  931. let NumMicroOps = 2;
  932. let ResourceCycles = [1,1];
  933. }
  934. def: InstRW<[SBWriteResGroup101], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
  935. "ILD_F(16|32|64)m")>;
  936. def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort23]> {
  937. let Latency = 11;
  938. let NumMicroOps = 2;
  939. let ResourceCycles = [1,1];
  940. }
  941. def: InstRW<[SBWriteResGroup104], (instregex "(V?)PCMPGTQrm")>;
  942. def SBWriteResGroup106 : SchedWriteRes<[SBPort1,SBPort23]> {
  943. let Latency = 11;
  944. let NumMicroOps = 3;
  945. let ResourceCycles = [2,1];
  946. }
  947. def: InstRW<[SBWriteResGroup106], (instregex "FICOM(P?)(16|32)m")>;
  948. def SBWriteResGroup108 : SchedWriteRes<[SBPort05,SBPort23]> {
  949. let Latency = 11;
  950. let NumMicroOps = 11;
  951. let ResourceCycles = [7,4];
  952. }
  953. def: InstRW<[SBWriteResGroup108], (instregex "RCL(8|16|32|64)m",
  954. "RCR(8|16|32|64)m")>;
  955. def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> {
  956. let Latency = 12;
  957. let NumMicroOps = 2;
  958. let ResourceCycles = [1,1];
  959. }
  960. def: InstRW<[SBWriteResGroup111], (instregex "MUL_F(32|64)m")>;
  961. def SBWriteResGroup114 : SchedWriteRes<[SBPort1,SBPort23]> {
  962. let Latency = 13;
  963. let NumMicroOps = 3;
  964. let ResourceCycles = [2,1];
  965. }
  966. def: InstRW<[SBWriteResGroup114], (instregex "(ADD|SUB|SUBR)_FI(16|32)m")>;
  967. def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
  968. let Latency = 15;
  969. let NumMicroOps = 3;
  970. let ResourceCycles = [1,1,1];
  971. }
  972. def: InstRW<[SBWriteResGroup119], (instregex "MUL_FI(16|32)m")>;
  973. def SBWriteResGroup130 : SchedWriteRes<[SBPort0,SBPort23]> {
  974. let Latency = 31;
  975. let NumMicroOps = 2;
  976. let ResourceCycles = [1,1];
  977. }
  978. def: InstRW<[SBWriteResGroup130], (instregex "DIV(R?)_F(32|64)m")>;
  979. def SBWriteResGroup131 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> {
  980. let Latency = 34;
  981. let NumMicroOps = 3;
  982. let ResourceCycles = [1,1,1];
  983. }
  984. def: InstRW<[SBWriteResGroup131], (instregex "DIV(R?)_FI(16|32)m")>;
  985. def SBWriteResGroupVzeroall : SchedWriteRes<[SBPort5]> {
  986. let Latency = 9;
  987. let NumMicroOps = 20;
  988. let ResourceCycles = [2];
  989. }
  990. def: InstRW<[SBWriteResGroupVzeroall], (instrs VZEROALL)>;
  991. def SBWriteResGroupVzeroupper : SchedWriteRes<[]> {
  992. let Latency = 1;
  993. let NumMicroOps = 4;
  994. let ResourceCycles = [];
  995. }
  996. def: InstRW<[SBWriteResGroupVzeroupper], (instrs VZEROUPPER)>;
  997. def: InstRW<[WriteZero], (instrs CLC)>;
  998. // Instruction variants handled by the renamer. These might not need execution
  999. // ports in certain conditions.
  1000. // See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
  1001. // section "Sandy Bridge and Ivy Bridge Pipeline" > "Register allocation and
  1002. // renaming".
  1003. // These can be investigated with llvm-exegesis, e.g.
  1004. // echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
  1005. // echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
  1006. def SBWriteZeroLatency : SchedWriteRes<[]> {
  1007. let Latency = 0;
  1008. }
  1009. def SBWriteZeroIdiom : SchedWriteVariant<[
  1010. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
  1011. SchedVar<NoSchedPred, [WriteALU]>
  1012. ]>;
  1013. def : InstRW<[SBWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
  1014. XOR32rr, XOR64rr)>;
  1015. def SBWriteFZeroIdiom : SchedWriteVariant<[
  1016. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
  1017. SchedVar<NoSchedPred, [WriteFLogic]>
  1018. ]>;
  1019. def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr,
  1020. VXORPDrr)>;
  1021. def SBWriteFZeroIdiomY : SchedWriteVariant<[
  1022. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
  1023. SchedVar<NoSchedPred, [WriteFLogicY]>
  1024. ]>;
  1025. def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>;
  1026. def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[
  1027. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
  1028. SchedVar<NoSchedPred, [WriteVecLogicX]>
  1029. ]>;
  1030. def : InstRW<[SBWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>;
  1031. def SBWriteVZeroIdiomALUX : SchedWriteVariant<[
  1032. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
  1033. SchedVar<NoSchedPred, [WriteVecALUX]>
  1034. ]>;
  1035. def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr,
  1036. PSUBDrr, VPSUBDrr,
  1037. PSUBQrr, VPSUBQrr,
  1038. PSUBWrr, VPSUBWrr,
  1039. PCMPGTBrr, VPCMPGTBrr,
  1040. PCMPGTDrr, VPCMPGTDrr,
  1041. PCMPGTWrr, VPCMPGTWrr)>;
  1042. def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> {
  1043. let Latency = 5;
  1044. let NumMicroOps = 1;
  1045. let ResourceCycles = [1];
  1046. }
  1047. def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
  1048. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>,
  1049. SchedVar<NoSchedPred, [SBWritePCMPGTQ]>
  1050. ]>;
  1051. def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>;
  1052. // CMOVs that use both Z and C flag require an extra uop.
  1053. def SBWriteCMOVA_CMOVBErr : SchedWriteRes<[SBPort05,SBPort015]> {
  1054. let Latency = 3;
  1055. let ResourceCycles = [2,1];
  1056. let NumMicroOps = 3;
  1057. }
  1058. def SBWriteCMOVA_CMOVBErm : SchedWriteRes<[SBPort23,SBPort05,SBPort015]> {
  1059. let Latency = 8;
  1060. let ResourceCycles = [1,2,1];
  1061. let NumMicroOps = 4;
  1062. }
  1063. def SBCMOVA_CMOVBErr : SchedWriteVariant<[
  1064. SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [SBWriteCMOVA_CMOVBErr]>,
  1065. SchedVar<NoSchedPred, [WriteCMOV]>
  1066. ]>;
  1067. def SBCMOVA_CMOVBErm : SchedWriteVariant<[
  1068. SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [SBWriteCMOVA_CMOVBErm]>,
  1069. SchedVar<NoSchedPred, [WriteCMOV.Folded]>
  1070. ]>;
  1071. def : InstRW<[SBCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
  1072. def : InstRW<[SBCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
  1073. // SETCCs that use both Z and C flag require an extra uop.
  1074. def SBWriteSETA_SETBEr : SchedWriteRes<[SBPort05]> {
  1075. let Latency = 2;
  1076. let ResourceCycles = [2];
  1077. let NumMicroOps = 2;
  1078. }
  1079. def SBWriteSETA_SETBEm : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> {
  1080. let Latency = 3;
  1081. let ResourceCycles = [1,1,2];
  1082. let NumMicroOps = 4;
  1083. }
  1084. def SBSETA_SETBErr : SchedWriteVariant<[
  1085. SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [SBWriteSETA_SETBEr]>,
  1086. SchedVar<NoSchedPred, [WriteSETCC]>
  1087. ]>;
  1088. def SBSETA_SETBErm : SchedWriteVariant<[
  1089. SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [SBWriteSETA_SETBEm]>,
  1090. SchedVar<NoSchedPred, [WriteSETCCStore]>
  1091. ]>;
  1092. def : InstRW<[SBSETA_SETBErr], (instrs SETCCr)>;
  1093. def : InstRW<[SBSETA_SETBErm], (instrs SETCCm)>;
  1094. ///////////////////////////////////////////////////////////////////////////////
  1095. // Dependency breaking instructions.
  1096. ///////////////////////////////////////////////////////////////////////////////
  1097. def : IsZeroIdiomFunction<[
  1098. // GPR Zero-idioms.
  1099. DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
  1100. // SSE Zero-idioms.
  1101. DepBreakingClass<[
  1102. // fp variants.
  1103. XORPSrr, XORPDrr,
  1104. // int variants.
  1105. PXORrr,
  1106. PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
  1107. PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
  1108. ], ZeroIdiomPredicate>,
  1109. // AVX Zero-idioms.
  1110. DepBreakingClass<[
  1111. // xmm fp variants.
  1112. VXORPSrr, VXORPDrr,
  1113. // xmm int variants.
  1114. VPXORrr,
  1115. VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
  1116. VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
  1117. ], ZeroIdiomPredicate>,
  1118. ]>;
  1119. } // SchedModel