X86SchedIceLake.td 113 KB


  1. //=- X86SchedIceLake.td - X86 Ice Lake Scheduling ------------*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the machine model for Ice Lake to support
  10. // instruction scheduling and other instruction cost heuristics.
  11. //
  12. // TODO: This is mainly a copy X86SchedSkylakeServer.td, but allows us to
  13. // iteratively improve scheduling handling toward better modelling the
  14. // Ice Lake (Sunny/Cypress Cove) microarchitecture.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. def IceLakeModel : SchedMachineModel {
  18. // All x86 instructions are modeled as a single micro-op, and Ice Lake can
  19. // decode 6 instructions per cycle.
  20. let IssueWidth = 6;
  21. let MicroOpBufferSize = 224; // Based on the reorder buffer.
  22. let LoadLatency = 5;
  23. let MispredictPenalty = 14;
  24. // Based on the LSD (loop-stream detector) queue size and benchmarking data.
  25. let LoopMicroOpBufferSize = 50;
  26. // This flag is set to allow the scheduler to assign a default model to
  27. // unrecognized opcodes.
  28. let CompleteModel = 0;
  29. }
  30. let SchedModel = IceLakeModel in {
  31. // Ice Lake can issue micro-ops to 8 different ports in one cycle.
  32. // Ports 0, 1, 5, and 6 handle all computation.
  33. // Ports 4 and 9 gets the data half of stores. Store data can be available later
  34. // than the store address, but since we don't model the latency of stores, we
  35. // can ignore that.
  36. // Ports 2 and 3 are identical. They handle loads and address calculations.
  37. // Ports 7 and 8 are identical. They handle stores address calculations.
  38. def ICXPort0 : ProcResource<1>;
  39. def ICXPort1 : ProcResource<1>;
  40. def ICXPort2 : ProcResource<1>;
  41. def ICXPort3 : ProcResource<1>;
  42. def ICXPort4 : ProcResource<1>;
  43. def ICXPort5 : ProcResource<1>;
  44. def ICXPort6 : ProcResource<1>;
  45. def ICXPort7 : ProcResource<1>;
  46. def ICXPort8 : ProcResource<1>;
  47. def ICXPort9 : ProcResource<1>;
  48. // Many micro-ops are capable of issuing on multiple ports.
  49. def ICXPort01 : ProcResGroup<[ICXPort0, ICXPort1]>;
  50. def ICXPort23 : ProcResGroup<[ICXPort2, ICXPort3]>;
  51. def ICXPort237 : ProcResGroup<[ICXPort2, ICXPort3, ICXPort7]>;
  52. def ICXPort04 : ProcResGroup<[ICXPort0, ICXPort4]>;
  53. def ICXPort05 : ProcResGroup<[ICXPort0, ICXPort5]>;
  54. def ICXPort06 : ProcResGroup<[ICXPort0, ICXPort6]>;
  55. def ICXPort15 : ProcResGroup<[ICXPort1, ICXPort5]>;
  56. def ICXPort16 : ProcResGroup<[ICXPort1, ICXPort6]>;
  57. def ICXPort49 : ProcResGroup<[ICXPort4, ICXPort9]>;
  58. def ICXPort56 : ProcResGroup<[ICXPort5, ICXPort6]>;
  59. def ICXPort78 : ProcResGroup<[ICXPort7, ICXPort8]>;
  60. def ICXPort015 : ProcResGroup<[ICXPort0, ICXPort1, ICXPort5]>;
  61. def ICXPort056 : ProcResGroup<[ICXPort0, ICXPort5, ICXPort6]>;
  62. def ICXPort0156: ProcResGroup<[ICXPort0, ICXPort1, ICXPort5, ICXPort6]>;
  63. def ICXDivider : ProcResource<1>; // Integer division issued on port 0.
  64. // FP division and sqrt on port 0.
  65. def ICXFPDivider : ProcResource<1>;
  66. // 60 Entry Unified Scheduler
  67. def ICXPortAny : ProcResGroup<[ICXPort0, ICXPort1, ICXPort2, ICXPort3, ICXPort4,
  68. ICXPort5, ICXPort6, ICXPort7, ICXPort8, ICXPort9]> {
  69. let BufferSize=60;
  70. }
  71. // Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
  72. // cycles after the memory operand.
  73. def : ReadAdvance<ReadAfterLd, 5>;
  74. // Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
  75. // until 5/6/7 cycles after the memory operand.
  76. def : ReadAdvance<ReadAfterVecLd, 5>;
  77. def : ReadAdvance<ReadAfterVecXLd, 6>;
  78. def : ReadAdvance<ReadAfterVecYLd, 7>;
  79. def : ReadAdvance<ReadInt2Fpu, 0>;
  80. // Many SchedWrites are defined in pairs with and without a folded load.
  81. // Instructions with folded loads are usually micro-fused, so they only appear
  82. // as two micro-ops when queued in the reservation station.
  83. // This multiclass defines the resource usage for variants with and without
  84. // folded loads.
  85. multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW,
  86. list<ProcResourceKind> ExePorts,
  87. int Lat, list<int> Res = [1], int UOps = 1,
  88. int LoadLat = 5, int LoadUOps = 1> {
  89. // Register variant is using a single cycle on ExePort.
  90. def : WriteRes<SchedRW, ExePorts> {
  91. let Latency = Lat;
  92. let ResourceCycles = Res;
  93. let NumMicroOps = UOps;
  94. }
  95. // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
  96. // the latency (default = 5).
  97. def : WriteRes<SchedRW.Folded, !listconcat([ICXPort23], ExePorts)> {
  98. let Latency = !add(Lat, LoadLat);
  99. let ResourceCycles = !listconcat([1], Res);
  100. let NumMicroOps = !add(UOps, LoadUOps);
  101. }
  102. }
  103. // A folded store needs a cycle on port 4 for the store data, and an extra port
  104. // 2/3/7 cycle to recompute the address.
  105. def : WriteRes<WriteRMW, [ICXPort237,ICXPort4]>;
  106. // Arithmetic.
  107. defm : ICXWriteResPair<WriteALU, [ICXPort0156], 1>; // Simple integer ALU op.
  108. defm : ICXWriteResPair<WriteADC, [ICXPort06], 1>; // Integer ALU + flags op.
  109. // Integer multiplication.
  110. defm : ICXWriteResPair<WriteIMul8, [ICXPort1], 3>;
  111. defm : ICXWriteResPair<WriteIMul16, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,2], 4>;
  112. defm : X86WriteRes<WriteIMul16Imm, [ICXPort1,ICXPort0156], 4, [1,1], 2>;
  113. defm : X86WriteRes<WriteIMul16ImmLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
  114. defm : X86WriteRes<WriteIMul16Reg, [ICXPort1], 3, [1], 1>;
  115. defm : X86WriteRes<WriteIMul16RegLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
  116. defm : ICXWriteResPair<WriteIMul32, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,1], 3>;
  117. defm : ICXWriteResPair<WriteMULX32, [ICXPort1,ICXPort06,ICXPort0156], 3, [1,1,1], 3>;
  118. defm : ICXWriteResPair<WriteIMul32Imm, [ICXPort1], 3>;
  119. defm : ICXWriteResPair<WriteIMul32Reg, [ICXPort1], 3>;
  120. defm : ICXWriteResPair<WriteIMul64, [ICXPort1,ICXPort5], 4, [1,1], 2>;
  121. defm : ICXWriteResPair<WriteMULX64, [ICXPort1,ICXPort5], 3, [1,1], 2>;
  122. defm : ICXWriteResPair<WriteIMul64Imm, [ICXPort1], 3>;
  123. defm : ICXWriteResPair<WriteIMul64Reg, [ICXPort1], 3>;
  124. def ICXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
  125. def : WriteRes<WriteIMulHLd, []> {
  126. let Latency = !add(ICXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
  127. }
  128. defm : X86WriteRes<WriteBSWAP32, [ICXPort15], 1, [1], 1>;
  129. defm : X86WriteRes<WriteBSWAP64, [ICXPort06, ICXPort15], 2, [1,1], 2>;
  130. defm : X86WriteRes<WriteCMPXCHG,[ICXPort06, ICXPort0156], 5, [2,3], 5>;
  131. defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort237,ICXPort4], 8, [1,2,1,1,1], 6>;
  132. defm : X86WriteRes<WriteXCHG, [ICXPort0156], 2, [3], 3>;
  133. // TODO: Why isn't the ICXDivider used?
  134. defm : ICXWriteResPair<WriteDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1, 4>;
  135. defm : X86WriteRes<WriteDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
  136. defm : X86WriteRes<WriteDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
  137. defm : X86WriteRes<WriteDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
  138. defm : X86WriteRes<WriteDiv16Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
  139. defm : X86WriteRes<WriteDiv32Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
  140. defm : X86WriteRes<WriteDiv64Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
  141. defm : X86WriteRes<WriteIDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1>;
  142. defm : X86WriteRes<WriteIDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
  143. defm : X86WriteRes<WriteIDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
  144. defm : X86WriteRes<WriteIDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
  145. defm : X86WriteRes<WriteIDiv8Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
  146. defm : X86WriteRes<WriteIDiv16Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
  147. defm : X86WriteRes<WriteIDiv32Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
  148. defm : X86WriteRes<WriteIDiv64Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
  149. defm : ICXWriteResPair<WriteCRC32, [ICXPort1], 3>;
  150. def : WriteRes<WriteLEA, [ICXPort15]>; // LEA instructions can't fold loads.
  151. defm : ICXWriteResPair<WriteCMOV, [ICXPort06], 1, [1], 1>; // Conditional move.
  152. defm : X86WriteRes<WriteFCMOV, [ICXPort1], 3, [1], 1>; // x87 conditional move.
  153. def : WriteRes<WriteSETCC, [ICXPort06]>; // Setcc.
  154. def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort4,ICXPort237]> {
  155. let Latency = 2;
  156. let NumMicroOps = 3;
  157. }
  158. defm : X86WriteRes<WriteLAHFSAHF, [ICXPort06], 1, [1], 1>;
  159. defm : X86WriteRes<WriteBitTest, [ICXPort06], 1, [1], 1>;
  160. defm : X86WriteRes<WriteBitTestImmLd, [ICXPort06,ICXPort23], 6, [1,1], 2>;
  161. defm : X86WriteRes<WriteBitTestRegLd, [ICXPort0156,ICXPort23], 6, [1,1], 2>;
  162. defm : X86WriteRes<WriteBitTestSet, [ICXPort06], 1, [1], 1>;
  163. defm : X86WriteRes<WriteBitTestSetImmLd, [ICXPort06,ICXPort23], 5, [1,1], 3>;
  164. defm : X86WriteRes<WriteBitTestSetRegLd, [ICXPort0156,ICXPort23], 5, [1,1], 2>;
  165. // Integer shifts and rotates.
  166. defm : ICXWriteResPair<WriteShift, [ICXPort06], 1>;
  167. defm : ICXWriteResPair<WriteShiftCL, [ICXPort06], 3, [3], 3>;
  168. defm : ICXWriteResPair<WriteRotate, [ICXPort06], 1, [1], 1>;
  169. defm : ICXWriteResPair<WriteRotateCL, [ICXPort06], 3, [3], 3>;
  170. // SHLD/SHRD.
  171. defm : X86WriteRes<WriteSHDrri, [ICXPort1], 3, [1], 1>;
  172. defm : X86WriteRes<WriteSHDrrcl,[ICXPort1,ICXPort06,ICXPort0156], 6, [1, 2, 1], 4>;
  173. defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort237,ICXPort0156], 9, [1, 1, 1, 1], 4>;
  174. defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>;
  175. // Bit counts.
  176. defm : ICXWriteResPair<WriteBSF, [ICXPort1], 3>;
  177. defm : ICXWriteResPair<WriteBSR, [ICXPort1], 3>;
  178. defm : ICXWriteResPair<WriteLZCNT, [ICXPort1], 3>;
  179. defm : ICXWriteResPair<WriteTZCNT, [ICXPort1], 3>;
  180. defm : ICXWriteResPair<WritePOPCNT, [ICXPort1], 3>;
  181. // BMI1 BEXTR/BLS, BMI2 BZHI
  182. defm : ICXWriteResPair<WriteBEXTR, [ICXPort06,ICXPort15], 2, [1,1], 2>;
  183. defm : ICXWriteResPair<WriteBLS, [ICXPort15], 1>;
  184. defm : ICXWriteResPair<WriteBZHI, [ICXPort15], 1>;
  185. // Loads, stores, and moves, not folded with other operations.
  186. defm : X86WriteRes<WriteLoad, [ICXPort23], 5, [1], 1>;
  187. defm : X86WriteRes<WriteStore, [ICXPort237, ICXPort4], 1, [1,1], 1>;
  188. defm : X86WriteRes<WriteStoreNT, [ICXPort237, ICXPort4], 1, [1,1], 2>;
  189. defm : X86WriteRes<WriteMove, [ICXPort0156], 1, [1], 1>;
  190. // Model the effect of clobbering the read-write mask operand of the GATHER operation.
  191. // Does not cost anything by itself, only has latency, matching that of the WriteLoad,
  192. defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
  193. // Idioms that clear a register, like xorps %xmm0, %xmm0.
  194. // These can often bypass execution ports completely.
  195. def : WriteRes<WriteZero, []>;
  196. // Branches don't produce values, so they have no latency, but they still
  197. // consume resources. Indirect branches can fold loads.
  198. defm : ICXWriteResPair<WriteJump, [ICXPort06], 1>;
  199. // Floating point. This covers both scalar and vector operations.
  200. defm : X86WriteRes<WriteFLD0, [ICXPort05], 1, [1], 1>;
  201. defm : X86WriteRes<WriteFLD1, [ICXPort05], 1, [2], 2>;
  202. defm : X86WriteRes<WriteFLDC, [ICXPort05], 1, [2], 2>;
  203. defm : X86WriteRes<WriteFLoad, [ICXPort23], 5, [1], 1>;
  204. defm : X86WriteRes<WriteFLoadX, [ICXPort23], 6, [1], 1>;
  205. defm : X86WriteRes<WriteFLoadY, [ICXPort23], 7, [1], 1>;
  206. defm : X86WriteRes<WriteFMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
  207. defm : X86WriteRes<WriteFMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
  208. defm : X86WriteRes<WriteFStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  209. defm : X86WriteRes<WriteFStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  210. defm : X86WriteRes<WriteFStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  211. defm : X86WriteRes<WriteFStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  212. defm : X86WriteRes<WriteFStoreNTX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  213. defm : X86WriteRes<WriteFStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  214. defm : X86WriteRes<WriteFMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  215. defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  216. defm : X86WriteRes<WriteFMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  217. defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  218. defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>;
  219. defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>;
  220. defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>;
  221. defm : X86WriteRes<WriteFMoveZ, [ICXPort05], 1, [1], 1>;
  222. defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>;
  223. defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub.
  224. defm : ICXWriteResPair<WriteFAddX, [ICXPort01], 4, [1], 1, 6>;
  225. defm : ICXWriteResPair<WriteFAddY, [ICXPort01], 4, [1], 1, 7>;
  226. defm : ICXWriteResPair<WriteFAddZ, [ICXPort05], 4, [1], 1, 7>;
  227. defm : ICXWriteResPair<WriteFAdd64, [ICXPort01], 4, [1], 1, 5>; // Floating point double add/sub.
  228. defm : ICXWriteResPair<WriteFAdd64X, [ICXPort01], 4, [1], 1, 6>;
  229. defm : ICXWriteResPair<WriteFAdd64Y, [ICXPort01], 4, [1], 1, 7>;
  230. defm : ICXWriteResPair<WriteFAdd64Z, [ICXPort05], 4, [1], 1, 7>;
  231. defm : ICXWriteResPair<WriteFCmp, [ICXPort01], 4, [1], 1, 5>; // Floating point compare.
  232. defm : ICXWriteResPair<WriteFCmpX, [ICXPort01], 4, [1], 1, 6>;
  233. defm : ICXWriteResPair<WriteFCmpY, [ICXPort01], 4, [1], 1, 7>;
  234. defm : ICXWriteResPair<WriteFCmpZ, [ICXPort05], 4, [1], 1, 7>;
  235. defm : ICXWriteResPair<WriteFCmp64, [ICXPort01], 4, [1], 1, 5>; // Floating point double compare.
  236. defm : ICXWriteResPair<WriteFCmp64X, [ICXPort01], 4, [1], 1, 6>;
  237. defm : ICXWriteResPair<WriteFCmp64Y, [ICXPort01], 4, [1], 1, 7>;
  238. defm : ICXWriteResPair<WriteFCmp64Z, [ICXPort05], 4, [1], 1, 7>;
  239. defm : ICXWriteResPair<WriteFCom, [ICXPort0], 2>; // Floating point compare to flags (X87).
  240. defm : ICXWriteResPair<WriteFComX, [ICXPort0], 2>; // Floating point compare to flags (SSE).
  241. defm : ICXWriteResPair<WriteFMul, [ICXPort01], 4, [1], 1, 5>; // Floating point multiplication.
  242. defm : ICXWriteResPair<WriteFMulX, [ICXPort01], 4, [1], 1, 6>;
  243. defm : ICXWriteResPair<WriteFMulY, [ICXPort01], 4, [1], 1, 7>;
  244. defm : ICXWriteResPair<WriteFMulZ, [ICXPort05], 4, [1], 1, 7>;
  245. defm : ICXWriteResPair<WriteFMul64, [ICXPort01], 4, [1], 1, 5>; // Floating point double multiplication.
  246. defm : ICXWriteResPair<WriteFMul64X, [ICXPort01], 4, [1], 1, 6>;
  247. defm : ICXWriteResPair<WriteFMul64Y, [ICXPort01], 4, [1], 1, 7>;
  248. defm : ICXWriteResPair<WriteFMul64Z, [ICXPort05], 4, [1], 1, 7>;
  249. defm : ICXWriteResPair<WriteFDiv, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
  250. defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
  251. defm : ICXWriteResPair<WriteFDivY, [ICXPort0,ICXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
  252. defm : ICXWriteResPair<WriteFDivZ, [ICXPort0,ICXPort5,ICXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
  253. defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 5>; // 10-14 cycles. // Floating point division.
  254. defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,4], 1, 6>; // 10-14 cycles.
  255. defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,8], 1, 7>; // 10-14 cycles.
  256. defm : ICXWriteResPair<WriteFDiv64Z, [ICXPort0,ICXPort5,ICXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
  257. defm : ICXWriteResPair<WriteFSqrt, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
  258. defm : ICXWriteResPair<WriteFSqrtX, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 6>;
  259. defm : ICXWriteResPair<WriteFSqrtY, [ICXPort0,ICXFPDivider], 12, [1,6], 1, 7>;
  260. defm : ICXWriteResPair<WriteFSqrtZ, [ICXPort0,ICXPort5,ICXFPDivider], 20, [2,1,12], 3, 7>;
  261. defm : ICXWriteResPair<WriteFSqrt64, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
  262. defm : ICXWriteResPair<WriteFSqrt64X, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 6>;
  263. defm : ICXWriteResPair<WriteFSqrt64Y, [ICXPort0,ICXFPDivider], 18, [1,12],1, 7>;
  264. defm : ICXWriteResPair<WriteFSqrt64Z, [ICXPort0,ICXPort5,ICXFPDivider], 32, [2,1,24], 3, 7>;
  265. defm : ICXWriteResPair<WriteFSqrt80, [ICXPort0,ICXFPDivider], 21, [1,7]>; // Floating point long double square root.
  266. defm : ICXWriteResPair<WriteFRcp, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
  267. defm : ICXWriteResPair<WriteFRcpX, [ICXPort0], 4, [1], 1, 6>;
  268. defm : ICXWriteResPair<WriteFRcpY, [ICXPort0], 4, [1], 1, 7>;
  269. defm : ICXWriteResPair<WriteFRcpZ, [ICXPort0,ICXPort5], 4, [2,1], 3, 7>;
  270. defm : ICXWriteResPair<WriteFRsqrt, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
  271. defm : ICXWriteResPair<WriteFRsqrtX,[ICXPort0], 4, [1], 1, 6>;
  272. defm : ICXWriteResPair<WriteFRsqrtY,[ICXPort0], 4, [1], 1, 7>;
  273. defm : ICXWriteResPair<WriteFRsqrtZ,[ICXPort0,ICXPort5], 9, [2,1], 3, 7>;
  274. defm : ICXWriteResPair<WriteFMA, [ICXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
  275. defm : ICXWriteResPair<WriteFMAX, [ICXPort01], 4, [1], 1, 6>;
  276. defm : ICXWriteResPair<WriteFMAY, [ICXPort01], 4, [1], 1, 7>;
  277. defm : ICXWriteResPair<WriteFMAZ, [ICXPort05], 4, [1], 1, 7>;
  278. defm : ICXWriteResPair<WriteDPPD, [ICXPort5,ICXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
  279. defm : ICXWriteResPair<WriteDPPS, [ICXPort5,ICXPort015], 13, [1,3], 4, 6>;
  280. defm : ICXWriteResPair<WriteDPPSY,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>;
  281. defm : ICXWriteResPair<WriteFSign, [ICXPort0], 1>; // Floating point fabs/fchs.
  282. defm : ICXWriteResPair<WriteFRnd, [ICXPort01], 8, [2], 2, 6>; // Floating point rounding.
  283. defm : ICXWriteResPair<WriteFRndY, [ICXPort01], 8, [2], 2, 7>;
  284. defm : ICXWriteResPair<WriteFRndZ, [ICXPort05], 8, [2], 2, 7>;
  285. defm : ICXWriteResPair<WriteFLogic, [ICXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
  286. defm : ICXWriteResPair<WriteFLogicY, [ICXPort015], 1, [1], 1, 7>;
  287. defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>;
  288. defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
  289. defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>;
  290. defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>;
  291. defm : ICXWriteResPair<WriteFShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector shuffles.
  292. defm : ICXWriteResPair<WriteFShuffleY, [ICXPort15], 1, [1], 1, 7>;
  293. defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
  294. defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort15], 1, [1], 1, 6>; // Floating point vector variable shuffles.
  295. defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort15], 1, [1], 1, 7>;
  296. defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
  297. defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends.
  298. defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>;
  299. defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>;
  300. defm : ICXWriteResPair<WriteFVarBlend, [ICXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
  301. defm : ICXWriteResPair<WriteFVarBlendY,[ICXPort015], 2, [2], 2, 7>;
  302. defm : ICXWriteResPair<WriteFVarBlendZ,[ICXPort015], 2, [2], 2, 7>;
  303. // FMA Scheduling helper class.
  304. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
  305. // Vector integer operations.
  306. defm : X86WriteRes<WriteVecLoad, [ICXPort23], 5, [1], 1>;
  307. defm : X86WriteRes<WriteVecLoadX, [ICXPort23], 6, [1], 1>;
  308. defm : X86WriteRes<WriteVecLoadY, [ICXPort23], 7, [1], 1>;
  309. defm : X86WriteRes<WriteVecLoadNT, [ICXPort23], 6, [1], 1>;
  310. defm : X86WriteRes<WriteVecLoadNTY, [ICXPort23], 7, [1], 1>;
  311. defm : X86WriteRes<WriteVecMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
  312. defm : X86WriteRes<WriteVecMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
  313. defm : X86WriteRes<WriteVecStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  314. defm : X86WriteRes<WriteVecStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  315. defm : X86WriteRes<WriteVecStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  316. defm : X86WriteRes<WriteVecStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  317. defm : X86WriteRes<WriteVecStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
  318. defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  319. defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  320. defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  321. defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
  322. defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>;
  323. defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>;
  324. defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>;
  325. defm : X86WriteRes<WriteVecMoveZ, [ICXPort05], 1, [1], 1>;
  326. defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>;
  327. defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>;
  328. defm : ICXWriteResPair<WriteVecALU, [ICXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
  329. defm : ICXWriteResPair<WriteVecALUX, [ICXPort01], 1, [1], 1, 6>;
  330. defm : ICXWriteResPair<WriteVecALUY, [ICXPort01], 1, [1], 1, 7>;
  331. defm : ICXWriteResPair<WriteVecALUZ, [ICXPort0], 1, [1], 1, 7>;
  332. defm : ICXWriteResPair<WriteVecLogic, [ICXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
  333. defm : ICXWriteResPair<WriteVecLogicX,[ICXPort015], 1, [1], 1, 6>;
  334. defm : ICXWriteResPair<WriteVecLogicY,[ICXPort015], 1, [1], 1, 7>;
  335. defm : ICXWriteResPair<WriteVecLogicZ,[ICXPort05], 1, [1], 1, 7>;
  336. defm : ICXWriteResPair<WriteVecTest, [ICXPort0,ICXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
  337. defm : ICXWriteResPair<WriteVecTestY, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
  338. defm : ICXWriteResPair<WriteVecTestZ, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
  339. defm : ICXWriteResPair<WriteVecIMul, [ICXPort0], 5, [1], 1, 5>; // Vector integer multiply.
  340. defm : ICXWriteResPair<WriteVecIMulX, [ICXPort01], 5, [1], 1, 6>;
  341. defm : ICXWriteResPair<WriteVecIMulY, [ICXPort01], 5, [1], 1, 7>;
  342. defm : ICXWriteResPair<WriteVecIMulZ, [ICXPort05], 5, [1], 1, 7>;
  343. defm : ICXWriteResPair<WritePMULLD, [ICXPort01], 10, [2], 2, 6>; // Vector PMULLD.
  344. defm : ICXWriteResPair<WritePMULLDY, [ICXPort01], 10, [2], 2, 7>;
  345. defm : ICXWriteResPair<WritePMULLDZ, [ICXPort05], 10, [2], 2, 7>;
  346. defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles.
  347. defm : ICXWriteResPair<WriteShuffleX, [ICXPort15], 1, [1], 1, 6>;
  348. defm : ICXWriteResPair<WriteShuffleY, [ICXPort15], 1, [1], 1, 7>;
  349. defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>;
  350. defm : ICXWriteResPair<WriteVarShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
  351. defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort15], 1, [1], 1, 6>;
  352. defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort15], 1, [1], 1, 7>;
  353. defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
  354. defm : ICXWriteResPair<WriteBlend, [ICXPort5], 1, [1], 1, 6>; // Vector blends.
  355. defm : ICXWriteResPair<WriteBlendY,[ICXPort5], 1, [1], 1, 7>;
  356. defm : ICXWriteResPair<WriteBlendZ,[ICXPort5], 1, [1], 1, 7>;
  357. defm : ICXWriteResPair<WriteVarBlend, [ICXPort015], 2, [2], 2, 6>; // Vector variable blends.
  358. defm : ICXWriteResPair<WriteVarBlendY,[ICXPort015], 2, [2], 2, 6>;
  359. defm : ICXWriteResPair<WriteVarBlendZ,[ICXPort05], 2, [1], 1, 6>;
  360. defm : ICXWriteResPair<WriteMPSAD, [ICXPort5], 4, [2], 2, 6>; // Vector MPSAD.
  361. defm : ICXWriteResPair<WriteMPSADY, [ICXPort5], 4, [2], 2, 7>;
  362. defm : ICXWriteResPair<WriteMPSADZ, [ICXPort5], 4, [2], 2, 7>;
  363. defm : ICXWriteResPair<WritePSADBW, [ICXPort5], 3, [1], 1, 5>; // Vector PSADBW.
  364. defm : ICXWriteResPair<WritePSADBWX, [ICXPort5], 3, [1], 1, 6>;
  365. defm : ICXWriteResPair<WritePSADBWY, [ICXPort5], 3, [1], 1, 7>;
  366. defm : ICXWriteResPair<WritePSADBWZ, [ICXPort5], 3, [1], 1, 7>; // TODO: 512-bit ops require ports 0/1 to be joined.
  367. defm : ICXWriteResPair<WritePHMINPOS, [ICXPort0], 4, [1], 1, 6>; // Vector PHMINPOS.
  368. // Vector integer shifts.
  369. defm : ICXWriteResPair<WriteVecShift, [ICXPort0], 1, [1], 1, 5>;
  370. defm : X86WriteRes<WriteVecShiftX, [ICXPort5,ICXPort01], 2, [1,1], 2>;
  371. defm : X86WriteRes<WriteVecShiftY, [ICXPort5,ICXPort01], 4, [1,1], 2>;
  372. defm : X86WriteRes<WriteVecShiftZ, [ICXPort5,ICXPort0], 4, [1,1], 2>;
  373. defm : X86WriteRes<WriteVecShiftXLd, [ICXPort01,ICXPort23], 7, [1,1], 2>;
  374. defm : X86WriteRes<WriteVecShiftYLd, [ICXPort01,ICXPort23], 8, [1,1], 2>;
  375. defm : X86WriteRes<WriteVecShiftZLd, [ICXPort0,ICXPort23], 8, [1,1], 2>;
  376. defm : ICXWriteResPair<WriteVecShiftImm, [ICXPort0], 1, [1], 1, 5>;
  377. defm : ICXWriteResPair<WriteVecShiftImmX, [ICXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts.
  378. defm : ICXWriteResPair<WriteVecShiftImmY, [ICXPort01], 1, [1], 1, 7>;
  379. defm : ICXWriteResPair<WriteVecShiftImmZ, [ICXPort0], 1, [1], 1, 7>;
  380. defm : ICXWriteResPair<WriteVarVecShift, [ICXPort01], 1, [1], 1, 6>; // Variable vector shifts.
  381. defm : ICXWriteResPair<WriteVarVecShiftY, [ICXPort01], 1, [1], 1, 7>;
  382. defm : ICXWriteResPair<WriteVarVecShiftZ, [ICXPort0], 1, [1], 1, 7>;
  383. // Vector insert/extract operations.
  384. def : WriteRes<WriteVecInsert, [ICXPort5]> {
  385. let Latency = 2;
  386. let NumMicroOps = 2;
  387. let ResourceCycles = [2];
  388. }
  389. def : WriteRes<WriteVecInsertLd, [ICXPort5,ICXPort23]> {
  390. let Latency = 6;
  391. let NumMicroOps = 2;
  392. }
  393. def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
  394. def : WriteRes<WriteVecExtract, [ICXPort0,ICXPort5]> {
  395. let Latency = 3;
  396. let NumMicroOps = 2;
  397. }
  398. def : WriteRes<WriteVecExtractSt, [ICXPort4,ICXPort5,ICXPort237]> {
  399. let Latency = 2;
  400. let NumMicroOps = 3;
  401. }
  402. // Conversion between integer and float.
  403. defm : ICXWriteResPair<WriteCvtSS2I, [ICXPort01], 6, [2], 2>; // Needs more work: DD vs DQ.
  404. defm : ICXWriteResPair<WriteCvtPS2I, [ICXPort01], 3>;
  405. defm : ICXWriteResPair<WriteCvtPS2IY, [ICXPort01], 3>;
  406. defm : ICXWriteResPair<WriteCvtPS2IZ, [ICXPort05], 3>;
  407. defm : ICXWriteResPair<WriteCvtSD2I, [ICXPort01], 6, [2], 2>;
  408. defm : ICXWriteResPair<WriteCvtPD2I, [ICXPort01], 3>;
  409. defm : ICXWriteResPair<WriteCvtPD2IY, [ICXPort01], 3>;
  410. defm : ICXWriteResPair<WriteCvtPD2IZ, [ICXPort05], 3>;
  411. defm : ICXWriteResPair<WriteCvtI2SS, [ICXPort1], 4>;
  412. defm : ICXWriteResPair<WriteCvtI2PS, [ICXPort01], 4>;
  413. defm : ICXWriteResPair<WriteCvtI2PSY, [ICXPort01], 4>;
  414. defm : ICXWriteResPair<WriteCvtI2PSZ, [ICXPort05], 4>; // Needs more work: DD vs DQ.
  415. defm : ICXWriteResPair<WriteCvtI2SD, [ICXPort1], 4>;
  416. defm : ICXWriteResPair<WriteCvtI2PD, [ICXPort01], 4>;
  417. defm : ICXWriteResPair<WriteCvtI2PDY, [ICXPort01], 4>;
  418. defm : ICXWriteResPair<WriteCvtI2PDZ, [ICXPort05], 4>;
  419. defm : ICXWriteResPair<WriteCvtSS2SD, [ICXPort1], 3>;
  420. defm : ICXWriteResPair<WriteCvtPS2PD, [ICXPort1], 3>;
  421. defm : ICXWriteResPair<WriteCvtPS2PDY, [ICXPort5,ICXPort01], 3, [1,1], 2>;
  422. defm : ICXWriteResPair<WriteCvtPS2PDZ, [ICXPort05], 3, [2], 2>;
  423. defm : ICXWriteResPair<WriteCvtSD2SS, [ICXPort5,ICXPort01], 5, [1,1], 2, 5>;
  424. defm : ICXWriteResPair<WriteCvtPD2PS, [ICXPort5,ICXPort01], 5, [1,1], 2, 6>;
  425. defm : ICXWriteResPair<WriteCvtPD2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2, 7>;
  426. defm : ICXWriteResPair<WriteCvtPD2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2, 7>;
  427. defm : X86WriteRes<WriteCvtPH2PS, [ICXPort5,ICXPort01], 5, [1,1], 2>;
  428. defm : X86WriteRes<WriteCvtPH2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
  429. defm : X86WriteRes<WriteCvtPH2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2>;
  430. defm : X86WriteRes<WriteCvtPH2PSLd, [ICXPort23,ICXPort01], 9, [1,1], 2>;
  431. defm : X86WriteRes<WriteCvtPH2PSYLd, [ICXPort23,ICXPort01], 10, [1,1], 2>;
  432. defm : X86WriteRes<WriteCvtPH2PSZLd, [ICXPort23,ICXPort05], 10, [1,1], 2>;
  433. defm : X86WriteRes<WriteCvtPS2PH, [ICXPort5,ICXPort01], 5, [1,1], 2>;
  434. defm : X86WriteRes<WriteCvtPS2PHY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
  435. defm : X86WriteRes<WriteCvtPS2PHZ, [ICXPort5,ICXPort05], 7, [1,1], 2>;
  436. defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 6, [1,1,1,1], 4>;
  437. defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 8, [1,1,1,1], 4>;
  438. defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort05], 8, [1,1,1,1], 4>;
  439. // Strings instructions.
  440. // Packed Compare Implicit Length Strings, Return Mask
  441. def : WriteRes<WritePCmpIStrM, [ICXPort0]> {
  442. let Latency = 10;
  443. let NumMicroOps = 3;
  444. let ResourceCycles = [3];
  445. }
  446. def : WriteRes<WritePCmpIStrMLd, [ICXPort0, ICXPort23]> {
  447. let Latency = 16;
  448. let NumMicroOps = 4;
  449. let ResourceCycles = [3,1];
  450. }
  451. // Packed Compare Explicit Length Strings, Return Mask
  452. def : WriteRes<WritePCmpEStrM, [ICXPort0, ICXPort5, ICXPort015, ICXPort0156]> {
  453. let Latency = 19;
  454. let NumMicroOps = 9;
  455. let ResourceCycles = [4,3,1,1];
  456. }
  457. def : WriteRes<WritePCmpEStrMLd, [ICXPort0, ICXPort5, ICXPort23, ICXPort015, ICXPort0156]> {
  458. let Latency = 25;
  459. let NumMicroOps = 10;
  460. let ResourceCycles = [4,3,1,1,1];
  461. }
  462. // Packed Compare Implicit Length Strings, Return Index
  463. def : WriteRes<WritePCmpIStrI, [ICXPort0]> {
  464. let Latency = 10;
  465. let NumMicroOps = 3;
  466. let ResourceCycles = [3];
  467. }
  468. def : WriteRes<WritePCmpIStrILd, [ICXPort0, ICXPort23]> {
  469. let Latency = 16;
  470. let NumMicroOps = 4;
  471. let ResourceCycles = [3,1];
  472. }
  473. // Packed Compare Explicit Length Strings, Return Index
  474. def : WriteRes<WritePCmpEStrI, [ICXPort0,ICXPort5,ICXPort0156]> {
  475. let Latency = 18;
  476. let NumMicroOps = 8;
  477. let ResourceCycles = [4,3,1];
  478. }
  479. def : WriteRes<WritePCmpEStrILd, [ICXPort0, ICXPort5, ICXPort23, ICXPort0156]> {
  480. let Latency = 24;
  481. let NumMicroOps = 9;
  482. let ResourceCycles = [4,3,1,1];
  483. }
  484. // MOVMSK Instructions.
  485. def : WriteRes<WriteFMOVMSK, [ICXPort0]> { let Latency = 2; }
  486. def : WriteRes<WriteVecMOVMSK, [ICXPort0]> { let Latency = 2; }
  487. def : WriteRes<WriteVecMOVMSKY, [ICXPort0]> { let Latency = 2; }
  488. def : WriteRes<WriteMMXMOVMSK, [ICXPort0]> { let Latency = 2; }
  489. // AES instructions.
  490. def : WriteRes<WriteAESDecEnc, [ICXPort0]> { // Decryption, encryption.
  491. let Latency = 4;
  492. let NumMicroOps = 1;
  493. let ResourceCycles = [1];
  494. }
  495. def : WriteRes<WriteAESDecEncLd, [ICXPort0, ICXPort23]> {
  496. let Latency = 10;
  497. let NumMicroOps = 2;
  498. let ResourceCycles = [1,1];
  499. }
  500. def : WriteRes<WriteAESIMC, [ICXPort0]> { // InvMixColumn.
  501. let Latency = 8;
  502. let NumMicroOps = 2;
  503. let ResourceCycles = [2];
  504. }
  505. def : WriteRes<WriteAESIMCLd, [ICXPort0, ICXPort23]> {
  506. let Latency = 14;
  507. let NumMicroOps = 3;
  508. let ResourceCycles = [2,1];
  509. }
  510. def : WriteRes<WriteAESKeyGen, [ICXPort0,ICXPort5,ICXPort015]> { // Key Generation.
  511. let Latency = 20;
  512. let NumMicroOps = 11;
  513. let ResourceCycles = [3,6,2];
  514. }
  515. def : WriteRes<WriteAESKeyGenLd, [ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
  516. let Latency = 25;
  517. let NumMicroOps = 11;
  518. let ResourceCycles = [3,6,1,1];
  519. }
  520. // Carry-less multiplication instructions.
  521. def : WriteRes<WriteCLMul, [ICXPort5]> {
  522. let Latency = 6;
  523. let NumMicroOps = 1;
  524. let ResourceCycles = [1];
  525. }
  526. def : WriteRes<WriteCLMulLd, [ICXPort5, ICXPort23]> {
  527. let Latency = 12;
  528. let NumMicroOps = 2;
  529. let ResourceCycles = [1,1];
  530. }
  531. // Catch-all for expensive system instructions.
  532. def : WriteRes<WriteSystem, [ICXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
  533. // AVX2.
  534. defm : ICXWriteResPair<WriteFShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
  535. defm : ICXWriteResPair<WriteFVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
  536. defm : ICXWriteResPair<WriteShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
  537. defm : ICXWriteResPair<WriteVPMOV256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move.
  538. defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
  539. // Old microcoded instructions that nobody use.
  540. def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
  541. // Fence instructions.
  542. def : WriteRes<WriteFence, [ICXPort23, ICXPort4]>;
  543. // Load/store MXCSR.
  544. def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
  545. def : WriteRes<WriteSTMXCSR, [ICXPort4,ICXPort5,ICXPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
  546. // Nop, not very useful expect it provides a model for nops!
  547. def : WriteRes<WriteNop, []>;
  548. ////////////////////////////////////////////////////////////////////////////////
  549. // Horizontal add/sub instructions.
  550. ////////////////////////////////////////////////////////////////////////////////
  551. defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort015], 6, [2,1], 3, 6>;
  552. defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort015], 6, [2,1], 3, 7>;
  553. defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>;
  554. defm : ICXWriteResPair<WritePHAddX, [ICXPort5,ICXPort015], 3, [2,1], 3, 6>;
  555. defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>;
  556. // Remaining instrs.
  557. def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> {
  558. let Latency = 1;
  559. let NumMicroOps = 1;
  560. let ResourceCycles = [1];
  561. }
  562. def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
  563. "KANDN(B|D|Q|W)rr",
  564. "KMOV(B|D|Q|W)kk",
  565. "KNOT(B|D|Q|W)rr",
  566. "KOR(B|D|Q|W)rr",
  567. "KXNOR(B|D|Q|W)rr",
  568. "KXOR(B|D|Q|W)rr",
  569. "KSET0(B|D|Q|W)", // Same as KXOR
  570. "KSET1(B|D|Q|W)", // Same as KXNOR
  571. "MMX_PADDS(B|W)rr",
  572. "MMX_PADDUS(B|W)rr",
  573. "MMX_PAVG(B|W)rr",
  574. "MMX_PCMPEQ(B|D|W)rr",
  575. "MMX_PCMPGT(B|D|W)rr",
  576. "MMX_P(MAX|MIN)SWrr",
  577. "MMX_P(MAX|MIN)UBrr",
  578. "MMX_PSUBS(B|W)rr",
  579. "MMX_PSUBUS(B|W)rr",
  580. "VPMOVB2M(Z|Z128|Z256)rr",
  581. "VPMOVD2M(Z|Z128|Z256)rr",
  582. "VPMOVQ2M(Z|Z128|Z256)rr",
  583. "VPMOVW2M(Z|Z128|Z256)rr")>;
  584. def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> {
  585. let Latency = 1;
  586. let NumMicroOps = 1;
  587. let ResourceCycles = [1];
  588. }
  589. def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
  590. "KMOV(B|D|Q|W)kr",
  591. "UCOM_F(P?)r",
  592. "VPBROADCAST(D|Q)rr",
  593. "(V?)INSERTPS(Z?)rr",
  594. "(V?)MOV(HL|LH)PS(Z?)rr",
  595. "(V?)MOVDDUP(Y|Z128|Z256)?rr",
  596. "(V?)PALIGNR(Y|Z128|Z256)?rri",
  597. "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?ri",
  598. "(V?)PERMIL(PD|PS)(Y|Z128|Z256)?rr",
  599. "(V?)UNPCK(L|H)(PD|PS)(Y|Z128|Z256)?rr")>;
  600. def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
  601. let Latency = 1;
  602. let NumMicroOps = 1;
  603. let ResourceCycles = [1];
  604. }
  605. def: InstRW<[ICXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
  606. def ICXWriteResGroup6 : SchedWriteRes<[ICXPort05]> {
  607. let Latency = 1;
  608. let NumMicroOps = 1;
  609. let ResourceCycles = [1];
  610. }
  611. def: InstRW<[ICXWriteResGroup6], (instrs FINCSTP, FNOP)>;
  612. def ICXWriteResGroup7 : SchedWriteRes<[ICXPort06]> {
  613. let Latency = 1;
  614. let NumMicroOps = 1;
  615. let ResourceCycles = [1];
  616. }
  617. def: InstRW<[ICXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
  618. def ICXWriteResGroup8 : SchedWriteRes<[ICXPort15]> {
  619. let Latency = 1;
  620. let NumMicroOps = 1;
  621. let ResourceCycles = [1];
  622. }
  623. def: InstRW<[ICXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
  624. def ICXWriteResGroup9 : SchedWriteRes<[ICXPort015]> {
  625. let Latency = 1;
  626. let NumMicroOps = 1;
  627. let ResourceCycles = [1];
  628. }
  629. def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
  630. "VBLENDMPS(Z128|Z256)rr",
  631. "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
  632. "(V?)PADD(B|D|Q|W)rr",
  633. "(V?)MOV(SD|SS)(Z?)rr",
  634. "VPBLENDD(Y?)rri",
  635. "VPBLENDMB(Z128|Z256)rr",
  636. "VPBLENDMD(Z128|Z256)rr",
  637. "VPBLENDMQ(Z128|Z256)rr",
  638. "VPBLENDMW(Z128|Z256)rr",
  639. "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
  640. "VPTERNLOGD(Z|Z128|Z256)rri",
  641. "VPTERNLOGQ(Z|Z128|Z256)rri")>;
  642. def ICXWriteResGroup10 : SchedWriteRes<[ICXPort0156]> {
  643. let Latency = 1;
  644. let NumMicroOps = 1;
  645. let ResourceCycles = [1];
  646. }
  647. def: InstRW<[ICXWriteResGroup10], (instrs SGDT64m,
  648. SIDT64m,
  649. SMSW16m,
  650. STRm,
  651. SYSCALL)>;
  652. def ICXWriteResGroup11 : SchedWriteRes<[ICXPort4,ICXPort237]> {
  653. let Latency = 1;
  654. let NumMicroOps = 2;
  655. let ResourceCycles = [1,1];
  656. }
  657. def: InstRW<[ICXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
  658. def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
  659. "ST_FP(32|64|80)m")>;
  660. def ICXWriteResGroup13 : SchedWriteRes<[ICXPort5]> {
  661. let Latency = 2;
  662. let NumMicroOps = 2;
  663. let ResourceCycles = [2];
  664. }
  665. def: InstRW<[ICXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
  666. def ICXWriteResGroup14 : SchedWriteRes<[ICXPort05]> {
  667. let Latency = 2;
  668. let NumMicroOps = 2;
  669. let ResourceCycles = [2];
  670. }
  671. def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP,
  672. MMX_MOVDQ2Qrr)>;
  673. def ICXWriteResGroup17 : SchedWriteRes<[ICXPort0156]> {
  674. let Latency = 2;
  675. let NumMicroOps = 2;
  676. let ResourceCycles = [2];
  677. }
  678. def: InstRW<[ICXWriteResGroup17], (instrs LFENCE,
  679. WAIT,
  680. XGETBV)>;
  681. def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
  682. let Latency = 2;
  683. let NumMicroOps = 2;
  684. let ResourceCycles = [1,1];
  685. }
  686. def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>;
  687. def ICXWriteResGroup21 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
  688. let Latency = 2;
  689. let NumMicroOps = 2;
  690. let ResourceCycles = [1,1];
  691. }
  692. def: InstRW<[ICXWriteResGroup21], (instrs SFENCE)>;
  693. def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
  694. let Latency = 2;
  695. let NumMicroOps = 2;
  696. let ResourceCycles = [1,1];
  697. }
  698. def: InstRW<[ICXWriteResGroup23], (instrs CWD,
  699. JCXZ, JECXZ, JRCXZ,
  700. ADC8i8, SBB8i8,
  701. ADC16i16, SBB16i16,
  702. ADC32i32, SBB32i32,
  703. ADC64i32, SBB64i32)>;
  704. def ICXWriteResGroup25 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237]> {
  705. let Latency = 2;
  706. let NumMicroOps = 3;
  707. let ResourceCycles = [1,1,1];
  708. }
  709. def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>;
  710. def ICXWriteResGroup27 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
  711. let Latency = 2;
  712. let NumMicroOps = 3;
  713. let ResourceCycles = [1,1,1];
  714. }
  715. def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
  716. def ICXWriteResGroup28 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
  717. let Latency = 2;
  718. let NumMicroOps = 3;
  719. let ResourceCycles = [1,1,1];
  720. }
  721. def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
  722. STOSB, STOSL, STOSQ, STOSW)>;
  723. def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
  724. def ICXWriteResGroup29 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
  725. let Latency = 2;
  726. let NumMicroOps = 5;
  727. let ResourceCycles = [2,2,1];
  728. }
  729. def: InstRW<[ICXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>;
  730. def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> {
  731. let Latency = 3;
  732. let NumMicroOps = 1;
  733. let ResourceCycles = [1];
  734. }
  735. def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
  736. "KORTEST(B|D|Q|W)rr",
  737. "KTEST(B|D|Q|W)rr")>;
  738. def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> {
  739. let Latency = 3;
  740. let NumMicroOps = 1;
  741. let ResourceCycles = [1];
  742. }
  743. def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr",
  744. "PEXT(32|64)rr")>;
  745. def ICXWriteResGroup32 : SchedWriteRes<[ICXPort5]> {
  746. let Latency = 3;
  747. let NumMicroOps = 1;
  748. let ResourceCycles = [1];
  749. }
  750. def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
  751. "VALIGND(Z|Z128|Z256)rri",
  752. "VALIGNQ(Z|Z128|Z256)rri",
  753. "VPBROADCAST(B|W)rr",
  754. "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
  755. "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
  756. def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
  757. let Latency = 4;
  758. let NumMicroOps = 1;
  759. let ResourceCycles = [1];
  760. }
  761. def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
  762. "KSHIFTL(B|D|Q|W)ri",
  763. "KSHIFTR(B|D|Q|W)ri",
  764. "KUNPCK(BW|DQ|WD)rr",
  765. "VCMPPD(Z|Z128|Z256)rri",
  766. "VCMPPS(Z|Z128|Z256)rri",
  767. "VCMP(SD|SS)Zrr",
  768. "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
  769. "VFPCLASS(SD|SS)Zrr",
  770. "VPCMPB(Z|Z128|Z256)rri",
  771. "VPCMPD(Z|Z128|Z256)rri",
  772. "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
  773. "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
  774. "VPCMPQ(Z|Z128|Z256)rri",
  775. "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
  776. "VPCMPW(Z|Z128|Z256)rri",
  777. "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
  778. def ICXWriteResGroup34 : SchedWriteRes<[ICXPort0,ICXPort0156]> {
  779. let Latency = 3;
  780. let NumMicroOps = 2;
  781. let ResourceCycles = [1,1];
  782. }
  783. def: InstRW<[ICXWriteResGroup34], (instrs FNSTSW16r)>;
  784. def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> {
  785. let Latency = 3;
  786. let NumMicroOps = 3;
  787. let ResourceCycles = [1,2];
  788. }
  789. def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
  790. def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> {
  791. let Latency = 3;
  792. let NumMicroOps = 3;
  793. let ResourceCycles = [2,1];
  794. }
  795. def: InstRW<[ICXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
  796. def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
  797. let Latency = 3;
  798. let NumMicroOps = 3;
  799. let ResourceCycles = [2,1];
  800. }
  801. def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWrr,
  802. MMX_PACKSSWBrr,
  803. MMX_PACKUSWBrr)>;
  804. def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
  805. let Latency = 3;
  806. let NumMicroOps = 3;
  807. let ResourceCycles = [1,2];
  808. }
  809. def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>;
  810. def ICXWriteResGroup43 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
  811. let Latency = 3;
  812. let NumMicroOps = 3;
  813. let ResourceCycles = [1,2];
  814. }
  815. def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>;
  816. def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
  817. let Latency = 2;
  818. let NumMicroOps = 3;
  819. let ResourceCycles = [1,2];
  820. }
  821. def: InstRW<[ICXWriteResGroup44], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
  822. RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
  823. def ICXWriteResGroup44b : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
  824. let Latency = 5;
  825. let NumMicroOps = 7;
  826. let ResourceCycles = [2,3,2];
  827. }
  828. def: InstRW<[ICXWriteResGroup44b], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
  829. def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
  830. let Latency = 6;
  831. let NumMicroOps = 7;
  832. let ResourceCycles = [2,3,2];
  833. }
  834. def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
  835. def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237]> {
  836. let Latency = 3;
  837. let NumMicroOps = 3;
  838. let ResourceCycles = [1,1,1];
  839. }
  840. def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>;
  841. def ICXWriteResGroup47 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237,ICXPort0156]> {
  842. let Latency = 3;
  843. let NumMicroOps = 4;
  844. let ResourceCycles = [1,1,1,1];
  845. }
  846. def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
  847. def ICXWriteResGroup48 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06,ICXPort0156]> {
  848. let Latency = 3;
  849. let NumMicroOps = 4;
  850. let ResourceCycles = [1,1,1,1];
  851. }
  852. def: InstRW<[ICXWriteResGroup48], (instrs CALL64pcrel32)>;
  853. def ICXWriteResGroup49 : SchedWriteRes<[ICXPort0]> {
  854. let Latency = 4;
  855. let NumMicroOps = 1;
  856. let ResourceCycles = [1];
  857. }
  858. def: InstRW<[ICXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
  859. def ICXWriteResGroup50 : SchedWriteRes<[ICXPort01]> {
  860. let Latency = 4;
  861. let NumMicroOps = 1;
  862. let ResourceCycles = [1];
  863. }
  864. def: InstRW<[ICXWriteResGroup50], (instregex "VCVTPD2QQ(Z128|Z256)rr",
  865. "VCVTPD2UQQ(Z128|Z256)rr",
  866. "VCVTPS2DQ(Y|Z128|Z256)rr",
  867. "(V?)CVTPS2DQrr",
  868. "VCVTPS2UDQ(Z128|Z256)rr",
  869. "VCVTTPD2QQ(Z128|Z256)rr",
  870. "VCVTTPD2UQQ(Z128|Z256)rr",
  871. "VCVTTPS2DQ(Z128|Z256)rr",
  872. "(V?)CVTTPS2DQrr",
  873. "VCVTTPS2UDQ(Z128|Z256)rr")>;
  874. def ICXWriteResGroup50z : SchedWriteRes<[ICXPort05]> {
  875. let Latency = 4;
  876. let NumMicroOps = 1;
  877. let ResourceCycles = [1];
  878. }
  879. def: InstRW<[ICXWriteResGroup50z], (instrs VCVTPD2QQZrr,
  880. VCVTPD2UQQZrr,
  881. VCVTPS2DQZrr,
  882. VCVTPS2UDQZrr,
  883. VCVTTPD2QQZrr,
  884. VCVTTPD2UQQZrr,
  885. VCVTTPS2DQZrr,
  886. VCVTTPS2UDQZrr)>;
  887. def ICXWriteResGroup51 : SchedWriteRes<[ICXPort5]> {
  888. let Latency = 4;
  889. let NumMicroOps = 2;
  890. let ResourceCycles = [2];
  891. }
  892. def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
  893. "VEXPANDPS(Z|Z128|Z256)rr",
  894. "VPEXPANDD(Z|Z128|Z256)rr",
  895. "VPEXPANDQ(Z|Z128|Z256)rr",
  896. "VPMOVDB(Z|Z128|Z256)rr",
  897. "VPMOVDW(Z|Z128|Z256)rr",
  898. "VPMOVQB(Z|Z128|Z256)rr",
  899. "VPMOVQW(Z|Z128|Z256)rr",
  900. "VPMOVSDB(Z|Z128|Z256)rr",
  901. "VPMOVSDW(Z|Z128|Z256)rr",
  902. "VPMOVSQB(Z|Z128|Z256)rr",
  903. "VPMOVSQD(Z|Z128|Z256)rr",
  904. "VPMOVSQW(Z|Z128|Z256)rr",
  905. "VPMOVSWB(Z|Z128|Z256)rr",
  906. "VPMOVUSDB(Z|Z128|Z256)rr",
  907. "VPMOVUSDW(Z|Z128|Z256)rr",
  908. "VPMOVUSQB(Z|Z128|Z256)rr",
  909. "VPMOVUSQD(Z|Z128|Z256)rr",
  910. "VPMOVUSWB(Z|Z128|Z256)rr",
  911. "VPMOVWB(Z|Z128|Z256)rr")>;
  912. def ICXWriteResGroup54 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
  913. let Latency = 4;
  914. let NumMicroOps = 3;
  915. let ResourceCycles = [1,1,1];
  916. }
  917. def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
  918. "IST_F(16|32)m",
  919. "VPMOVQD(Z|Z128|Z256)mr(b?)")>;
  920. def ICXWriteResGroup55 : SchedWriteRes<[ICXPort0156]> {
  921. let Latency = 4;
  922. let NumMicroOps = 4;
  923. let ResourceCycles = [4];
  924. }
  925. def: InstRW<[ICXWriteResGroup55], (instrs FNCLEX)>;
  926. def ICXWriteResGroup56 : SchedWriteRes<[]> {
  927. let Latency = 0;
  928. let NumMicroOps = 4;
  929. let ResourceCycles = [];
  930. }
  931. def: InstRW<[ICXWriteResGroup56], (instrs VZEROUPPER)>;
  932. def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
  933. let Latency = 4;
  934. let NumMicroOps = 4;
  935. let ResourceCycles = [1,1,2];
  936. }
  937. def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
  938. def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort01]> {
  939. let Latency = 5;
  940. let NumMicroOps = 2;
  941. let ResourceCycles = [1,1];
  942. }
  943. def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIrr",
  944. "MMX_CVT(T?)PS2PIrr",
  945. "VCVTDQ2PDZ128rr",
  946. "VCVTPD2DQZ128rr",
  947. "(V?)CVT(T?)PD2DQrr",
  948. "VCVTPD2UDQZ128rr",
  949. "VCVTPS2PDZ128rr",
  950. "(V?)CVTPS2PDrr",
  951. "VCVTPS2QQZ128rr",
  952. "VCVTPS2UQQZ128rr",
  953. "VCVTQQ2PSZ128rr",
  954. "(V?)CVTSI(64)?2SDrr",
  955. "VCVTSI2SSZrr",
  956. "(V?)CVTSI2SSrr",
  957. "VCVTSI(64)?2SDZrr",
  958. "VCVTSS2SDZrr",
  959. "(V?)CVTSS2SDrr",
  960. "VCVTTPD2DQZ128rr",
  961. "VCVTTPD2UDQZ128rr",
  962. "VCVTTPS2QQZ128rr",
  963. "VCVTTPS2UQQZ128rr",
  964. "VCVTUDQ2PDZ128rr",
  965. "VCVTUQQ2PSZ128rr",
  966. "VCVTUSI2SSZrr",
  967. "VCVTUSI(64)?2SDZrr")>;
  968. def ICXWriteResGroup62 : SchedWriteRes<[ICXPort5,ICXPort015]> {
  969. let Latency = 5;
  970. let NumMicroOps = 3;
  971. let ResourceCycles = [2,1];
  972. }
  973. def: InstRW<[ICXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>;
  974. def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> {
  975. let Latency = 5;
  976. let NumMicroOps = 3;
  977. let ResourceCycles = [1,1,1];
  978. }
  979. def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>;
  980. def ICXWriteResGroup65 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort01]> {
  981. let Latency = 5;
  982. let NumMicroOps = 3;
  983. let ResourceCycles = [1,1,1];
  984. }
  985. def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
  986. "VCVTPS2PHZ256mr(b?)",
  987. "VCVTPS2PHZmr(b?)")>;
  988. def ICXWriteResGroup66 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
  989. let Latency = 5;
  990. let NumMicroOps = 4;
  991. let ResourceCycles = [1,2,1];
  992. }
  993. def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
  994. "VPMOVDW(Z|Z128|Z256)mr(b?)",
  995. "VPMOVQB(Z|Z128|Z256)mr(b?)",
  996. "VPMOVQW(Z|Z128|Z256)mr(b?)",
  997. "VPMOVSDB(Z|Z128|Z256)mr(b?)",
  998. "VPMOVSDW(Z|Z128|Z256)mr(b?)",
  999. "VPMOVSQB(Z|Z128|Z256)mr(b?)",
  1000. "VPMOVSQD(Z|Z128|Z256)mr(b?)",
  1001. "VPMOVSQW(Z|Z128|Z256)mr(b?)",
  1002. "VPMOVSWB(Z|Z128|Z256)mr(b?)",
  1003. "VPMOVUSDB(Z|Z128|Z256)mr(b?)",
  1004. "VPMOVUSDW(Z|Z128|Z256)mr(b?)",
  1005. "VPMOVUSQB(Z|Z128|Z256)mr(b?)",
  1006. "VPMOVUSQD(Z|Z128|Z256)mr(b?)",
  1007. "VPMOVUSQW(Z|Z128|Z256)mr(b?)",
  1008. "VPMOVUSWB(Z|Z128|Z256)mr(b?)",
  1009. "VPMOVWB(Z|Z128|Z256)mr(b?)")>;
  1010. def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
  1011. let Latency = 5;
  1012. let NumMicroOps = 5;
  1013. let ResourceCycles = [1,4];
  1014. }
  1015. def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>;
  1016. def ICXWriteResGroup69 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
  1017. let Latency = 5;
  1018. let NumMicroOps = 6;
  1019. let ResourceCycles = [1,1,4];
  1020. }
  1021. def: InstRW<[ICXWriteResGroup69], (instregex "PUSHF(16|64)")>;
  1022. def ICXWriteResGroup71 : SchedWriteRes<[ICXPort23]> {
  1023. let Latency = 6;
  1024. let NumMicroOps = 1;
  1025. let ResourceCycles = [1];
  1026. }
  1027. def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
  1028. VPBROADCASTDrm,
  1029. VPBROADCASTQrm,
  1030. VMOVSHDUPrm,
  1031. VMOVSLDUPrm,
  1032. VMOVDDUPrm,
  1033. MOVSHDUPrm,
  1034. MOVSLDUPrm,
  1035. MOVDDUPrm)>;
  1036. def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
  1037. let Latency = 6;
  1038. let NumMicroOps = 2;
  1039. let ResourceCycles = [2];
  1040. }
  1041. def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSrr)>;
  1042. def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
  1043. "VCOMPRESSPS(Z|Z128|Z256)rr",
  1044. "VPCOMPRESSD(Z|Z128|Z256)rr",
  1045. "VPCOMPRESSQ(Z|Z128|Z256)rr",
  1046. "VPERMW(Z|Z128|Z256)rr")>;
  1047. def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> {
  1048. let Latency = 6;
  1049. let NumMicroOps = 2;
  1050. let ResourceCycles = [1,1];
  1051. }
  1052. def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBrm,
  1053. MMX_PADDSWrm,
  1054. MMX_PADDUSBrm,
  1055. MMX_PADDUSWrm,
  1056. MMX_PAVGBrm,
  1057. MMX_PAVGWrm,
  1058. MMX_PCMPEQBrm,
  1059. MMX_PCMPEQDrm,
  1060. MMX_PCMPEQWrm,
  1061. MMX_PCMPGTBrm,
  1062. MMX_PCMPGTDrm,
  1063. MMX_PCMPGTWrm,
  1064. MMX_PMAXSWrm,
  1065. MMX_PMAXUBrm,
  1066. MMX_PMINSWrm,
  1067. MMX_PMINUBrm,
  1068. MMX_PSUBSBrm,
  1069. MMX_PSUBSWrm,
  1070. MMX_PSUBUSBrm,
  1071. MMX_PSUBUSWrm)>;
  1072. def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> {
  1073. let Latency = 6;
  1074. let NumMicroOps = 2;
  1075. let ResourceCycles = [1,1];
  1076. }
  1077. def: InstRW<[ICXWriteResGroup76], (instrs FARJMP64m)>;
  1078. def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
  1079. def ICXWriteResGroup79 : SchedWriteRes<[ICXPort23,ICXPort15]> {
  1080. let Latency = 6;
  1081. let NumMicroOps = 2;
  1082. let ResourceCycles = [1,1];
  1083. }
  1084. def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm",
  1085. "MOVBE(16|32|64)rm")>;
  1086. def ICXWriteResGroup80 : SchedWriteRes<[ICXPort23,ICXPort015]> {
  1087. let Latency = 6;
  1088. let NumMicroOps = 2;
  1089. let ResourceCycles = [1,1];
  1090. }
  1091. def: InstRW<[ICXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
  1092. def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
  1093. def ICXWriteResGroup81 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
  1094. let Latency = 6;
  1095. let NumMicroOps = 2;
  1096. let ResourceCycles = [1,1];
  1097. }
  1098. def: InstRW<[ICXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
  1099. def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
  1100. def ICXWriteResGroup82 : SchedWriteRes<[ICXPort5,ICXPort01]> {
  1101. let Latency = 6;
  1102. let NumMicroOps = 3;
  1103. let ResourceCycles = [2,1];
  1104. }
  1105. def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
  1106. "VCVTSI642SSZrr",
  1107. "VCVTUSI642SSZrr")>;
  1108. def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]> {
  1109. let Latency = 6;
  1110. let NumMicroOps = 4;
  1111. let ResourceCycles = [1,1,1,1];
  1112. }
  1113. def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
  1114. def ICXWriteResGroup86 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
  1115. let Latency = 6;
  1116. let NumMicroOps = 4;
  1117. let ResourceCycles = [1,1,1,1];
  1118. }
  1119. def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
  1120. "SHL(8|16|32|64)m(1|i)",
  1121. "SHR(8|16|32|64)m(1|i)")>;
  1122. def ICXWriteResGroup87 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
  1123. let Latency = 6;
  1124. let NumMicroOps = 4;
  1125. let ResourceCycles = [1,1,1,1];
  1126. }
  1127. def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm",
  1128. "PUSH(16|32|64)rmm")>;
  1129. def ICXWriteResGroup88 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
  1130. let Latency = 6;
  1131. let NumMicroOps = 6;
  1132. let ResourceCycles = [1,5];
  1133. }
  1134. def: InstRW<[ICXWriteResGroup88], (instrs STD)>;
  1135. def ICXWriteResGroup89 : SchedWriteRes<[ICXPort23]> {
  1136. let Latency = 7;
  1137. let NumMicroOps = 1;
  1138. let ResourceCycles = [1];
  1139. }
  1140. def: InstRW<[ICXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
  1141. def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128,
  1142. VBROADCASTI128,
  1143. VBROADCASTSDYrm,
  1144. VBROADCASTSSYrm,
  1145. VMOVDDUPYrm,
  1146. VMOVSHDUPYrm,
  1147. VMOVSLDUPYrm,
  1148. VPBROADCASTDYrm,
  1149. VPBROADCASTQYrm)>;
  1150. def ICXWriteResGroup90 : SchedWriteRes<[ICXPort01,ICXPort5]> {
  1151. let Latency = 7;
  1152. let NumMicroOps = 2;
  1153. let ResourceCycles = [1,1];
  1154. }
  1155. def: InstRW<[ICXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
  1156. def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1157. let Latency = 7;
  1158. let NumMicroOps = 2;
  1159. let ResourceCycles = [1,1];
  1160. }
  1161. def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)",
  1162. "VPBROADCAST(B|W)(Z128)?rm",
  1163. "(V?)INSERTPS(Z?)rm",
  1164. "(V?)PALIGNR(Z128)?rmi",
  1165. "(V?)PERMIL(PD|PS)(Z128)?m(b?)i",
  1166. "(V?)PERMIL(PD|PS)(Z128)?rm",
  1167. "(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>;
  1168. def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort01]> {
  1169. let Latency = 7;
  1170. let NumMicroOps = 2;
  1171. let ResourceCycles = [1,1];
  1172. }
  1173. def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
  1174. "VCVTPD2DQ(Y|Z256)rr",
  1175. "VCVTPD2UDQZ256rr",
  1176. "VCVTPS2PD(Y|Z256)rr",
  1177. "VCVTPS2QQZ256rr",
  1178. "VCVTPS2UQQZ256rr",
  1179. "VCVTQQ2PSZ256rr",
  1180. "VCVTTPD2DQ(Y|Z256)rr",
  1181. "VCVTTPD2UDQZ256rr",
  1182. "VCVTTPS2QQZ256rr",
  1183. "VCVTTPS2UQQZ256rr",
  1184. "VCVTUDQ2PDZ256rr",
  1185. "VCVTUQQ2PSZ256rr")>;
  1186. def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> {
  1187. let Latency = 7;
  1188. let NumMicroOps = 2;
  1189. let ResourceCycles = [1,1];
  1190. }
  1191. def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
  1192. VCVTPD2DQZrr,
  1193. VCVTPD2UDQZrr,
  1194. VCVTPS2PDZrr,
  1195. VCVTPS2QQZrr,
  1196. VCVTPS2UQQZrr,
  1197. VCVTQQ2PSZrr,
  1198. VCVTTPD2DQZrr,
  1199. VCVTTPD2UDQZrr,
  1200. VCVTTPS2QQZrr,
  1201. VCVTTPS2UQQZrr,
  1202. VCVTUDQ2PDZrr,
  1203. VCVTUQQ2PSZrr)>;
  1204. def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> {
  1205. let Latency = 7;
  1206. let NumMicroOps = 2;
  1207. let ResourceCycles = [1,1];
  1208. }
  1209. def: InstRW<[ICXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
  1210. VPBLENDDrmi)>;
  1211. def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd],
  1212. (instregex "VBLENDMPDZ128rm(b?)",
  1213. "VBLENDMPSZ128rm(b?)",
  1214. "VBROADCASTI32X2Z128rm(b?)",
  1215. "VBROADCASTSSZ128rm(b?)",
  1216. "VINSERT(F|I)128rm",
  1217. "VMOVAPDZ128rm(b?)",
  1218. "VMOVAPSZ128rm(b?)",
  1219. "VMOVDDUPZ128rm(b?)",
  1220. "VMOVDQA32Z128rm(b?)",
  1221. "VMOVDQA64Z128rm(b?)",
  1222. "VMOVDQU16Z128rm(b?)",
  1223. "VMOVDQU32Z128rm(b?)",
  1224. "VMOVDQU64Z128rm(b?)",
  1225. "VMOVDQU8Z128rm(b?)",
  1226. "VMOVSHDUPZ128rm(b?)",
  1227. "VMOVSLDUPZ128rm(b?)",
  1228. "VMOVUPDZ128rm(b?)",
  1229. "VMOVUPSZ128rm(b?)",
  1230. "VPADD(B|D|Q|W)Z128rm(b?)",
  1231. "(V?)PADD(B|D|Q|W)rm",
  1232. "VPBLENDM(B|D|Q|W)Z128rm(b?)",
  1233. "VPBROADCASTDZ128rm(b?)",
  1234. "VPBROADCASTQZ128rm(b?)",
  1235. "VPSUB(B|D|Q|W)Z128rm(b?)",
  1236. "(V?)PSUB(B|D|Q|W)rm",
  1237. "VPTERNLOGDZ128rm(b?)i",
  1238. "VPTERNLOGQZ128rm(b?)i")>;
  1239. def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1240. let Latency = 7;
  1241. let NumMicroOps = 3;
  1242. let ResourceCycles = [2,1];
  1243. }
  1244. def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWrm,
  1245. MMX_PACKSSWBrm,
  1246. MMX_PACKUSWBrm)>;
  1247. def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> {
  1248. let Latency = 7;
  1249. let NumMicroOps = 3;
  1250. let ResourceCycles = [2,1];
  1251. }
  1252. def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2W128rr",
  1253. "VPERMI2W256rr",
  1254. "VPERMI2Wrr",
  1255. "VPERMT2W128rr",
  1256. "VPERMT2W256rr",
  1257. "VPERMT2Wrr")>;
  1258. def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
  1259. let Latency = 7;
  1260. let NumMicroOps = 3;
  1261. let ResourceCycles = [1,2];
  1262. }
  1263. def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64,
  1264. SCASB, SCASL, SCASQ, SCASW)>;
  1265. def ICXWriteResGroup100 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort01]> {
  1266. let Latency = 7;
  1267. let NumMicroOps = 3;
  1268. let ResourceCycles = [1,1,1];
  1269. }
  1270. def: InstRW<[ICXWriteResGroup100], (instregex "(V?)CVT(T?)SS2SI64(Z?)rr",
  1271. "VCVT(T?)SS2USI64Zrr")>;
  1272. def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> {
  1273. let Latency = 7;
  1274. let NumMicroOps = 3;
  1275. let ResourceCycles = [1,1,1];
  1276. }
  1277. def: InstRW<[ICXWriteResGroup101], (instrs FLDCW16m)>;
  1278. def ICXWriteResGroup103 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort0156]> {
  1279. let Latency = 7;
  1280. let NumMicroOps = 3;
  1281. let ResourceCycles = [1,1,1];
  1282. }
  1283. def: InstRW<[ICXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>;
  1284. def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> {
  1285. let Latency = 7;
  1286. let NumMicroOps = 3;
  1287. let ResourceCycles = [1,1,1];
  1288. }
  1289. def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>;
  1290. def ICXWriteResGroup106 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
  1291. let Latency = 7;
  1292. let NumMicroOps = 4;
  1293. let ResourceCycles = [1,2,1];
  1294. }
  1295. def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
  1296. "VCOMPRESSPS(Z|Z128|Z256)mr(b?)",
  1297. "VPCOMPRESSD(Z|Z128|Z256)mr(b?)",
  1298. "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>;
  1299. def ICXWriteResGroup107 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
  1300. let Latency = 7;
  1301. let NumMicroOps = 5;
  1302. let ResourceCycles = [1,1,1,2];
  1303. }
  1304. def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
  1305. "ROR(8|16|32|64)m(1|i)")>;
  1306. def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> {
  1307. let Latency = 2;
  1308. let NumMicroOps = 2;
  1309. let ResourceCycles = [2];
  1310. }
  1311. def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
  1312. ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
  1313. def ICXWriteResGroup108 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
  1314. let Latency = 7;
  1315. let NumMicroOps = 5;
  1316. let ResourceCycles = [1,1,1,2];
  1317. }
  1318. def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
  1319. def ICXWriteResGroup109 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
  1320. let Latency = 7;
  1321. let NumMicroOps = 5;
  1322. let ResourceCycles = [1,1,1,1,1];
  1323. }
  1324. def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
  1325. def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>;
  1326. def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
  1327. let Latency = 7;
  1328. let NumMicroOps = 7;
  1329. let ResourceCycles = [1,2,2,2];
  1330. }
  1331. def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
  1332. VPSCATTERQQZ128mr,
  1333. VSCATTERDPDZ128mr,
  1334. VSCATTERQPDZ128mr)>;
  1335. def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort0156]> {
  1336. let Latency = 7;
  1337. let NumMicroOps = 7;
  1338. let ResourceCycles = [1,3,1,2];
  1339. }
  1340. def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>;
  1341. def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
  1342. let Latency = 7;
  1343. let NumMicroOps = 11;
  1344. let ResourceCycles = [1,4,4,2];
  1345. }
  1346. def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
  1347. VPSCATTERQQZ256mr,
  1348. VSCATTERDPDZ256mr,
  1349. VSCATTERQPDZ256mr)>;
  1350. def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
  1351. let Latency = 7;
  1352. let NumMicroOps = 19;
  1353. let ResourceCycles = [1,8,8,2];
  1354. }
  1355. def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
  1356. VPSCATTERQQZmr,
  1357. VSCATTERDPDZmr,
  1358. VSCATTERQPDZmr)>;
  1359. def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
  1360. let Latency = 7;
  1361. let NumMicroOps = 36;
  1362. let ResourceCycles = [1,16,1,16,2];
  1363. }
  1364. def: InstRW<[ICXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
  1365. def ICXWriteResGroup118 : SchedWriteRes<[ICXPort1,ICXPort23]> {
  1366. let Latency = 8;
  1367. let NumMicroOps = 2;
  1368. let ResourceCycles = [1,1];
  1369. }
  1370. def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm",
  1371. "PEXT(32|64)rm")>;
  1372. def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1373. let Latency = 8;
  1374. let NumMicroOps = 2;
  1375. let ResourceCycles = [1,1];
  1376. }
  1377. def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
  1378. "VPBROADCASTB(Z|Z256)rm(b?)",
  1379. "VPBROADCASTW(Z|Z256)rm(b?)",
  1380. "(V?)PALIGNR(Y|Z256)rmi",
  1381. "(V?)PERMIL(PD|PS)(Y|Z256)m(b?)i",
  1382. "(V?)PERMIL(PD|PS)(Y|Z256)rm",
  1383. "(V?)UNPCK(L|H)(PD|PS)(Y|Z256)rm")>;
  1384. def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
  1385. VPBROADCASTWYrm,
  1386. VPMOVSXBDYrm,
  1387. VPMOVSXBQYrm,
  1388. VPMOVSXWQYrm)>;
  1389. def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> {
  1390. let Latency = 8;
  1391. let NumMicroOps = 2;
  1392. let ResourceCycles = [1,1];
  1393. }
  1394. def: InstRW<[ICXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
  1395. VPBLENDDYrmi)>;
  1396. def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
  1397. (instregex "VBLENDMPD(Z|Z256)rm(b?)",
  1398. "VBLENDMPS(Z|Z256)rm(b?)",
  1399. "VBROADCASTF32X2Z256rm(b?)",
  1400. "VBROADCASTF32X2Zrm(b?)",
  1401. "VBROADCASTF32X4Z256rm(b?)",
  1402. "VBROADCASTF32X4rm(b?)",
  1403. "VBROADCASTF32X8rm(b?)",
  1404. "VBROADCASTF64X2Z128rm(b?)",
  1405. "VBROADCASTF64X2rm(b?)",
  1406. "VBROADCASTF64X4rm(b?)",
  1407. "VBROADCASTI32X2Z256rm(b?)",
  1408. "VBROADCASTI32X2Zrm(b?)",
  1409. "VBROADCASTI32X4Z256rm(b?)",
  1410. "VBROADCASTI32X4rm(b?)",
  1411. "VBROADCASTI32X8rm(b?)",
  1412. "VBROADCASTI64X2Z128rm(b?)",
  1413. "VBROADCASTI64X2rm(b?)",
  1414. "VBROADCASTI64X4rm(b?)",
  1415. "VBROADCASTSD(Z|Z256)rm(b?)",
  1416. "VBROADCASTSS(Z|Z256)rm(b?)",
  1417. "VINSERTF32x4(Z|Z256)rm(b?)",
  1418. "VINSERTF32x8Zrm(b?)",
  1419. "VINSERTF64x2(Z|Z256)rm(b?)",
  1420. "VINSERTF64x4Zrm(b?)",
  1421. "VINSERTI32x4(Z|Z256)rm(b?)",
  1422. "VINSERTI32x8Zrm(b?)",
  1423. "VINSERTI64x2(Z|Z256)rm(b?)",
  1424. "VINSERTI64x4Zrm(b?)",
  1425. "VMOVAPD(Z|Z256)rm(b?)",
  1426. "VMOVAPS(Z|Z256)rm(b?)",
  1427. "VMOVDDUP(Z|Z256)rm(b?)",
  1428. "VMOVDQA32(Z|Z256)rm(b?)",
  1429. "VMOVDQA64(Z|Z256)rm(b?)",
  1430. "VMOVDQU16(Z|Z256)rm(b?)",
  1431. "VMOVDQU32(Z|Z256)rm(b?)",
  1432. "VMOVDQU64(Z|Z256)rm(b?)",
  1433. "VMOVDQU8(Z|Z256)rm(b?)",
  1434. "VMOVSHDUP(Z|Z256)rm(b?)",
  1435. "VMOVSLDUP(Z|Z256)rm(b?)",
  1436. "VMOVUPD(Z|Z256)rm(b?)",
  1437. "VMOVUPS(Z|Z256)rm(b?)",
  1438. "VPADD(B|D|Q|W)Yrm",
  1439. "VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
  1440. "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
  1441. "VPBROADCASTD(Z|Z256)rm(b?)",
  1442. "VPBROADCASTQ(Z|Z256)rm(b?)",
  1443. "VPSUB(B|D|Q|W)Yrm",
  1444. "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)",
  1445. "VPTERNLOGD(Z|Z256)rm(b?)i",
  1446. "VPTERNLOGQ(Z|Z256)rm(b?)i")>;
  1447. def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
  1448. let Latency = 8;
  1449. let NumMicroOps = 4;
  1450. let ResourceCycles = [1,2,1];
  1451. }
  1452. def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
  1453. def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
  1454. let Latency = 8;
  1455. let NumMicroOps = 5;
  1456. let ResourceCycles = [1,1,1,2];
  1457. }
  1458. def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
  1459. "RCR(8|16|32|64)m(1|i)")>;
  1460. def ICXWriteResGroup128 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
  1461. let Latency = 8;
  1462. let NumMicroOps = 6;
  1463. let ResourceCycles = [1,1,1,3];
  1464. }
  1465. def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
  1466. "ROR(8|16|32|64)mCL",
  1467. "SAR(8|16|32|64)mCL",
  1468. "SHL(8|16|32|64)mCL",
  1469. "SHR(8|16|32|64)mCL")>;
  1470. def ICXWriteResGroup130 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
  1471. let Latency = 8;
  1472. let NumMicroOps = 6;
  1473. let ResourceCycles = [1,1,1,2,1];
  1474. }
  1475. def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>;
  1476. def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
  1477. let Latency = 8;
  1478. let NumMicroOps = 8;
  1479. let ResourceCycles = [1,2,1,2,2];
  1480. }
  1481. def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
  1482. VPSCATTERQDZ256mr,
  1483. VSCATTERQPSZ128mr,
  1484. VSCATTERQPSZ256mr)>;
  1485. def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
  1486. let Latency = 8;
  1487. let NumMicroOps = 12;
  1488. let ResourceCycles = [1,4,1,4,2];
  1489. }
  1490. def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
  1491. VSCATTERDPSZ128mr)>;
  1492. def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
  1493. let Latency = 8;
  1494. let NumMicroOps = 20;
  1495. let ResourceCycles = [1,8,1,8,2];
  1496. }
  1497. def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
  1498. VSCATTERDPSZ256mr)>;
  1499. def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
  1500. let Latency = 8;
  1501. let NumMicroOps = 36;
  1502. let ResourceCycles = [1,16,1,16,2];
  1503. }
  1504. def: InstRW<[ICXWriteResGroup134], (instrs VPSCATTERDDZmr)>;
  1505. def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> {
  1506. let Latency = 9;
  1507. let NumMicroOps = 2;
  1508. let ResourceCycles = [1,1];
  1509. }
  1510. def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSrm)>;
  1511. def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1512. let Latency = 9;
  1513. let NumMicroOps = 2;
  1514. let ResourceCycles = [1,1];
  1515. }
  1516. def: InstRW<[ICXWriteResGroup136], (instrs VPMOVSXBWYrm,
  1517. VPMOVSXDQYrm,
  1518. VPMOVSXWDYrm,
  1519. VPMOVZXWDYrm)>;
  1520. def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
  1521. "VFPCLASSSDZrm(b?)",
  1522. "VFPCLASSSSZrm(b?)",
  1523. "(V?)PCMPGTQrm",
  1524. "VPERMI2D128rm(b?)",
  1525. "VPERMI2PD128rm(b?)",
  1526. "VPERMI2PS128rm(b?)",
  1527. "VPERMI2Q128rm(b?)",
  1528. "VPERMT2D128rm(b?)",
  1529. "VPERMT2PD128rm(b?)",
  1530. "VPERMT2PS128rm(b?)",
  1531. "VPERMT2Q128rm(b?)",
  1532. "VPMAXSQZ128rm(b?)",
  1533. "VPMAXUQZ128rm(b?)",
  1534. "VPMINSQZ128rm(b?)",
  1535. "VPMINUQZ128rm(b?)")>;
  1536. def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1537. let Latency = 10;
  1538. let NumMicroOps = 2;
  1539. let ResourceCycles = [1,1];
  1540. }
  1541. def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
  1542. "VCMP(SD|SS)Zrm",
  1543. "VFPCLASSPDZ128rm(b?)",
  1544. "VFPCLASSPSZ128rm(b?)",
  1545. "VPCMPBZ128rmi(b?)",
  1546. "VPCMPDZ128rmi(b?)",
  1547. "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
  1548. "VPCMPGT(B|D|Q|W)Z128rm(b?)",
  1549. "VPCMPQZ128rmi(b?)",
  1550. "VPCMPU(B|D|Q|W)Z128rmi(b?)",
  1551. "VPCMPWZ128rmi(b?)",
  1552. "(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
  1553. "VPTESTMBZ128rm(b?)",
  1554. "VPTESTMDZ128rm(b?)",
  1555. "VPTESTMQZ128rm(b?)",
  1556. "VPTESTMWZ128rm(b?)",
  1557. "VPTESTNMBZ128rm(b?)",
  1558. "VPTESTNMDZ128rm(b?)",
  1559. "VPTESTNMQZ128rm(b?)",
  1560. "VPTESTNMWZ128rm(b?)")>;
  1561. def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort01]> {
  1562. let Latency = 9;
  1563. let NumMicroOps = 2;
  1564. let ResourceCycles = [1,1];
  1565. }
  1566. def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIrm",
  1567. "(V?)CVTPS2PDrm")>;
  1568. def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
  1569. let Latency = 9;
  1570. let NumMicroOps = 4;
  1571. let ResourceCycles = [2,1,1];
  1572. }
  1573. def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
  1574. "(V?)PHSUBSWrm")>;
  1575. def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
  1576. let Latency = 9;
  1577. let NumMicroOps = 5;
  1578. let ResourceCycles = [1,2,1,1];
  1579. }
  1580. def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm",
  1581. "LSL(16|32|64)rm")>;
  1582. def ICXWriteResGroup148 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1583. let Latency = 10;
  1584. let NumMicroOps = 2;
  1585. let ResourceCycles = [1,1];
  1586. }
  1587. def: InstRW<[ICXWriteResGroup148], (instrs VPCMPGTQYrm)>;
  1588. def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
  1589. "ILD_F(16|32|64)m",
  1590. "VALIGND(Z|Z256)rm(b?)i",
  1591. "VALIGNQ(Z|Z256)rm(b?)i",
  1592. "VPMAXSQ(Z|Z256)rm(b?)",
  1593. "VPMAXUQ(Z|Z256)rm(b?)",
  1594. "VPMINSQ(Z|Z256)rm(b?)",
  1595. "VPMINUQ(Z|Z256)rm(b?)")>;
  1596. def ICXWriteResGroup148_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1597. let Latency = 11;
  1598. let NumMicroOps = 2;
  1599. let ResourceCycles = [1,1];
  1600. }
  1601. def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
  1602. "VCMPPS(Z|Z256)rm(b?)i",
  1603. "VFPCLASSPD(Z|Z256)rm(b?)",
  1604. "VFPCLASSPS(Z|Z256)rm(b?)",
  1605. "VPCMPB(Z|Z256)rmi(b?)",
  1606. "VPCMPD(Z|Z256)rmi(b?)",
  1607. "VPCMPEQB(Z|Z256)rm(b?)",
  1608. "VPCMPEQD(Z|Z256)rm(b?)",
  1609. "VPCMPEQQ(Z|Z256)rm(b?)",
  1610. "VPCMPEQW(Z|Z256)rm(b?)",
  1611. "VPCMPGTB(Z|Z256)rm(b?)",
  1612. "VPCMPGTD(Z|Z256)rm(b?)",
  1613. "VPCMPGTQ(Z|Z256)rm(b?)",
  1614. "VPCMPGTW(Z|Z256)rm(b?)",
  1615. "VPCMPQ(Z|Z256)rmi(b?)",
  1616. "VPCMPU(B|D|Q|W)Z256rmi(b?)",
  1617. "VPCMPU(B|D|Q|W)Zrmi(b?)",
  1618. "VPCMPW(Z|Z256)rmi(b?)",
  1619. "(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
  1620. "VPTESTM(B|D|Q|W)Z256rm(b?)",
  1621. "VPTESTM(B|D|Q|W)Zrm(b?)",
  1622. "VPTESTNM(B|D|Q|W)Z256rm(b?)",
  1623. "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
  1624. def ICXWriteResGroup149 : SchedWriteRes<[ICXPort23,ICXPort01]> {
  1625. let Latency = 10;
  1626. let NumMicroOps = 2;
  1627. let ResourceCycles = [1,1];
  1628. }
  1629. def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
  1630. "VCVTDQ2PSZ128rm(b?)",
  1631. "(V?)CVTDQ2PSrm",
  1632. "VCVTPD2QQZ128rm(b?)",
  1633. "VCVTPD2UQQZ128rm(b?)",
  1634. "VCVTPH2PSZ128rm(b?)",
  1635. "VCVTPS2DQZ128rm(b?)",
  1636. "(V?)CVTPS2DQrm",
  1637. "VCVTPS2PDZ128rm(b?)",
  1638. "VCVTPS2QQZ128rm(b?)",
  1639. "VCVTPS2UDQZ128rm(b?)",
  1640. "VCVTPS2UQQZ128rm(b?)",
  1641. "VCVTQQ2PDZ128rm(b?)",
  1642. "VCVTQQ2PSZ128rm(b?)",
  1643. "VCVTSS2SDZrm",
  1644. "(V?)CVTSS2SDrm",
  1645. "VCVTTPD2QQZ128rm(b?)",
  1646. "VCVTTPD2UQQZ128rm(b?)",
  1647. "VCVTTPS2DQZ128rm(b?)",
  1648. "(V?)CVTTPS2DQrm",
  1649. "VCVTTPS2QQZ128rm(b?)",
  1650. "VCVTTPS2UDQZ128rm(b?)",
  1651. "VCVTTPS2UQQZ128rm(b?)",
  1652. "VCVTUDQ2PDZ128rm(b?)",
  1653. "VCVTUDQ2PSZ128rm(b?)",
  1654. "VCVTUQQ2PDZ128rm(b?)",
  1655. "VCVTUQQ2PSZ128rm(b?)")>;
  1656. def ICXWriteResGroup151 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1657. let Latency = 10;
  1658. let NumMicroOps = 3;
  1659. let ResourceCycles = [2,1];
  1660. }
  1661. def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
  1662. "VEXPANDPSZ128rm(b?)",
  1663. "VPEXPANDDZ128rm(b?)",
  1664. "VPEXPANDQZ128rm(b?)")>;
  1665. def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
  1666. let Latency = 10;
  1667. let NumMicroOps = 4;
  1668. let ResourceCycles = [2,1,1];
  1669. }
  1670. def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm,
  1671. VPHSUBSWYrm)>;
  1672. def ICXWriteResGroup157 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
  1673. let Latency = 10;
  1674. let NumMicroOps = 8;
  1675. let ResourceCycles = [1,1,1,1,1,3];
  1676. }
  1677. def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
  1678. def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> {
  1679. let Latency = 11;
  1680. let NumMicroOps = 2;
  1681. let ResourceCycles = [1,1];
  1682. }
  1683. def: InstRW<[ICXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
  1684. def ICXWriteResGroup161 : SchedWriteRes<[ICXPort23,ICXPort01]> {
  1685. let Latency = 11;
  1686. let NumMicroOps = 2;
  1687. let ResourceCycles = [1,1];
  1688. }
  1689. def: InstRW<[ICXWriteResGroup161], (instrs VCVTDQ2PSYrm,
  1690. VCVTPS2PDYrm)>;
  1691. def: InstRW<[ICXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
  1692. "VCVTPH2PS(Z|Z256)rm(b?)",
  1693. "VCVTPS2PD(Z|Z256)rm(b?)",
  1694. "VCVTQQ2PD(Z|Z256)rm(b?)",
  1695. "VCVTQQ2PSZ256rm(b?)",
  1696. "VCVT(T?)PD2QQ(Z|Z256)rm(b?)",
  1697. "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)",
  1698. "VCVT(T?)PS2DQYrm",
  1699. "VCVT(T?)PS2DQ(Z|Z256)rm(b?)",
  1700. "VCVT(T?)PS2QQZ256rm(b?)",
  1701. "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
  1702. "VCVT(T?)PS2UQQZ256rm(b?)",
  1703. "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
  1704. "VCVTUQQ2PD(Z|Z256)rm(b?)",
  1705. "VCVTUQQ2PSZ256rm(b?)")>;
  1706. def ICXWriteResGroup162 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1707. let Latency = 11;
  1708. let NumMicroOps = 3;
  1709. let ResourceCycles = [2,1];
  1710. }
  1711. def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
  1712. "VEXPANDPD(Z|Z256)rm(b?)",
  1713. "VEXPANDPS(Z|Z256)rm(b?)",
  1714. "VPEXPANDD(Z|Z256)rm(b?)",
  1715. "VPEXPANDQ(Z|Z256)rm(b?)")>;
  1716. def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
  1717. let Latency = 11;
  1718. let NumMicroOps = 3;
  1719. let ResourceCycles = [1,1,1];
  1720. }
  1721. def: InstRW<[ICXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;
  1722. def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
  1723. let Latency = 11;
  1724. let NumMicroOps = 3;
  1725. let ResourceCycles = [1,1,1];
  1726. }
  1727. def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2DQrm,
  1728. CVTTPD2DQrm,
  1729. MMX_CVTPD2PIrm,
  1730. MMX_CVTTPD2PIrm)>;
  1731. def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
  1732. let Latency = 11;
  1733. let NumMicroOps = 4;
  1734. let ResourceCycles = [2,1,1];
  1735. }
  1736. def: InstRW<[ICXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
  1737. def ICXWriteResGroup169 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
  1738. let Latency = 11;
  1739. let NumMicroOps = 7;
  1740. let ResourceCycles = [2,3,2];
  1741. }
  1742. def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
  1743. "RCR(16|32|64)rCL")>;
  1744. def ICXWriteResGroup170 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
  1745. let Latency = 11;
  1746. let NumMicroOps = 9;
  1747. let ResourceCycles = [1,5,1,2];
  1748. }
  1749. def: InstRW<[ICXWriteResGroup170], (instrs RCL8rCL)>;
  1750. def ICXWriteResGroup171 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
  1751. let Latency = 11;
  1752. let NumMicroOps = 11;
  1753. let ResourceCycles = [2,9];
  1754. }
  1755. def: InstRW<[ICXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
  1756. def ICXWriteResGroup174 : SchedWriteRes<[ICXPort01]> {
  1757. let Latency = 15;
  1758. let NumMicroOps = 3;
  1759. let ResourceCycles = [3];
  1760. }
  1761. def: InstRW<[ICXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
  1762. def ICXWriteResGroup174z : SchedWriteRes<[ICXPort0]> {
  1763. let Latency = 15;
  1764. let NumMicroOps = 3;
  1765. let ResourceCycles = [3];
  1766. }
  1767. def: InstRW<[ICXWriteResGroup174z], (instregex "VPMULLQZrr")>;
  1768. def ICXWriteResGroup175 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1769. let Latency = 12;
  1770. let NumMicroOps = 3;
  1771. let ResourceCycles = [2,1];
  1772. }
  1773. def: InstRW<[ICXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;
  1774. def ICXWriteResGroup176 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort01]> {
  1775. let Latency = 12;
  1776. let NumMicroOps = 3;
  1777. let ResourceCycles = [1,1,1];
  1778. }
  1779. def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
  1780. "VCVT(T?)SS2USI64Zrm(b?)")>;
  1781. def ICXWriteResGroup177 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
  1782. let Latency = 12;
  1783. let NumMicroOps = 3;
  1784. let ResourceCycles = [1,1,1];
  1785. }
  1786. def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
  1787. "VCVT(T?)PS2UQQZrm(b?)")>;
  1788. def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> {
  1789. let Latency = 13;
  1790. let NumMicroOps = 3;
  1791. let ResourceCycles = [2,1];
  1792. }
  1793. def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
  1794. "VPERMWZ256rm(b?)",
  1795. "VPERMWZrm(b?)")>;
  1796. def ICXWriteResGroup181 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
  1797. let Latency = 13;
  1798. let NumMicroOps = 3;
  1799. let ResourceCycles = [1,1,1];
  1800. }
  1801. def: InstRW<[ICXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
  1802. def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
  1803. let Latency = 13;
  1804. let NumMicroOps = 4;
  1805. let ResourceCycles = [2,1,1];
  1806. }
  1807. def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
  1808. "VPERMT2W128rm(b?)")>;
  1809. def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
  1810. let Latency = 14;
  1811. let NumMicroOps = 3;
  1812. let ResourceCycles = [1,1,1];
  1813. }
  1814. def: InstRW<[ICXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;
  1815. def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort01]> {
  1816. let Latency = 14;
  1817. let NumMicroOps = 3;
  1818. let ResourceCycles = [1,1,1];
  1819. }
  1820. def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
  1821. "VCVTPD2UDQZrm(b?)",
  1822. "VCVTQQ2PSZrm(b?)",
  1823. "VCVTTPD2DQZrm(b?)",
  1824. "VCVTTPD2UDQZrm(b?)",
  1825. "VCVTUQQ2PSZrm(b?)")>;
  1826. def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
  1827. let Latency = 14;
  1828. let NumMicroOps = 4;
  1829. let ResourceCycles = [2,1,1];
  1830. }
  1831. def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2W256rm(b?)",
  1832. "VPERMI2Wrm(b?)",
  1833. "VPERMT2W256rm(b?)",
  1834. "VPERMT2Wrm(b?)")>;
  1835. def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
  1836. let Latency = 14;
  1837. let NumMicroOps = 10;
  1838. let ResourceCycles = [2,4,1,3];
  1839. }
  1840. def: InstRW<[ICXWriteResGroup190], (instrs RCR8rCL)>;
  1841. def ICXWriteResGroup191 : SchedWriteRes<[ICXPort0]> {
  1842. let Latency = 15;
  1843. let NumMicroOps = 1;
  1844. let ResourceCycles = [1];
  1845. }
  1846. def: InstRW<[ICXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
  1847. def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
  1848. let Latency = 15;
  1849. let NumMicroOps = 8;
  1850. let ResourceCycles = [1,2,2,1,2];
  1851. }
  1852. def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
  1853. def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
  1854. let Latency = 15;
  1855. let NumMicroOps = 10;
  1856. let ResourceCycles = [1,1,1,5,1,1];
  1857. }
  1858. def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
  1859. def ICXWriteResGroup199 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
  1860. let Latency = 16;
  1861. let NumMicroOps = 14;
  1862. let ResourceCycles = [1,1,1,4,2,5];
  1863. }
  1864. def: InstRW<[ICXWriteResGroup199], (instrs CMPXCHG8B)>;
  1865. def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> {
  1866. let Latency = 12;
  1867. let NumMicroOps = 34;
  1868. let ResourceCycles = [1, 4, 5];
  1869. }
  1870. def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>;
  1871. def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> {
  1872. let Latency = 17;
  1873. let NumMicroOps = 15;
  1874. let ResourceCycles = [2,1,2,4,2,4];
  1875. }
  1876. def: InstRW<[ICXWriteResGroup202], (instrs XCH_F)>;
  1877. def ICXWriteResGroup205 : SchedWriteRes<[ICXPort23,ICXPort01]> {
  1878. let Latency = 21;
  1879. let NumMicroOps = 4;
  1880. let ResourceCycles = [1,3];
  1881. }
  1882. def: InstRW<[ICXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>;
  1883. def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156]> {
  1884. let Latency = 18;
  1885. let NumMicroOps = 8;
  1886. let ResourceCycles = [1,1,1,5];
  1887. }
  1888. def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>;
  1889. def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
  1890. let Latency = 18;
  1891. let NumMicroOps = 11;
  1892. let ResourceCycles = [2,1,1,4,1,2];
  1893. }
  1894. def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
  1895. def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> {
  1896. let Latency = 22;
  1897. let NumMicroOps = 4;
  1898. let ResourceCycles = [1,3];
  1899. }
  1900. def: InstRW<[ICXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
  1901. def ICXWriteResGroup211_1 : SchedWriteRes<[ICXPort23,ICXPort0]> {
  1902. let Latency = 22;
  1903. let NumMicroOps = 4;
  1904. let ResourceCycles = [1,3];
  1905. }
  1906. def: InstRW<[ICXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
  1907. def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> {
  1908. let Latency = 20;
  1909. let NumMicroOps = 1;
  1910. let ResourceCycles = [1];
  1911. }
  1912. def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
  1913. def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
  1914. let Latency = 17;
  1915. let NumMicroOps = 5; // 2 uops perform multiple loads
  1916. let ResourceCycles = [1,2,1,1];
  1917. }
  1918. def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
  1919. VGATHERDPDZ128rm, VPGATHERDQZ128rm,
  1920. VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
  1921. def ICXWriteGatherEVEX4 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
  1922. let Latency = 19;
  1923. let NumMicroOps = 5; // 2 uops perform multiple loads
  1924. let ResourceCycles = [1,4,1,1];
  1925. }
  1926. def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
  1927. VGATHERQPDZ256rm, VPGATHERQQZ256rm,
  1928. VGATHERDPSZ128rm, VPGATHERDDZ128rm,
  1929. VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
  1930. def ICXWriteGatherEVEX8 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
  1931. let Latency = 21;
  1932. let NumMicroOps = 5; // 2 uops perform multiple loads
  1933. let ResourceCycles = [1,8,1,1];
  1934. }
  1935. def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
  1936. VGATHERDPDZrm, VPGATHERDQZrm,
  1937. VGATHERQPDZrm, VPGATHERQQZrm,
  1938. VGATHERQPSZrm, VPGATHERQDZrm)>;
  1939. def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
  1940. let Latency = 25;
  1941. let NumMicroOps = 5; // 2 uops perform multiple loads
  1942. let ResourceCycles = [1,16,1,1];
  1943. }
  1944. def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
  1945. def ICXWriteResGroup219 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
  1946. let Latency = 20;
  1947. let NumMicroOps = 8;
  1948. let ResourceCycles = [1,1,1,1,1,1,2];
  1949. }
  1950. def: InstRW<[ICXWriteResGroup219], (instrs INSB, INSL, INSW)>;
  1951. def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> {
  1952. let Latency = 20;
  1953. let NumMicroOps = 10;
  1954. let ResourceCycles = [1,2,7];
  1955. }
  1956. def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>;
  1957. def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> {
  1958. let Latency = 22;
  1959. let NumMicroOps = 2;
  1960. let ResourceCycles = [1,1];
  1961. }
  1962. def: InstRW<[ICXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
  1963. def ICXWriteResGroupVEX2 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
  1964. let Latency = 18;
  1965. let NumMicroOps = 5; // 2 uops perform multiple loads
  1966. let ResourceCycles = [1,2,1,1];
  1967. }
  1968. def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
  1969. VGATHERQPDrm, VPGATHERQQrm,
  1970. VGATHERQPSrm, VPGATHERQDrm)>;
  1971. def ICXWriteResGroupVEX4 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
  1972. let Latency = 20;
  1973. let NumMicroOps = 5; // 2 uops peform multiple loads
  1974. let ResourceCycles = [1,4,1,1];
  1975. }
  1976. def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
  1977. VGATHERDPSrm, VPGATHERDDrm,
  1978. VGATHERQPDYrm, VPGATHERQQYrm,
  1979. VGATHERQPSYrm, VPGATHERQDYrm)>;
  1980. def ICXWriteResGroupVEX8 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
  1981. let Latency = 22;
  1982. let NumMicroOps = 5; // 2 uops perform multiple loads
  1983. let ResourceCycles = [1,8,1,1];
  1984. }
  1985. def: InstRW<[ICXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
  1986. def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
  1987. let Latency = 22;
  1988. let NumMicroOps = 14;
  1989. let ResourceCycles = [5,5,4];
  1990. }
  1991. def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
  1992. "VPCONFLICTQZ256rr")>;
  1993. def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
  1994. let Latency = 23;
  1995. let NumMicroOps = 19;
  1996. let ResourceCycles = [2,1,4,1,1,4,6];
  1997. }
  1998. def: InstRW<[ICXWriteResGroup228], (instrs CMPXCHG16B)>;
  1999. def ICXWriteResGroup233 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
  2000. let Latency = 25;
  2001. let NumMicroOps = 3;
  2002. let ResourceCycles = [1,1,1];
  2003. }
  2004. def: InstRW<[ICXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
  2005. def ICXWriteResGroup239 : SchedWriteRes<[ICXPort0,ICXPort23]> {
  2006. let Latency = 27;
  2007. let NumMicroOps = 2;
  2008. let ResourceCycles = [1,1];
  2009. }
  2010. def: InstRW<[ICXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
  2011. def ICXWriteResGroup242 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
  2012. let Latency = 29;
  2013. let NumMicroOps = 15;
  2014. let ResourceCycles = [5,5,1,4];
  2015. }
  2016. def: InstRW<[ICXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>;
  2017. def ICXWriteResGroup243 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
  2018. let Latency = 30;
  2019. let NumMicroOps = 3;
  2020. let ResourceCycles = [1,1,1];
  2021. }
  2022. def: InstRW<[ICXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
  2023. def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,ICXPort0156]> {
  2024. let Latency = 35;
  2025. let NumMicroOps = 23;
  2026. let ResourceCycles = [1,5,3,4,10];
  2027. }
  2028. def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri",
  2029. "IN(8|16|32)rr")>;
  2030. def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
  2031. let Latency = 35;
  2032. let NumMicroOps = 23;
  2033. let ResourceCycles = [1,5,2,1,4,10];
  2034. }
  2035. def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir",
  2036. "OUT(8|16|32)rr")>;
  2037. def ICXWriteResGroup249 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
  2038. let Latency = 37;
  2039. let NumMicroOps = 21;
  2040. let ResourceCycles = [9,7,5];
  2041. }
  2042. def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
  2043. "VPCONFLICTQZrr")>;
  2044. def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
  2045. let Latency = 37;
  2046. let NumMicroOps = 31;
  2047. let ResourceCycles = [1,8,1,21];
  2048. }
  2049. def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>;
  2050. def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort15,ICXPort0156]> {
  2051. let Latency = 40;
  2052. let NumMicroOps = 18;
  2053. let ResourceCycles = [1,1,2,3,1,1,1,8];
  2054. }
  2055. def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>;
  2056. def ICXWriteResGroup253 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
  2057. let Latency = 41;
  2058. let NumMicroOps = 39;
  2059. let ResourceCycles = [1,10,1,1,26];
  2060. }
  2061. def: InstRW<[ICXWriteResGroup253], (instrs XSAVE64)>;
  2062. def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
  2063. let Latency = 42;
  2064. let NumMicroOps = 22;
  2065. let ResourceCycles = [2,20];
  2066. }
  2067. def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>;
  2068. def ICXWriteResGroup255 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
  2069. let Latency = 42;
  2070. let NumMicroOps = 40;
  2071. let ResourceCycles = [1,11,1,1,26];
  2072. }
  2073. def: InstRW<[ICXWriteResGroup255], (instrs XSAVE)>;
  2074. def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
  2075. def ICXWriteResGroup256 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
  2076. let Latency = 44;
  2077. let NumMicroOps = 22;
  2078. let ResourceCycles = [9,7,1,5];
  2079. }
  2080. def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
  2081. "VPCONFLICTQZrm(b?)")>;
  2082. def ICXWriteResGroup258 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05,ICXPort06,ICXPort0156]> {
  2083. let Latency = 62;
  2084. let NumMicroOps = 64;
  2085. let ResourceCycles = [2,8,5,10,39];
  2086. }
  2087. def: InstRW<[ICXWriteResGroup258], (instrs FLDENVm)>;
  2088. def ICXWriteResGroup259 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
  2089. let Latency = 63;
  2090. let NumMicroOps = 88;
  2091. let ResourceCycles = [4,4,31,1,2,1,45];
  2092. }
  2093. def: InstRW<[ICXWriteResGroup259], (instrs FXRSTOR64)>;
  2094. def ICXWriteResGroup260 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
  2095. let Latency = 63;
  2096. let NumMicroOps = 90;
  2097. let ResourceCycles = [4,2,33,1,2,1,47];
  2098. }
  2099. def: InstRW<[ICXWriteResGroup260], (instrs FXRSTOR)>;
  2100. def ICXWriteResGroup261 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
  2101. let Latency = 67;
  2102. let NumMicroOps = 35;
  2103. let ResourceCycles = [17,11,7];
  2104. }
  2105. def: InstRW<[ICXWriteResGroup261], (instregex "VPCONFLICTDZrr")>;
  2106. def ICXWriteResGroup262 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
  2107. let Latency = 74;
  2108. let NumMicroOps = 36;
  2109. let ResourceCycles = [17,11,1,7];
  2110. }
  2111. def: InstRW<[ICXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>;
  2112. def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> {
  2113. let Latency = 75;
  2114. let NumMicroOps = 15;
  2115. let ResourceCycles = [6,3,6];
  2116. }
  2117. def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>;
  2118. def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort237,ICXPort06,ICXPort0156]> {
  2119. let Latency = 106;
  2120. let NumMicroOps = 100;
  2121. let ResourceCycles = [9,1,11,16,1,11,21,30];
  2122. }
  2123. def: InstRW<[ICXWriteResGroup266], (instrs FSTENVm)>;
  2124. def ICXWriteResGroup267 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
  2125. let Latency = 140;
  2126. let NumMicroOps = 4;
  2127. let ResourceCycles = [1,3];
  2128. }
  2129. def: InstRW<[ICXWriteResGroup267], (instrs PAUSE)>;
  2130. def: InstRW<[WriteZero], (instrs CLC)>;
  2131. // Instruction variants handled by the renamer. These might not need execution
  2132. // ports in certain conditions.
  2133. // See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
  2134. // section "Skylake Pipeline" > "Register allocation and renaming".
  2135. // These can be investigated with llvm-exegesis, e.g.
  2136. // echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
  2137. // echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
  2138. def ICXWriteZeroLatency : SchedWriteRes<[]> {
  2139. let Latency = 0;
  2140. }
  2141. def ICXWriteZeroIdiom : SchedWriteVariant<[
  2142. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2143. SchedVar<NoSchedPred, [WriteALU]>
  2144. ]>;
  2145. def : InstRW<[ICXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
  2146. XOR32rr, XOR64rr)>;
  2147. def ICXWriteFZeroIdiom : SchedWriteVariant<[
  2148. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2149. SchedVar<NoSchedPred, [WriteFLogic]>
  2150. ]>;
  2151. def : InstRW<[ICXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
  2152. XORPDrr, VXORPDrr,
  2153. VXORPSZ128rr,
  2154. VXORPDZ128rr)>;
  2155. def ICXWriteFZeroIdiomY : SchedWriteVariant<[
  2156. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2157. SchedVar<NoSchedPred, [WriteFLogicY]>
  2158. ]>;
  2159. def : InstRW<[ICXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
  2160. VXORPSZ256rr, VXORPDZ256rr)>;
  2161. def ICXWriteFZeroIdiomZ : SchedWriteVariant<[
  2162. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2163. SchedVar<NoSchedPred, [WriteFLogicZ]>
  2164. ]>;
  2165. def : InstRW<[ICXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
  2166. def ICXWriteVZeroIdiomLogicX : SchedWriteVariant<[
  2167. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2168. SchedVar<NoSchedPred, [WriteVecLogicX]>
  2169. ]>;
  2170. def : InstRW<[ICXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
  2171. VPXORDZ128rr, VPXORQZ128rr)>;
  2172. def ICXWriteVZeroIdiomLogicY : SchedWriteVariant<[
  2173. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2174. SchedVar<NoSchedPred, [WriteVecLogicY]>
  2175. ]>;
  2176. def : InstRW<[ICXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
  2177. VPXORDZ256rr, VPXORQZ256rr)>;
  2178. def ICXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
  2179. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2180. SchedVar<NoSchedPred, [WriteVecLogicZ]>
  2181. ]>;
  2182. def : InstRW<[ICXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
  2183. def ICXWriteVZeroIdiomALUX : SchedWriteVariant<[
  2184. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2185. SchedVar<NoSchedPred, [WriteVecALUX]>
  2186. ]>;
  2187. def : InstRW<[ICXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
  2188. PCMPGTDrr, VPCMPGTDrr,
  2189. PCMPGTWrr, VPCMPGTWrr)>;
  2190. def ICXWriteVZeroIdiomALUY : SchedWriteVariant<[
  2191. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2192. SchedVar<NoSchedPred, [WriteVecALUY]>
  2193. ]>;
  2194. def : InstRW<[ICXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
  2195. VPCMPGTDYrr,
  2196. VPCMPGTWYrr)>;
  2197. def ICXWritePSUB : SchedWriteRes<[ICXPort015]> {
  2198. let Latency = 1;
  2199. let NumMicroOps = 1;
  2200. let ResourceCycles = [1];
  2201. }
  2202. def ICXWriteVZeroIdiomPSUB : SchedWriteVariant<[
  2203. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2204. SchedVar<NoSchedPred, [ICXWritePSUB]>
  2205. ]>;
  2206. def : InstRW<[ICXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
  2207. PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
  2208. PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
  2209. PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
  2210. VPSUBBYrr, VPSUBBZ256rr,
  2211. VPSUBDYrr, VPSUBDZ256rr,
  2212. VPSUBQYrr, VPSUBQZ256rr,
  2213. VPSUBWYrr, VPSUBWZ256rr,
  2214. VPSUBBZrr,
  2215. VPSUBDZrr,
  2216. VPSUBQZrr,
  2217. VPSUBWZrr)>;
  2218. def ICXWritePCMPGTQ : SchedWriteRes<[ICXPort5]> {
  2219. let Latency = 3;
  2220. let NumMicroOps = 1;
  2221. let ResourceCycles = [1];
  2222. }
  2223. def ICXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
  2224. SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
  2225. SchedVar<NoSchedPred, [ICXWritePCMPGTQ]>
  2226. ]>;
  2227. def : InstRW<[ICXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
  2228. VPCMPGTQYrr)>;
  2229. // CMOVs that use both Z and C flag require an extra uop.
  2230. def ICXWriteCMOVA_CMOVBErr : SchedWriteRes<[ICXPort06]> {
  2231. let Latency = 2;
  2232. let ResourceCycles = [2];
  2233. let NumMicroOps = 2;
  2234. }
  2235. def ICXWriteCMOVA_CMOVBErm : SchedWriteRes<[ICXPort23,ICXPort06]> {
  2236. let Latency = 7;
  2237. let ResourceCycles = [1,2];
  2238. let NumMicroOps = 3;
  2239. }
  2240. def ICXCMOVA_CMOVBErr : SchedWriteVariant<[
  2241. SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [ICXWriteCMOVA_CMOVBErr]>,
  2242. SchedVar<NoSchedPred, [WriteCMOV]>
  2243. ]>;
  2244. def ICXCMOVA_CMOVBErm : SchedWriteVariant<[
  2245. SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [ICXWriteCMOVA_CMOVBErm]>,
  2246. SchedVar<NoSchedPred, [WriteCMOV.Folded]>
  2247. ]>;
  2248. def : InstRW<[ICXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
  2249. def : InstRW<[ICXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
  2250. // SETCCs that use both Z and C flag require an extra uop.
  2251. def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> {
  2252. let Latency = 2;
  2253. let ResourceCycles = [2];
  2254. let NumMicroOps = 2;
  2255. }
  2256. def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06]> {
  2257. let Latency = 3;
  2258. let ResourceCycles = [1,1,2];
  2259. let NumMicroOps = 4;
  2260. }
  2261. def ICXSETA_SETBErr : SchedWriteVariant<[
  2262. SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [ICXWriteSETA_SETBEr]>,
  2263. SchedVar<NoSchedPred, [WriteSETCC]>
  2264. ]>;
  2265. def ICXSETA_SETBErm : SchedWriteVariant<[
  2266. SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [ICXWriteSETA_SETBEm]>,
  2267. SchedVar<NoSchedPred, [WriteSETCCStore]>
  2268. ]>;
  2269. def : InstRW<[ICXSETA_SETBErr], (instrs SETCCr)>;
  2270. def : InstRW<[ICXSETA_SETBErm], (instrs SETCCm)>;
  2271. ///////////////////////////////////////////////////////////////////////////////
  2272. // Dependency breaking instructions.
  2273. ///////////////////////////////////////////////////////////////////////////////
  2274. def : IsZeroIdiomFunction<[
  2275. // GPR Zero-idioms.
  2276. DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>,
  2277. // SSE Zero-idioms.
  2278. DepBreakingClass<[
  2279. // fp variants.
  2280. XORPSrr, XORPDrr,
  2281. // int variants.
  2282. PXORrr,
  2283. PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
  2284. PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
  2285. ], ZeroIdiomPredicate>,
  2286. // AVX Zero-idioms.
  2287. DepBreakingClass<[
  2288. // xmm fp variants.
  2289. VXORPSrr, VXORPDrr,
  2290. // xmm int variants.
  2291. VPXORrr,
  2292. VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
  2293. VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
  2294. // ymm variants.
  2295. VXORPSYrr, VXORPDYrr, VPXORYrr,
  2296. VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
  2297. VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr,
  2298. // zmm variants.
  2299. VXORPSZrr, VXORPDZrr, VPXORDZrr, VPXORQZrr,
  2300. VXORPSZ128rr, VXORPDZ128rr, VPXORDZ128rr, VPXORQZ128rr,
  2301. VXORPSZ256rr, VXORPDZ256rr, VPXORDZ256rr, VPXORQZ256rr,
  2302. VPSUBBZrr, VPSUBWZrr, VPSUBDZrr, VPSUBQZrr,
  2303. VPSUBBZ128rr, VPSUBWZ128rr, VPSUBDZ128rr, VPSUBQZ128rr,
  2304. VPSUBBZ256rr, VPSUBWZ256rr, VPSUBDZ256rr, VPSUBQZ256rr,
  2305. ], ZeroIdiomPredicate>,
  2306. ]>;
  2307. } // SchedModel