X86ScheduleZnver1.td 46 KB


  1. //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the machine model for Znver1 to support instruction
  10. // scheduling and other instruction cost heuristics.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. def Znver1Model : SchedMachineModel {
  14. // Zen can decode 4 instructions per cycle.
  15. let IssueWidth = 4;
  16. // Based on the reorder buffer we define MicroOpBufferSize
  17. let MicroOpBufferSize = 192;
  18. let LoadLatency = 4;
  19. let MispredictPenalty = 17;
  20. let HighLatency = 25;
  21. let PostRAScheduler = 1;
  22. // FIXME: This variable is required for incomplete model.
  23. // We haven't catered all instructions.
  24. // So, we reset the value of this variable so as to
  25. // say that the model is incomplete.
  26. let CompleteModel = 0;
  27. }
  28. let SchedModel = Znver1Model in {
  29. // Zen can issue micro-ops to 10 different units in one cycle.
  30. // These are
  31. // * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
  32. // * Two AGU units (ZAGU0, ZAGU1)
  33. // * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
  34. // AGUs feed load store queues @two loads and 1 store per cycle.
  35. // Four ALU units are defined below
  36. def ZnALU0 : ProcResource<1>;
  37. def ZnALU1 : ProcResource<1>;
  38. def ZnALU2 : ProcResource<1>;
  39. def ZnALU3 : ProcResource<1>;
  40. // Two AGU units are defined below
  41. def ZnAGU0 : ProcResource<1>;
  42. def ZnAGU1 : ProcResource<1>;
  43. // Four FPU units are defined below
  44. def ZnFPU0 : ProcResource<1>;
  45. def ZnFPU1 : ProcResource<1>;
  46. def ZnFPU2 : ProcResource<1>;
  47. def ZnFPU3 : ProcResource<1>;
  48. // FPU grouping
  49. def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
  50. def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
  51. def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
  52. def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
  53. def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
  54. def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
  55. def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
  56. // Below are the grouping of the units.
  57. // Micro-ops to be issued to multiple units are tackled this way.
  58. // ALU grouping
  59. // ZnALU03 - 0,3 grouping
  60. def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
  61. // 56 Entry (14x4 entries) Int Scheduler
  62. def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
  63. let BufferSize=56;
  64. }
  65. // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
  66. // but are relevant for some instructions
  67. def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
  68. let BufferSize=28;
  69. }
  70. // Integer Multiplication issued on ALU1.
  71. def ZnMultiplier : ProcResource<1>;
  72. // Integer division issued on ALU2.
  73. def ZnDivider : ProcResource<1>;
  74. // 4 Cycles integer load-to use Latency is captured
  75. def : ReadAdvance<ReadAfterLd, 4>;
  76. // 8 Cycles vector load-to use Latency is captured
  77. def : ReadAdvance<ReadAfterVecLd, 8>;
  78. def : ReadAdvance<ReadAfterVecXLd, 8>;
  79. def : ReadAdvance<ReadAfterVecYLd, 8>;
  80. def : ReadAdvance<ReadInt2Fpu, 0>;
  81. // The Integer PRF for Zen is 168 entries, and it holds the architectural and
  82. // speculative version of the 64-bit integer registers.
  83. // Reference: "Software Optimization Guide for AMD Family 17h Processors"
  84. def ZnIntegerPRF : RegisterFile<168, [GR64, CCR]>;
  85. // 36 Entry (9x4 entries) floating-point Scheduler
  86. def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> {
  87. let BufferSize=36;
  88. }
  89. // The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
  90. // registers. Operations on 256-bit data types are cracked into two COPs.
  91. // Reference: "Software Optimization Guide for AMD Family 17h Processors"
  92. def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
  93. // The unit can track up to 192 macro ops in-flight.
  94. // The retire unit handles in-order commit of up to 8 macro ops per cycle.
  95. // Reference: "Software Optimization Guide for AMD Family 17h Processors"
  96. // To be noted, the retire unit is shared between integer and FP ops.
  97. // In SMT mode it is 96 entry per thread. But, we do not use the conservative
  98. // value here because there is currently no way to fully mode the SMT mode,
  99. // so there is no point in trying.
  100. def ZnRCU : RetireControlUnit<192, 8>;
  101. // FIXME: there are 72 read buffers and 44 write buffers.
  102. // (a folded load is an instruction that loads and does some operation)
  103. // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
  104. // Instructions with folded loads are usually micro-fused, so they only appear
  105. // as two micro-ops.
  106. // a. load and
  107. // b. addpd
  108. // This multiclass is for folded loads for integer units.
  109. multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
  110. list<ProcResourceKind> ExePorts,
  111. int Lat, list<int> Res = [], int UOps = 1,
  112. int LoadLat = 4, int LoadUOps = 1> {
  113. // Register variant takes 1-cycle on Execution Port.
  114. def : WriteRes<SchedRW, ExePorts> {
  115. let Latency = Lat;
  116. let ResourceCycles = Res;
  117. let NumMicroOps = UOps;
  118. }
  119. // Memory variant also uses a cycle on ZnAGU
  120. // adds LoadLat cycles to the latency (default = 4).
  121. def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
  122. let Latency = !add(Lat, LoadLat);
  123. let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
  124. let NumMicroOps = !add(UOps, LoadUOps);
  125. }
  126. }
  127. // This multiclass is for folded loads for floating point units.
  128. multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
  129. list<ProcResourceKind> ExePorts,
  130. int Lat, list<int> Res = [], int UOps = 1,
  131. int LoadLat = 7, int LoadUOps = 0> {
  132. // Register variant takes 1-cycle on Execution Port.
  133. def : WriteRes<SchedRW, ExePorts> {
  134. let Latency = Lat;
  135. let ResourceCycles = Res;
  136. let NumMicroOps = UOps;
  137. }
  138. // Memory variant also uses a cycle on ZnAGU
  139. // adds LoadLat cycles to the latency (default = 7).
  140. def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
  141. let Latency = !add(Lat, LoadLat);
  142. let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
  143. let NumMicroOps = !add(UOps, LoadUOps);
  144. }
  145. }
  146. // WriteRMW is set for instructions with Memory write
  147. // operation in codegen
  148. def : WriteRes<WriteRMW, [ZnAGU]>;
  149. def : WriteRes<WriteStore, [ZnAGU]>;
  150. def : WriteRes<WriteStoreNT, [ZnAGU]>;
  151. def : WriteRes<WriteMove, [ZnALU]>;
  152. def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 4; }
  153. // Model the effect of clobbering the read-write mask operand of the GATHER operation.
  154. // Does not cost anything by itself, only has latency, matching that of the WriteLoad,
  155. def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 8; let NumMicroOps = 0; }
  156. def : WriteRes<WriteZero, []>;
  157. def : WriteRes<WriteLEA, [ZnALU]>;
  158. defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
  159. defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
  160. defm : ZnWriteResPair<WriteIMul8, [ZnALU1, ZnMultiplier], 4>;
  161. defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
  162. defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
  163. defm : X86WriteRes<WriteCMPXCHG, [ZnALU], 1, [1], 1>;
  164. defm : X86WriteRes<WriteCMPXCHGRMW,[ZnALU,ZnAGU], 8, [1,1], 5>;
  165. defm : X86WriteRes<WriteXCHG, [ZnALU], 1, [2], 2>;
  166. defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
  167. defm : ZnWriteResPair<WriteShiftCL, [ZnALU], 1>;
  168. defm : ZnWriteResPair<WriteRotate, [ZnALU], 1>;
  169. defm : ZnWriteResPair<WriteRotateCL, [ZnALU], 1>;
  170. defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
  171. defm : X86WriteResUnsupported<WriteSHDrrcl>;
  172. defm : X86WriteResUnsupported<WriteSHDmri>;
  173. defm : X86WriteResUnsupported<WriteSHDmrcl>;
  174. defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
  175. defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
  176. defm : ZnWriteResPair<WriteCMOV, [ZnALU], 1>;
  177. def : WriteRes<WriteSETCC, [ZnALU]>;
  178. def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
  179. defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
  180. defm : X86WriteRes<WriteBitTest, [ZnALU], 1, [1], 1>;
  181. defm : X86WriteRes<WriteBitTestImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
  182. defm : X86WriteRes<WriteBitTestRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
  183. defm : X86WriteRes<WriteBitTestSet, [ZnALU], 2, [1], 2>;
  184. // Bit counts.
  185. defm : ZnWriteResPair<WriteBSF, [ZnALU], 3, [12], 6, 4, 2>;
  186. defm : ZnWriteResPair<WriteBSR, [ZnALU], 4, [16], 6, 4, 2>;
  187. defm : ZnWriteResPair<WriteLZCNT, [ZnALU], 2>;
  188. defm : ZnWriteResPair<WriteTZCNT, [ZnALU], 2, [2], 2, 4, 0>;
  189. defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>;
  190. // Treat misc copies as a move.
  191. def : InstRW<[WriteMove], (instrs COPY)>;
  192. // BMI1 BEXTR, BMI2 BZHI
  193. defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1, [1], 1, 4, 1>;
  194. defm : ZnWriteResPair<WriteBLS, [ZnALU], 2, [2], 2, 4, 1>;
  195. defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
  196. // IDIV
  197. defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
  198. defm : ZnWriteResPair<WriteDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
  199. defm : ZnWriteResPair<WriteDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
  200. defm : ZnWriteResPair<WriteDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
  201. defm : ZnWriteResPair<WriteIDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
  202. defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
  203. defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
  204. defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
  205. // IMULH
  206. def ZnWriteIMulH : WriteRes<WriteIMulH, [ZnMultiplier]>{
  207. let Latency = 3;
  208. let NumMicroOps = 0;
  209. }
  210. def : WriteRes<WriteIMulHLd, [ZnMultiplier]> {
  211. let Latency = !add(ZnWriteIMulH.Latency, Znver1Model.LoadLatency);
  212. let NumMicroOps = ZnWriteIMulH.NumMicroOps;
  213. }
  214. // Floating point operations
  215. defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>;
  216. defm : X86WriteRes<WriteFLoadX, [ZnAGU], 8, [1], 1>;
  217. defm : X86WriteRes<WriteFLoadY, [ZnAGU], 8, [1], 1>;
  218. defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>;
  219. defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
  220. defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1], 1>;
  221. defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1], 1>;
  222. defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>;
  223. defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
  224. defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>;
  225. defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>;
  226. defm : X86WriteRes<WriteFMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  227. defm : X86WriteRes<WriteFMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  228. defm : X86WriteRes<WriteFMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  229. defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  230. defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
  231. defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
  232. defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
  233. defm : X86WriteResUnsupported<WriteFMoveZ>;
  234. defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU23], 3>;
  235. defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU23], 3>;
  236. defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU23], 3, [2], 2>;
  237. defm : X86WriteResPairUnsupported<WriteFAddZ>;
  238. defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU23], 3>;
  239. defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU23], 3>;
  240. defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU23], 3, [2], 2>;
  241. defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
  242. defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU01], 1>;
  243. defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU01], 1>;
  244. defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU01], 1, [2], 2>;
  245. defm : X86WriteResPairUnsupported<WriteFCmpZ>;
  246. defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU01], 1>;
  247. defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU01], 1>;
  248. defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU01], 1, [2], 2>;
  249. defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
  250. defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU01,ZnFPU2], 3, [1,1], 2>;
  251. defm : ZnWriteResFpuPair<WriteFComX, [ZnFPU01,ZnFPU2], 3, [1,1], 2>;
  252. defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
  253. defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
  254. defm : X86WriteResPairUnsupported<WriteFBlendZ>;
  255. defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
  256. defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1, [2], 2>;
  257. defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
  258. defm : ZnWriteResFpuPair<WriteCvtSS2I, [ZnFPU3], 5>;
  259. defm : ZnWriteResFpuPair<WriteCvtPS2I, [ZnFPU3], 5>;
  260. defm : ZnWriteResFpuPair<WriteCvtPS2IY, [ZnFPU3], 5>;
  261. defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
  262. defm : ZnWriteResFpuPair<WriteCvtSD2I, [ZnFPU3], 5>;
  263. defm : ZnWriteResFpuPair<WriteCvtPD2I, [ZnFPU3], 5>;
  264. defm : ZnWriteResFpuPair<WriteCvtPD2IY, [ZnFPU3], 5>;
  265. defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
  266. defm : ZnWriteResFpuPair<WriteCvtI2SS, [ZnFPU3], 5>;
  267. defm : ZnWriteResFpuPair<WriteCvtI2PS, [ZnFPU3], 5>;
  268. defm : ZnWriteResFpuPair<WriteCvtI2PSY, [ZnFPU3], 5>;
  269. defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
  270. defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>;
  271. defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>;
  272. defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>;
  273. defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
  274. defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 10, [3]>;
  275. defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 10, [3]>;
  276. defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 10, [6], 2>;
  277. defm : X86WriteResPairUnsupported<WriteFDivZ>;
  278. defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 13, [5]>;
  279. defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 13, [5]>;
  280. defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15, [9], 2>;
  281. defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
  282. defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
  283. defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
  284. defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
  285. defm : X86WriteResPairUnsupported<WriteFRndZ>;
  286. defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
  287. defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1, [2], 2>;
  288. defm : X86WriteResPairUnsupported<WriteFLogicZ>;
  289. defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU12], 2, [2], 1, 7, 1>;
  290. defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU12], 4, [4], 3, 7, 2>;
  291. defm : X86WriteResPairUnsupported<WriteFTestZ>;
  292. defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
  293. defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1, [2], 2>;
  294. defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
  295. defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
  296. defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1, [2], 2>;
  297. defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
  298. defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3>;
  299. defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3>;
  300. defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 3, [2], 2>;
  301. defm : X86WriteResPairUnsupported<WriteFMulZ>;
  302. defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 4>;
  303. defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 4>;
  304. defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [2], 2>;
  305. defm : X86WriteResPairUnsupported<WriteFMul64Z>;
  306. defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU01], 5>;
  307. defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU01], 5>;
  308. defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU01], 5, [2], 2>;
  309. defm : X86WriteResPairUnsupported<WriteFMAZ>;
  310. defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
  311. defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
  312. defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [2], 2>;
  313. defm : X86WriteResPairUnsupported<WriteFRcpZ>;
  314. defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>;
  315. defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5>;
  316. defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
  317. defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
  318. defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 14, [5]>;
  319. defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 14, [5]>;
  320. defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 14, [10], 2>;
  321. defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
  322. defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [8]>;
  323. defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [8]>;
  324. defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 20, [16], 2>;
  325. defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
  326. defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
  327. defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU12], 2, [2], 2>;
  328. defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU12], 2, [2], 2>;
  329. // Vector integer operations which uses FPU units
  330. defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
  331. defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>;
  332. defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>;
  333. defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>;
  334. defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>;
  335. defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
  336. defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
  337. defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1], 1>;
  338. defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1], 1>;
  339. defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1], 1>;
  340. defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1], 1>;
  341. defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1], 1>;
  342. defm : X86WriteRes<WriteVecMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  343. defm : X86WriteRes<WriteVecMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  344. defm : X86WriteRes<WriteVecMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  345. defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  346. defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
  347. defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>;
  348. defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>;
  349. defm : X86WriteResUnsupported<WriteVecMoveZ>;
  350. defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>;
  351. defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>;
  352. defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
  353. defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU2], 1>;
  354. defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
  355. defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 1, [2], 2>;
  356. defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
  357. defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU2], 1>;
  358. defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU2], 1>;
  359. defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU2], 1, [2], 2>;
  360. defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
  361. defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU1], 3, [2], 1>;
  362. defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU1], 3, [4], 2>;
  363. defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
  364. defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
  365. defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
  366. defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1, [2], 2>;
  367. defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
  368. defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 2, [2], 1, 7, 1>;
  369. defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 4, [4], 3, 7, 2>;
  370. defm : X86WriteResPairUnsupported<WriteVecTestZ>;
  371. defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU013], 1>;
  372. defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU013], 1>;
  373. defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU013], 1, [2], 2>;
  374. defm : X86WriteResPairUnsupported<WriteVecALUZ>;
  375. defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
  376. defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>;
  377. defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4, [2], 2>;
  378. defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
  379. defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4, [2]>;
  380. defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 4, [4], 2>;
  381. defm : X86WriteResPairUnsupported<WritePMULLDZ>;
  382. defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU12], 1>;
  383. defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU12], 1>;
  384. defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU12], 1, [2], 2>;
  385. defm : X86WriteResPairUnsupported<WriteShuffleZ>;
  386. defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU12], 1>;
  387. defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU12], 1>;
  388. defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU12], 1, [2], 2>;
  389. defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
  390. defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU013], 1>;
  391. defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU013], 1, [2], 2>;
  392. defm : X86WriteResPairUnsupported<WriteBlendZ>;
  393. defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
  394. defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1, [2], 2>;
  395. defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
  396. defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU12], 2, [2], 2>;
  397. defm : ZnWriteResFpuPair<WriteVPMOV256, [ZnFPU12], 1, [4], 3>;
  398. defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU12],2, [2], 2>;
  399. defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
  400. defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
  401. defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3, [2], 2>;
  402. defm : X86WriteResPairUnsupported<WritePSADBWZ>;
  403. defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
  404. // Vector insert/extract operations.
  405. defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
  406. def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
  407. let Latency = 2;
  408. let ResourceCycles = [1, 2];
  409. }
  410. def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
  411. let Latency = 5;
  412. let NumMicroOps = 2;
  413. let ResourceCycles = [1, 2, 3];
  414. }
  415. // MOVMSK Instructions.
  416. def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
  417. def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>;
  418. def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
  419. def : WriteRes<WriteVecMOVMSKY, [ZnFPU2]> {
  420. let NumMicroOps = 2;
  421. let Latency = 2;
  422. let ResourceCycles = [2];
  423. }
  424. // AES Instructions.
  425. defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>;
  426. defm : ZnWriteResFpuPair<WriteAESIMC, [ZnFPU01], 4>;
  427. defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>;
  428. def : WriteRes<WriteFence, [ZnAGU]>;
  429. def : WriteRes<WriteNop, []>;
  430. // Microcoded Instructions
  431. def ZnWriteMicrocoded : SchedWriteRes<[]> {
  432. let Latency = 100;
  433. }
  434. def : SchedAlias<WriteMicrocoded, ZnWriteMicrocoded>;
  435. def : SchedAlias<WriteFCMOV, ZnWriteMicrocoded>;
  436. def : SchedAlias<WriteSystem, ZnWriteMicrocoded>;
  437. def : SchedAlias<WriteMPSAD, ZnWriteMicrocoded>;
  438. def : SchedAlias<WriteMPSADY, ZnWriteMicrocoded>;
  439. def : SchedAlias<WriteMPSADLd, ZnWriteMicrocoded>;
  440. def : SchedAlias<WriteMPSADYLd, ZnWriteMicrocoded>;
  441. def : SchedAlias<WriteCLMul, ZnWriteMicrocoded>;
  442. def : SchedAlias<WriteCLMulLd, ZnWriteMicrocoded>;
  443. def : SchedAlias<WritePCmpIStrM, ZnWriteMicrocoded>;
  444. def : SchedAlias<WritePCmpIStrMLd, ZnWriteMicrocoded>;
  445. def : SchedAlias<WritePCmpEStrI, ZnWriteMicrocoded>;
  446. def : SchedAlias<WritePCmpEStrILd, ZnWriteMicrocoded>;
  447. def : SchedAlias<WritePCmpEStrM, ZnWriteMicrocoded>;
  448. def : SchedAlias<WritePCmpEStrMLd, ZnWriteMicrocoded>;
  449. def : SchedAlias<WritePCmpIStrI, ZnWriteMicrocoded>;
  450. def : SchedAlias<WritePCmpIStrILd, ZnWriteMicrocoded>;
  451. def : SchedAlias<WriteLDMXCSR, ZnWriteMicrocoded>;
  452. def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>;
  453. //=== Regex based InstRW ===//
  454. // Notation:
  455. // - r: register.
  456. // - m = memory.
  457. // - i = immediate
  458. // - mm: 64 bit mmx register.
  459. // - x = 128 bit xmm register.
  460. // - (x)mm = mmx or xmm register.
  461. // - y = 256 bit ymm register.
  462. // - v = any vector register.
  463. //=== Integer Instructions ===//
  464. //-- Move instructions --//
  465. // MOV.
  466. // r16,m.
  467. def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>;
  468. // XCHG.
  469. // r,m.
  470. def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
  471. let Latency = 5;
  472. let NumMicroOps = 2;
  473. }
  474. def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
  475. def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
  476. // POP16.
  477. // r.
  478. def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
  479. let Latency = 5;
  480. let NumMicroOps = 2;
  481. }
  482. def : InstRW<[ZnWritePop16r], (instrs POP16rmm)>;
  483. def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
  484. def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
  485. // PUSH.
  486. // r. Has default values.
  487. // m.
  488. def ZnWritePUSH : SchedWriteRes<[ZnAGU]>{
  489. let Latency = 4;
  490. }
  491. def : InstRW<[ZnWritePUSH], (instregex "PUSH(16|32)rmm")>;
  492. // PUSHF
  493. def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
  494. // PUSHA.
  495. def ZnWritePushA : SchedWriteRes<[ZnAGU]> {
  496. let Latency = 8;
  497. }
  498. def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>;
  499. //LAHF
  500. def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
  501. // MOVBE.
  502. // r,m.
  503. def ZnWriteMOVBE : SchedWriteRes<[ZnAGU, ZnALU]> {
  504. let Latency = 5;
  505. }
  506. def : InstRW<[ZnWriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
  507. // m16,r16.
  508. def : InstRW<[ZnWriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
  509. //-- Arithmetic instructions --//
  510. // ADD SUB.
  511. // m,r/i.
  512. def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
  513. "(ADD|SUB)(8|16|32|64)mi8",
  514. "(ADD|SUB)64mi32")>;
  515. // ADC SBB.
  516. // m,r/i.
  517. def : InstRW<[WriteALULd],
  518. (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
  519. "(ADC|SBB)(16|32|64)mi8",
  520. "(ADC|SBB)64mi32")>;
  521. // INC DEC NOT NEG.
  522. // m.
  523. def : InstRW<[WriteALULd],
  524. (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
  525. // MUL IMUL.
  526. // r16.
  527. def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
  528. let Latency = 3;
  529. }
  530. def : SchedAlias<WriteIMul16, ZnWriteMul16>;
  531. def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
  532. def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
  533. // m16.
  534. def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
  535. let Latency = 8;
  536. }
  537. def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
  538. def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
  539. def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
  540. // r32.
  541. def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
  542. let Latency = 3;
  543. }
  544. def : SchedAlias<WriteIMul32, ZnWriteMul32>;
  545. def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
  546. def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
  547. // m32.
  548. def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
  549. let Latency = 8;
  550. }
  551. def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
  552. def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
  553. def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
  554. // r64.
  555. def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
  556. let Latency = 4;
  557. let NumMicroOps = 2;
  558. }
  559. def : SchedAlias<WriteIMul64, ZnWriteMul64>;
  560. def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
  561. def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
  562. // m64.
  563. def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
  564. let Latency = 9;
  565. let NumMicroOps = 2;
  566. }
  567. def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
  568. def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
  569. def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
  570. // MULX
  571. // Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
  572. defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
  573. defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
  574. //-- Control transfer instructions --//
  575. // J(E|R)CXZ.
  576. def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>;
  577. def : InstRW<[ZnWriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
  578. // LOOP.
  579. def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>;
  580. def : InstRW<[ZnWriteLOOP], (instrs LOOP)>;
  581. // LOOP(N)E, LOOP(N)Z
  582. def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>;
  583. def : InstRW<[ZnWriteLOOPE], (instrs LOOPE, LOOPNE)>;
  584. // CALL.
  585. // r.
  586. def ZnWriteCALLr : SchedWriteRes<[ZnAGU, ZnALU03]>;
  587. def : InstRW<[ZnWriteCALLr], (instregex "CALL(16|32)r")>;
  588. def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
  589. // RET.
  590. def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
  591. let NumMicroOps = 2;
  592. }
  593. def : InstRW<[ZnWriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
  594. "IRET(16|32|64)")>;
  595. //-- Logic instructions --//
  596. // AND OR XOR.
  597. // m,r/i.
  598. def : InstRW<[WriteALULd],
  599. (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
  600. "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
  601. // Define ALU latency variants
  602. def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> {
  603. let Latency = 2;
  604. }
  605. def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
  606. let Latency = 6;
  607. }
  608. // BTR BTS BTC.
  609. // m,r,i.
  610. def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
  611. let Latency = 6;
  612. let NumMicroOps = 2;
  613. }
  614. // m,r,i.
  615. def : SchedAlias<WriteBitTestSetImmRMW, ZnWriteBTRSCm>;
  616. def : SchedAlias<WriteBitTestSetRegRMW, ZnWriteBTRSCm>;
  617. // PDEP PEXT.
  618. // r,r,r.
  619. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
  620. // r,r,m.
  621. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
  622. // RCR RCL.
  623. // m,i.
  624. def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
  625. // SHR SHL SAR.
  626. // m,i.
  627. def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
  628. // SHRD SHLD.
  629. // m,r
  630. def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
  631. // r,r,cl.
  632. def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
  633. // m,r,cl.
  634. def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
  635. //-- Misc instructions --//
  636. // CMPXCHG8B.
  637. def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
  638. let NumMicroOps = 18;
  639. }
  640. def : InstRW<[ZnWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
  641. def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
  642. // LEAVE
  643. def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> {
  644. let Latency = 8;
  645. let NumMicroOps = 2;
  646. }
  647. def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>;
  648. // PAUSE.
  649. def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
  650. // XADD.
  651. def ZnXADD : SchedWriteRes<[ZnALU]>;
  652. def : InstRW<[ZnXADD], (instregex "XADD(8|16|32|64)rr")>;
  653. def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
  654. //=== Floating Point x87 Instructions ===//
  655. //-- Move instructions --//
  656. def ZnWriteFLDr : SchedWriteRes<[ZnFPU13]> ;
  657. def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
  658. let Latency = 5;
  659. let NumMicroOps = 2;
  660. }
  661. // LD_F.
  662. // r.
  663. def : InstRW<[ZnWriteFLDr], (instrs LD_Frr)>;
  664. // m.
  665. def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
  666. let NumMicroOps = 2;
  667. }
  668. def : InstRW<[ZnWriteLD_F80m], (instrs LD_F80m)>;
  669. // FST(P).
  670. // r.
  671. def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
  672. // m80.
  673. def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
  674. let Latency = 5;
  675. }
  676. def : InstRW<[ZnWriteST_FP80m], (instrs ST_FP80m)>;
  677. def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
  678. // FXCHG.
  679. def : InstRW<[ZnWriteFXCH], (instrs XCH_F)>;
  680. // FILD.
  681. def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  682. let Latency = 11;
  683. let NumMicroOps = 2;
  684. }
  685. def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>;
  686. // FIST(P) FISTTP.
  687. def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> {
  688. let Latency = 12;
  689. }
  690. def : InstRW<[ZnWriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
  691. def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> {
  692. let Latency = 8;
  693. }
  694. def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  695. let Latency = 11;
  696. }
  697. // FLDZ.
  698. def : SchedAlias<WriteFLD0, ZnWriteFPU13>;
  699. // FLD1.
  700. def : SchedAlias<WriteFLD1, ZnWriteFPU3>;
  701. // FLDPI FLDL2E etc.
  702. def : SchedAlias<WriteFLDC, ZnWriteFPU3>;
  703. // FNSTSW.
  704. // AX.
  705. def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
  706. // FLDCW.
  707. def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
  708. // FNSTCW.
  709. def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
  710. // FINCSTP FDECSTP.
  711. def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>;
  712. // FFREE.
  713. def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
  714. //-- Arithmetic instructions --//
  715. def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ;
  716. def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ;
  717. def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> {
  718. let Latency = 8;
  719. }
  720. // FCHS.
  721. def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>;
  722. // FCOM(P) FUCOM(P).
  723. // r.
  724. def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
  725. // m.
  726. def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
  727. // FCOMPP FUCOMPP.
  728. // r.
  729. def : InstRW<[ZnWriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
  730. def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]>
  731. {
  732. let Latency = 9;
  733. }
  734. // FCOMI(P) FUCOMI(P).
  735. // m.
  736. def : InstRW<[ZnWriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
  737. def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]>
  738. {
  739. let Latency = 12;
  740. let NumMicroOps = 2;
  741. let ResourceCycles = [1,3];
  742. }
  743. // FICOM(P).
  744. def : InstRW<[ZnWriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
  745. // FTST.
  746. def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>;
  747. // FXAM.
  748. def : InstRW<[ZnWriteFPU3Lat1], (instrs XAM_F)>;
  749. // FNOP.
  750. def : InstRW<[ZnWriteFPU0Lat1], (instrs FNOP)>;
  751. // WAIT.
  752. def : InstRW<[ZnWriteFPU0Lat1], (instrs WAIT)>;
  753. //=== Integer MMX and XMM Instructions ===//
  754. def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
  755. def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> {
  756. let Latency = 8;
  757. let NumMicroOps = 2;
  758. }
  759. def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ;
  760. def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> {
  761. let NumMicroOps = 2;
  762. }
  763. // VPBLENDD.
  764. // v,v,v,i.
  765. def : InstRW<[ZnWriteFPU01], (instrs VPBLENDDrri)>;
  766. // ymm
  767. def : InstRW<[ZnWriteFPU01Y], (instrs VPBLENDDYrri)>;
  768. // v,v,m,i
  769. def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> {
  770. let NumMicroOps = 2;
  771. let Latency = 8;
  772. let ResourceCycles = [1, 2];
  773. }
  774. def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> {
  775. let NumMicroOps = 2;
  776. let Latency = 9;
  777. let ResourceCycles = [1, 3];
  778. }
  779. def : InstRW<[ZnWriteFPU01Op2], (instrs VPBLENDDrmi)>;
  780. def : InstRW<[ZnWriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
  781. // MASKMOVQ.
  782. def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
  783. // MASKMOVDQU.
  784. def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
  785. // VPMASKMOVD.
  786. // ymm
  787. def : InstRW<[WriteMicrocoded],
  788. (instregex "VPMASKMOVD(Y?)rm")>;
  789. // m, v,v.
  790. def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
  791. // VPBROADCAST B/W.
  792. // x, m8/16.
  793. def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
  794. let Latency = 8;
  795. let NumMicroOps = 2;
  796. let ResourceCycles = [1, 2];
  797. }
  798. def : InstRW<[ZnWriteVPBROADCAST128Ld],
  799. (instregex "VPBROADCAST(B|W)rm")>;
  800. // y, m8/16
  801. def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  802. let Latency = 8;
  803. let NumMicroOps = 2;
  804. let ResourceCycles = [1, 2];
  805. }
  806. def : InstRW<[ZnWriteVPBROADCAST256Ld],
  807. (instregex "VPBROADCAST(B|W)Yrm")>;
  808. // VPGATHER.
  809. def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
  810. //-- Arithmetic instructions --//
  811. // HADD, HSUB PS/PD
  812. // PHADD|PHSUB (S) W/D.
  813. defm : ZnWriteResFpuPair<WriteFHAdd, [], 7>;
  814. defm : ZnWriteResFpuPair<WriteFHAddY, [], 7>;
  815. defm : ZnWriteResFpuPair<WritePHAdd, [], 3>;
  816. defm : ZnWriteResFpuPair<WritePHAddX, [], 3>;
  817. defm : ZnWriteResFpuPair<WritePHAddY, [], 3>;
  818. // PCMPGTQ.
  819. def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
  820. def : InstRW<[ZnWritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
  821. // x <- x,m.
  822. def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
  823. let Latency = 8;
  824. }
  825. // ymm.
  826. def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
  827. let Latency = 8;
  828. let NumMicroOps = 2;
  829. let ResourceCycles = [1,2];
  830. }
  831. def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
  832. def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
  833. //=== Floating Point XMM and YMM Instructions ===//
  834. //-- Move instructions --//
  835. // VPERM2F128 / VPERM2I128.
  836. def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr,
  837. VPERM2I128rr)>;
  838. def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm,
  839. VPERM2I128rm)>;
  840. def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
  841. let NumMicroOps = 2;
  842. let Latency = 8;
  843. }
  844. // VBROADCASTF128 / VBROADCASTI128.
  845. def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128,
  846. VBROADCASTI128)>;
  847. // EXTRACTPS.
  848. // r32,x,i.
  849. def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
  850. let Latency = 2;
  851. let NumMicroOps = 2;
  852. let ResourceCycles = [1, 2];
  853. }
  854. def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
  855. def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
  856. let Latency = 5;
  857. let NumMicroOps = 2;
  858. let ResourceCycles = [5, 1, 2];
  859. }
  860. // m32,x,i.
  861. def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
  862. // VEXTRACTF128 / VEXTRACTI128.
  863. // x,y,i.
  864. def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr,
  865. VEXTRACTI128rr)>;
  866. // m128,y,i.
  867. def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr,
  868. VEXTRACTI128mr)>;
  869. def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
  870. let Latency = 2;
  871. let ResourceCycles = [2];
  872. }
  873. def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
  874. let Latency = 9;
  875. let NumMicroOps = 2;
  876. let ResourceCycles = [1, 2];
  877. }
  878. // VINSERTF128 / VINSERTI128.
  879. // y,y,x,i.
  880. def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr,
  881. VINSERTI128rr)>;
  882. def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm,
  883. VINSERTI128rm)>;
  884. // VGATHER.
  885. def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
  886. //-- Conversion instructions --//
  887. def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
  888. let Latency = 4;
  889. }
  890. def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
  891. let Latency = 5;
  892. let NumMicroOps = 2;
  893. let ResourceCycles = [2];
  894. }
  895. // CVTPD2PS.
  896. // x,x.
  897. def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>;
  898. // y,y.
  899. def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
  900. // z,z.
  901. defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
  902. def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU3]> {
  903. let Latency = 11;
  904. }
  905. // x,m128.
  906. def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
  907. // x,m256.
  908. def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  909. let Latency = 11;
  910. let NumMicroOps = 2;
  911. let ResourceCycles = [1,2];
  912. }
  913. def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
  914. // z,m512
  915. defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
  916. // CVTSD2SS.
  917. // x,x.
  918. // Same as WriteCVTPD2PSr
  919. def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>;
  920. // x,m64.
  921. def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>;
  922. // CVTPS2PD.
  923. // x,x.
  924. def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
  925. let Latency = 3;
  926. }
  927. def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>;
  928. // x,m64.
  929. // y,m128.
  930. def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  931. let Latency = 10;
  932. let NumMicroOps = 2;
  933. }
  934. def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
  935. def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
  936. defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
  937. // y,x.
  938. def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
  939. let Latency = 3;
  940. }
  941. def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
  942. defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
  943. // CVTSS2SD.
  944. // x,x.
  945. def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
  946. let Latency = 4;
  947. }
  948. def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
  949. // x,m32.
  950. def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  951. let Latency = 11;
  952. let NumMicroOps = 2;
  953. let ResourceCycles = [1, 2];
  954. }
  955. def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
  956. def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
  957. let Latency = 5;
  958. }
  959. // CVTDQ2PD.
  960. // x,x.
  961. def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
  962. // Same as xmm
  963. // y,x.
  964. def : InstRW<[ZnWriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
  965. def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> {
  966. let Latency = 5;
  967. }
  968. // CVT(T)PD2DQ.
  969. // x,x.
  970. def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)PD2DQrr")>;
  971. def ZnWriteCVTPD2DQLd: SchedWriteRes<[ZnAGU,ZnFPU12,ZnFPU3]> {
  972. let Latency = 12;
  973. let NumMicroOps = 2;
  974. }
  975. // x,m128.
  976. def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
  977. // same as xmm handling
  978. // x,y.
  979. def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
  980. // x,m256.
  981. def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
  982. def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> {
  983. let Latency = 4;
  984. }
  985. // CVT(T)PS2PI.
  986. // mm,x.
  987. def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIrr")>;
  988. // CVTPI2PD.
  989. // x,mm.
  990. def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDrr)>;
  991. // CVT(T)PD2PI.
  992. // mm,x.
  993. def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIrr")>;
  994. def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> {
  995. let Latency = 5;
  996. }
  997. // same as CVTPD2DQr
  998. // CVT(T)SS2SI.
  999. // r32,x.
  1000. def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
  1001. // same as CVTPD2DQm
  1002. // r32,m32.
  1003. def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
  1004. def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> {
  1005. let Latency = 5;
  1006. }
  1007. // CVTSI2SD.
  1008. // x,r32/64.
  1009. def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
  1010. def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> {
  1011. let Latency = 5;
  1012. }
  1013. def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> {
  1014. let Latency = 12;
  1015. }
  1016. // CVTSD2SI.
  1017. // r32/64
  1018. def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
  1019. // r32,m32.
  1020. def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
  1021. // VCVTPS2PH.
  1022. // x,v,i.
  1023. def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>;
  1024. def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>;
  1025. defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
  1026. // m,v,i.
  1027. def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>;
  1028. def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
  1029. defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
  1030. // VCVTPH2PS.
  1031. // v,x.
  1032. def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>;
  1033. def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>;
  1034. defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
  1035. // v,m.
  1036. def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>;
  1037. def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
  1038. defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
  1039. //-- SSE4A instructions --//
  1040. // EXTRQ
  1041. def ZnWriteEXTRQ: SchedWriteRes<[ZnFPU12, ZnFPU2]> {
  1042. let Latency = 2;
  1043. }
  1044. def : InstRW<[ZnWriteEXTRQ], (instregex "EXTRQ")>;
  1045. // INSERTQ
  1046. def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
  1047. let Latency = 4;
  1048. }
  1049. def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
  1050. //-- SHA instructions --//
  1051. // SHA256MSG2
  1052. def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
  1053. // SHA1MSG1, SHA256MSG1
  1054. // x,x.
  1055. def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> {
  1056. let Latency = 2;
  1057. let ResourceCycles = [2];
  1058. }
  1059. def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
  1060. // x,m.
  1061. def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
  1062. let Latency = 9;
  1063. let ResourceCycles = [1,2];
  1064. }
  1065. def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
  1066. // SHA1MSG2
  1067. // x,x.
  1068. def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
  1069. def : InstRW<[ZnWriteSHA1MSG2r], (instrs SHA1MSG2rr)>;
  1070. // x,m.
  1071. def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
  1072. let Latency = 8;
  1073. }
  1074. def : InstRW<[ZnWriteSHA1MSG2Ld], (instrs SHA1MSG2rm)>;
  1075. // SHA1NEXTE
  1076. // x,x.
  1077. def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
  1078. def : InstRW<[ZnWriteSHA1NEXTEr], (instrs SHA1NEXTErr)>;
  1079. // x,m.
  1080. def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  1081. let Latency = 8;
  1082. }
  1083. def : InstRW<[ZnWriteSHA1NEXTELd], (instrs SHA1NEXTErm)>;
  1084. // SHA1RNDS4
  1085. // x,x.
  1086. def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
  1087. let Latency = 6;
  1088. }
  1089. def : InstRW<[ZnWriteSHA1RNDS4r], (instrs SHA1RNDS4rri)>;
  1090. // x,m.
  1091. def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  1092. let Latency = 13;
  1093. }
  1094. def : InstRW<[ZnWriteSHA1RNDS4Ld], (instrs SHA1RNDS4rmi)>;
  1095. // SHA256RNDS2
  1096. // x,x.
  1097. def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
  1098. let Latency = 4;
  1099. }
  1100. def : InstRW<[ZnWriteSHA256RNDS2r], (instrs SHA256RNDS2rr)>;
  1101. // x,m.
  1102. def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  1103. let Latency = 11;
  1104. }
  1105. def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
  1106. //-- Arithmetic instructions --//
  1107. // DPPS.
  1108. // x,x,i / v,v,v,i.
  1109. def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>;
  1110. def : SchedAlias<WriteDPPSY, ZnWriteMicrocoded>;
  1111. // x,m,i / v,v,m,i.
  1112. def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>;
  1113. def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>;
  1114. // DPPD.
  1115. // x,x,i.
  1116. def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>;
  1117. // x,m,i.
  1118. def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
  1119. //-- Other instructions --//
  1120. // VZEROUPPER.
  1121. def : InstRW<[WriteMicrocoded], (instrs VZEROUPPER)>;
  1122. // VZEROALL.
  1123. def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
  1124. ///////////////////////////////////////////////////////////////////////////////
  1125. // Dependency breaking instructions.
  1126. ///////////////////////////////////////////////////////////////////////////////
  1127. def : IsZeroIdiomFunction<[
  1128. // GPR Zero-idioms.
  1129. DepBreakingClass<[
  1130. SUB32rr, SUB64rr,
  1131. XOR32rr, XOR64rr
  1132. ], ZeroIdiomPredicate>,
  1133. // MMX Zero-idioms.
  1134. DepBreakingClass<[
  1135. MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
  1136. MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
  1137. MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
  1138. MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
  1139. ], ZeroIdiomPredicate>,
  1140. // SSE Zero-idioms.
  1141. DepBreakingClass<[
  1142. // fp variants.
  1143. XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
  1144. // int variants.
  1145. PXORrr, PANDNrr,
  1146. PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
  1147. PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
  1148. ], ZeroIdiomPredicate>,
  1149. // AVX XMM Zero-idioms.
  1150. DepBreakingClass<[
  1151. // fp variants.
  1152. VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
  1153. // int variants.
  1154. VPXORrr, VPANDNrr,
  1155. VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
  1156. VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr
  1157. ], ZeroIdiomPredicate>,
  1158. // AVX YMM Zero-idioms.
  1159. DepBreakingClass<[
  1160. // fp variants
  1161. VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr,
  1162. // int variants
  1163. VPXORYrr, VPANDNYrr,
  1164. VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
  1165. VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
  1166. ], ZeroIdiomPredicate>
  1167. ]>;
  1168. def : IsDepBreakingFunction<[
  1169. // GPR
  1170. DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
  1171. DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
  1172. // MMX
  1173. DepBreakingClass<[
  1174. MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
  1175. ], ZeroIdiomPredicate>,
  1176. // SSE
  1177. DepBreakingClass<[
  1178. PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
  1179. ], ZeroIdiomPredicate>,
  1180. // AVX XMM
  1181. DepBreakingClass<[
  1182. VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
  1183. ], ZeroIdiomPredicate>,
  1184. // AVX YMM
  1185. DepBreakingClass<[
  1186. VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
  1187. ], ZeroIdiomPredicate>,
  1188. ]>;
  1189. } // SchedModel