X86ScheduleZnver1.td 51 KB


  1. //=- X86ScheduleZnver1.td - X86 Znver1 Scheduling -------------*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the machine model for Znver1 to support instruction
  10. // scheduling and other instruction cost heuristics.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. def Znver1Model : SchedMachineModel {
  14. // Zen can decode 4 instructions per cycle.
  15. let IssueWidth = 4;
  16. // Based on the reorder buffer we define MicroOpBufferSize
  17. let MicroOpBufferSize = 192;
  18. let LoadLatency = 4;
  19. let MispredictPenalty = 17;
  20. let HighLatency = 25;
  21. let PostRAScheduler = 1;
  22. // FIXME: This variable is required for incomplete model.
  23. // We haven't catered all instructions.
  24. // So, we reset the value of this variable so as to
  25. // say that the model is incomplete.
  26. let CompleteModel = 0;
  27. }
  28. let SchedModel = Znver1Model in {
  29. // Zen can issue micro-ops to 10 different units in one cycle.
  30. // These are
  31. // * Four integer ALU units (ZALU0, ZALU1, ZALU2, ZALU3)
  32. // * Two AGU units (ZAGU0, ZAGU1)
  33. // * Four FPU units (ZFPU0, ZFPU1, ZFPU2, ZFPU3)
  34. // AGUs feed load store queues @two loads and 1 store per cycle.
  35. // Four ALU units are defined below
  36. def ZnALU0 : ProcResource<1>;
  37. def ZnALU1 : ProcResource<1>;
  38. def ZnALU2 : ProcResource<1>;
  39. def ZnALU3 : ProcResource<1>;
  40. // Two AGU units are defined below
  41. def ZnAGU0 : ProcResource<1>;
  42. def ZnAGU1 : ProcResource<1>;
  43. // Four FPU units are defined below
  44. def ZnFPU0 : ProcResource<1>;
  45. def ZnFPU1 : ProcResource<1>;
  46. def ZnFPU2 : ProcResource<1>;
  47. def ZnFPU3 : ProcResource<1>;
  48. // FPU grouping
  49. def ZnFPU013 : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU3]>;
  50. def ZnFPU01 : ProcResGroup<[ZnFPU0, ZnFPU1]>;
  51. def ZnFPU12 : ProcResGroup<[ZnFPU1, ZnFPU2]>;
  52. def ZnFPU13 : ProcResGroup<[ZnFPU1, ZnFPU3]>;
  53. def ZnFPU23 : ProcResGroup<[ZnFPU2, ZnFPU3]>;
  54. def ZnFPU02 : ProcResGroup<[ZnFPU0, ZnFPU2]>;
  55. def ZnFPU03 : ProcResGroup<[ZnFPU0, ZnFPU3]>;
  56. // Below are the grouping of the units.
  57. // Micro-ops to be issued to multiple units are tackled this way.
  58. // ALU grouping
  59. // ZnALU03 - 0,3 grouping
  60. def ZnALU03: ProcResGroup<[ZnALU0, ZnALU3]>;
  61. // 56 Entry (14x4 entries) Int Scheduler
  62. def ZnALU : ProcResGroup<[ZnALU0, ZnALU1, ZnALU2, ZnALU3]> {
  63. let BufferSize=56;
  64. }
  65. // 28 Entry (14x2) AGU group. AGUs can't be used for all ALU operations
  66. // but are relevant for some instructions
  67. def ZnAGU : ProcResGroup<[ZnAGU0, ZnAGU1]> {
  68. let BufferSize=28;
  69. }
  70. // Integer Multiplication issued on ALU1.
  71. def ZnMultiplier : ProcResource<1>;
  72. // Integer division issued on ALU2.
  73. def ZnDivider : ProcResource<1>;
  74. // 4 Cycles integer load-to use Latency is captured
  75. def : ReadAdvance<ReadAfterLd, 4>;
  76. // 8 Cycles vector load-to use Latency is captured
  77. def : ReadAdvance<ReadAfterVecLd, 8>;
  78. def : ReadAdvance<ReadAfterVecXLd, 8>;
  79. def : ReadAdvance<ReadAfterVecYLd, 8>;
  80. def : ReadAdvance<ReadInt2Fpu, 0>;
  81. // The Integer PRF for Zen is 168 entries, and it holds the architectural and
  82. // speculative version of the 64-bit integer registers.
  83. // Reference: "Software Optimization Guide for AMD Family 17h Processors"
  84. def ZnIntegerPRF : RegisterFile<168, [GR64, CCR]>;
  85. // 36 Entry (9x4 entries) floating-point Scheduler
  86. def ZnFPU : ProcResGroup<[ZnFPU0, ZnFPU1, ZnFPU2, ZnFPU3]> {
  87. let BufferSize=36;
  88. }
  89. // The Zen FP Retire Queue renames SIMD and FP uOps onto a pool of 160 128-bit
  90. // registers. Operations on 256-bit data types are cracked into two COPs.
  91. // Reference: "Software Optimization Guide for AMD Family 17h Processors"
  92. def ZnFpuPRF: RegisterFile<160, [VR64, VR128, VR256], [1, 1, 2]>;
  93. // The unit can track up to 192 macro ops in-flight.
  94. // The retire unit handles in-order commit of up to 8 macro ops per cycle.
  95. // Reference: "Software Optimization Guide for AMD Family 17h Processors"
  96. // To be noted, the retire unit is shared between integer and FP ops.
  97. // In SMT mode it is 96 entry per thread. But, we do not use the conservative
  98. // value here because there is currently no way to fully mode the SMT mode,
  99. // so there is no point in trying.
  100. def ZnRCU : RetireControlUnit<192, 8>;
  101. // FIXME: there are 72 read buffers and 44 write buffers.
  102. // (a folded load is an instruction that loads and does some operation)
  103. // Ex: ADDPD xmm,[mem]-> This instruction has two micro-ops
  104. // Instructions with folded loads are usually micro-fused, so they only appear
  105. // as two micro-ops.
  106. // a. load and
  107. // b. addpd
  108. // This multiclass is for folded loads for integer units.
  109. multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW,
  110. list<ProcResourceKind> ExePorts,
  111. int Lat, list<int> Res = [], int UOps = 1,
  112. int LoadLat = 4, int LoadUOps = 1> {
  113. // Register variant takes 1-cycle on Execution Port.
  114. def : WriteRes<SchedRW, ExePorts> {
  115. let Latency = Lat;
  116. let ResourceCycles = Res;
  117. let NumMicroOps = UOps;
  118. }
  119. // Memory variant also uses a cycle on ZnAGU
  120. // adds LoadLat cycles to the latency (default = 4).
  121. def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
  122. let Latency = !add(Lat, LoadLat);
  123. let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
  124. let NumMicroOps = !add(UOps, LoadUOps);
  125. }
  126. }
  127. // This multiclass is for folded loads for floating point units.
  128. multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
  129. list<ProcResourceKind> ExePorts,
  130. int Lat, list<int> Res = [], int UOps = 1,
  131. int LoadLat = 7, int LoadUOps = 0> {
  132. // Register variant takes 1-cycle on Execution Port.
  133. def : WriteRes<SchedRW, ExePorts> {
  134. let Latency = Lat;
  135. let ResourceCycles = Res;
  136. let NumMicroOps = UOps;
  137. }
  138. // Memory variant also uses a cycle on ZnAGU
  139. // adds LoadLat cycles to the latency (default = 7).
  140. def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> {
  141. let Latency = !add(Lat, LoadLat);
  142. let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res));
  143. let NumMicroOps = !add(UOps, LoadUOps);
  144. }
  145. }
  146. // WriteRMW is set for instructions with Memory write
  147. // operation in codegen
  148. def : WriteRes<WriteRMW, [ZnAGU]>;
  149. def : WriteRes<WriteStore, [ZnAGU]>;
  150. def : WriteRes<WriteStoreNT, [ZnAGU]>;
  151. def : WriteRes<WriteMove, [ZnALU]>;
  152. def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
  153. // Model the effect of clobbering the read-write mask operand of the GATHER operation.
  154. // Does not cost anything by itself, only has latency, matching that of the WriteLoad,
  155. def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 8; let NumMicroOps = 0; }
  156. def : WriteRes<WriteZero, []>;
  157. def : WriteRes<WriteLEA, [ZnALU]>;
  158. defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
  159. defm : ZnWriteResPair<WriteADC, [ZnALU], 1>;
  160. defm : ZnWriteResPair<WriteIMul8, [ZnALU1, ZnMultiplier], 4>;
  161. //defm : ZnWriteResPair<WriteIMul16, [ZnALU1, ZnMultiplier], 4>;
  162. //defm : ZnWriteResPair<WriteIMul16Imm, [ZnALU1, ZnMultiplier], 4>;
  163. //defm : ZnWriteResPair<WriteIMul16Reg, [ZnALU1, ZnMultiplier], 4>;
  164. //defm : ZnWriteResPair<WriteIMul32, [ZnALU1, ZnMultiplier], 4>;
  165. //defm : ZnWriteResPair<WriteIMul32Imm, [ZnALU1, ZnMultiplier], 4>;
  166. //defm : ZnWriteResPair<WriteIMul32Reg, [ZnALU1, ZnMultiplier], 4>;
  167. //defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
  168. //defm : ZnWriteResPair<WriteIMul64Imm, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
  169. //defm : ZnWriteResPair<WriteIMul64Reg, [ZnALU1, ZnMultiplier], 4, [1,1], 2>;
  170. defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>;
  171. defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>;
  172. defm : X86WriteRes<WriteCMPXCHG, [ZnALU], 1, [1], 1>;
  173. defm : X86WriteRes<WriteCMPXCHGRMW,[ZnALU,ZnAGU], 8, [1,1], 5>;
  174. defm : X86WriteRes<WriteXCHG, [ZnALU], 1, [2], 2>;
  175. defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
  176. defm : ZnWriteResPair<WriteShiftCL, [ZnALU], 1>;
  177. defm : ZnWriteResPair<WriteRotate, [ZnALU], 1>;
  178. defm : ZnWriteResPair<WriteRotateCL, [ZnALU], 1>;
  179. defm : X86WriteRes<WriteSHDrri, [ZnALU], 1, [1], 1>;
  180. defm : X86WriteResUnsupported<WriteSHDrrcl>;
  181. defm : X86WriteResUnsupported<WriteSHDmri>;
  182. defm : X86WriteResUnsupported<WriteSHDmrcl>;
  183. defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
  184. defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
  185. defm : ZnWriteResPair<WriteCMOV, [ZnALU], 1>;
  186. def : WriteRes<WriteSETCC, [ZnALU]>;
  187. def : WriteRes<WriteSETCCStore, [ZnALU, ZnAGU]>;
  188. defm : X86WriteRes<WriteLAHFSAHF, [ZnALU], 2, [1], 2>;
  189. defm : X86WriteRes<WriteBitTest, [ZnALU], 1, [1], 1>;
  190. defm : X86WriteRes<WriteBitTestImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
  191. defm : X86WriteRes<WriteBitTestRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
  192. defm : X86WriteRes<WriteBitTestSet, [ZnALU], 2, [1], 2>;
  193. //defm : X86WriteRes<WriteBitTestSetImmLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
  194. //defm : X86WriteRes<WriteBitTestSetRegLd, [ZnALU,ZnAGU], 5, [1,1], 2>;
  195. // Bit counts.
  196. defm : ZnWriteResPair<WriteBSF, [ZnALU], 3>;
  197. defm : ZnWriteResPair<WriteBSR, [ZnALU], 3>;
  198. defm : ZnWriteResPair<WriteLZCNT, [ZnALU], 2>;
  199. defm : ZnWriteResPair<WriteTZCNT, [ZnALU], 2>;
  200. defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>;
  201. // Treat misc copies as a move.
  202. def : InstRW<[WriteMove], (instrs COPY)>;
  203. // BMI1 BEXTR/BLS, BMI2 BZHI
  204. defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>;
  205. //defm : ZnWriteResPair<WriteBLS, [ZnALU], 2>;
  206. defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>;
  207. // IDIV
  208. defm : ZnWriteResPair<WriteDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
  209. defm : ZnWriteResPair<WriteDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
  210. defm : ZnWriteResPair<WriteDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
  211. defm : ZnWriteResPair<WriteDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
  212. defm : ZnWriteResPair<WriteIDiv8, [ZnALU2, ZnDivider], 15, [1,15], 1>;
  213. defm : ZnWriteResPair<WriteIDiv16, [ZnALU2, ZnDivider], 17, [1,17], 2>;
  214. defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
  215. defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
  216. // IMULH
  217. def ZnWriteIMulH : WriteRes<WriteIMulH, [ZnMultiplier]>{
  218. let Latency = 3;
  219. let NumMicroOps = 0;
  220. }
  221. def : WriteRes<WriteIMulHLd, [ZnMultiplier]> {
  222. let Latency = !add(ZnWriteIMulH.Latency, Znver1Model.LoadLatency);
  223. let NumMicroOps = ZnWriteIMulH.NumMicroOps;
  224. }
  225. // Floating point operations
  226. defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>;
  227. defm : X86WriteRes<WriteFLoadX, [ZnAGU], 8, [1], 1>;
  228. defm : X86WriteRes<WriteFLoadY, [ZnAGU], 8, [1], 1>;
  229. defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>;
  230. defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
  231. defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1], 1>;
  232. defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1], 1>;
  233. defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>;
  234. defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
  235. defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>;
  236. defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>;
  237. defm : X86WriteRes<WriteFMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  238. defm : X86WriteRes<WriteFMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  239. defm : X86WriteRes<WriteFMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  240. defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  241. defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
  242. defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
  243. defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
  244. defm : X86WriteResUnsupported<WriteFMoveZ>;
  245. defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>;
  246. defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>;
  247. defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>;
  248. defm : X86WriteResPairUnsupported<WriteFAddZ>;
  249. defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>;
  250. defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>;
  251. defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>;
  252. defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
  253. defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>;
  254. defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>;
  255. defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>;
  256. defm : X86WriteResPairUnsupported<WriteFCmpZ>;
  257. defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>;
  258. defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>;
  259. defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>;
  260. defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
  261. defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>;
  262. defm : ZnWriteResFpuPair<WriteFComX, [ZnFPU0], 3>;
  263. defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>;
  264. defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>;
  265. defm : X86WriteResPairUnsupported<WriteFBlendZ>;
  266. defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>;
  267. defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>;
  268. defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
  269. defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>;
  270. defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>;
  271. defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
  272. defm : ZnWriteResFpuPair<WriteCvtSS2I, [ZnFPU3], 5>;
  273. defm : ZnWriteResFpuPair<WriteCvtPS2I, [ZnFPU3], 5>;
  274. defm : ZnWriteResFpuPair<WriteCvtPS2IY, [ZnFPU3], 5>;
  275. defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
  276. defm : ZnWriteResFpuPair<WriteCvtSD2I, [ZnFPU3], 5>;
  277. defm : ZnWriteResFpuPair<WriteCvtPD2I, [ZnFPU3], 5>;
  278. defm : ZnWriteResFpuPair<WriteCvtPD2IY, [ZnFPU3], 5>;
  279. defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
  280. defm : ZnWriteResFpuPair<WriteCvtI2SS, [ZnFPU3], 5>;
  281. defm : ZnWriteResFpuPair<WriteCvtI2PS, [ZnFPU3], 5>;
  282. defm : ZnWriteResFpuPair<WriteCvtI2PSY, [ZnFPU3], 5>;
  283. defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
  284. defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>;
  285. defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>;
  286. defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>;
  287. defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
  288. defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>;
  289. defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>;
  290. //defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>;
  291. defm : X86WriteResPairUnsupported<WriteFDivZ>;
  292. defm : ZnWriteResFpuPair<WriteFDiv64, [ZnFPU3], 15>;
  293. defm : ZnWriteResFpuPair<WriteFDiv64X, [ZnFPU3], 15>;
  294. //defm : ZnWriteResFpuPair<WriteFDiv64Y, [ZnFPU3], 15>;
  295. defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
  296. defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>;
  297. defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
  298. defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops?
  299. defm : X86WriteResPairUnsupported<WriteFRndZ>;
  300. defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>;
  301. defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>;
  302. defm : X86WriteResPairUnsupported<WriteFLogicZ>;
  303. defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU], 1>;
  304. defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU], 1>;
  305. defm : X86WriteResPairUnsupported<WriteFTestZ>;
  306. defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>;
  307. defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>;
  308. defm : X86WriteResPairUnsupported<WriteFShuffleZ>;
  309. defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>;
  310. defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>;
  311. defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
  312. defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3, [1], 1, 7, 1>;
  313. defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3, [1], 1, 7, 1>;
  314. defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 4, [1], 1, 7, 1>;
  315. defm : X86WriteResPairUnsupported<WriteFMulZ>;
  316. defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 3, [1], 1, 7, 1>;
  317. defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 3, [1], 1, 7, 1>;
  318. defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [1], 1, 7, 1>;
  319. defm : X86WriteResPairUnsupported<WriteFMul64Z>;
  320. defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>;
  321. defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>;
  322. defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>;
  323. defm : X86WriteResPairUnsupported<WriteFMAZ>;
  324. defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>;
  325. defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>;
  326. defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>;
  327. defm : X86WriteResPairUnsupported<WriteFRcpZ>;
  328. //defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>;
  329. defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>;
  330. //defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>;
  331. defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
  332. defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>;
  333. defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>;
  334. defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>;
  335. defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
  336. defm : ZnWriteResFpuPair<WriteFSqrt64, [ZnFPU3], 20, [20]>;
  337. defm : ZnWriteResFpuPair<WriteFSqrt64X, [ZnFPU3], 20, [20]>;
  338. defm : ZnWriteResFpuPair<WriteFSqrt64Y, [ZnFPU3], 40, [40], 1, 7, 1>;
  339. defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
  340. defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>;
  341. // Vector integer operations which uses FPU units
  342. defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
  343. defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>;
  344. defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>;
  345. defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>;
  346. defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>;
  347. defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
  348. defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
  349. defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1], 1>;
  350. defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1], 1>;
  351. defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1], 1>;
  352. defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1], 1>;
  353. defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1], 1>;
  354. defm : X86WriteRes<WriteVecMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  355. defm : X86WriteRes<WriteVecMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  356. defm : X86WriteRes<WriteVecMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
  357. defm : X86WriteRes<WriteVecMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
  358. defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
  359. defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>;
  360. defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>;
  361. defm : X86WriteResUnsupported<WriteVecMoveZ>;
  362. defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>;
  363. defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>;
  364. defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>;
  365. defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>;
  366. defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>;
  367. defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>;
  368. defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
  369. defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>;
  370. defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>;
  371. defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>;
  372. defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>;
  373. defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>;
  374. defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>;
  375. defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>;
  376. defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
  377. defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>;
  378. defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>;
  379. defm : X86WriteResPairUnsupported<WriteVecTestZ>;
  380. defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>;
  381. defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>;
  382. defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>;
  383. defm : X86WriteResPairUnsupported<WriteVecALUZ>;
  384. defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>;
  385. defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>;
  386. defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>;
  387. defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
  388. defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4, [1], 1, 7, 1>; // FIXME
  389. defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2], 1, 7, 1>; // FIXME
  390. defm : X86WriteResPairUnsupported<WritePMULLDZ>;
  391. defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>;
  392. defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU], 1>;
  393. defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>;
  394. defm : X86WriteResPairUnsupported<WriteShuffleZ>;
  395. defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>;
  396. defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU], 1>;
  397. defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU], 1>;
  398. defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
  399. defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>;
  400. defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>;
  401. defm : X86WriteResPairUnsupported<WriteBlendZ>;
  402. defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>;
  403. defm : ZnWriteResFpuPair<WriteVPMOV256, [ZnFPU12], 1, [1], 2>;
  404. defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>;
  405. defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>;
  406. defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>;
  407. defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>;
  408. defm : X86WriteResPairUnsupported<WritePSADBWZ>;
  409. defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>;
  410. // Vector Shift Operations
  411. defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>;
  412. defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>;
  413. defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>;
  414. // Vector insert/extract operations.
  415. defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>;
  416. def : WriteRes<WriteVecExtract, [ZnFPU12, ZnFPU2]> {
  417. let Latency = 2;
  418. let ResourceCycles = [1, 2];
  419. }
  420. def : WriteRes<WriteVecExtractSt, [ZnAGU, ZnFPU12, ZnFPU2]> {
  421. let Latency = 5;
  422. let NumMicroOps = 2;
  423. let ResourceCycles = [1, 2, 3];
  424. }
  425. // MOVMSK Instructions.
  426. def : WriteRes<WriteFMOVMSK, [ZnFPU2]>;
  427. def : WriteRes<WriteMMXMOVMSK, [ZnFPU2]>;
  428. def : WriteRes<WriteVecMOVMSK, [ZnFPU2]>;
  429. def : WriteRes<WriteVecMOVMSKY, [ZnFPU2]> {
  430. let NumMicroOps = 2;
  431. let Latency = 2;
  432. let ResourceCycles = [2];
  433. }
  434. // AES Instructions.
  435. defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>;
  436. defm : ZnWriteResFpuPair<WriteAESIMC, [ZnFPU01], 4>;
  437. defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>;
  438. def : WriteRes<WriteFence, [ZnAGU]>;
  439. def : WriteRes<WriteNop, []>;
  440. // Following instructions with latency=100 are microcoded.
  441. // We set long latency so as to block the entire pipeline.
  442. defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>;
  443. defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>;
  444. // Microcoded Instructions
  445. def ZnWriteMicrocoded : SchedWriteRes<[]> {
  446. let Latency = 100;
  447. }
  448. def : SchedAlias<WriteMicrocoded, ZnWriteMicrocoded>;
  449. def : SchedAlias<WriteFCMOV, ZnWriteMicrocoded>;
  450. def : SchedAlias<WriteSystem, ZnWriteMicrocoded>;
  451. def : SchedAlias<WriteMPSAD, ZnWriteMicrocoded>;
  452. def : SchedAlias<WriteMPSADY, ZnWriteMicrocoded>;
  453. def : SchedAlias<WriteMPSADLd, ZnWriteMicrocoded>;
  454. def : SchedAlias<WriteMPSADYLd, ZnWriteMicrocoded>;
  455. def : SchedAlias<WriteCLMul, ZnWriteMicrocoded>;
  456. def : SchedAlias<WriteCLMulLd, ZnWriteMicrocoded>;
  457. def : SchedAlias<WritePCmpIStrM, ZnWriteMicrocoded>;
  458. def : SchedAlias<WritePCmpIStrMLd, ZnWriteMicrocoded>;
  459. def : SchedAlias<WritePCmpEStrI, ZnWriteMicrocoded>;
  460. def : SchedAlias<WritePCmpEStrILd, ZnWriteMicrocoded>;
  461. def : SchedAlias<WritePCmpEStrM, ZnWriteMicrocoded>;
  462. def : SchedAlias<WritePCmpEStrMLd, ZnWriteMicrocoded>;
  463. def : SchedAlias<WritePCmpIStrI, ZnWriteMicrocoded>;
  464. def : SchedAlias<WritePCmpIStrILd, ZnWriteMicrocoded>;
  465. def : SchedAlias<WriteLDMXCSR, ZnWriteMicrocoded>;
  466. def : SchedAlias<WriteSTMXCSR, ZnWriteMicrocoded>;
  467. //=== Regex based InstRW ===//
  468. // Notation:
  469. // - r: register.
  470. // - m = memory.
  471. // - i = immediate
  472. // - mm: 64 bit mmx register.
  473. // - x = 128 bit xmm register.
  474. // - (x)mm = mmx or xmm register.
  475. // - y = 256 bit ymm register.
  476. // - v = any vector register.
  477. //=== Integer Instructions ===//
  478. //-- Move instructions --//
  479. // MOV.
  480. // r16,m.
  481. def : InstRW<[WriteALULd, ReadAfterLd], (instrs MOV16rm)>;
  482. // MOVSX, MOVZX.
  483. // r,m.
  484. def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>;
  485. // XCHG.
  486. // r,m.
  487. def ZnWriteXCHGrm : SchedWriteRes<[ZnAGU, ZnALU]> {
  488. let Latency = 5;
  489. let NumMicroOps = 2;
  490. }
  491. def : InstRW<[ZnWriteXCHGrm, ReadAfterLd], (instregex "XCHG(8|16|32|64)rm")>;
  492. def : InstRW<[WriteMicrocoded], (instrs XLAT)>;
  493. // POP16.
  494. // r.
  495. def ZnWritePop16r : SchedWriteRes<[ZnAGU]>{
  496. let Latency = 5;
  497. let NumMicroOps = 2;
  498. }
  499. def : InstRW<[ZnWritePop16r], (instrs POP16rmm)>;
  500. def : InstRW<[WriteMicrocoded], (instregex "POPF(16|32)")>;
  501. def : InstRW<[WriteMicrocoded], (instregex "POPA(16|32)")>;
  502. // PUSH.
  503. // r. Has default values.
  504. // m.
  505. def ZnWritePUSH : SchedWriteRes<[ZnAGU]>{
  506. let Latency = 4;
  507. }
  508. def : InstRW<[ZnWritePUSH], (instregex "PUSH(16|32)rmm")>;
  509. //PUSHF
  510. def : InstRW<[WriteMicrocoded], (instregex "PUSHF(16|32)")>;
  511. // PUSHA.
  512. def ZnWritePushA : SchedWriteRes<[ZnAGU]> {
  513. let Latency = 8;
  514. }
  515. def : InstRW<[ZnWritePushA], (instregex "PUSHA(16|32)")>;
  516. //LAHF
  517. def : InstRW<[WriteMicrocoded], (instrs LAHF)>;
  518. // MOVBE.
  519. // r,m.
  520. def ZnWriteMOVBE : SchedWriteRes<[ZnAGU, ZnALU]> {
  521. let Latency = 5;
  522. }
  523. def : InstRW<[ZnWriteMOVBE, ReadAfterLd], (instregex "MOVBE(16|32|64)rm")>;
  524. // m16,r16.
  525. def : InstRW<[ZnWriteMOVBE], (instregex "MOVBE(16|32|64)mr")>;
  526. //-- Arithmetic instructions --//
  527. // ADD SUB.
  528. // m,r/i.
  529. def : InstRW<[WriteALULd], (instregex "(ADD|SUB)(8|16|32|64)m(r|i)",
  530. "(ADD|SUB)(8|16|32|64)mi8",
  531. "(ADD|SUB)64mi32")>;
  532. // ADC SBB.
  533. // m,r/i.
  534. def : InstRW<[WriteALULd],
  535. (instregex "(ADC|SBB)(8|16|32|64)m(r|i)",
  536. "(ADC|SBB)(16|32|64)mi8",
  537. "(ADC|SBB)64mi32")>;
  538. // INC DEC NOT NEG.
  539. // m.
  540. def : InstRW<[WriteALULd],
  541. (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m")>;
  542. // MUL IMUL.
  543. // r16.
  544. def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
  545. let Latency = 3;
  546. }
  547. def : SchedAlias<WriteIMul16, ZnWriteMul16>;
  548. def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right?
  549. def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right?
  550. def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
  551. def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did.
  552. // m16.
  553. def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
  554. let Latency = 8;
  555. }
  556. def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>;
  557. // r32.
  558. def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
  559. let Latency = 3;
  560. }
  561. def : SchedAlias<WriteIMul32, ZnWriteMul32>;
  562. def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right?
  563. def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right?
  564. def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
  565. def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did.
  566. // m32.
  567. def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
  568. let Latency = 8;
  569. }
  570. def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>;
  571. // r64.
  572. def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
  573. let Latency = 4;
  574. let NumMicroOps = 2;
  575. }
  576. def : SchedAlias<WriteIMul64, ZnWriteMul64>;
  577. def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right?
  578. def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right?
  579. def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
  580. def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did.
  581. // m64.
  582. def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
  583. let Latency = 9;
  584. let NumMicroOps = 2;
  585. }
  586. def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
  587. // MULX
  588. // Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
  589. defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
  590. defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
  591. //-- Control transfer instructions --//
  592. // J(E|R)CXZ.
  593. def ZnWriteJCXZ : SchedWriteRes<[ZnALU03]>;
  594. def : InstRW<[ZnWriteJCXZ], (instrs JCXZ, JECXZ, JRCXZ)>;
  595. // INTO
  596. def : InstRW<[WriteMicrocoded], (instrs INTO)>;
  597. // LOOP.
  598. def ZnWriteLOOP : SchedWriteRes<[ZnALU03]>;
  599. def : InstRW<[ZnWriteLOOP], (instrs LOOP)>;
  600. // LOOP(N)E, LOOP(N)Z
  601. def ZnWriteLOOPE : SchedWriteRes<[ZnALU03]>;
  602. def : InstRW<[ZnWriteLOOPE], (instrs LOOPE, LOOPNE)>;
  603. // CALL.
  604. // r.
  605. def ZnWriteCALLr : SchedWriteRes<[ZnAGU, ZnALU03]>;
  606. def : InstRW<[ZnWriteCALLr], (instregex "CALL(16|32)r")>;
  607. def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
  608. // RET.
  609. def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
  610. let NumMicroOps = 2;
  611. }
  612. def : InstRW<[ZnWriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
  613. "IRET(16|32|64)")>;
  614. //-- Logic instructions --//
  615. // AND OR XOR.
  616. // m,r/i.
  617. def : InstRW<[WriteALULd],
  618. (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)",
  619. "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>;
  620. // Define ALU latency variants
  621. def ZnWriteALULat2 : SchedWriteRes<[ZnALU]> {
  622. let Latency = 2;
  623. }
  624. def ZnWriteALULat2Ld : SchedWriteRes<[ZnAGU, ZnALU]> {
  625. let Latency = 6;
  626. }
  627. // BTR BTS BTC.
  628. // m,r,i.
  629. def ZnWriteBTRSCm : SchedWriteRes<[ZnAGU, ZnALU]> {
  630. let Latency = 6;
  631. let NumMicroOps = 2;
  632. }
  633. // m,r,i.
  634. def : SchedAlias<WriteBitTestSetImmRMW, ZnWriteBTRSCm>;
  635. def : SchedAlias<WriteBitTestSetRegRMW, ZnWriteBTRSCm>;
  636. // BLSI BLSMSK BLSR.
  637. // r,r.
  638. def : SchedAlias<WriteBLS, ZnWriteALULat2>;
  639. // r,m.
  640. def : SchedAlias<WriteBLSLd, ZnWriteALULat2Ld>;
  641. // CLD STD.
  642. def : InstRW<[WriteALU], (instrs STD, CLD)>;
  643. // PDEP PEXT.
  644. // r,r,r.
  645. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>;
  646. // r,r,m.
  647. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>;
  648. // RCR RCL.
  649. // m,i.
  650. def : InstRW<[WriteMicrocoded], (instregex "RC(R|L)(8|16|32|64)m(1|i|CL)")>;
  651. // SHR SHL SAR.
  652. // m,i.
  653. def : InstRW<[WriteShiftLd], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>;
  654. // SHRD SHLD.
  655. // m,r
  656. def : InstRW<[WriteShiftLd], (instregex "SH(R|L)D(16|32|64)mri8")>;
  657. // r,r,cl.
  658. def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)rrCL")>;
  659. // m,r,cl.
  660. def : InstRW<[WriteMicrocoded], (instregex "SH(R|L)D(16|32|64)mrCL")>;
  661. //-- Misc instructions --//
  662. // CMPXCHG8B.
  663. def ZnWriteCMPXCHG8B : SchedWriteRes<[ZnAGU, ZnALU]> {
  664. let NumMicroOps = 18;
  665. }
  666. def : InstRW<[ZnWriteCMPXCHG8B], (instrs CMPXCHG8B)>;
  667. def : InstRW<[WriteMicrocoded], (instrs CMPXCHG16B)>;
  668. // LEAVE
  669. def ZnWriteLEAVE : SchedWriteRes<[ZnALU, ZnAGU]> {
  670. let Latency = 8;
  671. let NumMicroOps = 2;
  672. }
  673. def : InstRW<[ZnWriteLEAVE], (instregex "LEAVE")>;
  674. // PAUSE.
  675. def : InstRW<[WriteMicrocoded], (instrs PAUSE)>;
  676. // RDTSC.
  677. def : InstRW<[WriteMicrocoded], (instregex "RDTSC")>;
  678. // RDPMC.
  679. def : InstRW<[WriteMicrocoded], (instrs RDPMC)>;
  680. // RDRAND.
  681. def : InstRW<[WriteMicrocoded], (instrs RDRAND16r, RDRAND32r, RDRAND64r)>;
  682. // XGETBV.
  683. def : InstRW<[WriteMicrocoded], (instrs XGETBV)>;
  684. //-- String instructions --//
  685. // CMPS.
  686. def : InstRW<[WriteMicrocoded], (instregex "CMPS(B|L|Q|W)")>;
  687. // LODSB/W.
  688. def : InstRW<[WriteMicrocoded], (instregex "LODS(B|W)")>;
  689. // LODSD/Q.
  690. def : InstRW<[WriteMicrocoded], (instregex "LODS(L|Q)")>;
  691. // MOVS.
  692. def : InstRW<[WriteMicrocoded], (instregex "MOVS(B|L|Q|W)")>;
  693. // SCAS.
  694. def : InstRW<[WriteMicrocoded], (instregex "SCAS(B|W|L|Q)")>;
  695. // STOS
  696. def : InstRW<[WriteMicrocoded], (instregex "STOS(B|L|Q|W)")>;
  697. // XADD.
  698. def ZnXADD : SchedWriteRes<[ZnALU]>;
  699. def : InstRW<[ZnXADD], (instregex "XADD(8|16|32|64)rr")>;
  700. def : InstRW<[WriteMicrocoded], (instregex "XADD(8|16|32|64)rm")>;
  701. //=== Floating Point x87 Instructions ===//
  702. //-- Move instructions --//
  703. def ZnWriteFLDr : SchedWriteRes<[ZnFPU13]> ;
  704. def ZnWriteSTr: SchedWriteRes<[ZnFPU23]> {
  705. let Latency = 5;
  706. let NumMicroOps = 2;
  707. }
  708. // LD_F.
  709. // r.
  710. def : InstRW<[ZnWriteFLDr], (instrs LD_Frr)>;
  711. // m.
  712. def ZnWriteLD_F80m : SchedWriteRes<[ZnAGU, ZnFPU13]> {
  713. let NumMicroOps = 2;
  714. }
  715. def : InstRW<[ZnWriteLD_F80m], (instrs LD_F80m)>;
  716. // FBLD.
  717. def : InstRW<[WriteMicrocoded], (instrs FBLDm)>;
  718. // FST(P).
  719. // r.
  720. def : InstRW<[ZnWriteSTr], (instregex "ST_(F|FP)rr")>;
  721. // m80.
  722. def ZnWriteST_FP80m : SchedWriteRes<[ZnAGU, ZnFPU23]> {
  723. let Latency = 5;
  724. }
  725. def : InstRW<[ZnWriteST_FP80m], (instrs ST_FP80m)>;
  726. // FBSTP.
  727. // m80.
  728. def : InstRW<[WriteMicrocoded], (instrs FBSTPm)>;
  729. def ZnWriteFXCH : SchedWriteRes<[ZnFPU]>;
  730. // FXCHG.
  731. def : InstRW<[ZnWriteFXCH], (instrs XCH_F)>;
  732. // FILD.
  733. def ZnWriteFILD : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  734. let Latency = 11;
  735. let NumMicroOps = 2;
  736. }
  737. def : InstRW<[ZnWriteFILD], (instregex "ILD_F(16|32|64)m")>;
  738. // FIST(P) FISTTP.
  739. def ZnWriteFIST : SchedWriteRes<[ZnAGU, ZnFPU23]> {
  740. let Latency = 12;
  741. }
  742. def : InstRW<[ZnWriteFIST], (instregex "IS(T|TT)_(F|FP)(16|32|64)m")>;
  743. def ZnWriteFPU13 : SchedWriteRes<[ZnAGU, ZnFPU13]> {
  744. let Latency = 8;
  745. }
  746. def ZnWriteFPU3 : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  747. let Latency = 11;
  748. }
  749. // FLDZ.
  750. def : SchedAlias<WriteFLD0, ZnWriteFPU13>;
  751. // FLD1.
  752. def : SchedAlias<WriteFLD1, ZnWriteFPU3>;
  753. // FLDPI FLDL2E etc.
  754. def : SchedAlias<WriteFLDC, ZnWriteFPU3>;
  755. // FNSTSW.
  756. // AX.
  757. def : InstRW<[WriteMicrocoded], (instrs FNSTSW16r)>;
  758. // m16.
  759. def : InstRW<[WriteMicrocoded], (instrs FNSTSWm)>;
  760. // FLDCW.
  761. def : InstRW<[WriteMicrocoded], (instrs FLDCW16m)>;
  762. // FNSTCW.
  763. def : InstRW<[WriteMicrocoded], (instrs FNSTCW16m)>;
  764. // FINCSTP FDECSTP.
  765. def : InstRW<[ZnWriteFPU3], (instrs FINCSTP, FDECSTP)>;
  766. // FFREE.
  767. def : InstRW<[ZnWriteFPU3], (instregex "FFREE")>;
  768. // FNSAVE.
  769. def : InstRW<[WriteMicrocoded], (instrs FSAVEm)>;
  770. // FRSTOR.
  771. def : InstRW<[WriteMicrocoded], (instrs FRSTORm)>;
  772. //-- Arithmetic instructions --//
  773. def ZnWriteFPU3Lat1 : SchedWriteRes<[ZnFPU3]> ;
  774. def ZnWriteFPU0Lat1 : SchedWriteRes<[ZnFPU0]> ;
  775. def ZnWriteFPU0Lat1Ld : SchedWriteRes<[ZnAGU, ZnFPU0]> {
  776. let Latency = 8;
  777. }
  778. // FCHS.
  779. def : InstRW<[ZnWriteFPU3Lat1], (instregex "CHS_F")>;
  780. // FCOM(P) FUCOM(P).
  781. // r.
  782. def : InstRW<[ZnWriteFPU0Lat1], (instregex "COM(P?)_FST0r", "UCOM_F(P?)r")>;
  783. // m.
  784. def : InstRW<[ZnWriteFPU0Lat1Ld], (instregex "FCOM(P?)(32|64)m")>;
  785. // FCOMPP FUCOMPP.
  786. // r.
  787. def : InstRW<[ZnWriteFPU0Lat1], (instrs FCOMPP, UCOM_FPPr)>;
  788. def ZnWriteFPU02 : SchedWriteRes<[ZnAGU, ZnFPU02]>
  789. {
  790. let Latency = 9;
  791. }
  792. // FCOMI(P) FUCOMI(P).
  793. // m.
  794. def : InstRW<[ZnWriteFPU02], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>;
  795. def ZnWriteFPU03 : SchedWriteRes<[ZnAGU, ZnFPU03]>
  796. {
  797. let Latency = 12;
  798. let NumMicroOps = 2;
  799. let ResourceCycles = [1,3];
  800. }
  801. // FICOM(P).
  802. def : InstRW<[ZnWriteFPU03], (instregex "FICOM(P?)(16|32)m")>;
  803. // FTST.
  804. def : InstRW<[ZnWriteFPU0Lat1], (instregex "TST_F")>;
  805. // FXAM.
  806. def : InstRW<[ZnWriteFPU3Lat1], (instrs XAM_F)>;
  807. // FPREM.
  808. def : InstRW<[WriteMicrocoded], (instrs FPREM)>;
  809. // FPREM1.
  810. def : InstRW<[WriteMicrocoded], (instrs FPREM1)>;
  811. // FRNDINT.
  812. def : InstRW<[WriteMicrocoded], (instrs FRNDINT)>;
  813. // FSCALE.
  814. def : InstRW<[WriteMicrocoded], (instrs FSCALE)>;
  815. // FXTRACT.
  816. def : InstRW<[WriteMicrocoded], (instrs FXTRACT)>;
  817. // FNOP.
  818. def : InstRW<[ZnWriteFPU0Lat1], (instrs FNOP)>;
  819. // WAIT.
  820. def : InstRW<[ZnWriteFPU0Lat1], (instrs WAIT)>;
  821. // FNCLEX.
  822. def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>;
  823. // FNINIT.
  824. def : InstRW<[WriteMicrocoded], (instrs FNINIT)>;
  825. //=== Integer MMX and XMM Instructions ===//
  826. // PACKSSWB/DW.
  827. // mm <- mm.
  828. def ZnWriteFPU12 : SchedWriteRes<[ZnFPU12]> ;
  829. def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> {
  830. let NumMicroOps = 2;
  831. }
  832. def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ;
  833. def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> {
  834. let Latency = 8;
  835. let NumMicroOps = 2;
  836. }
  837. def : InstRW<[ZnWriteFPU12], (instrs MMX_PACKSSDWrr,
  838. MMX_PACKSSWBrr,
  839. MMX_PACKUSWBrr)>;
  840. def : InstRW<[ZnWriteFPU12m], (instrs MMX_PACKSSDWrm,
  841. MMX_PACKSSWBrm,
  842. MMX_PACKUSWBrm)>;
  843. def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ;
  844. def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> {
  845. let Latency = 2;
  846. }
  847. def ZnWriteFPU013m : SchedWriteRes<[ZnAGU, ZnFPU013]> {
  848. let Latency = 8;
  849. let NumMicroOps = 2;
  850. }
  851. def ZnWriteFPU013Ld : SchedWriteRes<[ZnAGU, ZnFPU013]> {
  852. let Latency = 8;
  853. let NumMicroOps = 2;
  854. }
  855. def ZnWriteFPU013LdY : SchedWriteRes<[ZnAGU, ZnFPU013]> {
  856. let Latency = 9;
  857. let NumMicroOps = 2;
  858. }
  859. // PBLENDW.
  860. // x,x,i / v,v,v,i
  861. def : InstRW<[ZnWriteFPU013], (instregex "(V?)PBLENDWrri")>;
  862. // ymm
  863. def : InstRW<[ZnWriteFPU013Y], (instrs VPBLENDWYrri)>;
  864. // x,m,i / v,v,m,i
  865. def : InstRW<[ZnWriteFPU013Ld], (instregex "(V?)PBLENDWrmi")>;
  866. // y,m,i
  867. def : InstRW<[ZnWriteFPU013LdY], (instrs VPBLENDWYrmi)>;
  868. def ZnWriteFPU01 : SchedWriteRes<[ZnFPU01]> ;
  869. def ZnWriteFPU01Y : SchedWriteRes<[ZnFPU01]> {
  870. let NumMicroOps = 2;
  871. }
  872. // VPBLENDD.
  873. // v,v,v,i.
  874. def : InstRW<[ZnWriteFPU01], (instrs VPBLENDDrri)>;
  875. // ymm
  876. def : InstRW<[ZnWriteFPU01Y], (instrs VPBLENDDYrri)>;
  877. // v,v,m,i
  878. def ZnWriteFPU01Op2 : SchedWriteRes<[ZnAGU, ZnFPU01]> {
  879. let NumMicroOps = 2;
  880. let Latency = 8;
  881. let ResourceCycles = [1, 2];
  882. }
  883. def ZnWriteFPU01Op2Y : SchedWriteRes<[ZnAGU, ZnFPU01]> {
  884. let NumMicroOps = 2;
  885. let Latency = 9;
  886. let ResourceCycles = [1, 3];
  887. }
  888. def : InstRW<[ZnWriteFPU01Op2], (instrs VPBLENDDrmi)>;
  889. def : InstRW<[ZnWriteFPU01Op2Y], (instrs VPBLENDDYrmi)>;
  890. // MASKMOVQ.
  891. def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>;
  892. // MASKMOVDQU.
  893. def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>;
  894. // VPMASKMOVD.
  895. // ymm
  896. def : InstRW<[WriteMicrocoded],
  897. (instregex "VPMASKMOVD(Y?)rm")>;
  898. // m, v,v.
  899. def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOV(D|Q)(Y?)mr")>;
  900. // VPBROADCAST B/W.
  901. // x, m8/16.
  902. def ZnWriteVPBROADCAST128Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
  903. let Latency = 8;
  904. let NumMicroOps = 2;
  905. let ResourceCycles = [1, 2];
  906. }
  907. def : InstRW<[ZnWriteVPBROADCAST128Ld],
  908. (instregex "VPBROADCAST(B|W)rm")>;
  909. // y, m8/16
  910. def ZnWriteVPBROADCAST256Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  911. let Latency = 8;
  912. let NumMicroOps = 2;
  913. let ResourceCycles = [1, 2];
  914. }
  915. def : InstRW<[ZnWriteVPBROADCAST256Ld],
  916. (instregex "VPBROADCAST(B|W)Yrm")>;
  917. // VPGATHER.
  918. def : InstRW<[WriteMicrocoded], (instregex "VPGATHER(Q|D)(Q|D)(Y?)rm")>;
  919. //-- Arithmetic instructions --//
  920. // HADD, HSUB PS/PD
  921. // PHADD|PHSUB (S) W/D.
  922. def : SchedAlias<WritePHAdd, ZnWriteMicrocoded>;
  923. def : SchedAlias<WritePHAddLd, ZnWriteMicrocoded>;
  924. def : SchedAlias<WritePHAddX, ZnWriteMicrocoded>;
  925. def : SchedAlias<WritePHAddXLd, ZnWriteMicrocoded>;
  926. def : SchedAlias<WritePHAddY, ZnWriteMicrocoded>;
  927. def : SchedAlias<WritePHAddYLd, ZnWriteMicrocoded>;
  928. // PCMPGTQ.
  929. def ZnWritePCMPGTQr : SchedWriteRes<[ZnFPU03]>;
  930. def : InstRW<[ZnWritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>;
  931. // x <- x,m.
  932. def ZnWritePCMPGTQm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
  933. let Latency = 8;
  934. }
  935. // ymm.
  936. def ZnWritePCMPGTQYm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
  937. let Latency = 8;
  938. let NumMicroOps = 2;
  939. let ResourceCycles = [1,2];
  940. }
  941. def : InstRW<[ZnWritePCMPGTQm], (instregex "(V?)PCMPGTQrm")>;
  942. def : InstRW<[ZnWritePCMPGTQYm], (instrs VPCMPGTQYrm)>;
  943. //-- Logic instructions --//
  944. // PSLL,PSRL,PSRA W/D/Q.
  945. // x,x / v,v,x.
  946. def ZnWritePShift : SchedWriteRes<[ZnFPU2]> ;
  947. def ZnWritePShiftY : SchedWriteRes<[ZnFPU2]> {
  948. let Latency = 2;
  949. }
  950. // PSLL,PSRL DQ.
  951. def : InstRW<[ZnWritePShift], (instregex "(V?)PS(R|L)LDQri")>;
  952. def : InstRW<[ZnWritePShiftY], (instregex "(V?)PS(R|L)LDQYri")>;
  953. //=== Floating Point XMM and YMM Instructions ===//
  954. //-- Move instructions --//
  955. // VPERM2F128.
  956. def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rr)>;
  957. def : InstRW<[WriteMicrocoded], (instrs VPERM2F128rm)>;
  958. def ZnWriteBROADCAST : SchedWriteRes<[ZnAGU, ZnFPU13]> {
  959. let NumMicroOps = 2;
  960. let Latency = 8;
  961. }
  962. // VBROADCASTF128.
  963. def : InstRW<[ZnWriteBROADCAST], (instrs VBROADCASTF128)>;
  964. // EXTRACTPS.
  965. // r32,x,i.
  966. def ZnWriteEXTRACTPSr : SchedWriteRes<[ZnFPU12, ZnFPU2]> {
  967. let Latency = 2;
  968. let NumMicroOps = 2;
  969. let ResourceCycles = [1, 2];
  970. }
  971. def : InstRW<[ZnWriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>;
  972. def ZnWriteEXTRACTPSm : SchedWriteRes<[ZnAGU,ZnFPU12, ZnFPU2]> {
  973. let Latency = 5;
  974. let NumMicroOps = 2;
  975. let ResourceCycles = [5, 1, 2];
  976. }
  977. // m32,x,i.
  978. def : InstRW<[ZnWriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>;
  979. // VEXTRACTF128.
  980. // x,y,i.
  981. def : InstRW<[ZnWriteFPU013], (instrs VEXTRACTF128rr)>;
  982. // m128,y,i.
  983. def : InstRW<[ZnWriteFPU013m], (instrs VEXTRACTF128mr)>;
  984. def ZnWriteVINSERT128r: SchedWriteRes<[ZnFPU013]> {
  985. let Latency = 2;
  986. let ResourceCycles = [2];
  987. }
  988. def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> {
  989. let Latency = 9;
  990. let NumMicroOps = 2;
  991. let ResourceCycles = [1, 2];
  992. }
  993. // VINSERTF128.
  994. // y,y,x,i.
  995. def : InstRW<[ZnWriteVINSERT128r], (instrs VINSERTF128rr)>;
  996. def : InstRW<[ZnWriteVINSERT128Ld], (instrs VINSERTF128rm)>;
  997. // VGATHER.
  998. def : InstRW<[WriteMicrocoded], (instregex "VGATHER(Q|D)(PD|PS)(Y?)rm")>;
  999. //-- Conversion instructions --//
  1000. def ZnWriteCVTPD2PSr: SchedWriteRes<[ZnFPU3]> {
  1001. let Latency = 4;
  1002. }
  1003. def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> {
  1004. let Latency = 5;
  1005. }
  1006. // CVTPD2PS.
  1007. // x,x.
  1008. def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>;
  1009. // y,y.
  1010. def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>;
  1011. // z,z.
  1012. defm : X86WriteResUnsupported<WriteCvtPD2PSZ>;
  1013. def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> {
  1014. let Latency = 11;
  1015. let NumMicroOps = 2;
  1016. let ResourceCycles = [1,2];
  1017. }
  1018. // x,m128.
  1019. def : SchedAlias<WriteCvtPD2PSLd, ZnWriteCVTPD2PSLd>;
  1020. // x,m256.
  1021. def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  1022. let Latency = 11;
  1023. }
  1024. def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>;
  1025. // z,m512
  1026. defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>;
  1027. // CVTSD2SS.
  1028. // x,x.
  1029. // Same as WriteCVTPD2PSr
  1030. def : SchedAlias<WriteCvtSD2SS, ZnWriteCVTPD2PSr>;
  1031. // x,m64.
  1032. def : SchedAlias<WriteCvtSD2SSLd, ZnWriteCVTPD2PSLd>;
  1033. // CVTPS2PD.
  1034. // x,x.
  1035. def ZnWriteCVTPS2PDr : SchedWriteRes<[ZnFPU3]> {
  1036. let Latency = 3;
  1037. }
  1038. def : SchedAlias<WriteCvtPS2PD, ZnWriteCVTPS2PDr>;
  1039. // x,m64.
  1040. // y,m128.
  1041. def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  1042. let Latency = 10;
  1043. let NumMicroOps = 2;
  1044. }
  1045. def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>;
  1046. def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>;
  1047. defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>;
  1048. // y,x.
  1049. def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> {
  1050. let Latency = 3;
  1051. }
  1052. def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>;
  1053. defm : X86WriteResUnsupported<WriteCvtPS2PDZ>;
  1054. // CVTSS2SD.
  1055. // x,x.
  1056. def ZnWriteCVTSS2SDr : SchedWriteRes<[ZnFPU3]> {
  1057. let Latency = 4;
  1058. }
  1059. def : SchedAlias<WriteCvtSS2SD, ZnWriteCVTSS2SDr>;
  1060. // x,m32.
  1061. def ZnWriteCVTSS2SDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  1062. let Latency = 11;
  1063. let NumMicroOps = 2;
  1064. let ResourceCycles = [1, 2];
  1065. }
  1066. def : SchedAlias<WriteCvtSS2SDLd, ZnWriteCVTSS2SDLd>;
  1067. def ZnWriteCVTDQ2PDr: SchedWriteRes<[ZnFPU12,ZnFPU3]> {
  1068. let Latency = 5;
  1069. }
  1070. // CVTDQ2PD.
  1071. // x,x.
  1072. def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V)?CVTDQ2PDrr")>;
  1073. // Same as xmm
  1074. // y,x.
  1075. def : InstRW<[ZnWriteCVTDQ2PDr], (instrs VCVTDQ2PDYrr)>;
  1076. def ZnWriteCVTPD2DQr: SchedWriteRes<[ZnFPU12, ZnFPU3]> {
  1077. let Latency = 5;
  1078. }
  1079. // CVT(T)PD2DQ.
  1080. // x,x.
  1081. def : InstRW<[ZnWriteCVTDQ2PDr], (instregex "(V?)CVT(T?)PD2DQrr")>;
  1082. def ZnWriteCVTPD2DQLd: SchedWriteRes<[ZnAGU,ZnFPU12,ZnFPU3]> {
  1083. let Latency = 12;
  1084. let NumMicroOps = 2;
  1085. }
  1086. // x,m128.
  1087. def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)PD2DQrm")>;
  1088. // same as xmm handling
  1089. // x,y.
  1090. def : InstRW<[ZnWriteCVTPD2DQr], (instregex "VCVT(T?)PD2DQYrr")>;
  1091. // x,m256.
  1092. def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "VCVT(T?)PD2DQYrm")>;
  1093. def ZnWriteCVTPS2PIr: SchedWriteRes<[ZnFPU3]> {
  1094. let Latency = 4;
  1095. }
  1096. // CVT(T)PS2PI.
  1097. // mm,x.
  1098. def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PS2PIrr")>;
  1099. // CVTPI2PD.
  1100. // x,mm.
  1101. def : InstRW<[ZnWriteCVTPS2PDr], (instrs MMX_CVTPI2PDrr)>;
  1102. // CVT(T)PD2PI.
  1103. // mm,x.
  1104. def : InstRW<[ZnWriteCVTPS2PIr], (instregex "MMX_CVT(T?)PD2PIrr")>;
  1105. def ZnWriteCVSTSI2SSr: SchedWriteRes<[ZnFPU3]> {
  1106. let Latency = 5;
  1107. }
  1108. // same as CVTPD2DQr
  1109. // CVT(T)SS2SI.
  1110. // r32,x.
  1111. def : InstRW<[ZnWriteCVTPD2DQr], (instregex "(V?)CVT(T?)SS2SI(64)?rr")>;
  1112. // same as CVTPD2DQm
  1113. // r32,m32.
  1114. def : InstRW<[ZnWriteCVTPD2DQLd], (instregex "(V?)CVT(T?)SS2SI(64)?rm")>;
  1115. def ZnWriteCVSTSI2SDr: SchedWriteRes<[ZnFPU013, ZnFPU3]> {
  1116. let Latency = 5;
  1117. }
  1118. // CVTSI2SD.
  1119. // x,r32/64.
  1120. def : InstRW<[ZnWriteCVSTSI2SDr], (instregex "(V?)CVTSI(64)?2SDrr")>;
  1121. def ZnWriteCVSTSI2SIr: SchedWriteRes<[ZnFPU3, ZnFPU2]> {
  1122. let Latency = 5;
  1123. }
  1124. def ZnWriteCVSTSI2SILd: SchedWriteRes<[ZnAGU, ZnFPU3, ZnFPU2]> {
  1125. let Latency = 12;
  1126. }
  1127. // CVTSD2SI.
  1128. // r32/64
  1129. def : InstRW<[ZnWriteCVSTSI2SIr], (instregex "(V?)CVT(T?)SD2SI(64)?rr")>;
  1130. // r32,m32.
  1131. def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>;
  1132. // VCVTPS2PH.
  1133. // x,v,i.
  1134. def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>;
  1135. def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>;
  1136. defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
  1137. // m,v,i.
  1138. def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>;
  1139. def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>;
  1140. defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
  1141. // VCVTPH2PS.
  1142. // v,x.
  1143. def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>;
  1144. def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>;
  1145. defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
  1146. // v,m.
  1147. def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>;
  1148. def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>;
  1149. defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
  1150. //-- SSE4A instructions --//
  1151. // EXTRQ
  1152. def ZnWriteEXTRQ: SchedWriteRes<[ZnFPU12, ZnFPU2]> {
  1153. let Latency = 2;
  1154. }
  1155. def : InstRW<[ZnWriteEXTRQ], (instregex "EXTRQ")>;
  1156. // INSERTQ
  1157. def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
  1158. let Latency = 4;
  1159. }
  1160. def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
  1161. //-- SHA instructions --//
  1162. // SHA256MSG2
  1163. def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
  1164. // SHA1MSG1, SHA256MSG1
  1165. // x,x.
  1166. def ZnWriteSHA1MSG1r : SchedWriteRes<[ZnFPU12]> {
  1167. let Latency = 2;
  1168. let ResourceCycles = [2];
  1169. }
  1170. def : InstRW<[ZnWriteSHA1MSG1r], (instregex "SHA(1|256)MSG1rr")>;
  1171. // x,m.
  1172. def ZnWriteSHA1MSG1Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
  1173. let Latency = 9;
  1174. let ResourceCycles = [1,2];
  1175. }
  1176. def : InstRW<[ZnWriteSHA1MSG1Ld], (instregex "SHA(1|256)MSG1rm")>;
  1177. // SHA1MSG2
  1178. // x,x.
  1179. def ZnWriteSHA1MSG2r : SchedWriteRes<[ZnFPU12]> ;
  1180. def : InstRW<[ZnWriteSHA1MSG2r], (instrs SHA1MSG2rr)>;
  1181. // x,m.
  1182. def ZnWriteSHA1MSG2Ld : SchedWriteRes<[ZnAGU, ZnFPU12]> {
  1183. let Latency = 8;
  1184. }
  1185. def : InstRW<[ZnWriteSHA1MSG2Ld], (instrs SHA1MSG2rm)>;
  1186. // SHA1NEXTE
  1187. // x,x.
  1188. def ZnWriteSHA1NEXTEr : SchedWriteRes<[ZnFPU1]> ;
  1189. def : InstRW<[ZnWriteSHA1NEXTEr], (instrs SHA1NEXTErr)>;
  1190. // x,m.
  1191. def ZnWriteSHA1NEXTELd : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  1192. let Latency = 8;
  1193. }
  1194. def : InstRW<[ZnWriteSHA1NEXTELd], (instrs SHA1NEXTErm)>;
  1195. // SHA1RNDS4
  1196. // x,x.
  1197. def ZnWriteSHA1RNDS4r : SchedWriteRes<[ZnFPU1]> {
  1198. let Latency = 6;
  1199. }
  1200. def : InstRW<[ZnWriteSHA1RNDS4r], (instrs SHA1RNDS4rri)>;
  1201. // x,m.
  1202. def ZnWriteSHA1RNDS4Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  1203. let Latency = 13;
  1204. }
  1205. def : InstRW<[ZnWriteSHA1RNDS4Ld], (instrs SHA1RNDS4rmi)>;
  1206. // SHA256RNDS2
  1207. // x,x.
  1208. def ZnWriteSHA256RNDS2r : SchedWriteRes<[ZnFPU1]> {
  1209. let Latency = 4;
  1210. }
  1211. def : InstRW<[ZnWriteSHA256RNDS2r], (instrs SHA256RNDS2rr)>;
  1212. // x,m.
  1213. def ZnWriteSHA256RNDS2Ld : SchedWriteRes<[ZnAGU, ZnFPU1]> {
  1214. let Latency = 11;
  1215. }
  1216. def : InstRW<[ZnWriteSHA256RNDS2Ld], (instrs SHA256RNDS2rm)>;
  1217. //-- Arithmetic instructions --//
  1218. // HADD, HSUB PS/PD
  1219. def : SchedAlias<WriteFHAdd, ZnWriteMicrocoded>;
  1220. def : SchedAlias<WriteFHAddLd, ZnWriteMicrocoded>;
  1221. def : SchedAlias<WriteFHAddY, ZnWriteMicrocoded>;
  1222. def : SchedAlias<WriteFHAddYLd, ZnWriteMicrocoded>;
  1223. // VDIVPS.
  1224. // TODO - convert to ZnWriteResFpuPair
  1225. // y,y,y.
  1226. def ZnWriteVDIVPSYr : SchedWriteRes<[ZnFPU3]> {
  1227. let Latency = 12;
  1228. let ResourceCycles = [12];
  1229. }
  1230. def : SchedAlias<WriteFDivY, ZnWriteVDIVPSYr>;
  1231. // y,y,m256.
  1232. def ZnWriteVDIVPSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  1233. let Latency = 19;
  1234. let NumMicroOps = 2;
  1235. let ResourceCycles = [1, 19];
  1236. }
  1237. def : SchedAlias<WriteFDivYLd, ZnWriteVDIVPSYLd>;
  1238. // VDIVPD.
  1239. // TODO - convert to ZnWriteResFpuPair
  1240. // y,y,y.
  1241. def ZnWriteVDIVPDY : SchedWriteRes<[ZnFPU3]> {
  1242. let Latency = 15;
  1243. let ResourceCycles = [15];
  1244. }
  1245. def : SchedAlias<WriteFDiv64Y, ZnWriteVDIVPDY>;
  1246. // y,y,m256.
  1247. def ZnWriteVDIVPDYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> {
  1248. let Latency = 22;
  1249. let NumMicroOps = 2;
  1250. let ResourceCycles = [1,22];
  1251. }
  1252. def : SchedAlias<WriteFDiv64YLd, ZnWriteVDIVPDYLd>;
  1253. // DPPS.
  1254. // x,x,i / v,v,v,i.
  1255. def : SchedAlias<WriteDPPS, ZnWriteMicrocoded>;
  1256. def : SchedAlias<WriteDPPSY, ZnWriteMicrocoded>;
  1257. // x,m,i / v,v,m,i.
  1258. def : SchedAlias<WriteDPPSLd, ZnWriteMicrocoded>;
  1259. def : SchedAlias<WriteDPPSYLd,ZnWriteMicrocoded>;
  1260. // DPPD.
  1261. // x,x,i.
  1262. def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>;
  1263. // x,m,i.
  1264. def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
  1265. // RSQRTSS
  1266. // TODO - convert to ZnWriteResFpuPair
  1267. // x,x.
  1268. def ZnWriteRSQRTSSr : SchedWriteRes<[ZnFPU02]> {
  1269. let Latency = 5;
  1270. }
  1271. def : SchedAlias<WriteFRsqrt, ZnWriteRSQRTSSr>;
  1272. // x,m128.
  1273. def ZnWriteRSQRTSSLd: SchedWriteRes<[ZnAGU, ZnFPU02]> {
  1274. let Latency = 12;
  1275. let NumMicroOps = 2;
  1276. let ResourceCycles = [1,2]; // FIXME: Is this right?
  1277. }
  1278. def : SchedAlias<WriteFRsqrtLd, ZnWriteRSQRTSSLd>;
  1279. // RSQRTPS
  1280. // TODO - convert to ZnWriteResFpuPair
  1281. // y,y.
  1282. def ZnWriteRSQRTPSYr : SchedWriteRes<[ZnFPU01]> {
  1283. let Latency = 5;
  1284. let NumMicroOps = 2;
  1285. let ResourceCycles = [2];
  1286. }
  1287. def : SchedAlias<WriteFRsqrtY, ZnWriteRSQRTPSYr>;
  1288. // y,m256.
  1289. def ZnWriteRSQRTPSYLd : SchedWriteRes<[ZnAGU, ZnFPU01]> {
  1290. let Latency = 12;
  1291. let NumMicroOps = 2;
  1292. }
  1293. def : SchedAlias<WriteFRsqrtYLd, ZnWriteRSQRTPSYLd>;
  1294. //-- Other instructions --//
  1295. // VZEROUPPER.
  1296. def : InstRW<[WriteMicrocoded], (instrs VZEROUPPER)>;
  1297. // VZEROALL.
  1298. def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
  1299. ///////////////////////////////////////////////////////////////////////////////
  1300. // Dependency breaking instructions.
  1301. ///////////////////////////////////////////////////////////////////////////////
  1302. def : IsZeroIdiomFunction<[
  1303. // GPR Zero-idioms.
  1304. DepBreakingClass<[
  1305. SUB32rr, SUB64rr,
  1306. XOR32rr, XOR64rr
  1307. ], ZeroIdiomPredicate>,
  1308. // MMX Zero-idioms.
  1309. DepBreakingClass<[
  1310. MMX_PXORrr, MMX_PANDNrr, MMX_PSUBBrr,
  1311. MMX_PSUBDrr, MMX_PSUBQrr, MMX_PSUBWrr,
  1312. MMX_PSUBSBrr, MMX_PSUBSWrr, MMX_PSUBUSBrr, MMX_PSUBUSWrr,
  1313. MMX_PCMPGTBrr, MMX_PCMPGTDrr, MMX_PCMPGTWrr
  1314. ], ZeroIdiomPredicate>,
  1315. // SSE Zero-idioms.
  1316. DepBreakingClass<[
  1317. // fp variants.
  1318. XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr,
  1319. // int variants.
  1320. PXORrr, PANDNrr,
  1321. PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
  1322. PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr
  1323. ], ZeroIdiomPredicate>,
  1324. // AVX XMM Zero-idioms.
  1325. DepBreakingClass<[
  1326. // fp variants.
  1327. VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr,
  1328. // int variants.
  1329. VPXORrr, VPANDNrr,
  1330. VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
  1331. VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr
  1332. ], ZeroIdiomPredicate>,
  1333. // AVX YMM Zero-idioms.
  1334. DepBreakingClass<[
  1335. // fp variants
  1336. VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr,
  1337. // int variants
  1338. VPXORYrr, VPANDNYrr,
  1339. VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
  1340. VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
  1341. ], ZeroIdiomPredicate>
  1342. ]>;
  1343. def : IsDepBreakingFunction<[
  1344. // GPR
  1345. DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>,
  1346. DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >,
  1347. // MMX
  1348. DepBreakingClass<[
  1349. MMX_PCMPEQBrr, MMX_PCMPEQWrr, MMX_PCMPEQDrr
  1350. ], ZeroIdiomPredicate>,
  1351. // SSE
  1352. DepBreakingClass<[
  1353. PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
  1354. ], ZeroIdiomPredicate>,
  1355. // AVX XMM
  1356. DepBreakingClass<[
  1357. VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
  1358. ], ZeroIdiomPredicate>,
  1359. // AVX YMM
  1360. DepBreakingClass<[
  1361. VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
  1362. ], ZeroIdiomPredicate>,
  1363. ]>;
  1364. } // SchedModel