PPCScheduleP7.td 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. //===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the itinerary class data for the POWER7 processor.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. // Primary reference:
  13. // IBM POWER7 multicore server processor
  14. // B. Sinharoy, et al.
  15. // IBM J. Res. & Dev. (55) 3. May/June 2011.
  16. // Scheduling for the P7 involves tracking two types of resources:
  17. // 1. The dispatch bundle slots
  18. // 2. The functional unit resources
  19. // Dispatch units:
  20. def P7_DU1 : FuncUnit;
  21. def P7_DU2 : FuncUnit;
  22. def P7_DU3 : FuncUnit;
  23. def P7_DU4 : FuncUnit;
  24. def P7_DU5 : FuncUnit;
  25. def P7_DU6 : FuncUnit;
  26. def P7_LS1 : FuncUnit; // Load/Store pipeline 1
  27. def P7_LS2 : FuncUnit; // Load/Store pipeline 2
  28. def P7_FX1 : FuncUnit; // FX pipeline 1
  29. def P7_FX2 : FuncUnit; // FX pipeline 2
  30. // VS pipeline 1 (vector integer ops. always here)
  31. def P7_VS1 : FuncUnit; // VS pipeline 1
  32. // VS pipeline 2 (128-bit stores and perms. here)
  33. def P7_VS2 : FuncUnit; // VS pipeline 2
  34. def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
  35. def P7_BRU : FuncUnit; // BR unit
  36. // Notes:
  37. // Each LSU pipeline can also execute FX add and logical instructions.
  38. // Each LSU pipeline can complete a load or store in one cycle.
  39. //
  40. // Each store is broken into two parts, AGEN goes to the LSU while a
  41. // "data steering" op. goes to the FXU or VSU.
  42. //
  43. // FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
  44. // VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
  45. //
  46. // Frequent FX ops. take only one cycle and results can be used again in the
  47. // next cycle (there is a self-bypass). Getting results from the other FX
  48. // pipeline takes an additional cycle.
  49. //
  50. // The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
  51. // (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
  52. // Dispatch of an instruction to VS1 that uses four single prec. inputs
  53. // (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
  54. // floating point instruction.
  55. //
  56. // The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
  57. // (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
  58. // (unlike on the POWER6).
  59. //
  60. // FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
  61. // share the same write-back, and have a 5-cycle latency difference, so the
  62. // IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
  63. // op. has been dispatched to VS1.
  64. //
  65. // Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
  66. //
  67. // Instruction dispatch groups have (at most) four non-branch instructions, and
  68. // two branches. Unlike on the POWER4/5, a branch does not automatically
  69. // end the dispatch group, but a second branch must be the last in the group.
  70. def P7Itineraries : ProcessorItineraries<
  71. [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
  72. P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
  73. InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2,
  74. P7_DU3, P7_DU4], 0>,
  75. InstrStage<1, [P7_FX1, P7_FX2,
  76. P7_LS1, P7_LS2]>],
  77. [1, 1, 1]>,
  78. InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
  79. P7_DU3, P7_DU4], 0>,
  80. InstrStage<1, [P7_FX1, P7_FX2]>],
  81. [1, 1, 1]>,
  82. InstrItinData<IIC_IntISEL, [InstrStage<1, [P7_DU1], 0>,
  83. InstrStage<1, [P7_FX1, P7_FX2], 0>,
  84. InstrStage<1, [P7_BRU]>],
  85. [1, 1, 1, 1]>,
  86. InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2,
  87. P7_DU3, P7_DU4], 0>,
  88. InstrStage<1, [P7_FX1, P7_FX2]>],
  89. [1, 1, 1]>,
  90. // FIXME: Add record-form itinerary data.
  91. InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
  92. InstrStage<1, [P7_DU2], 0>,
  93. InstrStage<36, [P7_FX1, P7_FX2]>],
  94. [36, 1, 1]>,
  95. InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>,
  96. InstrStage<1, [P7_DU2], 0>,
  97. InstrStage<68, [P7_FX1, P7_FX2]>],
  98. [68, 1, 1]>,
  99. InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2,
  100. P7_DU3, P7_DU4], 0>,
  101. InstrStage<1, [P7_FX1, P7_FX2]>],
  102. [4, 1, 1]>,
  103. InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2,
  104. P7_DU3, P7_DU4], 0>,
  105. InstrStage<1, [P7_FX1, P7_FX2]>],
  106. [4, 1, 1]>,
  107. InstrItinData<IIC_IntMulHD , [InstrStage<1, [P7_DU1, P7_DU2,
  108. P7_DU3, P7_DU4], 0>,
  109. InstrStage<1, [P7_FX1, P7_FX2]>],
  110. [4, 1, 1]>,
  111. InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
  112. P7_DU3, P7_DU4], 0>,
  113. InstrStage<1, [P7_FX1, P7_FX2]>],
  114. [4, 1, 1]>,
  115. InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2,
  116. P7_DU3, P7_DU4], 0>,
  117. InstrStage<1, [P7_FX1, P7_FX2]>],
  118. [1, 1, 1]>,
  119. InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2,
  120. P7_DU3, P7_DU4], 0>,
  121. InstrStage<1, [P7_FX1, P7_FX2]>],
  122. [1, 1, 1]>,
  123. InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P7_DU1, P7_DU2,
  124. P7_DU3, P7_DU4], 0>,
  125. InstrStage<1, [P7_FX1, P7_FX2]>],
  126. [1, 1, 1]>,
  127. InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
  128. P7_DU3, P7_DU4], 0>,
  129. InstrStage<1, [P7_FX1, P7_FX2]>],
  130. [1, 1, 1]>,
  131. InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2,
  132. P7_DU3, P7_DU4], 0>,
  133. InstrStage<1, [P7_FX1, P7_FX2]>],
  134. [1, 1]>,
  135. InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2,
  136. P7_DU3, P7_DU4], 0>,
  137. InstrStage<1, [P7_FX1, P7_FX2]>],
  138. [1, 1]>,
  139. InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
  140. InstrStage<1, [P7_BRU]>],
  141. [3, 1, 1]>,
  142. InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>,
  143. InstrStage<1, [P7_CRU]>],
  144. [3, 1, 1]>,
  145. InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
  146. InstrStage<1, [P7_BRU]>],
  147. [3, 1, 1]>,
  148. InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
  149. InstrStage<1, [P7_BRU]>],
  150. [3, 1, 1]>,
  151. InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2,
  152. P7_DU3, P7_DU4], 0>,
  153. InstrStage<1, [P7_LS1, P7_LS2]>],
  154. [2, 1, 1]>,
  155. InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
  156. InstrStage<1, [P7_DU2], 0>,
  157. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  158. InstrStage<1, [P7_FX1, P7_FX2]>],
  159. [2, 2, 1, 1]>,
  160. InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
  161. InstrStage<1, [P7_DU2], 0>,
  162. InstrStage<1, [P7_DU3], 0>,
  163. InstrStage<1, [P7_DU4], 0>,
  164. InstrStage<1, [P7_FX1, P7_FX2]>,
  165. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  166. InstrStage<1, [P7_FX1, P7_FX2]>],
  167. [3, 3, 1, 1]>,
  168. InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2,
  169. P7_DU3, P7_DU4], 0>,
  170. InstrStage<1, [P7_LS1, P7_LS2]>],
  171. [2, 1, 1]>,
  172. InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>,
  173. InstrStage<1, [P7_DU2], 0>,
  174. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  175. InstrStage<1, [P7_FX1, P7_FX2]>],
  176. [2, 2, 1, 1]>,
  177. InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>,
  178. InstrStage<1, [P7_DU2], 0>,
  179. InstrStage<1, [P7_DU3], 0>,
  180. InstrStage<1, [P7_DU4], 0>,
  181. InstrStage<1, [P7_FX1, P7_FX2]>,
  182. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  183. InstrStage<1, [P7_FX1, P7_FX2]>],
  184. [3, 3, 1, 1]>,
  185. InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2,
  186. P7_DU3, P7_DU4], 0>,
  187. InstrStage<1, [P7_LS1, P7_LS2]>],
  188. [3, 1, 1]>,
  189. InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2,
  190. P7_DU3, P7_DU4], 0>,
  191. InstrStage<1, [P7_LS1, P7_LS2]>],
  192. [3, 1, 1]>,
  193. InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>,
  194. InstrStage<1, [P7_DU2], 0>,
  195. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  196. InstrStage<1, [P7_FX1, P7_FX2]>],
  197. [3, 3, 1, 1]>,
  198. InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>,
  199. InstrStage<1, [P7_DU2], 0>,
  200. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  201. InstrStage<1, [P7_FX1, P7_FX2]>],
  202. [3, 3, 1, 1]>,
  203. InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>,
  204. InstrStage<1, [P7_DU2], 0>,
  205. InstrStage<1, [P7_LS1, P7_LS2]>,
  206. InstrStage<1, [P7_FX1, P7_FX2]>],
  207. [3, 1, 1]>,
  208. InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>,
  209. InstrStage<1, [P7_DU2], 0>,
  210. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  211. InstrStage<1, [P7_FX1, P7_FX2]>,
  212. InstrStage<1, [P7_FX1, P7_FX2]>],
  213. [4, 4, 1, 1]>,
  214. InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>,
  215. InstrStage<1, [P7_DU2], 0>,
  216. InstrStage<1, [P7_DU3], 0>,
  217. InstrStage<1, [P7_DU4], 0>,
  218. InstrStage<1, [P7_FX1, P7_FX2]>,
  219. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  220. InstrStage<1, [P7_FX1, P7_FX2]>,
  221. InstrStage<1, [P7_FX1, P7_FX2]>],
  222. [4, 4, 1, 1]>,
  223. InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>,
  224. InstrStage<1, [P7_DU2], 0>,
  225. InstrStage<1, [P7_LS1, P7_LS2]>,
  226. InstrStage<1, [P7_FX1, P7_FX2]>],
  227. [3, 1, 1]>,
  228. InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>,
  229. InstrStage<1, [P7_DU2], 0>,
  230. InstrStage<1, [P7_DU3], 0>,
  231. InstrStage<1, [P7_DU4], 0>,
  232. InstrStage<1, [P7_LS1, P7_LS2]>],
  233. [3, 1, 1]>,
  234. InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>,
  235. InstrStage<1, [P7_DU2], 0>,
  236. InstrStage<1, [P7_DU3], 0>,
  237. InstrStage<1, [P7_DU4], 0>,
  238. InstrStage<1, [P7_LS1, P7_LS2]>],
  239. [3, 1, 1]>,
  240. InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2,
  241. P7_DU3, P7_DU4], 0>,
  242. InstrStage<1, [P7_LS1, P7_LS2]>],
  243. [2, 1, 1]>,
  244. InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2,
  245. P7_DU3, P7_DU4], 0>,
  246. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  247. InstrStage<1, [P7_FX1, P7_FX2]>],
  248. [1, 1, 1]>,
  249. InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2,
  250. P7_DU3, P7_DU4], 0>,
  251. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  252. InstrStage<1, [P7_FX1, P7_FX2]>],
  253. [1, 1, 1]>,
  254. InstrItinData<IIC_LdStSTU , [InstrStage<1, [P7_DU1], 0>,
  255. InstrStage<1, [P7_DU2], 0>,
  256. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  257. InstrStage<1, [P7_FX1, P7_FX2]>,
  258. InstrStage<1, [P7_FX1, P7_FX2]>],
  259. [2, 1, 1, 1]>,
  260. InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P7_DU1], 0>,
  261. InstrStage<1, [P7_DU2], 0>,
  262. InstrStage<1, [P7_DU3], 0>,
  263. InstrStage<1, [P7_DU4], 0>,
  264. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  265. InstrStage<1, [P7_FX1, P7_FX2]>,
  266. InstrStage<1, [P7_FX1, P7_FX2]>],
  267. [2, 1, 1, 1]>,
  268. InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2,
  269. P7_DU3, P7_DU4], 0>,
  270. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  271. InstrStage<1, [P7_VS1, P7_VS2]>],
  272. [1, 1, 1]>,
  273. InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>,
  274. InstrStage<1, [P7_DU2], 0>,
  275. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  276. InstrStage<1, [P7_FX1, P7_FX2], 0>,
  277. InstrStage<1, [P7_VS1, P7_VS2]>],
  278. [2, 1, 1, 1]>,
  279. InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2,
  280. P7_DU3, P7_DU4], 0>,
  281. InstrStage<1, [P7_LS1, P7_LS2], 0>,
  282. InstrStage<1, [P7_VS2]>],
  283. [1, 1, 1]>,
  284. InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>,
  285. InstrStage<1, [P7_DU2], 0>,
  286. InstrStage<1, [P7_DU3], 0>,
  287. InstrStage<1, [P7_DU4], 0>,
  288. InstrStage<1, [P7_LS1, P7_LS2]>],
  289. [1, 1, 1]>,
  290. InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>,
  291. InstrStage<1, [P7_DU2], 0>,
  292. InstrStage<1, [P7_DU3], 0>,
  293. InstrStage<1, [P7_DU4], 0>,
  294. InstrStage<1, [P7_LS1, P7_LS2]>],
  295. [1, 1, 1]>,
  296. InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
  297. InstrStage<1, [P7_DU2], 0>,
  298. InstrStage<1, [P7_DU3], 0>,
  299. InstrStage<1, [P7_DU4], 0>,
  300. InstrStage<1, [P7_CRU]>,
  301. InstrStage<1, [P7_FX1, P7_FX2]>],
  302. [3, 1]>, // mtcr
  303. InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>,
  304. InstrStage<1, [P7_CRU]>],
  305. [6, 1]>,
  306. InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
  307. InstrStage<1, [P7_CRU]>],
  308. [3, 1]>,
  309. InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
  310. InstrStage<1, [P7_FX1]>],
  311. [4, 1]>, // mtctr
  312. InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
  313. P7_DU3, P7_DU4], 0>,
  314. InstrStage<1, [P7_VS1, P7_VS2]>],
  315. [5, 1, 1]>,
  316. InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2,
  317. P7_DU3, P7_DU4], 0>,
  318. InstrStage<1, [P7_VS1, P7_VS2]>],
  319. [5, 1, 1]>,
  320. InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
  321. P7_DU3, P7_DU4], 0>,
  322. InstrStage<1, [P7_VS1, P7_VS2]>],
  323. [8, 1, 1]>,
  324. InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2,
  325. P7_DU3, P7_DU4], 0>,
  326. InstrStage<1, [P7_VS1, P7_VS2]>],
  327. [33, 1, 1]>,
  328. InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2,
  329. P7_DU3, P7_DU4], 0>,
  330. InstrStage<1, [P7_VS1, P7_VS2]>],
  331. [27, 1, 1]>,
  332. InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2,
  333. P7_DU3, P7_DU4], 0>,
  334. InstrStage<1, [P7_VS1, P7_VS2]>],
  335. [44, 1, 1]>,
  336. InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2,
  337. P7_DU3, P7_DU4], 0>,
  338. InstrStage<1, [P7_VS1, P7_VS2]>],
  339. [32, 1, 1]>,
  340. InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2,
  341. P7_DU3, P7_DU4], 0>,
  342. InstrStage<1, [P7_VS1, P7_VS2]>],
  343. [5, 1, 1, 1]>,
  344. InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2,
  345. P7_DU3, P7_DU4], 0>,
  346. InstrStage<1, [P7_VS1, P7_VS2]>],
  347. [5, 1, 1]>,
  348. InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>,
  349. InstrStage<1, [P7_VS1]>],
  350. [2, 1, 1]>,
  351. InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>,
  352. InstrStage<1, [P7_VS1]>],
  353. [2, 1, 1]>,
  354. InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>,
  355. InstrStage<1, [P7_VS1]>],
  356. [2, 1, 1]>,
  357. InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>,
  358. InstrStage<1, [P7_VS1, P7_VS2]>],
  359. [6, 1, 1]>,
  360. InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
  361. InstrStage<1, [P7_VS1, P7_VS2]>],
  362. [6, 1, 1]>,
  363. InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>,
  364. InstrStage<1, [P7_VS1, P7_VS2]>],
  365. [6, 1, 1]>,
  366. InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>,
  367. InstrStage<1, [P7_VS1]>],
  368. [7, 1, 1]>,
  369. InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
  370. InstrStage<1, [P7_VS2]>],
  371. [3, 1, 1]>
  372. ]>;
  373. // ===---------------------------------------------------------------------===//
  374. // P7 machine model for scheduling and other instruction cost heuristics.
  375. def P7Model : SchedMachineModel {
  376. let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle.
  377. // Note that the dispatch bundle size is 6 (including
  378. // branches), but the total internal issue bandwidth per
  379. // cycle (from all queues) is 8.
  380. let LoadLatency = 3; // Optimistic load latency assuming bypass.
  381. // This is overriden by OperandCycles if the
  382. // Itineraries are queried instead.
  383. let MispredictPenalty = 16;
  384. // Try to make sure we have at least 10 dispatch groups in a loop.
  385. let LoopMicroOpBufferSize = 40;
  386. let CompleteModel = 0;
  387. let Itineraries = P7Itineraries;
  388. }