PPCScheduleP8.td 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. //===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the itinerary class data for the POWER8 processor.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. // Scheduling for the P8 involves tracking two types of resources:
  13. // 1. The dispatch bundle slots
  14. // 2. The functional unit resources
  15. // Dispatch units:
  16. def P8_DU1 : FuncUnit;
  17. def P8_DU2 : FuncUnit;
  18. def P8_DU3 : FuncUnit;
  19. def P8_DU4 : FuncUnit;
  20. def P8_DU5 : FuncUnit;
  21. def P8_DU6 : FuncUnit;
  22. def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
  23. def P8_DU8 : FuncUnit;
  24. // 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
  25. def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
  26. def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2
  27. // Load/Store pipelines can handle Stores, fixed-point loads, and simple
  28. // fixed-point operations.
  29. def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
  30. def P8_LSU2 : FuncUnit; // Load/Store pipeline 2
  31. // Fixed Point unit
  32. def P8_FXU1 : FuncUnit; // FX pipeline 1
  33. def P8_FXU2 : FuncUnit; // FX pipeline 2
  34. // The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
  35. // are combined on P7 and newer into a Vector Scalar Unit (VSU).
  36. // The P8 Instruction latency documents still refers to the unit as the
  37. // FPU, so keep in mind that FPU==VSU.
  38. // In contrast to the P7, the VMX units on P8 are symmetric, so no need to
  39. // split vector integer ops or 128-bit load/store/perms to the specific units.
  40. def P8_FPU1 : FuncUnit; // VS pipeline 1
  41. def P8_FPU2 : FuncUnit; // VS pipeline 2
  42. def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
  43. def P8_BRU : FuncUnit; // BR unit
  44. def P8Itineraries : ProcessorItineraries<
  45. [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
  46. P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
  47. P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
  48. InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  49. P8_DU4, P8_DU5, P8_DU6], 0>,
  50. InstrStage<1, [P8_FXU1, P8_FXU2,
  51. P8_LU1, P8_LU2,
  52. P8_LSU1, P8_LSU2]>],
  53. [1, 1, 1]>,
  54. InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  55. P8_DU4, P8_DU5, P8_DU6], 0>,
  56. InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
  57. P8_LU2, P8_LSU1, P8_LSU2]>],
  58. [1, 1, 1]>,
  59. InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
  60. InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
  61. InstrStage<1, [P8_BRU]>],
  62. [1, 1, 1, 1]>,
  63. InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  64. P8_DU4, P8_DU5, P8_DU6], 0>,
  65. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  66. [1, 1, 1]>,
  67. InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  68. P8_DU4, P8_DU5, P8_DU6], 0>,
  69. InstrStage<15, [P8_FXU1, P8_FXU2]>],
  70. [15, 1, 1]>,
  71. InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  72. P8_DU4, P8_DU5, P8_DU6], 0>,
  73. InstrStage<23, [P8_FXU1, P8_FXU2]>],
  74. [23, 1, 1]>,
  75. InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  76. P8_DU4, P8_DU5, P8_DU6], 0>,
  77. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  78. [4, 1, 1]>,
  79. InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  80. P8_DU4, P8_DU5, P8_DU6], 0>,
  81. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  82. [4, 1, 1]>,
  83. InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  84. P8_DU4, P8_DU5, P8_DU6], 0>,
  85. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  86. [4, 1, 1]>,
  87. InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  88. P8_DU4, P8_DU5, P8_DU6], 0>,
  89. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  90. [4, 1, 1]>,
  91. InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  92. P8_DU4, P8_DU5, P8_DU6], 0>,
  93. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  94. [1, 1, 1]>,
  95. InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  96. P8_DU4, P8_DU5, P8_DU6], 0>,
  97. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  98. [1, 1, 1]>,
  99. InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  100. P8_DU4, P8_DU5, P8_DU6], 0>,
  101. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  102. [1, 1, 1]>,
  103. InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  104. P8_DU4, P8_DU5, P8_DU6], 0>,
  105. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  106. [1, 1, 1]>,
  107. InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  108. P8_DU4, P8_DU5, P8_DU6], 0>,
  109. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  110. [1, 1]>,
  111. InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  112. P8_DU4, P8_DU5, P8_DU6], 0>,
  113. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  114. [1, 1]>,
  115. InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
  116. InstrStage<1, [P8_BRU]>],
  117. [3, 1, 1]>,
  118. // FIXME - the Br* groups below are not branch related, so should probably
  119. // be renamed.
  120. // IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
  121. // and should be 'First' in dispatch.
  122. InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
  123. InstrStage<1, [P8_CRU]>],
  124. [3, 1, 1]>,
  125. // IIC_BrMCR consists of the mcrf instruction.
  126. InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  127. P8_DU4, P8_DU5, P8_DU6], 0>,
  128. InstrStage<1, [P8_CRU]>],
  129. [3, 1, 1]>,
  130. // IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
  131. // should be first in the dispatch group.
  132. InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
  133. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  134. [3, 1, 1]>,
  135. InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
  136. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  137. [3, 1]>,
  138. InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  139. P8_DU4, P8_DU5, P8_DU6], 0>,
  140. InstrStage<1, [P8_LSU1, P8_LSU2,
  141. P8_LU1, P8_LU2]>],
  142. [2, 1, 1]>,
  143. InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
  144. InstrStage<1, [P8_DU2], 0>,
  145. InstrStage<1, [P8_LSU1, P8_LSU2,
  146. P8_LU1, P8_LU2 ], 0>,
  147. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  148. [2, 2, 1, 1]>,
  149. // Update-Indexed form loads/stores are no longer first and last in the
  150. // dispatch group. They are simply cracked, so require DU1,DU2.
  151. InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
  152. InstrStage<1, [P8_DU2], 0>,
  153. InstrStage<1, [P8_LSU1, P8_LSU2,
  154. P8_LU1, P8_LU2], 0>,
  155. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  156. [3, 3, 1, 1]>,
  157. InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  158. P8_DU4, P8_DU5, P8_DU6], 0>,
  159. InstrStage<1, [P8_LSU1, P8_LSU2,
  160. P8_LU1, P8_LU2]>],
  161. [2, 1, 1]>,
  162. InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
  163. InstrStage<1, [P8_DU2], 0>,
  164. InstrStage<1, [P8_LSU1, P8_LSU2,
  165. P8_LU1, P8_LU2], 0>,
  166. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  167. [2, 2, 1, 1]>,
  168. InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
  169. InstrStage<1, [P8_DU2], 0>,
  170. InstrStage<1, [P8_LSU1, P8_LSU2,
  171. P8_LU1, P8_LU2], 0>,
  172. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  173. [3, 3, 1, 1]>,
  174. InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  175. P8_DU4, P8_DU5, P8_DU6], 0>,
  176. InstrStage<1, [P8_LU1, P8_LU2]>],
  177. [3, 1, 1]>,
  178. InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  179. P8_DU4, P8_DU5, P8_DU6], 0>,
  180. InstrStage<1, [P8_LU1, P8_LU2]>],
  181. [3, 1, 1]>,
  182. InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
  183. InstrStage<1, [P8_DU2], 0>,
  184. InstrStage<1, [P8_LU1, P8_LU2], 0>,
  185. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  186. [3, 3, 1, 1]>,
  187. InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
  188. InstrStage<1, [P8_DU2], 0>,
  189. InstrStage<1, [P8_LU1, P8_LU2], 0>,
  190. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  191. [3, 3, 1, 1]>,
  192. InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
  193. InstrStage<1, [P8_DU2], 0>,
  194. InstrStage<1, [P8_LSU1, P8_LSU2,
  195. P8_LU1, P8_LU2], 0>,
  196. InstrStage<1, [P8_FXU1, P8_FXU2,
  197. P8_LU1, P8_LU2]>],
  198. [3, 1, 1]>,
  199. InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
  200. InstrStage<1, [P8_DU2], 0>,
  201. InstrStage<1, [P8_LSU1, P8_LSU2,
  202. P8_LU1, P8_LU2], 0>,
  203. InstrStage<1, [P8_FXU1, P8_FXU2]>,
  204. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  205. [4, 4, 1, 1]>,
  206. // first+last in dispatch group.
  207. InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
  208. InstrStage<1, [P8_DU2], 0>,
  209. InstrStage<1, [P8_DU3], 0>,
  210. InstrStage<1, [P8_DU4], 0>,
  211. InstrStage<1, [P8_DU5], 0>,
  212. InstrStage<1, [P8_DU6], 0>,
  213. InstrStage<1, [P8_LSU1, P8_LSU2,
  214. P8_LU1, P8_LU2], 0>,
  215. InstrStage<1, [P8_FXU1, P8_FXU2]>,
  216. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  217. [4, 4, 1, 1]>,
  218. InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
  219. InstrStage<1, [P8_DU2], 0>,
  220. InstrStage<1, [P8_LSU1, P8_LSU2,
  221. P8_LU1, P8_LU2]>,
  222. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  223. [3, 1, 1]>,
  224. InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
  225. InstrStage<1, [P8_DU2], 0>,
  226. InstrStage<1, [P8_DU3], 0>,
  227. InstrStage<1, [P8_DU4], 0>,
  228. InstrStage<1, [P8_LSU1, P8_LSU2,
  229. P8_LU1, P8_LU2]>],
  230. [3, 1, 1]>,
  231. // first+last
  232. InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
  233. InstrStage<1, [P8_DU2], 0>,
  234. InstrStage<1, [P8_DU3], 0>,
  235. InstrStage<1, [P8_DU4], 0>,
  236. InstrStage<1, [P8_DU5], 0>,
  237. InstrStage<1, [P8_DU6], 0>,
  238. InstrStage<1, [P8_LSU1, P8_LSU2,
  239. P8_LU1, P8_LU2]>],
  240. [3, 1, 1]>,
  241. InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  242. P8_DU4, P8_DU5, P8_DU6], 0>,
  243. InstrStage<1, [P8_LSU1, P8_LSU2,
  244. P8_LU1, P8_LU2]>],
  245. [2, 1, 1]>,
  246. // Stores are dual-issued from the issue queue, so may only take up one
  247. // dispatch slot. The instruction will be broken into two IOPS. The agen
  248. // op is issued to the LSU, and the data op (register fetch) is issued
  249. // to either the LU (GPR store) or the VSU (FPR store).
  250. InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  251. P8_DU4, P8_DU5, P8_DU6], 0>,
  252. InstrStage<1, [P8_LSU1, P8_LSU2]>,
  253. InstrStage<1, [P8_LU1, P8_LU2]>],
  254. [1, 1, 1]>,
  255. InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  256. P8_DU4, P8_DU5, P8_DU6], 0>,
  257. InstrStage<1, [P8_LU1, P8_LU2,
  258. P8_LSU1, P8_LSU2]>]
  259. [1, 1, 1]>,
  260. InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>,
  261. InstrStage<1, [P8_DU2], 0>,
  262. InstrStage<1, [P8_LU1, P8_LU2,
  263. P8_LSU1, P8_LSU2], 0>,
  264. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  265. [2, 1, 1, 1]>,
  266. // First+last
  267. InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>,
  268. InstrStage<1, [P8_DU2], 0>,
  269. InstrStage<1, [P8_DU3], 0>,
  270. InstrStage<1, [P8_DU4], 0>,
  271. InstrStage<1, [P8_DU5], 0>,
  272. InstrStage<1, [P8_DU6], 0>,
  273. InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
  274. InstrStage<1, [P8_FXU1, P8_FXU2]>,
  275. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  276. [2, 1, 1, 1]>,
  277. InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  278. P8_DU4, P8_DU5, P8_DU6], 0>,
  279. InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
  280. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  281. [1, 1, 1]>,
  282. InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
  283. InstrStage<1, [P8_DU2], 0>,
  284. InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
  285. InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
  286. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  287. [2, 1, 1, 1]>,
  288. InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  289. P8_DU4, P8_DU5, P8_DU6], 0>,
  290. InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
  291. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  292. [1, 1, 1]>,
  293. InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
  294. InstrStage<1, [P8_DU2], 0>,
  295. InstrStage<1, [P8_DU3], 0>,
  296. InstrStage<1, [P8_DU4], 0>,
  297. InstrStage<1, [P8_DU5], 0>,
  298. InstrStage<1, [P8_DU6], 0>,
  299. InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
  300. InstrStage<1, [P8_LU1, P8_LU2]>],
  301. [1, 1, 1]>,
  302. InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
  303. InstrStage<1, [P8_DU2], 0>,
  304. InstrStage<1, [P8_DU3], 0>,
  305. InstrStage<1, [P8_DU4], 0>,
  306. InstrStage<1, [P8_DU5], 0>,
  307. InstrStage<1, [P8_DU6], 0>,
  308. InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
  309. InstrStage<1, [P8_LU1, P8_LU2]>],
  310. [1, 1, 1]>,
  311. InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
  312. InstrStage<1, [P8_CRU]>],
  313. [6, 1]>,
  314. InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
  315. InstrStage<1, [P8_CRU]>],
  316. [3, 1]>,
  317. InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
  318. InstrStage<1, [P8_FXU1, P8_FXU2]>],
  319. [4, 1]>, // mtctr
  320. InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  321. P8_DU4, P8_DU5, P8_DU6], 0>,
  322. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  323. [5, 1, 1]>,
  324. InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  325. P8_DU4, P8_DU5, P8_DU6], 0>,
  326. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  327. [5, 1, 1]>,
  328. InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  329. P8_DU4, P8_DU5, P8_DU6], 0>,
  330. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  331. [8, 1, 1]>,
  332. InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  333. P8_DU4, P8_DU5, P8_DU6], 0>,
  334. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  335. [33, 1, 1]>,
  336. InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  337. P8_DU4, P8_DU5, P8_DU6], 0>,
  338. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  339. [27, 1, 1]>,
  340. InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  341. P8_DU4, P8_DU5, P8_DU6], 0>,
  342. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  343. [44, 1, 1]>,
  344. InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  345. P8_DU4, P8_DU5, P8_DU6], 0>,
  346. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  347. [32, 1, 1]>,
  348. InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  349. P8_DU4, P8_DU5, P8_DU6], 0>,
  350. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  351. [5, 1, 1, 1]>,
  352. InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
  353. P8_DU4, P8_DU5, P8_DU6], 0>,
  354. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  355. [5, 1, 1]>,
  356. InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
  357. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  358. [2, 1, 1]>,
  359. InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
  360. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  361. [2, 1, 1]>,
  362. InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
  363. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  364. [2, 1, 1]>,
  365. InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
  366. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  367. [6, 1, 1]>,
  368. InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
  369. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  370. [6, 1, 1]>,
  371. InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
  372. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  373. [6, 1, 1]>,
  374. InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
  375. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  376. [7, 1, 1]>,
  377. InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
  378. InstrStage<1, [P8_FPU1, P8_FPU2]>],
  379. [3, 1, 1]>
  380. ]>;
  381. // ===---------------------------------------------------------------------===//
  382. // P8 machine model for scheduling and other instruction cost heuristics.
  383. // P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
  384. // to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).
  385. def P8Model : SchedMachineModel {
  386. let IssueWidth = 8; // up to 8 instructions dispatched per cycle.
  387. // up to six non-branch instructions.
  388. // up to two branches in a dispatch group.
  389. let LoadLatency = 3; // Optimistic load latency assuming bypass.
  390. // This is overriden by OperandCycles if the
  391. // Itineraries are queried instead.
  392. let MispredictPenalty = 16;
  393. // Try to make sure we have at least 10 dispatch groups in a loop.
  394. let LoopMicroOpBufferSize = 60;
  395. let CompleteModel = 0;
  396. let Itineraries = P8Itineraries;
  397. }