PPCScheduleP10.td 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. //===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // Automatically generated file, do not edit!
  9. //
  10. // This file defines the resources required by P10 instructions.
  11. //===----------------------------------------------------------------------===//
  12. // Modeling pipeline forwarding logic.
  13. def P10BR_Read : SchedRead;
  14. def P10DF_Read : SchedRead;
  15. def P10DV_Read : SchedRead;
  16. def P10DX_Read : SchedRead;
  17. def P10F2_Read : SchedRead;
  18. def P10FX_Read : SchedRead;
  19. def P10LD_Read : SchedRead;
  20. def P10MU_Read : SchedRead;
  21. def P10PM_Read : SchedRead;
  22. def P10ST_Read : SchedRead;
  23. def P10SX_Read : SchedRead;
  24. def P10vMU_Read : SchedRead;
  25. def P10Model : SchedMachineModel {
  26. let IssueWidth = 8;
  27. // TODO - Need to be updated according to P10 UM.
  28. let MicroOpBufferSize = 44;
  29. // TODO - tune this on real HW once it arrives. For now, we will use the same
  30. // value as we do on P9.
  31. let LoopMicroOpBufferSize = 60;
  32. let CompleteModel = 1;
  33. // Do not support SPE (Signal Procesing Engine) on Power 10.
  34. let UnsupportedFeatures = [HasSPE, IsE500, IsBookE, IsISAFuture];
  35. }
  36. let SchedModel = P10Model in {
  37. // ***************** Processor Resources *****************
  38. // Pipeline Groups
  39. def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines.
  40. def P10_BR : ProcResource<2>; // Two Branch pipelines.
  41. def P10_CY : ProcResource<4>; // Four Crypto pipelines.
  42. def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines.
  43. def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines.
  44. def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines.
  45. def P10_FX : ProcResource<4>; // Four ALU pipelines.
  46. def P10_LD : ProcResource<2>; // Two Load pipelines.
  47. def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines.
  48. def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines.
  49. def P10_ST : ProcResource<2>; // Two ST-D pipelines.
  50. def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines.
  51. // Dispatch Groups
  52. // Dispatch to any slots
  53. def P10_ANY_SLOT : ProcResource<8>;
  54. let Super = P10_ANY_SLOT in {
  55. // Dispatch to even slots
  56. def P10_EVEN_SLOT : ProcResource<4>;
  57. // Dispatch to odd slots
  58. def P10_ODD_SLOT : ProcResource<4>;
  59. }
  60. // Dispatch Rules
  61. let NumMicroOps = 0, Latency = 1 in {
  62. // Dispatch Rule '-'
  63. def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>;
  64. // Dispatch Rule '-', even slot
  65. def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>;
  66. // Dispatch Rule 'P'
  67. def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>;
  68. }
  69. // ***************** SchedWriteRes Definitions *****************
  70. // A BF pipeline may take from 7 to 36 cycles to complete.
  71. // Some BF operations may keep the pipeline busy for up to 10 cycles.
  72. def P10W_BF_7C : SchedWriteRes<[P10_BF]> {
  73. let Latency = 7;
  74. }
  75. def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
  76. let ResourceCycles = [ 5 ];
  77. let Latency = 22;
  78. }
  79. def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
  80. let ResourceCycles = [ 8 ];
  81. let Latency = 24;
  82. }
  83. def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
  84. let ResourceCycles = [ 5 ];
  85. let Latency = 26;
  86. }
  87. def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
  88. let ResourceCycles = [ 7 ];
  89. let Latency = 27;
  90. }
  91. def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
  92. let ResourceCycles = [ 10 ];
  93. let Latency = 36;
  94. }
  95. // A BR pipeline may take 2 cycles to complete.
  96. def P10W_BR_2C : SchedWriteRes<[P10_BR]> {
  97. let Latency = 2;
  98. }
  99. // A CY pipeline may take 7 cycles to complete.
  100. def P10W_CY_7C : SchedWriteRes<[P10_CY]> {
  101. let Latency = 7;
  102. }
  103. // A DF pipeline may take from 13 to 174 cycles to complete.
  104. // Some DF operations may keep the pipeline busy for up to 67 cycles.
  105. def P10W_DF_13C : SchedWriteRes<[P10_DF]> {
  106. let Latency = 13;
  107. }
  108. def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
  109. let ResourceCycles = [ 16 ];
  110. let Latency = 24;
  111. }
  112. def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
  113. let ResourceCycles = [ 17 ];
  114. let Latency = 25;
  115. }
  116. def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
  117. let ResourceCycles = [ 18 ];
  118. let Latency = 26;
  119. }
  120. def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
  121. let ResourceCycles = [ 22 ];
  122. let Latency = 32;
  123. }
  124. def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
  125. let ResourceCycles = [ 25 ];
  126. let Latency = 33;
  127. }
  128. def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
  129. let ResourceCycles = [ 25 ];
  130. let Latency = 34;
  131. }
  132. def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
  133. let ResourceCycles = [ 30 ];
  134. let Latency = 38;
  135. }
  136. def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
  137. let ResourceCycles = [ 17 ];
  138. let Latency = 40;
  139. }
  140. def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
  141. let ResourceCycles = [ 34 ];
  142. let Latency = 43;
  143. }
  144. def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
  145. let ResourceCycles = [ 49 ];
  146. let Latency = 59;
  147. }
  148. def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
  149. let ResourceCycles = [ 12 ];
  150. let Latency = 61;
  151. }
  152. def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
  153. let ResourceCycles = [ 15 ];
  154. let Latency = 68;
  155. }
  156. def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
  157. let ResourceCycles = [ 67 ];
  158. let Latency = 77;
  159. }
  160. def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
  161. let ResourceCycles = [ 12 ];
  162. let Latency = 87;
  163. }
  164. def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
  165. let ResourceCycles = [ 32 ];
  166. let Latency = 100;
  167. }
  168. def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
  169. let ResourceCycles = [ 33 ];
  170. let Latency = 174;
  171. }
  172. // A DV pipeline may take from 20 to 83 cycles to complete.
  173. // Some DV operations may keep the pipeline busy for up to 33 cycles.
  174. def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
  175. let ResourceCycles = [ 10 ];
  176. let Latency = 20;
  177. }
  178. def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
  179. let ResourceCycles = [ 10 ];
  180. let Latency = 25;
  181. }
  182. def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
  183. let ResourceCycles = [ 10 ];
  184. let Latency = 27;
  185. }
  186. def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
  187. let ResourceCycles = [ 10 ];
  188. let Latency = 41;
  189. }
  190. def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
  191. let ResourceCycles = [ 21 ];
  192. let Latency = 43;
  193. }
  194. def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
  195. let ResourceCycles = [ 21 ];
  196. let Latency = 47;
  197. }
  198. def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
  199. let ResourceCycles = [ 33 ];
  200. let Latency = 54;
  201. }
  202. def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
  203. let ResourceCycles = [ 33 ];
  204. let Latency = 60;
  205. }
  206. def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
  207. let ResourceCycles = [ 21 ];
  208. let Latency = 75;
  209. }
  210. def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
  211. let ResourceCycles = [ 33 ];
  212. let Latency = 83;
  213. }
  214. // A DX pipeline may take 5 cycles to complete.
  215. def P10W_DX_5C : SchedWriteRes<[P10_DX]> {
  216. let Latency = 5;
  217. }
  218. // A F2 pipeline may take 4 cycles to complete.
  219. def P10W_F2_4C : SchedWriteRes<[P10_FX]> {
  220. let Latency = 4;
  221. }
  222. // A FX pipeline may take from 2 to 3 cycles to complete.
  223. def P10W_FX_2C : SchedWriteRes<[P10_FX]> {
  224. let Latency = 2;
  225. }
  226. def P10W_FX_3C : SchedWriteRes<[P10_FX]> {
  227. let Latency = 3;
  228. }
  229. // A LD pipeline may take 6 cycles to complete.
  230. def P10W_LD_6C : SchedWriteRes<[P10_LD]> {
  231. let Latency = 6;
  232. }
  233. // A MF pipeline may take 13 cycles to complete.
  234. def P10W_MF_13C : SchedWriteRes<[P10_SX]> {
  235. let Latency = 13;
  236. }
  237. // A MFL pipeline may take 13 cycles to complete.
  238. def P10W_MFL_13C : SchedWriteRes<[P10_SX]> {
  239. let Latency = 13;
  240. }
  241. // A MM pipeline may take 10 cycles to complete.
  242. def P10W_MM_10C : SchedWriteRes<[P10_MM]> {
  243. let Latency = 10;
  244. }
  245. // A MU pipeline may take 5 cycles to complete.
  246. def P10W_MU_5C : SchedWriteRes<[P10_BF]> {
  247. let Latency = 5;
  248. }
  249. // A PM pipeline may take 4 cycles to complete.
  250. def P10W_PM_4C : SchedWriteRes<[P10_PM]> {
  251. let Latency = 4;
  252. }
  253. // A ST pipeline may take 3 cycles to complete.
  254. def P10W_ST_3C : SchedWriteRes<[P10_ST]> {
  255. let Latency = 3;
  256. }
  257. // A SX pipeline may take from 0 to 3 cycles to complete.
  258. def P10W_SX : SchedWriteRes<[P10_SX]> {
  259. let Latency = 0;
  260. }
  261. def P10W_SX_3C : SchedWriteRes<[P10_SX]> {
  262. let Latency = 3;
  263. }
  264. // A vMU pipeline may take 7 cycles to complete.
  265. def P10W_vMU_7C : SchedWriteRes<[P10_BF]> {
  266. let Latency = 7;
  267. }
  268. // ***************** Read Advance Definitions *****************
  269. // Modeling pipeline forwarding logic.
  270. def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
  271. def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>;
  272. def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>;
  273. def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>;
  274. def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>;
  275. def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
  276. def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
  277. def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
  278. def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
  279. def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
  280. def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>;
  281. def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>;
  282. def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>;
  283. def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>;
  284. def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
  285. def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
  286. def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>;
  287. def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
  288. // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM.
  289. // Save 2 cycles if pipeline BF reads the data from pipelines BF.
  290. def P10BF_Read : SchedReadVariant<[
  291. SchedVar<P10W_BF_7C_Pred, [P10BF_Read_2C]>,
  292. SchedVar<NoSchedPred, [P10BF_Read_1C]>
  293. ]>;
  294. // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM.
  295. // Save 3 cycles if pipeline CY reads the data from pipelines CY.
  296. def P10CY_Read : SchedReadVariant<[
  297. SchedVar<P10W_CY_7C_Pred, [P10CY_Read_3C]>,
  298. SchedVar<NoSchedPred, [P10CY_Read_1C]>
  299. ]>;
  300. // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF.
  301. // Save 6 cycles if pipeline MM reads the data from pipelines MM.
  302. def P10MM_Read : SchedReadVariant<[
  303. SchedVar<P10W_MM_10C_Pred, [P10MM_Read_6C]>,
  304. SchedVar<NoSchedPred, [P10MM_Read_1C]>
  305. ]>;
  306. // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2.
  307. def : SchedAlias<P10BR_Read, P10BR_Read_1C>;
  308. // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
  309. def : SchedAlias<P10DF_Read, P10DF_Read_1C>;
  310. // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
  311. def : SchedAlias<P10DV_Read, P10DV_Read_1C>;
  312. // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
  313. def : SchedAlias<P10DX_Read, P10DX_Read_1C>;
  314. // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM.
  315. def : SchedAlias<P10F2_Read, P10F2_Read_1C>;
  316. // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM.
  317. def : SchedAlias<P10FX_Read, P10FX_Read_1C>;
  318. // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2.
  319. def : SchedAlias<P10LD_Read, P10LD_Read_1C>;
  320. // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF.
  321. def : SchedAlias<P10MU_Read, P10MU_Read_1C>;
  322. // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM.
  323. def : SchedAlias<P10PM_Read, P10PM_Read_1C>;
  324. // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM.
  325. def : SchedAlias<P10ST_Read, P10ST_Read_1C>;
  326. // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM.
  327. def : SchedAlias<P10SX_Read, P10SX_Read_1C>;
  328. // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
  329. def : SchedAlias<P10vMU_Read, P10vMU_Read_1C>;
  330. include "P10InstrResources.td"
  331. }