AArch64SchedAmpere1.td 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138
  1. //=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the machine model for the Ampere Computing Ampere-1 to
  10. // support instruction scheduling and other instruction cost heuristics.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. // The Ampere-1 core is an out-of-order micro-architecture. The front
  14. // end has branch prediction, with a 10-cycle recovery time from a
  15. // mispredicted branch. Instructions coming out of the front end are
  16. // decoded into internal micro-ops (uops).
  17. def Ampere1Model : SchedMachineModel {
  18. let IssueWidth = 4; // 4-way decode and dispatch
  19. let MicroOpBufferSize = 174; // micro-op re-order buffer size
  20. let LoadLatency = 4; // Optimistic load latency
  21. let MispredictPenalty = 10; // Branch mispredict penalty
  22. let LoopMicroOpBufferSize = 32; // Instruction queue size
  23. let CompleteModel = 1;
  24. list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
  25. SMEUnsupported.F,
  26. PAUnsupported.F,
  27. [HasMTE]);
  28. }
  29. let SchedModel = Ampere1Model in {
  30. //===----------------------------------------------------------------------===//
  31. // Define each kind of processor resource and number available on Ampere-1.
  32. // Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP,
  33. // and 2 memory) issue into. The integer and FP schedulers can each issue
  34. // one uop per cycle, while the memory schedulers can each issue one load
  35. // and one store address calculation per cycle.
  36. def Ampere1UnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w
  37. def Ampere1UnitB : ProcResource<2>; // integer single-cycle, and complex shifts
  38. def Ampere1UnitBS : ProcResource<1>; // integer multi-cycle
  39. def Ampere1UnitL : ProcResource<2>; // load
  40. def Ampere1UnitS : ProcResource<2>; // store address calculation
  41. def Ampere1UnitX : ProcResource<1>; // FP and vector operations, and flag write
  42. def Ampere1UnitY : ProcResource<1>; // FP and vector operations, and crypto
  43. def Ampere1UnitZ : ProcResource<1>; // FP store data and FP-to-integer moves
  44. def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>;
  45. def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>;
  46. //===----------------------------------------------------------------------===//
  47. // Define customized scheduler read/write types specific to the Ampere-1.
  48. def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> {
  49. let Latency = 1;
  50. let NumMicroOps = 1;
  51. }
  52. def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> {
  53. let Latency = 1;
  54. let NumMicroOps = 2;
  55. }
  56. def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> {
  57. let Latency = 1;
  58. let NumMicroOps = 1;
  59. }
  60. def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> {
  61. let Latency = 1;
  62. let NumMicroOps = 1;
  63. }
  64. def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
  65. let Latency = 1;
  66. let NumMicroOps = 1;
  67. }
  68. def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> {
  69. let Latency = 1;
  70. let NumMicroOps = 1;
  71. }
  72. def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> {
  73. let Latency = 1;
  74. let NumMicroOps = 2;
  75. }
  76. def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
  77. let Latency = 2;
  78. let NumMicroOps = 1;
  79. }
  80. def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> {
  81. let Latency = 2;
  82. let NumMicroOps = 2;
  83. }
  84. def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> {
  85. let Latency = 2;
  86. let NumMicroOps = 2;
  87. }
  88. def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> {
  89. let Latency = 2;
  90. let NumMicroOps = 2;
  91. }
  92. def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> {
  93. let Latency = 2;
  94. let NumMicroOps = 2;
  95. }
  96. def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
  97. let Latency = 2;
  98. let NumMicroOps = 2;
  99. }
  100. def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
  101. Ampere1UnitS]> {
  102. let Latency = 2;
  103. let NumMicroOps = 3;
  104. }
  105. def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
  106. Ampere1UnitZ]> {
  107. let Latency = 2;
  108. let NumMicroOps = 3;
  109. }
  110. def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> {
  111. let Latency = 2;
  112. let NumMicroOps = 2;
  113. }
  114. def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  115. let Latency = 2;
  116. let NumMicroOps = 1;
  117. }
  118. def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> {
  119. let Latency = 2;
  120. let NumMicroOps = 2;
  121. }
  122. def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
  123. let Latency = 3;
  124. let NumMicroOps = 1;
  125. }
  126. def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  127. let Latency = 3;
  128. let NumMicroOps = 1;
  129. }
  130. def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS,
  131. Ampere1UnitAB]> {
  132. let Latency = 2;
  133. let NumMicroOps = 3;
  134. }
  135. def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> {
  136. let Latency = 2;
  137. let NumMicroOps = 3;
  138. }
  139. def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
  140. Ampere1UnitZ, Ampere1UnitZ]> {
  141. let Latency = 2;
  142. let NumMicroOps = 4;
  143. }
  144. def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
  145. let Latency = 4;
  146. let NumMicroOps = 1;
  147. }
  148. def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
  149. let Latency = 4;
  150. let NumMicroOps = 1;
  151. }
  152. def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
  153. let Latency = 4;
  154. let NumMicroOps = 1;
  155. }
  156. def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
  157. let Latency = 4;
  158. let NumMicroOps = 1;
  159. }
  160. def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> {
  161. let Latency = 4;
  162. let NumMicroOps = 1;
  163. }
  164. def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
  165. let Latency = 4;
  166. let NumMicroOps = 2;
  167. }
  168. def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  169. let Latency = 4;
  170. let NumMicroOps = 1;
  171. }
  172. def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
  173. let Latency = 4;
  174. let NumMicroOps = 2;
  175. }
  176. def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> {
  177. let Latency = 4;
  178. let NumMicroOps = 3;
  179. }
  180. def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
  181. Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
  182. let Latency = 4;
  183. let NumMicroOps = 6;
  184. }
  185. def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
  186. let Latency = 5;
  187. let NumMicroOps = 2;
  188. }
  189. def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
  190. let Latency = 5;
  191. let NumMicroOps = 1;
  192. }
  193. def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
  194. let Latency = 5;
  195. let NumMicroOps = 1;
  196. }
  197. def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
  198. let Latency = 5;
  199. let NumMicroOps = 1;
  200. }
  201. def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
  202. let Latency = 5;
  203. let NumMicroOps = 2;
  204. }
  205. def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> {
  206. let Latency = 5;
  207. let NumMicroOps = 2;
  208. }
  209. def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  210. let Latency = 5;
  211. let NumMicroOps = 1;
  212. }
  213. def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
  214. let Latency = 5;
  215. let NumMicroOps = 2;
  216. }
  217. def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
  218. Ampere1UnitS, Ampere1UnitS,
  219. Ampere1UnitZ, Ampere1UnitZ,
  220. Ampere1UnitZ, Ampere1UnitZ]> {
  221. let Latency = 5;
  222. let NumMicroOps = 8;
  223. }
  224. def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
  225. Ampere1UnitS, Ampere1UnitS,
  226. Ampere1UnitZ, Ampere1UnitZ]> {
  227. let Latency = 5;
  228. let NumMicroOps = 6;
  229. }
  230. def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
  231. Ampere1UnitS, Ampere1UnitS,
  232. Ampere1UnitZ, Ampere1UnitZ]> {
  233. let Latency = 6;
  234. let NumMicroOps = 6;
  235. }
  236. def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY,
  237. Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
  238. Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
  239. let Latency = 6;
  240. let NumMicroOps = 9;
  241. }
  242. def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
  243. let Latency = 6;
  244. let NumMicroOps = 2;
  245. }
  246. def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  247. let Latency = 6;
  248. let NumMicroOps = 1;
  249. }
  250. def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
  251. let Latency = 6;
  252. let NumMicroOps = 2;
  253. }
  254. def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
  255. let Latency = 6;
  256. let NumMicroOps = 3;
  257. }
  258. def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> {
  259. let Latency = 6;
  260. let NumMicroOps = 3;
  261. }
  262. def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
  263. Ampere1UnitL, Ampere1UnitL]> {
  264. let Latency = 6;
  265. let NumMicroOps = 4;
  266. }
  267. def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
  268. let Latency = 6;
  269. let NumMicroOps = 2;
  270. }
  271. def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
  272. let Latency = 7;
  273. let NumMicroOps = 1;
  274. }
  275. def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> {
  276. let Latency = 7;
  277. let NumMicroOps = 2;
  278. }
  279. def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> {
  280. let Latency = 7;
  281. let NumMicroOps = 2;
  282. }
  283. def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
  284. Ampere1UnitXY, Ampere1UnitXY]> {
  285. let Latency = 7;
  286. let NumMicroOps = 4;
  287. }
  288. def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
  289. let Latency = 7;
  290. let NumMicroOps = 2;
  291. }
  292. def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
  293. Ampere1UnitXY, Ampere1UnitXY,
  294. Ampere1UnitS, Ampere1UnitS,
  295. Ampere1UnitS, Ampere1UnitS,
  296. Ampere1UnitZ, Ampere1UnitZ,
  297. Ampere1UnitZ, Ampere1UnitZ]> {
  298. let Latency = 7;
  299. let NumMicroOps = 12;
  300. }
  301. def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> {
  302. let Latency = 8;
  303. let NumMicroOps = 2;
  304. }
  305. def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA,
  306. Ampere1UnitA]> {
  307. let Latency = 8;
  308. let NumMicroOps = 3;
  309. }
  310. def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
  311. let Latency = 8;
  312. let NumMicroOps = 2;
  313. }
  314. def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
  315. Ampere1UnitXY, Ampere1UnitXY]> {
  316. let Latency = 8;
  317. let NumMicroOps = 4;
  318. }
  319. def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
  320. Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
  321. let Latency = 8;
  322. let NumMicroOps = 6;
  323. }
  324. def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
  325. Ampere1UnitL, Ampere1UnitL,
  326. Ampere1UnitXY, Ampere1UnitXY,
  327. Ampere1UnitXY, Ampere1UnitXY]> {
  328. let Latency = 8;
  329. let NumMicroOps = 8;
  330. }
  331. def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
  332. Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
  333. let Latency = 9;
  334. let NumMicroOps = 6;
  335. }
  336. def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
  337. Ampere1UnitL, Ampere1UnitL,
  338. Ampere1UnitXY, Ampere1UnitXY,
  339. Ampere1UnitXY, Ampere1UnitXY]> {
  340. let Latency = 9;
  341. let NumMicroOps = 8;
  342. }
  343. def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
  344. let Latency = 9;
  345. let NumMicroOps = 3;
  346. }
  347. def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
  348. Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
  349. let Latency = 9;
  350. let NumMicroOps = 5;
  351. }
  352. def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
  353. Ampere1UnitXY, Ampere1UnitXY,
  354. Ampere1UnitXY, Ampere1UnitXY,
  355. Ampere1UnitS, Ampere1UnitS,
  356. Ampere1UnitS, Ampere1UnitS,
  357. Ampere1UnitZ, Ampere1UnitZ,
  358. Ampere1UnitZ, Ampere1UnitZ]> {
  359. let Latency = 9;
  360. let NumMicroOps = 14;
  361. }
  362. def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
  363. Ampere1UnitXY, Ampere1UnitXY,
  364. Ampere1UnitXY, Ampere1UnitXY,
  365. Ampere1UnitXY, Ampere1UnitXY,
  366. Ampere1UnitS, Ampere1UnitS,
  367. Ampere1UnitS, Ampere1UnitS,
  368. Ampere1UnitZ, Ampere1UnitZ,
  369. Ampere1UnitZ, Ampere1UnitZ]> {
  370. let Latency = 9;
  371. let NumMicroOps = 16;
  372. }
  373. def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
  374. let Latency = 10;
  375. let NumMicroOps = 2;
  376. }
  377. def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
  378. let Latency = 10;
  379. let NumMicroOps = 2;
  380. }
  381. def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> {
  382. let Latency = 10;
  383. let NumMicroOps = 2;
  384. }
  385. def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
  386. Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
  387. let Latency = 10;
  388. let NumMicroOps = 6;
  389. }
  390. def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
  391. let Latency = 10;
  392. let NumMicroOps = 3;
  393. }
  394. def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
  395. let Latency = 10;
  396. let NumMicroOps = 3;
  397. }
  398. def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> {
  399. let Latency = 11;
  400. let NumMicroOps = 2;
  401. }
  402. def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
  403. let Latency = 11;
  404. let NumMicroOps = 3;
  405. }
  406. def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
  407. let Latency = 11;
  408. let NumMicroOps = 3;
  409. }
  410. def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
  411. Ampere1UnitL, Ampere1UnitL,
  412. Ampere1UnitXY, Ampere1UnitXY,
  413. Ampere1UnitXY, Ampere1UnitXY,
  414. Ampere1UnitXY, Ampere1UnitXY,
  415. Ampere1UnitXY, Ampere1UnitXY]> {
  416. let Latency = 11;
  417. let NumMicroOps = 12;
  418. }
  419. def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
  420. Ampere1UnitL, Ampere1UnitL,
  421. Ampere1UnitXY, Ampere1UnitXY,
  422. Ampere1UnitXY, Ampere1UnitXY,
  423. Ampere1UnitXY, Ampere1UnitXY,
  424. Ampere1UnitXY, Ampere1UnitXY]> {
  425. let Latency = 12;
  426. let NumMicroOps = 12;
  427. }
  428. def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
  429. let Latency = 12;
  430. let NumMicroOps = 3;
  431. }
  432. def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
  433. Ampere1UnitXY, Ampere1UnitXY]> {
  434. let Latency = 12;
  435. let NumMicroOps = 4;
  436. }
  437. def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
  438. let Latency = 18;
  439. let NumMicroOps = 1;
  440. }
  441. def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  442. let Latency = 19;
  443. let NumMicroOps = 1;
  444. }
  445. def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  446. let Latency = 25;
  447. let NumMicroOps = 1;
  448. }
  449. def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  450. let Latency = 32;
  451. let NumMicroOps = 1;
  452. }
  453. def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
  454. let Latency = 34;
  455. let NumMicroOps = 1;
  456. }
  457. def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  458. let Latency = 34;
  459. let NumMicroOps = 1;
  460. }
  461. def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  462. let Latency = 39;
  463. let NumMicroOps = 1;
  464. }
  465. def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
  466. let Latency = 62;
  467. let NumMicroOps = 1;
  468. }
  469. // For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4),
  470. // which are a single uop, and for extended registers, which have full flexibility
  471. // across Unit A or B for both uops.
  472. def Ampere1Write_Arith : SchedWriteVariant<[
  473. SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>,
  474. SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1AB]>,
  475. SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1AB]>]>;
  476. def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[
  477. SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>,
  478. SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1A]>,
  479. SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1A]>]>;
  480. //===----------------------------------------------------------------------===//
  481. // Map the target-defined scheduler read/write resources and latencies for Ampere-1.
  482. // This provides a coarse model, which is then specialised below.
  483. def : WriteRes<WriteImm, [Ampere1UnitAB]>; // MOVN, MOVZ
  484. def : WriteRes<WriteI, [Ampere1UnitAB]>; // ALU
  485. def : WriteRes<WriteISReg, [Ampere1UnitB, Ampere1UnitA]> {
  486. let Latency = 2;
  487. let NumMicroOps = 2;
  488. } // ALU of Shifted-Reg
  489. def : WriteRes<WriteIEReg, [Ampere1UnitAB, Ampere1UnitA]> {
  490. let Latency = 2;
  491. let NumMicroOps = 2;
  492. } // ALU of Extended-Reg
  493. def : WriteRes<WriteExtr, [Ampere1UnitB]>; // EXTR shifts a reg pair
  494. def : WriteRes<WriteIS, [Ampere1UnitB]>; // Shift/Scale
  495. def : WriteRes<WriteID32, [Ampere1UnitBS]> {
  496. let Latency = 18;
  497. } // 32-bit Divide
  498. def : WriteRes<WriteID64, [Ampere1UnitBS]> {
  499. let Latency = 34;
  500. } // 64-bit Divide
  501. def : WriteRes<WriteIM32, [Ampere1UnitBS]> {
  502. let Latency = 3;
  503. } // 32-bit Multiply
  504. def : WriteRes<WriteIM64, [Ampere1UnitBS]> {
  505. let Latency = 3;
  506. } // 32-bit Multiply
  507. def : WriteRes<WriteBr, [Ampere1UnitA]>;
  508. def : WriteRes<WriteBrReg, [Ampere1UnitA, Ampere1UnitA]>;
  509. def : WriteRes<WriteLD, [Ampere1UnitL]> {
  510. let Latency = 4;
  511. } // Load from base addr plus immediate offset
  512. def : WriteRes<WriteST, [Ampere1UnitS]> {
  513. let Latency = 1;
  514. } // Store to base addr plus immediate offset
  515. def : WriteRes<WriteSTP, [Ampere1UnitS, Ampere1UnitS]> {
  516. let Latency = 1;
  517. let NumMicroOps = 2;
  518. } // Store a register pair.
  519. def : WriteRes<WriteAdr, [Ampere1UnitAB]>;
  520. def : WriteRes<WriteLDIdx, [Ampere1UnitAB, Ampere1UnitS]> {
  521. let Latency = 5;
  522. let NumMicroOps = 2;
  523. } // Load from a register index (maybe scaled).
  524. def : WriteRes<WriteSTIdx, [Ampere1UnitS, Ampere1UnitS]> {
  525. let Latency = 1;
  526. let NumMicroOps = 2;
  527. } // Store to a register index (maybe scaled).
  528. def : WriteRes<WriteF, [Ampere1UnitXY]> {
  529. let Latency = 2;
  530. } // General floating-point ops.
  531. def : WriteRes<WriteFCmp, [Ampere1UnitX]> {
  532. let Latency = 5;
  533. } // Floating-point compare.
  534. def : WriteRes<WriteFCvt, [Ampere1UnitXY]> {
  535. let Latency = 6;
  536. } // Float conversion.
  537. def : WriteRes<WriteFCopy, [Ampere1UnitXY]> {
  538. } // Float-int register copy.
  539. def : WriteRes<WriteFImm, [Ampere1UnitXY]> {
  540. let Latency = 2;
  541. } // Float-int register copy.
  542. def : WriteRes<WriteFMul, [Ampere1UnitXY]> {
  543. let Latency = 5;
  544. } // Floating-point multiply.
  545. def : WriteRes<WriteFDiv, [Ampere1UnitXY]> {
  546. let Latency = 34;
  547. } // Floating-point division.
  548. def : WriteRes<WriteVd, [Ampere1UnitXY]> {
  549. let Latency = 3;
  550. } // 64bit Vector D ops.
  551. def : WriteRes<WriteVq, [Ampere1UnitXY]> {
  552. let Latency = 3;
  553. } // 128bit Vector Q ops.
  554. def : WriteRes<WriteVLD, [Ampere1UnitL, Ampere1UnitL]> {
  555. let Latency = 5;
  556. } // Vector loads.
  557. def : WriteRes<WriteVST, [Ampere1UnitS, Ampere1UnitZ]> {
  558. let Latency = 2;
  559. } // Vector stores.
  560. def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
  561. def : WriteRes<WriteSys, []> { let Latency = 1; }
  562. def : WriteRes<WriteBarrier, []> { let Latency = 1; }
  563. def : WriteRes<WriteHint, []> { let Latency = 1; }
  564. def : WriteRes<WriteLDHi, []> {
  565. let Latency = 4;
  566. } // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP
  567. // Forwarding logic.
  568. def : ReadAdvance<ReadI, 0>;
  569. def : ReadAdvance<ReadISReg, 0>;
  570. def : ReadAdvance<ReadIEReg, 0>;
  571. def : ReadAdvance<ReadIM, 0>;
  572. def : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>;
  573. def : ReadAdvance<ReadID, 0>;
  574. def : ReadAdvance<ReadExtrHi, 0>;
  575. def : ReadAdvance<ReadST, 0>;
  576. def : ReadAdvance<ReadAdrBase, 0>;
  577. def : ReadAdvance<ReadVLD, 0>;
  578. //===----------------------------------------------------------------------===//
  579. // Specialising the scheduling model further for Ampere-1.
  580. def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>;
  581. // Branch instructions
  582. def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>;
  583. def : InstRW<[Ampere1Write_1cyc_1A],
  584. (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
  585. def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>;
  586. // Cryptography instructions
  587. // -- AES encryption/decryption
  588. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>;
  589. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>;
  590. // -- Polynomial multiplication
  591. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>;
  592. // -- SHA-256 hash
  593. def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>;
  594. // -- SHA-256 schedule update
  595. def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>;
  596. // -- SHA-3 instructions
  597. def : InstRW<[Ampere1Write_2cyc_1XY],
  598. (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>;
  599. // -- SHA-512 hash
  600. def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>;
  601. // -- SHA-512 schedule update
  602. def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>;
  603. // -- SHA1 choose/majority/parity
  604. def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>;
  605. // -- SHA1 hash/schedule update
  606. def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>;
  607. def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>;
  608. // FP and vector load instructions
  609. // -- Load 1-element structure to one/all lanes
  610. // ---- all lanes
  611. def : InstRW<[Ampere1Write_7cyc_1L_1XY],
  612. (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>;
  613. // ---- one lane
  614. def : InstRW<[Ampere1Write_7cyc_1L_1XY],
  615. (instregex "^LD1i(8|16|32|64)")>;
  616. // -- Load 1-element structure to one/all lanes, 1D size
  617. def : InstRW<[Ampere1Write_5cyc_1L],
  618. (instregex "^LD1Rv1d")>;
  619. // -- Load 1-element structures to 1 register
  620. def : InstRW<[Ampere1Write_5cyc_1L],
  621. (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  622. // -- Load 1-element structures to 2 registers
  623. def : InstRW<[Ampere1Write_5cyc_2L],
  624. (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  625. // -- Load 1-element structures to 3 registers
  626. def : InstRW<[Ampere1Write_6cyc_3L],
  627. (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  628. // -- Load 1-element structures to 4 registers
  629. def : InstRW<[Ampere1Write_6cyc_4L],
  630. (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  631. // -- Load 2-element structure to all lanes of 2 registers, 1D size
  632. def : InstRW<[Ampere1Write_5cyc_2L],
  633. (instregex "^LD2Rv1d")>;
  634. // -- Load 2-element structure to all lanes of 2 registers, other sizes
  635. def : InstRW<[Ampere1Write_7cyc_2L_2XY],
  636. (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>;
  637. // -- Load 2-element structure to one lane of 2 registers
  638. def : InstRW<[Ampere1Write_7cyc_2L_2XY],
  639. (instregex "^LD2i(8|16|32|64)")>;
  640. // -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size
  641. def : InstRW<[Ampere1Write_7cyc_2L_2XY],
  642. (instregex "^LD2Twov(16b|8h|4s|2d)")>;
  643. // -- Load 2-element structures to 2 registers, 8B/4H/2S size
  644. def : InstRW<[Ampere1Write_9cyc_2L_3XY],
  645. (instregex "^LD2Twov(8b|4h|2s)")>;
  646. // -- Load 3-element structure to all lanes of 3 registers, 1D size
  647. def : InstRW<[Ampere1Write_6cyc_3L],
  648. (instregex "^LD3Rv1d")>;
  649. // -- Load 3-element structure to all lanes of 3 registers, other sizes
  650. def : InstRW<[Ampere1Write_8cyc_3L_3XY],
  651. (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>;
  652. // -- Load 3-element structure to one lane of 3 registers
  653. def : InstRW<[Ampere1Write_8cyc_3L_3XY],
  654. (instregex "^LD3i(8|16|32|64)")>;
  655. // -- Load 3-element structures to 3 registers, 16B/8H/4S sizes
  656. def : InstRW<[Ampere1Write_9cyc_3L_3XY],
  657. (instregex "^LD3Threev(16b|8h|4s)")>;
  658. // -- Load 3-element structures to 3 registers, 2D size
  659. def : InstRW<[Ampere1Write_8cyc_3L_3XY],
  660. (instregex "^LD3Threev2d")>;
  661. // -- Load 3-element structures to 3 registers, 8B/4H/2S sizes
  662. def : InstRW<[Ampere1Write_10cyc_3L_3XY],
  663. (instregex "^LD3Threev(8b|4h|2s)")>;
  664. // -- Load 4-element structure to all lanes of 4 registers, 1D size
  665. def : InstRW<[Ampere1Write_6cyc_4L],
  666. (instregex "^LD4Rv1d")>;
  667. // -- Load 4-element structure to all lanes of 4 registers, other sizes
  668. def : InstRW<[Ampere1Write_8cyc_4L_4XY],
  669. (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>;
  670. // -- Load 4-element structure to one lane of 4 registers
  671. def : InstRW<[Ampere1Write_6cyc_4L],
  672. (instregex "^LD4i(8|16|32|64)")>;
  673. // -- Load 4-element structures to 4 registers, 2D size
  674. def : InstRW<[Ampere1Write_9cyc_4L_4XY],
  675. (instregex "^LD4Fourv2d")>;
  676. // -- Load 4-element structures to 4 registers, 2S size
  677. def : InstRW<[Ampere1Write_12cyc_4L_8XY],
  678. (instregex "^LD4Fourv2s")>;
  679. // -- Load 4-element structures to 4 registers, other sizes
  680. def : InstRW<[Ampere1Write_11cyc_4L_8XY],
  681. (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>;
  682. // -- Load pair, Q-form
  683. def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>;
  684. // -- Load pair, S/D-form
  685. def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>;
  686. // -- Load register
  687. def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>;
  688. // -- Load register, sign-extended register
  689. def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>;
  690. // FP and vector store instructions
  691. // -- Store 1-element structure from one lane of 1 register
  692. def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z],
  693. (instregex "^ST1i(8|16|32|64)")>;
  694. // -- Store 1-element structures from 1 register
  695. def : InstRW<[Ampere1Write_2cyc_1S_1Z],
  696. (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  697. // -- Store 1-element structures from 2 registers
  698. def : InstRW<[Ampere1Write_3cyc_2S_2Z],
  699. (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  700. // -- Store 1-element structures from 3 registers
  701. def : InstRW<[Ampere1Write_4cyc_3S_3Z],
  702. (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  703. // -- Store 1-element structures from 4 registers
  704. def : InstRW<[Ampere1Write_5cyc_4S_4Z],
  705. (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  706. // -- Store 2-element structure from one lane of 2 registers
  707. def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
  708. (instregex "^ST2i(8|16|32|64)")>;
  709. // -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes
  710. def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
  711. (instregex "^ST2Twov(16b|8h|4s|2d)")>;
  712. // -- Store 2-element structures from 2 registers, 8B/4H/2S sizes
  713. def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z],
  714. (instregex "^ST2Twov(8b|4h|2s)")>;
  715. // -- Store 3-element structure from one lane of 3 registers
  716. def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
  717. (instregex "^ST3i(8|16|32|64)")>;
  718. // -- Store 3-element structures from 3 registers
  719. def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
  720. (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
  721. // -- Store 4-element structure from one lane of 4 registers
  722. def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
  723. (instregex "^ST4i(8|16|32|64)")>;
  724. // -- Store 4-element structures from 4 registers, 16B/8H/4S sizes
  725. def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z],
  726. (instregex "^ST4Fourv(16b|8h|4s)")>;
  727. // -- Store 4-element structures from 4 registers, 2D sizes
  728. def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
  729. (instregex "^ST4Fourv2d")>;
  730. // -- Store 4-element structures from 4 registers, 8B/4H/2S sizes
  731. def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z],
  732. (instregex "^ST4Fourv(8b|4h|2s)")>;
  733. // -- Store pair, Q-form
  734. def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>;
  735. // -- Store pair, S/D-form
  736. def : InstRW<[Ampere1Write_3cyc_1S_2Z], (instregex "^STN?P[SD]")>;
  737. // -- Store register
  738. def : InstRW<[Ampere1Write_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>;
  739. // -- Store register, sign-extended register offset
  740. def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>;
  741. // FP data processing, bfloat16 format
  742. def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>;
  743. def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>;
  744. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>;
  745. def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>;
  746. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>;
  747. // FP data processing, scalar/vector, half precision
  748. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>;
  749. def : InstRW<[Ampere1Write_4cyc_1XY],
  750. (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>;
  751. def : InstRW<[Ampere1Write_4cyc_1XY],
  752. (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>;
  753. def : InstRW<[Ampere1Write_4cyc_1XY],
  754. (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>;
  755. def : InstRW<[Ampere1Write_4cyc_1X],
  756. (instregex "^FCMPE?H")>;
  757. def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X],
  758. (instregex "^FCCMPE?H")>;
  759. def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY],
  760. (instregex "^FCSELH")>;
  761. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>;
  762. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>;
  763. def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>;
  764. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>;
  765. def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>;
  766. def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>;
  767. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>;
  768. def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>;
  769. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>;
  770. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>;
  771. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>;
  772. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>;
  773. def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>;
  774. def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>;
  775. // FP data processing, scalar/vector, single/double precision
  776. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>;
  777. def : InstRW<[Ampere1Write_5cyc_1XY],
  778. (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>;
  779. def : InstRW<[Ampere1Write_5cyc_1XY],
  780. (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>;
  781. def : InstRW<[Ampere1Write_5cyc_1XY],
  782. (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>;
  783. def : InstRW<[Ampere1Write_5cyc_1X],
  784. (instregex "^FCMPE?(S|D)")>;
  785. def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X],
  786. (instregex "^FCCMPE?(S|D)")>;
  787. def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY],
  788. (instregex "^FCSEL(S|D)")>;
  789. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>;
  790. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>;
  791. def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>;
  792. def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>;
  793. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>;
  794. def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>;
  795. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>;
  796. def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>;
  797. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
  798. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>;
  799. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>;
  800. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>;
  801. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>;
  802. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>;
  803. def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>;
  804. def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>;
  805. // FP miscellaneous instructions
  806. def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
  807. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>;
  808. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>;
  809. def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>;
  810. def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>;
  811. def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>;
  812. def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>;
  813. def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>;
  814. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>;
  815. def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>;
  816. def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>;
  817. // Integer arithmetic and logical instructions
  818. def : InstRW<[Ampere1Write_1cyc_1A],
  819. (instregex "ADC(W|X)r", "SBC(W|X)r")>;
  820. def : InstRW<[Ampere1Write_Arith],
  821. (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>;
  822. def : InstRW<[Ampere1Write_ArithFlagsetting],
  823. (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>;
  824. def : InstRW<[Ampere1Write_1cyc_1A],
  825. (instregex "(ADC|SBC)S(W|X)r")>;
  826. def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>;
  827. def : InstRW<[Ampere1Write_1cyc_1A],
  828. (instregex "(CCMN|CCMP)(X|W)")>;
  829. def : InstRW<[Ampere1Write_1cyc_1A],
  830. (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>;
  831. def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>;
  832. def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>;
  833. def : InstRW<[Ampere1Write_3cyc_1BS],
  834. (instregex "(S|U)MULHr")>;
  835. def : InstRW<[Ampere1Write_4cyc_1BS],
  836. (instregex "(S|U)?M(ADD|SUB)L?r")>;
  837. // Integer load instructions
  838. def : InstRW<[Ampere1Write_4cyc_2L],
  839. (instregex "(LDNP|LDP|LDPSW)(X|W)")>;
  840. def : InstRW<[Ampere1Write_4cyc_1L],
  841. (instregex "LDR(B|D|H|Q|S)ui")>;
  842. def : InstRW<[Ampere1Write_4cyc_1L],
  843. (instregex "LDR(D|Q|W|X)l")>;
  844. def : InstRW<[Ampere1Write_4cyc_1L],
  845. (instregex "LDTR(B|H|W|X)i")>;
  846. def : InstRW<[Ampere1Write_4cyc_1L],
  847. (instregex "LDTRS(BW|BX|HW|HX|W)i")>;
  848. def : InstRW<[Ampere1Write_4cyc_1L],
  849. (instregex "LDUR(BB|HH|X|W)i")>;
  850. def : InstRW<[Ampere1Write_4cyc_1L],
  851. (instregex "LDURS(BW|BX|HW|HX|W)i")>;
  852. def : InstRW<[Ampere1Write_5cyc_1AB_1L],
  853. (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>;
  854. def : InstRW<[Ampere1Write_1cyc_1L],
  855. (instrs PRFMl, PRFUMi, PRFUMi)>;
  856. def : InstRW<[Ampere1Write_2cyc_1AB_1L],
  857. (instrs PRFMroW, PRFMroX)>;
  858. // Integer miscellaneous instructions
  859. def : InstRW<[Ampere1Write_1cyc_1A], (instrs ADR, ADRP)>;
  860. def : InstRW<[Ampere1Write_1cyc_1B], (instregex "EXTR(W|X)")>;
  861. def : InstRW<[Ampere1Write_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>;
  862. def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>;
  863. def : InstRW<[Ampere1Write_1cyc_1B], (instregex "CLS(W|X)")>;
  864. def : InstRW<[Ampere1Write_1cyc_1A], (instrs SETF8, SETF16)>;
  865. def : InstRW<[Ampere1Write_1cyc_1AB],
  866. (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
  867. def : InstRW<[Ampere1Write_1cyc_1B],
  868. (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>;
  869. def : InstRW<[Ampere1Write_1cyc_1B],
  870. (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>;
  871. // Integer store instructions
  872. def : InstRW<[Ampere1Write_1cyc_2S], (instregex "STNP(X|W)i")>;
  873. def : InstRW<[Ampere1Write_2cyc_1B_1S],
  874. (instrs STPWi, STPXi)>;
  875. def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB],
  876. (instregex "STP(W|X)(pre|post)")>;
  877. def : InstRW<[Ampere1Write_1cyc_1S],
  878. (instrs STTRBi, STTRHi, STTRWi, STTRXi)>;
  879. def : InstRW<[Ampere1Write_1cyc_1S],
  880. (instregex "STUR(BB|HH|X|W)i",
  881. "STR(X|W)ui",
  882. "STUR(BB|HH|X|W)i")>;
  883. def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>;
  884. def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>;
  885. // Pointer authentication
  886. //def : InstRW<[Ampere1Write_7cyc_1BS],
  887. // (instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>;
  888. def : InstRW<[Ampere1Write_8cyc_1BS_1A],
  889. (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>;
  890. def : InstRW<[Ampere1Write_8cyc_1BS_2A],
  891. (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>;
  892. //def : InstRW<[Ampere1Write_7cyc_1BS],
  893. // (instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>;
  894. def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>;
  895. def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>;
  896. // Vector integer instructions
  897. // -- absolute difference
  898. def : InstRW<[Ampere1Write_3cyc_1XY],
  899. (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv",
  900. "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>;
  901. // -- arithmetic
  902. def : InstRW<[Ampere1Write_3cyc_1XY],
  903. (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD",
  904. "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW",
  905. "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>;
  906. // -- arithmetic, horizontal, 16B
  907. def : InstRW<[Ampere1Write_12cyc_4XY],
  908. (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>;
  909. def : InstRW<[Ampere1Write_12cyc_4XY],
  910. (instregex "^[SU](MIN|MAX)Vv16i8v")>;
  911. // -- arithmetic, horizontal, 4H/4S
  912. def : InstRW<[Ampere1Write_6cyc_2XY],
  913. (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>;
  914. def : InstRW<[Ampere1Write_6cyc_2XY],
  915. (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>;
  916. // -- arithmetic, horizontal, 8B/8H
  917. def : InstRW<[Ampere1Write_9cyc_3XY],
  918. (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>;
  919. def : InstRW<[Ampere1Write_9cyc_3XY],
  920. (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>;
  921. // -- arithmetic, narrowing
  922. def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>;
  923. def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>;
  924. // -- arithmetic, pairwise
  925. def : InstRW<[Ampere1Write_3cyc_1XY],
  926. (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>;
  927. // -- arithmetic, saturating
  928. def : InstRW<[Ampere1Write_3cyc_1XY],
  929. (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>;
  930. // -- bit count
  931. def : InstRW<[Ampere1Write_2cyc_1XY],
  932. (instregex "^(CLS|CLZ|CNT)v")>;
  933. // -- compare
  934. def : InstRW<[Ampere1Write_3cyc_1XY],
  935. (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv",
  936. "^CMHIv", "^CMHSv")>;
  937. // -- compare non-zero
  938. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>;
  939. // -- dot product
  940. def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>;
  941. // -- fp reciprocal estimate
  942. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>;
  943. // -- integer reciprocal estimate
  944. def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>;
  945. // -- logical
  946. def : InstRW<[Ampere1Write_2cyc_1XY],
  947. (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
  948. // -- logical, narrowing
  949. def : InstRW<[Ampere1Write_5cyc_2XY],
  950. (instregex "RSHRNv",
  951. "SHRNv", "SQSHRNv", "SQSHRUNv",
  952. "UQXTNv")>;
  953. // -- matrix multiply
  954. def : InstRW<[Ampere1Write_6cyc_2XY],
  955. (instrs SMMLA, UMMLA, USMMLA)>;
  956. // -- max/min
  957. def : InstRW<[Ampere1Write_3cyc_1XY],
  958. (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
  959. def : InstRW<[Ampere1Write_3cyc_1XY],
  960. (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
  961. // -- move immediate
  962. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>;
  963. // -- multiply
  964. def : InstRW<[Ampere1Write_3cyc_1XY],
  965. (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>;
  966. // -- multiply accumulate
  967. def : InstRW<[Ampere1Write_3cyc_1XY],
  968. (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>;
  969. // -- negation, saturating
  970. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>;
  971. // -- reverse bits/bytes
  972. def : InstRW<[Ampere1Write_2cyc_1XY],
  973. (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>;
  974. // -- shift
  975. def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
  976. // -- shift and accumulate
  977. def : InstRW<[Ampere1Write_3cyc_1XY],
  978. (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>;
  979. // -- shift, saturating
  980. def : InstRW<[Ampere1Write_3cyc_1XY],
  981. (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU",
  982. "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL",
  983. "^UQSHL")>;
  984. // Vector miscellaneous instructions
  985. // -- duplicate element
  986. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>;
  987. // -- duplicate from GPR
  988. def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>;
  989. // -- extract narrow
  990. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>;
  991. // -- insert/extract element
  992. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>;
  993. // -- move FP immediate
  994. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>;
  995. // -- move element to GPR
  996. def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>;
  997. // -- move from GPR to any element
  998. def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>;
  999. // -- table lookup
  1000. def : InstRW<[Ampere1Write_2cyc_1XY],
  1001. (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>;
  1002. def : InstRW<[Ampere1Write_4cyc_2XY],
  1003. (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>;
  1004. def : InstRW<[Ampere1Write_6cyc_3XY],
  1005. (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>;
  1006. def : InstRW<[Ampere1Write_8cyc_4XY],
  1007. (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>;
  1008. // -- transpose
  1009. def : InstRW<[Ampere1Write_2cyc_1XY],
  1010. (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>;
  1011. // -- zip/unzip
  1012. def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>;
  1013. } // SchedModel = Ampere1Model