AArch64SchedA64FX.td 84 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613
  1. //=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the scheduling model for the Fujitsu A64FX processors.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. def A64FXModel : SchedMachineModel {
  13. let IssueWidth = 6; // 6 micro-ops dispatched at a time.
  14. let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer.
  15. let LoadLatency = 5; // Optimistic load latency.
  16. let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
  17. // Determined via a mix of micro-arch details and experimentation.
  18. let LoopMicroOpBufferSize = 128;
  19. let PostRAScheduler = 1; // Using PostRA sched.
  20. let CompleteModel = 1;
  21. list<Predicate> UnsupportedFeatures =
  22. [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
  23. HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
  24. HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
  25. let FullInstRWOverlapCheck = 0;
  26. }
  27. let SchedModel = A64FXModel in {
  28. // Define the issue ports.
  29. // A64FXIP*
  30. // Port 0
  31. def A64FXIPFLA : ProcResource<1>;
  32. // Port 1
  33. def A64FXIPPR : ProcResource<1>;
  34. // Port 2
  35. def A64FXIPEXA : ProcResource<1>;
  36. // Port 3
  37. def A64FXIPFLB : ProcResource<1>;
  38. // Port 4
  39. def A64FXIPEXB : ProcResource<1>;
  40. // Port 5
  41. def A64FXIPEAGA : ProcResource<1>;
  42. // Port 6
  43. def A64FXIPEAGB : ProcResource<1>;
  44. // Port 7
  45. def A64FXIPBR : ProcResource<1>;
  46. // Define groups for the functional units on each issue port. Each group
  47. // created will be used by a WriteRes later on.
  48. def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
  49. def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
  50. def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
  51. def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
  52. def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
  53. def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
  54. def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
  55. def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
  56. def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
  57. def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
  58. def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
  59. def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
  60. def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
  61. def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
  62. def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
  63. A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>;
  64. def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
  65. let Latency = 1;
  66. }
  67. def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
  68. let Latency = 2;
  69. }
  70. def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
  71. let Latency = 4;
  72. }
  73. def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
  74. let Latency = 6;
  75. }
  76. def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
  77. let Latency = 8;
  78. }
  79. def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
  80. let Latency = 9;
  81. }
  82. def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
  83. let Latency = 3;
  84. }
  85. def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
  86. let Latency = 5;
  87. }
  88. def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
  89. let Latency = 4;
  90. }
  91. def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
  92. let Latency = 6;
  93. }
  94. def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
  95. let Latency = 4;
  96. }
  97. def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
  98. let Latency = 8;
  99. }
  100. def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
  101. let Latency = 9;
  102. }
  103. def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
  104. let Latency = 10;
  105. }
  106. def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
  107. let Latency = 12;
  108. }
  109. def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
  110. let Latency = 20;
  111. }
  112. def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
  113. let Latency = 5;
  114. }
  115. def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
  116. let Latency = 11;
  117. }
  118. def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
  119. let Latency = 5;
  120. }
  121. def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
  122. let Latency = 1;
  123. }
  124. def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
  125. let Latency = 2;
  126. }
  127. def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
  128. let Latency = 4;
  129. let NumMicroOps = 4;
  130. }
  131. def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
  132. let Latency = 1;
  133. }
  134. def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
  135. let Latency = 5;
  136. }
  137. def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
  138. let Latency = 8;
  139. }
  140. def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
  141. let Latency = 11;
  142. }
  143. def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
  144. let Latency = 5;
  145. let NumMicroOps = 2;
  146. }
  147. def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
  148. let Latency = 5;
  149. let NumMicroOps = 3;
  150. }
  151. def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
  152. let Latency = 5;
  153. let NumMicroOps = 2;
  154. }
  155. def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
  156. let Latency = 8;
  157. let NumMicroOps = 2;
  158. }
  159. def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
  160. let Latency = 11;
  161. let NumMicroOps = 2;
  162. }
  163. def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
  164. let Latency = 8;
  165. let NumMicroOps = 3;
  166. }
  167. def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
  168. let Latency = 11;
  169. let NumMicroOps = 3;
  170. }
  171. def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
  172. let Latency = 8;
  173. let NumMicroOps = 4;
  174. }
  175. def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
  176. let Latency = 11;
  177. let NumMicroOps = 4;
  178. }
  179. def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
  180. let Latency = 8;
  181. let NumMicroOps = 2;
  182. }
  183. def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
  184. let Latency = 11;
  185. let NumMicroOps = 2;
  186. }
  187. def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
  188. let Latency = 8;
  189. let NumMicroOps = 3;
  190. }
  191. def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
  192. let Latency = 11;
  193. let NumMicroOps = 3;
  194. }
  195. def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
  196. let Latency = 8;
  197. let NumMicroOps = 4;
  198. }
  199. def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
  200. let Latency = 11;
  201. let NumMicroOps = 4;
  202. }
  203. def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
  204. let Latency = 8;
  205. let NumMicroOps = 5;
  206. }
  207. def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
  208. let Latency = 11;
  209. let NumMicroOps = 5;
  210. }
  211. def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
  212. let Latency = 8;
  213. let NumMicroOps = 2;
  214. }
  215. def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
  216. let Latency = 8;
  217. let NumMicroOps = 3;
  218. }
  219. def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
  220. let Latency = 8;
  221. let NumMicroOps = 4;
  222. }
  223. def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
  224. let Latency = 8;
  225. let NumMicroOps = 5;
  226. }
  227. def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
  228. let Latency = 8;
  229. let NumMicroOps = 6;
  230. }
  231. def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
  232. let Latency = 8;
  233. let NumMicroOps = 7;
  234. }
  235. def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
  236. let Latency = 8;
  237. let NumMicroOps = 8;
  238. }
  239. def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
  240. let Latency = 8;
  241. let NumMicroOps = 9;
  242. }
  243. def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
  244. let Latency = 1;
  245. }
  246. def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
  247. let Latency = 10;
  248. }
  249. def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
  250. let Latency = 14;
  251. }
  252. def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
  253. let Latency = 12;
  254. }
  255. def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
  256. let Latency = 14;
  257. }
  258. def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
  259. let Latency = 14;
  260. }
  261. def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
  262. let Latency = 6;
  263. }
  264. def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
  265. let Latency = 8;
  266. }
  267. def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
  268. let Latency = 10;
  269. }
  270. def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
  271. let Latency = 12;
  272. let NumMicroOps = 6;
  273. }
  274. def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
  275. let Latency = 14;
  276. let NumMicroOps = 6;
  277. }
  278. def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
  279. let Latency = 9;
  280. }
  281. def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
  282. let Latency = 8;
  283. }
  284. def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
  285. let Latency = 8;
  286. let NumMicroOps = 3;
  287. }
  288. def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
  289. let Latency = 8;
  290. let NumMicroOps = 2;
  291. }
  292. def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
  293. let Latency = 10;
  294. let NumMicroOps = 3;
  295. }
  296. def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
  297. let Latency = 10;
  298. let NumMicroOps = 2;
  299. }
  300. def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
  301. let Latency = 10;
  302. let NumMicroOps = 3;
  303. }
  304. def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
  305. let Latency = 15;
  306. let NumMicroOps = 2;
  307. }
  308. def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
  309. let Latency = 15;
  310. let NumMicroOps = 3;
  311. }
  312. def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
  313. let Latency = 10;
  314. let NumMicroOps = 3;
  315. }
  316. def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
  317. let Latency = 10;
  318. let NumMicroOps = 2;
  319. }
  320. def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
  321. let Latency = 15;
  322. let NumMicroOps = 2;
  323. }
  324. def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
  325. let Latency = 14;
  326. let NumMicroOps = 7;
  327. }
  328. def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
  329. let Latency = 5;
  330. }
  331. def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
  332. let Latency = 10;
  333. }
  334. def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
  335. let Latency = 9;
  336. }
  337. def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
  338. let Latency = 12;
  339. }
  340. def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
  341. let Latency = 25;
  342. }
  343. def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
  344. let Latency = 10;
  345. let NumMicroOps = 3;
  346. }
  347. def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
  348. let Latency = 10;
  349. let NumMicroOps = 5;
  350. }
  351. def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
  352. let Latency = 10;
  353. let NumMicroOps = 7;
  354. }
  355. def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
  356. let Latency = 10;
  357. let NumMicroOps = 9;
  358. }
  359. def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
  360. let Latency = 0;
  361. }
  362. def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
  363. let Latency = 0;
  364. }
  365. def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
  366. let Latency = 0;
  367. }
  368. def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
  369. let Latency = 0;
  370. }
  371. def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
  372. let Latency = 0;
  373. }
  374. def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
  375. let Latency = 0;
  376. }
  377. def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
  378. let Latency = 0;
  379. }
  380. def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
  381. let Latency = 0;
  382. }
  383. def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
  384. let Latency = 0;
  385. }
  386. def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
  387. let Latency = 0;
  388. }
  389. def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
  390. let Latency = 1;
  391. }
  392. def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
  393. let Latency = 1;
  394. }
  395. def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
  396. let Latency = 1;
  397. }
  398. def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
  399. let Latency = 1;
  400. }
  401. def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
  402. let Latency = 7;
  403. }
  404. // Define commonly used read types.
  405. // No forwarding is provided for these types.
  406. def : ReadAdvance<ReadI, 0>;
  407. def : ReadAdvance<ReadISReg, 0>;
  408. def : ReadAdvance<ReadIEReg, 0>;
  409. def : ReadAdvance<ReadIM, 0>;
  410. def : ReadAdvance<ReadIMA, 0>;
  411. def : ReadAdvance<ReadID, 0>;
  412. def : ReadAdvance<ReadExtrHi, 0>;
  413. def : ReadAdvance<ReadAdrBase, 0>;
  414. def : ReadAdvance<ReadST, 0>;
  415. def : ReadAdvance<ReadVLD, 0>;
  416. //===----------------------------------------------------------------------===//
  417. // 3. Instruction Tables.
  418. //---
  419. // 3.1 Branch Instructions
  420. //---
  421. // Branch, immed
  422. // Branch and link, immed
  423. // Compare and branch
  424. def : WriteRes<WriteBr, [A64FXGI7]> {
  425. let Latency = 1;
  426. }
  427. // Branch, register
  428. // Branch and link, register != LR
  429. // Branch and link, register = LR
  430. def : WriteRes<WriteBrReg, [A64FXGI7]> {
  431. let Latency = 1;
  432. }
  433. def : WriteRes<WriteSys, []> { let Latency = 1; }
  434. def : WriteRes<WriteBarrier, []> { let Latency = 1; }
  435. def : WriteRes<WriteHint, []> { let Latency = 1; }
  436. def : WriteRes<WriteAtomic, []> {
  437. let Latency = 4;
  438. }
  439. //---
  440. // Branch
  441. //---
  442. def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
  443. def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
  444. def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
  445. def : InstRW<[A64FXWrite_1Cyc_GI7],
  446. (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
  447. //---
  448. // 3.2 Arithmetic and Logical Instructions
  449. // 3.3 Move and Shift Instructions
  450. //---
  451. // ALU, basic
  452. // Conditional compare
  453. // Conditional select
  454. // Address generation
  455. def : WriteRes<WriteI, [A64FXGI2456]> {
  456. let Latency = 1;
  457. }
  458. def : InstRW<[WriteI],
  459. (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
  460. "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
  461. "ADC(W|X)r",
  462. "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
  463. "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
  464. "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
  465. "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
  466. "SBCS(W|X)r", "CCMN(W|X)(i|r)",
  467. "CCMP(W|X)(i|r)", "CSEL(W|X)r",
  468. "CSINC(W|X)r", "CSINV(W|X)r",
  469. "CSNEG(W|X)r")>;
  470. def : InstRW<[WriteI], (instrs COPY)>;
  471. // ALU, extend and/or shift
  472. def : WriteRes<WriteISReg, [A64FXGI2456]> {
  473. let Latency = 2;
  474. }
  475. def : InstRW<[WriteISReg],
  476. (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
  477. "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
  478. "ADC(W|X)r",
  479. "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
  480. "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
  481. "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
  482. "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
  483. "SBCS(W|X)r", "CCMN(W|X)(i|r)",
  484. "CCMP(W|X)(i|r)", "CSEL(W|X)r",
  485. "CSINC(W|X)r", "CSINV(W|X)r",
  486. "CSNEG(W|X)r")>;
  487. def : WriteRes<WriteIEReg, [A64FXGI2456]> {
  488. let Latency = 1;
  489. }
  490. def : InstRW<[WriteIEReg],
  491. (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?",
  492. "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)",
  493. "ADC(W|X)r",
  494. "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)",
  495. "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)",
  496. "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)",
  497. "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r",
  498. "SBCS(W|X)r", "CCMN(W|X)(i|r)",
  499. "CCMP(W|X)(i|r)", "CSEL(W|X)r",
  500. "CSINC(W|X)r", "CSINV(W|X)r",
  501. "CSNEG(W|X)r")>;
  502. // Move immed
  503. def : WriteRes<WriteImm, [A64FXGI2456]> {
  504. let Latency = 1;
  505. }
  506. def : InstRW<[A64FXWrite_1Cyc_GI2456],
  507. (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
  508. def : InstRW<[A64FXWrite_2Cyc_GI24],
  509. (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
  510. // Variable shift
  511. def : WriteRes<WriteIS, [A64FXGI2456]> {
  512. let Latency = 1;
  513. }
  514. //---
  515. // 3.4 Divide and Multiply Instructions
  516. //---
  517. // Divide, W-form
  518. def : WriteRes<WriteID32, [A64FXGI4]> {
  519. let Latency = 39;
  520. let ResourceCycles = [39];
  521. }
  522. // Divide, X-form
  523. def : WriteRes<WriteID64, [A64FXGI4]> {
  524. let Latency = 23;
  525. let ResourceCycles = [23];
  526. }
  527. // Multiply accumulate, W-form
  528. def : WriteRes<WriteIM32, [A64FXGI2456]> {
  529. let Latency = 5;
  530. }
  531. // Multiply accumulate, X-form
  532. def : WriteRes<WriteIM64, [A64FXGI2456]> {
  533. let Latency = 5;
  534. }
  535. def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
  536. def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
  537. def : InstRW<[A64FXWrite_MADDL],
  538. (instregex "(S|U)(MADDL|MSUBL)rrr")>;
  539. def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
  540. def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
  541. // Bitfield extract, two reg
  542. def : WriteRes<WriteExtr, [A64FXGI2456]> {
  543. let Latency = 1;
  544. }
  545. // Multiply high
  546. def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
  547. // Miscellaneous Data-Processing Instructions
  548. // Bitfield extract
  549. def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
  550. // Bitifield move - basic
  551. def : InstRW<[A64FXWrite_1Cyc_GI24],
  552. (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
  553. // Bitfield move, insert
  554. def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
  555. def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
  556. // Count leading
  557. def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
  558. "^CLZ(W|X)r$")>;
  559. // Reverse bits
  560. def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
  561. // Cryptography Extensions
  562. def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
  563. def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
  564. def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
  565. def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
  566. def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
  567. def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
  568. def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
  569. def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
  570. def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
  571. // CRC Instructions
  572. def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
  573. def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
  574. def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
  575. def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
  576. def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
  577. def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
  578. // Reverse bits/bytes
  579. // NOTE: Handled by WriteI.
  580. //---
  581. // 3.6 Load Instructions
  582. // 3.10 FP Load Instructions
  583. //---
  584. // Load register, literal
  585. // Load register, unscaled immed
  586. // Load register, immed unprivileged
  587. // Load register, unsigned immed
  588. def : WriteRes<WriteLD, [A64FXGI56]> {
  589. let Latency = 4;
  590. }
  591. // Load register, immed post-index
  592. // NOTE: Handled by WriteLD, WriteI.
  593. // Load register, immed pre-index
  594. // NOTE: Handled by WriteLD, WriteAdr.
  595. def : WriteRes<WriteAdr, [A64FXGI2456]> {
  596. let Latency = 1;
  597. }
  598. // Load pair, immed offset, normal
  599. // Load pair, immed offset, signed words, base != SP
  600. // Load pair, immed offset signed words, base = SP
  601. // LDP only breaks into *one* LS micro-op. Thus
  602. // the resources are handled by WriteLD.
  603. def : WriteRes<WriteLDHi, []> {
  604. let Latency = 5;
  605. }
  606. // Load register offset, basic
  607. // Load register, register offset, scale by 4/8
  608. // Load register, register offset, scale by 2
  609. // Load register offset, extend
  610. // Load register, register offset, extend, scale by 4/8
  611. // Load register, register offset, extend, scale by 2
  612. def A64FXWriteLDIdx : SchedWriteVariant<[
  613. SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
  614. SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>;
  615. def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
  616. def A64FXReadAdrBase : SchedReadVariant<[
  617. SchedVar<ScaledIdxPred, [ReadDefault]>,
  618. SchedVar<NoSchedPred, [ReadDefault]>]>;
  619. def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
  620. // Load pair, immed pre-index, normal
  621. // Load pair, immed pre-index, signed words
  622. // Load pair, immed post-index, normal
  623. // Load pair, immed post-index, signed words
  624. // NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
  625. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
  626. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
  627. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
  628. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
  629. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
  630. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
  631. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
  632. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
  633. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
  634. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
  635. def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
  636. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
  637. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
  638. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
  639. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
  640. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
  641. def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
  642. def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
  643. def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
  644. def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
  645. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
  646. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
  647. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
  648. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
  649. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
  650. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
  651. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
  652. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
  653. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
  654. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  655. (instrs LDPDpre)>;
  656. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  657. (instrs LDPQpre)>;
  658. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  659. (instrs LDPSpre)>;
  660. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  661. (instrs LDPWpre)>;
  662. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  663. (instrs LDPWpre)>;
  664. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
  665. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
  666. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
  667. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
  668. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
  669. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
  670. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
  671. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
  672. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
  673. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
  674. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
  675. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
  676. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
  677. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
  678. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
  679. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
  680. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
  681. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
  682. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
  683. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  684. (instrs LDPDpost)>;
  685. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  686. (instrs LDPQpost)>;
  687. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  688. (instrs LDPSpost)>;
  689. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  690. (instrs LDPWpost)>;
  691. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  692. (instrs LDPXpost)>;
  693. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
  694. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
  695. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
  696. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
  697. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
  698. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
  699. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
  700. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  701. (instrs LDPDpre)>;
  702. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  703. (instrs LDPQpre)>;
  704. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  705. (instrs LDPSpre)>;
  706. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  707. (instrs LDPWpre)>;
  708. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  709. (instrs LDPXpre)>;
  710. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
  711. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
  712. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
  713. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
  714. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
  715. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
  716. def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
  717. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  718. (instrs LDPDpost)>;
  719. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  720. (instrs LDPQpost)>;
  721. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  722. (instrs LDPSpost)>;
  723. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  724. (instrs LDPWpost)>;
  725. def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
  726. (instrs LDPXpost)>;
  727. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
  728. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
  729. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
  730. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
  731. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
  732. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
  733. def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
  734. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
  735. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
  736. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
  737. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
  738. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
  739. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
  740. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
  741. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
  742. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
  743. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
  744. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
  745. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
  746. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
  747. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
  748. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
  749. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
  750. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
  751. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
  752. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
  753. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
  754. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  755. (instrs LDRBroW)>;
  756. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  757. (instrs LDRBroW)>;
  758. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  759. (instrs LDRDroW)>;
  760. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  761. (instrs LDRHroW)>;
  762. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  763. (instrs LDRHHroW)>;
  764. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  765. (instrs LDRQroW)>;
  766. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  767. (instrs LDRSroW)>;
  768. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  769. (instrs LDRSHWroW)>;
  770. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  771. (instrs LDRSHXroW)>;
  772. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  773. (instrs LDRWroW)>;
  774. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  775. (instrs LDRXroW)>;
  776. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  777. (instrs LDRBroX)>;
  778. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  779. (instrs LDRDroX)>;
  780. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  781. (instrs LDRHroX)>;
  782. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  783. (instrs LDRHHroX)>;
  784. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  785. (instrs LDRQroX)>;
  786. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  787. (instrs LDRSroX)>;
  788. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  789. (instrs LDRSHWroX)>;
  790. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  791. (instrs LDRSHXroX)>;
  792. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  793. (instrs LDRWroX)>;
  794. def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
  795. (instrs LDRXroX)>;
  796. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
  797. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
  798. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
  799. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
  800. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
  801. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
  802. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
  803. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
  804. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
  805. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
  806. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
  807. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
  808. def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
  809. //---
  810. // Prefetch
  811. //---
  812. def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
  813. def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
  814. def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
  815. def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
  816. def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
  817. //--
  818. // 3.7 Store Instructions
  819. // 3.11 FP Store Instructions
  820. //--
  821. // Store register, unscaled immed
  822. // Store register, immed unprivileged
  823. // Store register, unsigned immed
  824. def : WriteRes<WriteST, [A64FXGI56]> {
  825. let Latency = 1;
  826. }
  827. // Store register, immed post-index
  828. // NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
  829. // Store register, immed pre-index
  830. // NOTE: Handled by WriteAdr, WriteST
  831. // Store register, register offset, basic
  832. // Store register, register offset, scaled by 4/8
  833. // Store register, register offset, scaled by 2
  834. // Store register, register offset, extend
  835. // Store register, register offset, extend, scale by 4/8
  836. // Store register, register offset, extend, scale by 1
  837. def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
  838. let Latency = 1;
  839. }
  840. // Store pair, immed offset, W-form
  841. // Store pair, immed offset, X-form
  842. def : WriteRes<WriteSTP, [A64FXGI56]> {
  843. let Latency = 1;
  844. }
  845. // Store pair, immed post-index, W-form
  846. // Store pair, immed post-index, X-form
  847. // Store pair, immed pre-index, W-form
  848. // Store pair, immed pre-index, X-form
  849. // NOTE: Handled by WriteAdr, WriteSTP.
  850. def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
  851. def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
  852. def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
  853. def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
  854. def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
  855. def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
  856. def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
  857. def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
  858. def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
  859. def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
  860. def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
  861. def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
  862. def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
  863. def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
  864. def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
  865. def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
  866. def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
  867. def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
  868. def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
  869. def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
  870. def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
  871. def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
  872. def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
  873. def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
  874. def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
  875. def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
  876. def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
  877. def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
  878. def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
  879. def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
  880. def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
  881. def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
  882. def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
  883. def : InstRW<[A64FXWrite_STP01],
  884. (instrs STPDpre, STPDpost)>;
  885. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  886. (instrs STPDpre, STPDpost)>;
  887. def : InstRW<[A64FXWrite_STP01],
  888. (instrs STPDpre, STPDpost)>;
  889. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  890. (instrs STPDpre, STPDpost)>;
  891. def : InstRW<[A64FXWrite_STP01],
  892. (instrs STPQpre, STPQpost)>;
  893. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  894. (instrs STPQpre, STPQpost)>;
  895. def : InstRW<[A64FXWrite_STP01],
  896. (instrs STPQpre, STPQpost)>;
  897. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  898. (instrs STPQpre, STPQpost)>;
  899. def : InstRW<[A64FXWrite_STP01],
  900. (instrs STPSpre, STPSpost)>;
  901. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  902. (instrs STPSpre, STPSpost)>;
  903. def : InstRW<[A64FXWrite_STP01],
  904. (instrs STPSpre, STPSpost)>;
  905. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  906. (instrs STPSpre, STPSpost)>;
  907. def : InstRW<[A64FXWrite_STP01],
  908. (instrs STPWpre, STPWpost)>;
  909. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  910. (instrs STPWpre, STPWpost)>;
  911. def : InstRW<[A64FXWrite_STP01],
  912. (instrs STPWpre, STPWpost)>;
  913. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  914. (instrs STPWpre, STPWpost)>;
  915. def : InstRW<[A64FXWrite_STP01],
  916. (instrs STPXpre, STPXpost)>;
  917. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  918. (instrs STPXpre, STPXpost)>;
  919. def : InstRW<[A64FXWrite_STP01],
  920. (instrs STPXpre, STPXpost)>;
  921. def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
  922. (instrs STPXpre, STPXpost)>;
  923. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  924. (instrs STRBpre, STRBpost)>;
  925. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  926. (instrs STRBpre, STRBpost)>;
  927. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  928. (instrs STRBpre, STRBpost)>;
  929. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  930. (instrs STRBpre, STRBpost)>;
  931. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  932. (instrs STRBBpre, STRBBpost)>;
  933. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  934. (instrs STRBBpre, STRBBpost)>;
  935. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  936. (instrs STRBBpre, STRBBpost)>;
  937. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  938. (instrs STRBBpre, STRBBpost)>;
  939. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  940. (instrs STRDpre, STRDpost)>;
  941. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  942. (instrs STRDpre, STRDpost)>;
  943. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  944. (instrs STRDpre, STRDpost)>;
  945. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  946. (instrs STRDpre, STRDpost)>;
  947. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  948. (instrs STRHpre, STRHpost)>;
  949. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  950. (instrs STRHpre, STRHpost)>;
  951. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  952. (instrs STRHpre, STRHpost)>;
  953. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  954. (instrs STRHpre, STRHpost)>;
  955. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  956. (instrs STRHHpre, STRHHpost)>;
  957. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  958. (instrs STRHHpre, STRHHpost)>;
  959. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  960. (instrs STRHHpre, STRHHpost)>;
  961. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  962. (instrs STRHHpre, STRHHpost)>;
  963. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  964. (instrs STRQpre, STRQpost)>;
  965. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  966. (instrs STRQpre, STRQpost)>;
  967. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  968. (instrs STRQpre, STRQpost)>;
  969. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  970. (instrs STRQpre, STRQpost)>;
  971. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  972. (instrs STRSpre, STRSpost)>;
  973. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  974. (instrs STRSpre, STRSpost)>;
  975. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  976. (instrs STRSpre, STRSpost)>;
  977. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  978. (instrs STRSpre, STRSpost)>;
  979. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  980. (instrs STRWpre, STRWpost)>;
  981. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  982. (instrs STRWpre, STRWpost)>;
  983. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  984. (instrs STRWpre, STRWpost)>;
  985. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  986. (instrs STRWpre, STRWpost)>;
  987. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  988. (instrs STRXpre, STRXpost)>;
  989. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  990. (instrs STRXpre, STRXpost)>;
  991. def : InstRW<[WriteAdr, A64FXWrite_STP01],
  992. (instrs STRXpre, STRXpost)>;
  993. def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
  994. (instrs STRXpre, STRXpost)>;
  995. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  996. (instrs STRBroW, STRBroX)>;
  997. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  998. (instrs STRBroW, STRBroX)>;
  999. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1000. (instrs STRBBroW, STRBBroX)>;
  1001. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1002. (instrs STRBBroW, STRBBroX)>;
  1003. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1004. (instrs STRDroW, STRDroX)>;
  1005. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1006. (instrs STRDroW, STRDroX)>;
  1007. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1008. (instrs STRHroW, STRHroX)>;
  1009. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1010. (instrs STRHroW, STRHroX)>;
  1011. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1012. (instrs STRHHroW, STRHHroX)>;
  1013. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1014. (instrs STRHHroW, STRHHroX)>;
  1015. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1016. (instrs STRQroW, STRQroX)>;
  1017. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1018. (instrs STRQroW, STRQroX)>;
  1019. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1020. (instrs STRSroW, STRSroX)>;
  1021. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1022. (instrs STRSroW, STRSroX)>;
  1023. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1024. (instrs STRWroW, STRWroX)>;
  1025. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1026. (instrs STRWroW, STRWroX)>;
  1027. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1028. (instrs STRXroW, STRXroX)>;
  1029. def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
  1030. (instrs STRXroW, STRXroX)>;
  1031. //---
  1032. // 3.8 FP Data Processing Instructions
  1033. //---
  1034. // FP absolute value
  1035. // FP min/max
  1036. // FP negate
  1037. def : WriteRes<WriteF, [A64FXGI03]> {
  1038. let Latency = 4;
  1039. let ResourceCycles = [2];
  1040. }
  1041. // FP arithmetic
  1042. def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
  1043. def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
  1044. // FP compare
  1045. def : WriteRes<WriteFCmp, [A64FXGI03]> {
  1046. let Latency = 4;
  1047. let ResourceCycles = [2];
  1048. }
  1049. // FP Div, Sqrt
  1050. def : WriteRes<WriteFDiv, [A64FXGI0]> {
  1051. let Latency = 43;
  1052. }
  1053. def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
  1054. let Latency = 38;
  1055. }
  1056. def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
  1057. let Latency = 29;
  1058. }
  1059. def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
  1060. let Latency = 43;
  1061. }
  1062. def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
  1063. let Latency = 29;
  1064. }
  1065. def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
  1066. let Latency = 43;
  1067. }
  1068. // FP divide, S-form
  1069. // FP square root, S-form
  1070. def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
  1071. def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
  1072. def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
  1073. def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
  1074. def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
  1075. def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
  1076. // FP divide, D-form
  1077. // FP square root, D-form
  1078. def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
  1079. def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
  1080. def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
  1081. def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
  1082. def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
  1083. def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
  1084. // FP round to integral
  1085. def : InstRW<[A64FXWrite_9Cyc_GI03],
  1086. (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
  1087. // FP select
  1088. def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
  1089. //---
  1090. // 3.9 FP Miscellaneous Instructions
  1091. //---
  1092. // FP convert, from vec to vec reg
  1093. // FP convert, from gen to vec reg
  1094. // FP convert, from vec to gen reg
  1095. def : WriteRes<WriteFCvt, [A64FXGI03]> {
  1096. let Latency = 9;
  1097. let ResourceCycles = [2];
  1098. }
  1099. // FP move, immed
  1100. // FP move, register
  1101. def : WriteRes<WriteFImm, [A64FXGI0]> {
  1102. let Latency = 4;
  1103. let ResourceCycles = [2];
  1104. }
  1105. // FP transfer, from gen to vec reg
  1106. // FP transfer, from vec to gen reg
  1107. def : WriteRes<WriteFCopy, [A64FXGI0]> {
  1108. let Latency = 4;
  1109. let ResourceCycles = [2];
  1110. }
  1111. def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
  1112. def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
  1113. //---
  1114. // 3.12 ASIMD Integer Instructions
  1115. //---
  1116. // ASIMD absolute diff, D-form
  1117. // ASIMD absolute diff, Q-form
  1118. // ASIMD absolute diff accum, D-form
  1119. // ASIMD absolute diff accum, Q-form
  1120. // ASIMD absolute diff accum long
  1121. // ASIMD absolute diff long
  1122. // ASIMD arith, basic
  1123. // ASIMD arith, complex
  1124. // ASIMD compare
  1125. // ASIMD logical (AND, BIC, EOR)
  1126. // ASIMD max/min, basic
  1127. // ASIMD max/min, reduce, 4H/4S
  1128. // ASIMD max/min, reduce, 8B/8H
  1129. // ASIMD max/min, reduce, 16B
  1130. // ASIMD multiply, D-form
  1131. // ASIMD multiply, Q-form
  1132. // ASIMD multiply accumulate long
  1133. // ASIMD multiply accumulate saturating long
  1134. // ASIMD multiply long
  1135. // ASIMD pairwise add and accumulate
  1136. // ASIMD shift accumulate
  1137. // ASIMD shift by immed, basic
  1138. // ASIMD shift by immed and insert, basic, D-form
  1139. // ASIMD shift by immed and insert, basic, Q-form
  1140. // ASIMD shift by immed, complex
  1141. // ASIMD shift by register, basic, D-form
  1142. // ASIMD shift by register, basic, Q-form
  1143. // ASIMD shift by register, complex, D-form
  1144. // ASIMD shift by register, complex, Q-form
  1145. def : WriteRes<WriteVd, [A64FXGI03]> {
  1146. let Latency = 4;
  1147. }
  1148. def : WriteRes<WriteVq, [A64FXGI03]> {
  1149. let Latency = 4;
  1150. }
  1151. // ASIMD arith, reduce, 4H/4S
  1152. // ASIMD arith, reduce, 8B/8H
  1153. // ASIMD arith, reduce, 16B
  1154. // ASIMD logical (MVN (alias for NOT), ORN, ORR)
  1155. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1156. (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
  1157. // ASIMD arith, reduce
  1158. def : InstRW<[A64FXWrite_ADDLV],
  1159. (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
  1160. // ASIMD polynomial (8x8) multiply long
  1161. def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
  1162. def : InstRW<[A64FXWrite_MULLV],
  1163. (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
  1164. def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
  1165. def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
  1166. // ASIMD absolute diff accum, D-form
  1167. def : InstRW<[A64FXWrite_ABA],
  1168. (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
  1169. // ASIMD absolute diff accum, Q-form
  1170. def : InstRW<[A64FXWrite_ABA],
  1171. (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
  1172. // ASIMD absolute diff accum long
  1173. def : InstRW<[A64FXWrite_ABAL],
  1174. (instregex "^[SU]ABAL")>;
  1175. // ASIMD arith, reduce, 4H/4S
  1176. def : InstRW<[A64FXWrite_ADDLV1],
  1177. (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
  1178. // ASIMD arith, reduce, 8B
  1179. def : InstRW<[A64FXWrite_ADDLV1],
  1180. (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
  1181. // ASIMD arith, reduce, 16B/16H
  1182. def : InstRW<[A64FXWrite_ADDLV1],
  1183. (instregex "^[SU]?ADDL?Vv16i8v$")>;
  1184. // ASIMD max/min, reduce, 4H/4S
  1185. def : InstRW<[A64FXWrite_MINMAXV],
  1186. (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
  1187. // ASIMD max/min, reduce, 8B/8H
  1188. def : InstRW<[A64FXWrite_MINMAXV],
  1189. (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
  1190. // ASIMD max/min, reduce, 16B/16H
  1191. def : InstRW<[A64FXWrite_MINMAXV],
  1192. (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
  1193. // ASIMD multiply, D-form
  1194. def : InstRW<[A64FXWrite_PMUL],
  1195. (instregex "^(P?MUL|SQR?DMUL)" #
  1196. "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
  1197. "(_indexed)?$")>;
  1198. // ASIMD multiply, Q-form
  1199. def : InstRW<[A64FXWrite_PMUL],
  1200. (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
  1201. // ASIMD multiply, Q-form
  1202. def : InstRW<[A64FXWrite_SQRDMULH],
  1203. (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
  1204. // ASIMD multiply accumulate, D-form
  1205. def : InstRW<[A64FXWrite_9Cyc_GI03],
  1206. (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
  1207. // ASIMD multiply accumulate, Q-form
  1208. def : InstRW<[A64FXWrite_9Cyc_GI03],
  1209. (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
  1210. // ASIMD shift accumulate
  1211. def : InstRW<[A64FXWrite_SRSRAV],
  1212. (instregex "SRSRAv", "URSRAv")>;
  1213. def : InstRW<[A64FXWrite_SSRAV],
  1214. (instregex "SSRAv", "USRAv")>;
  1215. // ASIMD shift by immed, basic
  1216. def : InstRW<[A64FXWrite_RSHRN],
  1217. (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
  1218. def : InstRW<[A64FXWrite_SHRN],
  1219. (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
  1220. def : InstRW<[A64FXWrite_6Cyc_GI3],
  1221. (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
  1222. // ASIMD shift by immed, complex
  1223. def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
  1224. def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
  1225. // ASIMD shift by register, basic, Q-form
  1226. def : InstRW<[A64FXWrite_6Cyc_GI3],
  1227. (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
  1228. // ASIMD shift by register, complex, D-form
  1229. def : InstRW<[A64FXWrite_6Cyc_GI3],
  1230. (instregex "^[SU][QR]{1,2}SHL" #
  1231. "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
  1232. // ASIMD shift by register, complex, Q-form
  1233. def : InstRW<[A64FXWrite_6Cyc_GI3],
  1234. (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
  1235. // ASIMD Arithmetic
  1236. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1237. (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
  1238. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1239. (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
  1240. def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
  1241. def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
  1242. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1243. (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
  1244. "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
  1245. def : InstRW<[A64FXWrite_ADDP],
  1246. (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
  1247. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1248. (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
  1249. "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
  1250. def : InstRW<[A64FXWrite_4Cyc_GI0],
  1251. (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
  1252. def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
  1253. def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
  1254. def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
  1255. def : InstRW<[A64FXWrite_MINMAXV],
  1256. (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
  1257. def : InstRW<[A64FXWrite_ABA],
  1258. (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
  1259. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1260. (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
  1261. def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
  1262. def : InstRW<[A64FXWrite_SHRN],
  1263. (instregex "^ADDHNv", "^SUBHNv")>;
  1264. def : InstRW<[A64FXWrite_RSHRN],
  1265. (instregex "^RADDHNv", "^RSUBHNv")>;
  1266. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1267. (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
  1268. "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
  1269. "^URHADD", "^USQADD")>;
  1270. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1271. (instregex "^CMEQv", "^CMGEv", "^CMGTv",
  1272. "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
  1273. def : InstRW<[A64FXWrite_MINMAXV],
  1274. (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
  1275. def : InstRW<[A64FXWrite_ADDP],
  1276. (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
  1277. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1278. (instregex "^SABDv", "^UABDv")>;
  1279. def : InstRW<[A64FXWrite_TBX1],
  1280. (instregex "^SABDLv", "^UABDLv")>;
  1281. //---
  1282. // 3.13 ASIMD Floating-point Instructions
  1283. //---
  1284. def : WriteRes<WriteFMul, [A64FXGI03]> {
  1285. let Latency = 9;
  1286. }
  1287. // ASIMD FP absolute value
  1288. def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
  1289. // ASIMD FP arith, normal, D-form
  1290. // ASIMD FP arith, normal, Q-form
  1291. def : InstRW<[A64FXWrite_9Cyc_GI03],
  1292. (instregex "^FABDv", "^FADDv", "^FSUBv")>;
  1293. // ASIMD FP arith, pairwise, D-form
  1294. // ASIMD FP arith, pairwise, Q-form
  1295. def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
  1296. // ASIMD FP compare, D-form
  1297. // ASIMD FP compare, Q-form
  1298. def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
  1299. def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
  1300. "^FCMGTv", "^FCMLEv",
  1301. "^FCMLTv")>;
  1302. // ASIMD FP round, D-form
  1303. def : InstRW<[A64FXWrite_9Cyc_GI03],
  1304. (instregex "^FRINT[AIMNPXZ](v2f32)")>;
  1305. // ASIMD FP round, Q-form
  1306. def : InstRW<[A64FXWrite_9Cyc_GI03],
  1307. (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
  1308. // ASIMD FP convert, long
  1309. // ASIMD FP convert, narrow
  1310. // ASIMD FP convert, other, D-form
  1311. // ASIMD FP convert, other, Q-form
  1312. // ASIMD FP convert, long and narrow
  1313. def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
  1314. // ASIMD FP convert, other, D-form
  1315. def : InstRW<[A64FXWrite_FCVTXNV],
  1316. (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
  1317. // ASIMD FP convert, other, Q-form
  1318. def : InstRW<[A64FXWrite_FCVTXNV],
  1319. (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
  1320. // ASIMD FP divide, D-form, F32
  1321. def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
  1322. def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
  1323. // ASIMD FP divide, Q-form, F32
  1324. def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
  1325. def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
  1326. // ASIMD FP divide, Q-form, F64
  1327. def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
  1328. def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
  1329. // ASIMD FP max/min, normal, D-form
  1330. // ASIMD FP max/min, normal, Q-form
  1331. def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
  1332. "^FMINv", "^FMINNMv")>;
  1333. // ASIMD FP max/min, pairwise, D-form
  1334. // ASIMD FP max/min, pairwise, Q-form
  1335. def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
  1336. "^FMINPv", "^FMINNMPv")>;
  1337. // ASIMD FP max/min, reduce
  1338. def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
  1339. "^FMINVv", "^FMINNMVv")>;
  1340. // ASIMD FP multiply, D-form, FZ
  1341. // ASIMD FP multiply, D-form, no FZ
  1342. // ASIMD FP multiply, Q-form, FZ
  1343. // ASIMD FP multiply, Q-form, no FZ
  1344. def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
  1345. def : InstRW<[A64FXWrite_FMULXE],
  1346. (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
  1347. def : InstRW<[A64FXWrite_FMULXE],
  1348. (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
  1349. // ASIMD FP multiply accumulate, Dform, FZ
  1350. // ASIMD FP multiply accumulate, Dform, no FZ
  1351. // ASIMD FP multiply accumulate, Qform, FZ
  1352. // ASIMD FP multiply accumulate, Qform, no FZ
  1353. def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
  1354. def : InstRW<[A64FXWrite_FMULXE],
  1355. (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
  1356. def : InstRW<[A64FXWrite_FMULXE],
  1357. (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
  1358. // ASIMD FP negate
  1359. def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
  1360. //--
  1361. // 3.14 ASIMD Miscellaneous Instructions
  1362. //--
  1363. // ASIMD bit reverse
  1364. def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
  1365. // ASIMD bitwise insert, D-form
  1366. // ASIMD bitwise insert, Q-form
  1367. def : InstRW<[A64FXWrite_BIF],
  1368. (instregex "^BIFv", "^BITv", "^BSLv")>;
  1369. // ASIMD count, D-form
  1370. // ASIMD count, Q-form
  1371. def : InstRW<[A64FXWrite_4Cyc_GI0],
  1372. (instregex "^CLSv", "^CLZv", "^CNTv")>;
  1373. // ASIMD duplicate, gen reg
  1374. // ASIMD duplicate, element
  1375. def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
  1376. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>;
  1377. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
  1378. // ASIMD extract
  1379. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
  1380. // ASIMD extract narrow
  1381. def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
  1382. // ASIMD extract narrow, saturating
  1383. def : InstRW<[A64FXWrite_6Cyc_GI3],
  1384. (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
  1385. // ASIMD insert, element to element
  1386. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
  1387. // ASIMD transfer, element to gen reg
  1388. def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
  1389. // ASIMD move, integer immed
  1390. def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
  1391. // ASIMD move, FP immed
  1392. def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
  1393. // ASIMD table lookup, D-form
  1394. def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
  1395. def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
  1396. def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
  1397. def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
  1398. def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
  1399. def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
  1400. def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
  1401. def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
  1402. // ASIMD table lookup, Q-form
  1403. def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
  1404. def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
  1405. def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
  1406. def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
  1407. def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
  1408. def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
  1409. def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
  1410. def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
  1411. // ASIMD unzip/zip
  1412. def : InstRW<[A64FXWrite_6Cyc_GI0],
  1413. (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
  1414. // ASIMD reciprocal estimate, D-form
  1415. // ASIMD reciprocal estimate, Q-form
  1416. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1417. (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
  1418. "^FRSQRTEv", "^URSQRTEv")>;
  1419. // ASIMD reciprocal step, D-form, FZ
  1420. // ASIMD reciprocal step, D-form, no FZ
  1421. // ASIMD reciprocal step, Q-form, FZ
  1422. // ASIMD reciprocal step, Q-form, no FZ
  1423. def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
  1424. // ASIMD reverse
  1425. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1426. (instregex "^REV16v", "^REV32v", "^REV64v")>;
  1427. // ASIMD table lookup, D-form
  1428. // ASIMD table lookup, Q-form
  1429. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
  1430. // ASIMD transfer, element to word or word
  1431. def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
  1432. // ASIMD transfer, element to gen reg
  1433. def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
  1434. // ASIMD transfer gen reg to element
  1435. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
  1436. // ASIMD transpose
  1437. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
  1438. "^UZP1v", "^UZP2v")>;
  1439. // ASIMD unzip/zip
  1440. def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
  1441. //--
  1442. // 3.15 ASIMD Load Instructions
  1443. //--
  1444. // ASIMD load, 1 element, multiple, 1 reg, D-form
  1445. // ASIMD load, 1 element, multiple, 1 reg, Q-form
  1446. def : InstRW<[A64FXWrite_8Cyc_GI56],
  1447. (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
  1448. def : InstRW<[A64FXWrite_11Cyc_GI56],
  1449. (instregex "^LD1Onev(16b|8h|4s)$")>;
  1450. def : InstRW<[A64FXWrite_LD108, WriteAdr],
  1451. (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
  1452. def : InstRW<[A64FXWrite_LD109, WriteAdr],
  1453. (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
  1454. // ASIMD load, 1 element, multiple, 2 reg, D-form
  1455. // ASIMD load, 1 element, multiple, 2 reg, Q-form
  1456. def : InstRW<[A64FXWrite_LD102],
  1457. (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
  1458. def : InstRW<[A64FXWrite_LD103],
  1459. (instregex "^LD1Twov(16b|8h|4s)$")>;
  1460. def : InstRW<[A64FXWrite_LD110, WriteAdr],
  1461. (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
  1462. def : InstRW<[A64FXWrite_LD111, WriteAdr],
  1463. (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
  1464. // ASIMD load, 1 element, multiple, 3 reg, D-form
  1465. // ASIMD load, 1 element, multiple, 3 reg, Q-form
  1466. def : InstRW<[A64FXWrite_LD104],
  1467. (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
  1468. def : InstRW<[A64FXWrite_LD105],
  1469. (instregex "^LD1Threev(16b|8h|4s)$")>;
  1470. def : InstRW<[A64FXWrite_LD112, WriteAdr],
  1471. (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
  1472. def : InstRW<[A64FXWrite_LD113, WriteAdr],
  1473. (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
  1474. // ASIMD load, 1 element, multiple, 4 reg, D-form
  1475. // ASIMD load, 1 element, multiple, 4 reg, Q-form
  1476. def : InstRW<[A64FXWrite_LD106],
  1477. (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
  1478. def : InstRW<[A64FXWrite_LD107],
  1479. (instregex "^LD1Fourv(16b|8h|4s)$")>;
  1480. def : InstRW<[A64FXWrite_LD114, WriteAdr],
  1481. (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
  1482. def : InstRW<[A64FXWrite_LD115, WriteAdr],
  1483. (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
  1484. // ASIMD load, 1 element, one lane, B/H/S
  1485. // ASIMD load, 1 element, one lane, D
  1486. def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
  1487. def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
  1488. (instregex "^LD1i(8|16|32|64)_POST$")>;
  1489. // ASIMD load, 1 element, all lanes, D-form, B/H/S
  1490. // ASIMD load, 1 element, all lanes, D-form, D
  1491. // ASIMD load, 1 element, all lanes, Q-form
  1492. def : InstRW<[A64FXWrite_8Cyc_GI03],
  1493. (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1494. def : InstRW<[A64FXWrite_LD108, WriteAdr],
  1495. (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1496. // ASIMD load, 2 element, multiple, D-form, B/H/S
  1497. // ASIMD load, 2 element, multiple, Q-form, D
  1498. def : InstRW<[A64FXWrite_LD103],
  1499. (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
  1500. def : InstRW<[A64FXWrite_LD111, WriteAdr],
  1501. (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
  1502. // ASIMD load, 2 element, one lane, B/H
  1503. // ASIMD load, 2 element, one lane, S
  1504. // ASIMD load, 2 element, one lane, D
  1505. def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
  1506. def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
  1507. (instregex "^LD2i(8|16|32|64)_POST$")>;
  1508. // ASIMD load, 2 element, all lanes, D-form, B/H/S
  1509. // ASIMD load, 2 element, all lanes, D-form, D
  1510. // ASIMD load, 2 element, all lanes, Q-form
  1511. def : InstRW<[A64FXWrite_LD102],
  1512. (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1513. def : InstRW<[A64FXWrite_LD110, WriteAdr],
  1514. (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1515. // ASIMD load, 3 element, multiple, D-form, B/H/S
  1516. // ASIMD load, 3 element, multiple, Q-form, B/H/S
  1517. // ASIMD load, 3 element, multiple, Q-form, D
  1518. def : InstRW<[A64FXWrite_LD105],
  1519. (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
  1520. def : InstRW<[A64FXWrite_LD113, WriteAdr],
  1521. (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
  1522. // ASIMD load, 3 element, one lone, B/H
  1523. // ASIMD load, 3 element, one lane, S
  1524. // ASIMD load, 3 element, one lane, D
  1525. def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
  1526. def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
  1527. (instregex "^LD3i(8|16|32|64)_POST$")>;
  1528. // ASIMD load, 3 element, all lanes, D-form, B/H/S
  1529. // ASIMD load, 3 element, all lanes, D-form, D
  1530. // ASIMD load, 3 element, all lanes, Q-form, B/H/S
  1531. // ASIMD load, 3 element, all lanes, Q-form, D
  1532. def : InstRW<[A64FXWrite_LD104],
  1533. (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1534. def : InstRW<[A64FXWrite_LD112, WriteAdr],
  1535. (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1536. // ASIMD load, 4 element, multiple, D-form, B/H/S
  1537. // ASIMD load, 4 element, multiple, Q-form, B/H/S
  1538. // ASIMD load, 4 element, multiple, Q-form, D
  1539. def : InstRW<[A64FXWrite_LD107],
  1540. (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
  1541. def : InstRW<[A64FXWrite_LD115, WriteAdr],
  1542. (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
  1543. // ASIMD load, 4 element, one lane, B/H
  1544. // ASIMD load, 4 element, one lane, S
  1545. // ASIMD load, 4 element, one lane, D
  1546. def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
  1547. def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
  1548. (instregex "^LD4i(8|16|32|64)_POST$")>;
  1549. // ASIMD load, 4 element, all lanes, D-form, B/H/S
  1550. // ASIMD load, 4 element, all lanes, D-form, D
  1551. // ASIMD load, 4 element, all lanes, Q-form, B/H/S
  1552. // ASIMD load, 4 element, all lanes, Q-form, D
  1553. def : InstRW<[A64FXWrite_LD106],
  1554. (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1555. def : InstRW<[A64FXWrite_LD114, WriteAdr],
  1556. (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1557. //--
  1558. // 3.16 ASIMD Store Instructions
  1559. //--
  1560. // ASIMD store, 1 element, multiple, 1 reg, D-form
  1561. // ASIMD store, 1 element, multiple, 1 reg, Q-form
  1562. def : InstRW<[A64FXWrite_ST10],
  1563. (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1564. def : InstRW<[A64FXWrite_ST14, WriteAdr],
  1565. (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1566. // ASIMD store, 1 element, multiple, 2 reg, D-form
  1567. // ASIMD store, 1 element, multiple, 2 reg, Q-form
  1568. def : InstRW<[A64FXWrite_ST11],
  1569. (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1570. def : InstRW<[A64FXWrite_ST15, WriteAdr],
  1571. (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1572. // ASIMD store, 1 element, multiple, 3 reg, D-form
  1573. // ASIMD store, 1 element, multiple, 3 reg, Q-form
  1574. def : InstRW<[A64FXWrite_ST12],
  1575. (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1576. def : InstRW<[A64FXWrite_ST16, WriteAdr],
  1577. (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1578. // ASIMD store, 1 element, multiple, 4 reg, D-form
  1579. // ASIMD store, 1 element, multiple, 4 reg, Q-form
  1580. def : InstRW<[A64FXWrite_ST13],
  1581. (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  1582. def : InstRW<[A64FXWrite_ST17, WriteAdr],
  1583. (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  1584. // ASIMD store, 1 element, one lane, B/H/S
  1585. // ASIMD store, 1 element, one lane, D
  1586. def : InstRW<[A64FXWrite_ST10],
  1587. (instregex "^ST1i(8|16|32|64)$")>;
  1588. def : InstRW<[A64FXWrite_ST14, WriteAdr],
  1589. (instregex "^ST1i(8|16|32|64)_POST$")>;
  1590. // ASIMD store, 2 element, multiple, D-form, B/H/S
  1591. // ASIMD store, 2 element, multiple, Q-form, B/H/S
  1592. // ASIMD store, 2 element, multiple, Q-form, D
  1593. def : InstRW<[A64FXWrite_ST11],
  1594. (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
  1595. def : InstRW<[A64FXWrite_ST15, WriteAdr],
  1596. (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
  1597. // ASIMD store, 2 element, one lane, B/H/S
  1598. // ASIMD store, 2 element, one lane, D
  1599. def : InstRW<[A64FXWrite_ST11],
  1600. (instregex "^ST2i(8|16|32|64)$")>;
  1601. def : InstRW<[A64FXWrite_ST15, WriteAdr],
  1602. (instregex "^ST2i(8|16|32|64)_POST$")>;
  1603. // ASIMD store, 3 element, multiple, D-form, B/H/S
  1604. // ASIMD store, 3 element, multiple, Q-form, B/H/S
  1605. // ASIMD store, 3 element, multiple, Q-form, D
  1606. def : InstRW<[A64FXWrite_ST12],
  1607. (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
  1608. def : InstRW<[A64FXWrite_ST16, WriteAdr],
  1609. (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
  1610. // ASIMD store, 3 element, one lane, B/H
  1611. // ASIMD store, 3 element, one lane, S
  1612. // ASIMD store, 3 element, one lane, D
  1613. def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
  1614. def : InstRW<[A64FXWrite_ST16, WriteAdr],
  1615. (instregex "^ST3i(8|16|32|64)_POST$")>;
  1616. // ASIMD store, 4 element, multiple, D-form, B/H/S
  1617. // ASIMD store, 4 element, multiple, Q-form, B/H/S
  1618. // ASIMD store, 4 element, multiple, Q-form, D
  1619. def : InstRW<[A64FXWrite_ST13],
  1620. (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
  1621. def : InstRW<[A64FXWrite_ST17, WriteAdr],
  1622. (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
  1623. // ASIMD store, 4 element, one lane, B/H
  1624. // ASIMD store, 4 element, one lane, S
  1625. // ASIMD store, 4 element, one lane, D
  1626. def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
  1627. def : InstRW<[A64FXWrite_ST17, WriteAdr],
  1628. (instregex "^ST4i(8|16|32|64)_POST$")>;
  1629. // V8.1a Atomics (LSE)
  1630. def : InstRW<[A64FXWrite_CAS, WriteAtomic],
  1631. (instrs CASB, CASH, CASW, CASX)>;
  1632. def : InstRW<[A64FXWrite_CAS, WriteAtomic],
  1633. (instrs CASAB, CASAH, CASAW, CASAX)>;
  1634. def : InstRW<[A64FXWrite_CAS, WriteAtomic],
  1635. (instrs CASLB, CASLH, CASLW, CASLX)>;
  1636. def : InstRW<[A64FXWrite_CAS, WriteAtomic],
  1637. (instrs CASALB, CASALH, CASALW, CASALX)>;
  1638. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1639. (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
  1640. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1641. (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
  1642. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1643. (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
  1644. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1645. (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
  1646. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1647. (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
  1648. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1649. (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
  1650. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1651. (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
  1652. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1653. (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
  1654. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1655. (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
  1656. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1657. (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
  1658. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1659. (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
  1660. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1661. (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
  1662. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1663. (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
  1664. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1665. (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
  1666. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1667. (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
  1668. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1669. (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
  1670. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1671. (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
  1672. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1673. (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
  1674. LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
  1675. LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
  1676. LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
  1677. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1678. (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
  1679. LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
  1680. LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
  1681. LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
  1682. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1683. (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
  1684. LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
  1685. LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
  1686. LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
  1687. def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
  1688. (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
  1689. LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
  1690. LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
  1691. LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
  1692. def : InstRW<[A64FXWrite_SWP, WriteAtomic],
  1693. (instrs SWPB, SWPH, SWPW, SWPX)>;
  1694. def : InstRW<[A64FXWrite_SWP, WriteAtomic],
  1695. (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
  1696. def : InstRW<[A64FXWrite_SWP, WriteAtomic],
  1697. (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
  1698. def : InstRW<[A64FXWrite_SWP, WriteAtomic],
  1699. (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
  1700. def : InstRW<[A64FXWrite_STUR, WriteAtomic],
  1701. (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
  1702. // SVE instructions
  1703. // The modeling method for SVE instructions is more accurate than others.
  1704. // TODO: modify the model of other instructions similarly.
  1705. def : InstRW<[A64FXWrite_4Cyc_GI0],
  1706. (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z",
  1707. "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P",
  1708. "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI",
  1709. "^SUBR?_ZI")>;
  1710. def : InstRW<[A64FXWrite_6Cyc_GI0],
  1711. (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR",
  1712. "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z",
  1713. "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>;
  1714. def : InstRW<[A64FXWrite_9Cyc_GI0],
  1715. (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z",
  1716. "^INDEX_II_[SD]", "^MUL_ZI")>;
  1717. def : InstRW<[A64FXWrite_4Cyc_GI3],
  1718. (instregex "^CNT_Z")>;
  1719. def : InstRW<[A64FXWrite_4Cyc_GI03],
  1720. (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z",
  1721. "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z",
  1722. "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z",
  1723. "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_",
  1724. "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z",
  1725. "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z",
  1726. "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z",
  1727. "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>;
  1728. def : InstRW<[A64FXWrite_9Cyc_GI03 ],
  1729. (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP",
  1730. "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z",
  1731. "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z",
  1732. "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP",
  1733. "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>;
  1734. def : InstRW<[A64FXWrite_3Cyc_GI1],
  1735. (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P",
  1736. "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT",
  1737. "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)",
  1738. "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>;
  1739. def : InstRW<[A64FXWrite_1Cyc_GI24],
  1740. (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X",
  1741. "^RDVLI")>;
  1742. def : InstRW<[A64FXWrite_11Cyc_GI5],
  1743. (instregex "^LDR_[PZ]XI")>;
  1744. def : InstRW<[A64FXWrite_11Cyc_GI56],
  1745. (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>;
  1746. def A64FXWrite_None : SchedWriteRes<[]> {
  1747. }
  1748. def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>;
  1749. def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> {
  1750. let Latency = 15;
  1751. let NumMicroOps = 2;
  1752. let ResourceCycles = [2];
  1753. }
  1754. def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>;
  1755. def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> {
  1756. let Latency = 5;
  1757. let NumMicroOps = 2;
  1758. let ResourceCycles = [2];
  1759. }
  1760. def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>;
  1761. def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> {
  1762. let Latency = 8;
  1763. let NumMicroOps = 2;
  1764. }
  1765. def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>;
  1766. def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> {
  1767. let Latency = 46;
  1768. let NumMicroOps = 10;
  1769. let ResourceCycles = [10];
  1770. }
  1771. def : InstRW<[A64FXWrite_Reduction4CycB],
  1772. (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>;
  1773. def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> {
  1774. let Latency = 42;
  1775. let NumMicroOps = 9;
  1776. let ResourceCycles = [9];
  1777. }
  1778. def : InstRW<[A64FXWrite_Reduction4CycH],
  1779. (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>;
  1780. def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> {
  1781. let Latency = 38;
  1782. let NumMicroOps = 8;
  1783. let ResourceCycles = [8];
  1784. }
  1785. def : InstRW<[A64FXWrite_Reduction4CycS],
  1786. (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>;
  1787. def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> {
  1788. let Latency = 34;
  1789. let NumMicroOps = 7;
  1790. let ResourceCycles = [7];
  1791. }
  1792. def : InstRW<[A64FXWrite_Reduction4CycD],
  1793. (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>;
  1794. def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  1795. let Latency = 29;
  1796. }
  1797. def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>;
  1798. def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
  1799. let Latency = 4;
  1800. }
  1801. def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>;
  1802. def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> {
  1803. let Latency = 6;
  1804. }
  1805. def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>;
  1806. def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
  1807. let Latency = 8;
  1808. }
  1809. def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>;
  1810. def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> {
  1811. let Latency = 2;
  1812. let ResourceCycles = [2];
  1813. }
  1814. def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>;
  1815. def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> {
  1816. let Latency = 7;
  1817. let NumMicroOps = 2;
  1818. }
  1819. def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>;
  1820. def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
  1821. let Latency = 12;
  1822. }
  1823. def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>;
  1824. def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> {
  1825. let Latency = 75;
  1826. let NumMicroOps = 11;
  1827. let ResourceCycles = [11];
  1828. }
  1829. def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>;
  1830. def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> {
  1831. let Latency = 60;
  1832. let NumMicroOps = 9;
  1833. let ResourceCycles = [9];
  1834. }
  1835. def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>;
  1836. def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> {
  1837. let Latency = 45;
  1838. let NumMicroOps = 7;
  1839. let ResourceCycles = [7];
  1840. }
  1841. def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>;
  1842. def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> {
  1843. let Latency = 468;
  1844. let NumMicroOps = 63;
  1845. let ResourceCycles = [63];
  1846. }
  1847. def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>;
  1848. def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> {
  1849. let Latency = 228;
  1850. let NumMicroOps = 31;
  1851. let ResourceCycles = [31];
  1852. }
  1853. def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>;
  1854. def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> {
  1855. let Latency = 108;
  1856. let NumMicroOps = 15;
  1857. let ResourceCycles = [15];
  1858. }
  1859. def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>;
  1860. def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
  1861. let Latency = 15;
  1862. let NumMicroOps = 2;
  1863. }
  1864. def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>;
  1865. def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> {
  1866. let Latency = 15;
  1867. let NumMicroOps = 3;
  1868. let ResourceCycles = [3];
  1869. }
  1870. def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>;
  1871. def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> {
  1872. let Latency = 134;
  1873. let ResourceCycles = [134];
  1874. }
  1875. def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>;
  1876. def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> {
  1877. let Latency = 98;
  1878. let ResourceCycles = [98];
  1879. }
  1880. def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>;
  1881. def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> {
  1882. let Latency = 154;
  1883. let ResourceCycles = [154];
  1884. }
  1885. def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>;
  1886. def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> {
  1887. let Latency = 54;
  1888. let NumMicroOps = 11;
  1889. let ResourceCycles = [11];
  1890. }
  1891. def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>;
  1892. def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> {
  1893. let Latency = 44;
  1894. let NumMicroOps = 9;
  1895. let ResourceCycles = [9];
  1896. }
  1897. def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>;
  1898. def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> {
  1899. let Latency = 34;
  1900. let NumMicroOps = 7;
  1901. let ResourceCycles = [7];
  1902. }
  1903. def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>;
  1904. def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
  1905. let Latency = 17;
  1906. let NumMicroOps = 2;
  1907. let ResourceCycles = [2, 2];
  1908. }
  1909. def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>;
  1910. def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
  1911. let Latency = 13;
  1912. let NumMicroOps = 1;
  1913. }
  1914. def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>;
  1915. def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> {
  1916. let Latency = 13;
  1917. let NumMicroOps = 2;
  1918. let ResourceCycles = [2];
  1919. }
  1920. def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>;
  1921. def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> {
  1922. let Latency = 17;
  1923. let NumMicroOps = 3;
  1924. let ResourceCycles = [2, 2, 1];
  1925. }
  1926. def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>;
  1927. def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
  1928. let Latency = 17;
  1929. let NumMicroOps = 2;
  1930. let ResourceCycles = [2, 1];
  1931. }
  1932. def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>;
  1933. def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
  1934. let Latency = 10;
  1935. }
  1936. def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>;
  1937. def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  1938. let Latency = 25;
  1939. }
  1940. def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>;
  1941. def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
  1942. let Latency = 19;
  1943. let ResourceCycles = [2, 4, 4];
  1944. }
  1945. def : InstRW<[A64FXWrite_GLD_S_ZI],
  1946. (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>;
  1947. def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
  1948. let Latency = 16;
  1949. let ResourceCycles = [1, 2, 2];
  1950. }
  1951. def : InstRW<[A64FXWrite_GLD_D_ZI],
  1952. (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>;
  1953. def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
  1954. let Latency = 23;
  1955. let ResourceCycles = [2, 1, 4, 4];
  1956. }
  1957. def : InstRW<[A64FXWrite_GLD_S_RZ],
  1958. (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>;
  1959. def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
  1960. let Latency = 20;
  1961. let ResourceCycles = [1, 1, 2, 2];
  1962. }
  1963. def : InstRW<[A64FXWrite_GLD_D_RZ],
  1964. (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]",
  1965. "^GLD(FF)?1S?[BHW]_D$")>;
  1966. def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> {
  1967. let Latency = 15;
  1968. let NumMicroOps = 3;
  1969. let ResourceCycles = [9];
  1970. }
  1971. def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>;
  1972. def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> {
  1973. let Latency = 11;
  1974. let NumMicroOps = 2;
  1975. let ResourceCycles = [2];
  1976. }
  1977. def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>;
  1978. def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> {
  1979. let Latency = 12;
  1980. let NumMicroOps = 3;
  1981. let ResourceCycles = [3];
  1982. }
  1983. def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>;
  1984. def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> {
  1985. let Latency = 15;
  1986. let NumMicroOps = 4;
  1987. let ResourceCycles = [13];
  1988. }
  1989. def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>;
  1990. def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> {
  1991. let Latency = 11;
  1992. let NumMicroOps = 3;
  1993. let ResourceCycles = [3];
  1994. }
  1995. def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>;
  1996. def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> {
  1997. let Latency = 12;
  1998. let NumMicroOps = 4;
  1999. let ResourceCycles = [4];
  2000. }
  2001. def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>;
  2002. def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> {
  2003. let Latency = 15;
  2004. let NumMicroOps = 5;
  2005. let ResourceCycles = [17];
  2006. }
  2007. def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>;
  2008. def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> {
  2009. let Latency = 11;
  2010. let NumMicroOps = 4;
  2011. let ResourceCycles = [4];
  2012. }
  2013. def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>;
  2014. def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> {
  2015. let Latency = 12;
  2016. let NumMicroOps = 5;
  2017. let ResourceCycles = [5];
  2018. }
  2019. def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>;
  2020. def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> {
  2021. }
  2022. def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>;
  2023. def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
  2024. let ResourceCycles = [2, 1, 4];
  2025. }
  2026. def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>;
  2027. def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2028. let ResourceCycles = [2, 4];
  2029. }
  2030. def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>;
  2031. def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
  2032. let ResourceCycles = [1, 1, 2];
  2033. }
  2034. def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>;
  2035. def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2036. let ResourceCycles = [1, 2];
  2037. }
  2038. def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>;
  2039. def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> {
  2040. let Latency = 114;
  2041. let ResourceCycles = [114];
  2042. }
  2043. def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>;
  2044. def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> {
  2045. let Latency = 178;
  2046. let ResourceCycles = [178];
  2047. }
  2048. def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>;
  2049. def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
  2050. let Latency = 15;
  2051. let NumMicroOps = 2;
  2052. }
  2053. def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>;
  2054. def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> {
  2055. let Latency = 2;
  2056. let ResourceCycles = [2];
  2057. }
  2058. def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>;
  2059. def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
  2060. let Latency = 6;
  2061. let NumMicroOps = 2;
  2062. let ResourceCycles = [3, 1];
  2063. }
  2064. def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>;
  2065. def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
  2066. let Latency = 12;
  2067. }
  2068. def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>;
  2069. def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2070. let Latency = 11;
  2071. }
  2072. def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>;
  2073. def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
  2074. let Latency = 20;
  2075. let NumMicroOps = 8;
  2076. let ResourceCycles = [8, 8, 8, 8];
  2077. }
  2078. def : InstRW<[A64FXWrite_SST1_W_RZ],
  2079. (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>;
  2080. def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
  2081. let Latency = 20;
  2082. let NumMicroOps = 4;
  2083. let ResourceCycles = [4, 4, 4, 4];
  2084. }
  2085. def : InstRW<[A64FXWrite_SST1_D_RZ],
  2086. (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>;
  2087. def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
  2088. let Latency = 16;
  2089. let NumMicroOps = 8;
  2090. let ResourceCycles = [12, 8, 8];
  2091. }
  2092. def : InstRW<[A64FXWrite_SST1_W_ZI],
  2093. (instregex "^SST1[BH]_S_I", "^SST1W_I")>;
  2094. def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
  2095. let Latency = 16;
  2096. let NumMicroOps = 4;
  2097. let ResourceCycles = [4, 4, 4];
  2098. }
  2099. def : InstRW<[A64FXWrite_SST1_D_ZI],
  2100. (instregex "^SST1[BHW]_D_I", "^SST1D_I")>;
  2101. def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2102. let Latency = 12;
  2103. let NumMicroOps = 3;
  2104. let ResourceCycles = [8, 9];
  2105. }
  2106. def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>;
  2107. def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2108. let Latency = 11;
  2109. let NumMicroOps = 2;
  2110. let ResourceCycles = [2, 2];
  2111. }
  2112. def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>;
  2113. def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2114. let Latency = 12;
  2115. let NumMicroOps = 3;
  2116. let ResourceCycles = [2, 3];
  2117. }
  2118. def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>;
  2119. def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2120. let Latency = 15;
  2121. let NumMicroOps = 4;
  2122. let ResourceCycles = [12, 13];
  2123. }
  2124. def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>;
  2125. def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2126. let Latency = 11;
  2127. let NumMicroOps = 3;
  2128. let ResourceCycles = [3, 3];
  2129. }
  2130. def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>;
  2131. def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2132. let Latency = 12;
  2133. let NumMicroOps = 4;
  2134. let ResourceCycles = [3, 4];
  2135. }
  2136. def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>;
  2137. def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2138. let Latency = 15;
  2139. let NumMicroOps = 5;
  2140. let ResourceCycles = [16, 17];
  2141. }
  2142. def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>;
  2143. def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2144. let Latency = 11;
  2145. let NumMicroOps = 4;
  2146. let ResourceCycles = [4, 4];
  2147. }
  2148. def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>;
  2149. def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
  2150. let Latency = 12;
  2151. let NumMicroOps = 5;
  2152. let ResourceCycles = [4, 5];
  2153. }
  2154. def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>;
  2155. def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
  2156. let Latency = 11;
  2157. }
  2158. def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>;
  2159. def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> {
  2160. let Latency = 11;
  2161. }
  2162. def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>;
  2163. def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
  2164. let Latency = 4;
  2165. }
  2166. def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>;
  2167. def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
  2168. let Latency = 3;
  2169. let NumMicroOps = 2;
  2170. }
  2171. def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>;
  2172. } // SchedModel = A64FXModel