AArch64SchedNeoverseN2.td 86 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280
  1. //=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the scheduling model for the Arm Neoverse N2 processors.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. def NeoverseN2Model : SchedMachineModel {
  13. let IssueWidth = 10; // Micro-ops dispatched at a time.
  14. let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer.
  15. let LoadLatency = 4; // Optimistic load latency.
  16. let MispredictPenalty = 10; // Extra cycles for mispredicted branch.
  17. let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
  18. let CompleteModel = 1;
  19. list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
  20. [HasSVE2p1]);
  21. }
  22. //===----------------------------------------------------------------------===//
  23. // Define each kind of processor resource and number available on Neoverse N2.
  24. // Instructions are first fetched and then decoded into internal macro-ops
  25. // (MOPs). From there, the MOPs proceed through register renaming and dispatch
  26. // stages. A MOP can be split into two micro-ops further down the pipeline
  27. // after the decode stage. Once dispatched, micro-ops wait for their operands
  28. // and issue out-of-order to one of thirteen issue pipelines. Each issue
  29. // pipeline can accept one micro-op per cycle.
  30. let SchedModel = NeoverseN2Model in {
  31. // Define the (13) issue ports.
  32. def N2UnitB : ProcResource<2>; // Branch 0/1
  33. def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1
  34. def N2UnitM0 : ProcResource<1>; // Integer multicycle 0
  35. def N2UnitM1 : ProcResource<1>; // Integer multicycle 1
  36. def N2UnitL01 : ProcResource<2>; // Load/Store 0/1
  37. def N2UnitL2 : ProcResource<1>; // Load 2
  38. def N2UnitD : ProcResource<2>; // Store data 0/1
  39. def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0
  40. def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1
  41. def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1
  42. def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1
  43. def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
  44. def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
  45. // Define commonly used read types.
  46. // No forwarding is provided for these types.
  47. def : ReadAdvance<ReadI, 0>;
  48. def : ReadAdvance<ReadISReg, 0>;
  49. def : ReadAdvance<ReadIEReg, 0>;
  50. def : ReadAdvance<ReadIM, 0>;
  51. def : ReadAdvance<ReadIMA, 0>;
  52. def : ReadAdvance<ReadID, 0>;
  53. def : ReadAdvance<ReadExtrHi, 0>;
  54. def : ReadAdvance<ReadAdrBase, 0>;
  55. def : ReadAdvance<ReadST, 0>;
  56. def : ReadAdvance<ReadVLD, 0>;
  57. def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
  58. def : WriteRes<WriteBarrier, []> { let Latency = 1; }
  59. def : WriteRes<WriteHint, []> { let Latency = 1; }
  60. def : WriteRes<WriteLDHi, []> { let Latency = 4; }
  61. //===----------------------------------------------------------------------===//
  62. // Define customized scheduler read/write types specific to the Neoverse N2.
  63. //===----------------------------------------------------------------------===//
  64. // Define generic 1 micro-op types
  65. def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; }
  66. def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; }
  67. def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; }
  68. def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; }
  69. def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
  70. def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; }
  71. def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; }
  72. def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2;
  73. let ResourceCycles = [2]; }
  74. def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3;
  75. let ResourceCycles = [3]; }
  76. def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5;
  77. let ResourceCycles = [5]; }
  78. def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12;
  79. let ResourceCycles = [12]; }
  80. def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20;
  81. let ResourceCycles = [20]; }
  82. def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; }
  83. def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; }
  84. def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; }
  85. def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; }
  86. def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; }
  87. def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; }
  88. def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; }
  89. def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; }
  90. def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; }
  91. def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; }
  92. def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7;
  93. let ResourceCycles = [7]; }
  94. def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; }
  95. def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; }
  96. def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; }
  97. def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; }
  98. def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; }
  99. def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; }
  100. def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; }
  101. def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; }
  102. def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; }
  103. def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; }
  104. def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; }
  105. def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; }
  106. def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
  107. //===----------------------------------------------------------------------===//
  108. // Define generic 2 micro-op types
  109. def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
  110. let Latency = 1;
  111. let NumMicroOps = 2;
  112. }
  113. def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
  114. let Latency = 6;
  115. let NumMicroOps = 2;
  116. }
  117. def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
  118. let Latency = 9;
  119. let NumMicroOps = 2;
  120. }
  121. def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
  122. let Latency = 3;
  123. let NumMicroOps = 2;
  124. }
  125. def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
  126. let Latency = 4;
  127. let NumMicroOps = 2;
  128. }
  129. def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
  130. let Latency = 5;
  131. let NumMicroOps = 2;
  132. }
  133. def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
  134. let Latency = 6;
  135. let NumMicroOps = 2;
  136. }
  137. def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
  138. let Latency = 7;
  139. let NumMicroOps = 2;
  140. }
  141. def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
  142. let Latency = 1;
  143. let NumMicroOps = 2;
  144. }
  145. def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
  146. let Latency = 5;
  147. let NumMicroOps = 2;
  148. }
  149. def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
  150. let Latency = 2;
  151. let NumMicroOps = 2;
  152. }
  153. def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
  154. let Latency = 4;
  155. let NumMicroOps = 2;
  156. }
  157. def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
  158. let Latency = 4;
  159. let NumMicroOps = 2;
  160. }
  161. def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
  162. let Latency = 10;
  163. let NumMicroOps = 2;
  164. let ResourceCycles = [5, 5];
  165. }
  166. def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
  167. let Latency = 13;
  168. let NumMicroOps = 2;
  169. let ResourceCycles = [6, 7];
  170. }
  171. def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
  172. let Latency = 15;
  173. let NumMicroOps = 2;
  174. let ResourceCycles = [7, 8];
  175. }
  176. def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
  177. let Latency = 16;
  178. let NumMicroOps = 2;
  179. let ResourceCycles = [8, 8];
  180. }
  181. def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
  182. let Latency = 4;
  183. let NumMicroOps = 2;
  184. }
  185. def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
  186. let Latency = 6;
  187. let NumMicroOps = 2;
  188. }
  189. def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
  190. let Latency = 6;
  191. let NumMicroOps = 2;
  192. }
  193. def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
  194. let Latency = 8;
  195. let NumMicroOps = 2;
  196. }
  197. def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
  198. let Latency = 4;
  199. let NumMicroOps = 2;
  200. }
  201. def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> {
  202. let Latency = 3;
  203. let NumMicroOps = 2;
  204. }
  205. def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> {
  206. let Latency = 2;
  207. let NumMicroOps = 2;
  208. }
  209. def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
  210. let Latency = 6;
  211. let NumMicroOps = 2;
  212. }
  213. def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
  214. let Latency = 4;
  215. let NumMicroOps = 2;
  216. }
  217. def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
  218. let Latency = 5;
  219. let NumMicroOps = 2;
  220. }
  221. def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
  222. let Latency = 5;
  223. let NumMicroOps = 2;
  224. }
  225. def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
  226. let Latency = 7;
  227. let NumMicroOps = 2;
  228. }
  229. def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
  230. let Latency = 2;
  231. let NumMicroOps = 2;
  232. }
  233. def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
  234. let Latency = 6;
  235. let NumMicroOps = 2;
  236. }
  237. def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
  238. let Latency = 6;
  239. let NumMicroOps = 2;
  240. }
  241. def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
  242. let Latency = 6;
  243. let NumMicroOps = 2;
  244. }
  245. def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
  246. let Latency = 9;
  247. let NumMicroOps = 2;
  248. }
  249. def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
  250. let Latency = 4;
  251. let NumMicroOps = 2;
  252. }
  253. //===----------------------------------------------------------------------===//
  254. // Define generic 3 micro-op types
  255. def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
  256. let Latency = 1;
  257. let NumMicroOps = 3;
  258. }
  259. def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
  260. let Latency = 2;
  261. let NumMicroOps = 3;
  262. }
  263. def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
  264. let Latency = 2;
  265. let NumMicroOps = 3;
  266. }
  267. def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
  268. let Latency = 7;
  269. let NumMicroOps = 3;
  270. }
  271. def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
  272. let Latency = 8;
  273. let NumMicroOps = 3;
  274. }
  275. def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
  276. let Latency = 10;
  277. let NumMicroOps = 3;
  278. }
  279. def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
  280. let Latency = 2;
  281. let NumMicroOps = 3;
  282. }
  283. def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
  284. let Latency = 4;
  285. let NumMicroOps = 3;
  286. }
  287. def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
  288. let Latency = 6;
  289. let NumMicroOps = 3;
  290. }
  291. def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
  292. let Latency = 8;
  293. let NumMicroOps = 3;
  294. }
  295. //===----------------------------------------------------------------------===//
  296. // Define generic 4 micro-op types
  297. def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
  298. N2UnitI]> {
  299. let Latency = 2;
  300. let NumMicroOps = 4;
  301. }
  302. def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
  303. let Latency = 6;
  304. let NumMicroOps = 4;
  305. }
  306. def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
  307. let Latency = 4;
  308. let NumMicroOps = 4;
  309. }
  310. def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
  311. let Latency = 6;
  312. let NumMicroOps = 4;
  313. }
  314. def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
  315. let Latency = 8;
  316. let NumMicroOps = 4;
  317. }
  318. def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
  319. let Latency = 9;
  320. let NumMicroOps = 4;
  321. }
  322. def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
  323. N2UnitV]> {
  324. let Latency = 2;
  325. let NumMicroOps = 4;
  326. }
  327. def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
  328. N2UnitV]> {
  329. let Latency = 4;
  330. let NumMicroOps = 4;
  331. }
  332. def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
  333. N2UnitV]> {
  334. let Latency = 5;
  335. let NumMicroOps = 4;
  336. }
  337. def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
  338. N2UnitV0]> {
  339. let Latency = 8;
  340. let NumMicroOps = 4;
  341. }
  342. def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
  343. N2UnitV1]> {
  344. let Latency = 11;
  345. let NumMicroOps = 4;
  346. }
  347. def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
  348. N2UnitV1]> {
  349. let Latency = 9;
  350. let NumMicroOps = 4;
  351. }
  352. def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
  353. N2UnitV1]> {
  354. let Latency = 8;
  355. let NumMicroOps = 4;
  356. }
  357. def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
  358. N2UnitV1]> {
  359. let Latency = 10;
  360. let NumMicroOps = 4;
  361. }
  362. def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
  363. let Latency = 10;
  364. let NumMicroOps = 4;
  365. }
  366. def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
  367. N2UnitM]> {
  368. let Latency = 4;
  369. let NumMicroOps = 4;
  370. }
  371. def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
  372. let Latency = 6;
  373. let NumMicroOps = 4;
  374. }
  375. def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
  376. let Latency = 7;
  377. let NumMicroOps = 4;
  378. }
  379. //===----------------------------------------------------------------------===//
  380. // Define generic 5 micro-op types
  381. def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
  382. N2UnitI, N2UnitI]> {
  383. let Latency = 2;
  384. let NumMicroOps = 5;
  385. }
  386. def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
  387. N2UnitV]> {
  388. let Latency = 8;
  389. let NumMicroOps = 5;
  390. }
  391. //===----------------------------------------------------------------------===//
  392. // Define generic 6 micro-op types
  393. def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
  394. N2UnitV, N2UnitV, N2UnitV]> {
  395. let Latency = 8;
  396. let NumMicroOps = 6;
  397. }
  398. def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  399. N2UnitV, N2UnitV, N2UnitV]> {
  400. let Latency = 2;
  401. let NumMicroOps = 6;
  402. }
  403. def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  404. N2UnitV, N2UnitV, N2UnitV]> {
  405. let Latency = 6;
  406. let NumMicroOps = 6;
  407. }
  408. def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  409. N2UnitV, N2UnitV, N2UnitV]> {
  410. let Latency = 4;
  411. let NumMicroOps = 6;
  412. }
  413. def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
  414. N2UnitS, N2UnitS]> {
  415. let Latency = 10;
  416. let NumMicroOps = 6;
  417. }
  418. //===----------------------------------------------------------------------===//
  419. // Define generic 7 micro-op types
  420. def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
  421. N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
  422. let Latency = 8;
  423. let NumMicroOps = 7;
  424. }
  425. //===----------------------------------------------------------------------===//
  426. // Define generic 8 micro-op types
  427. def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
  428. N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
  429. let Latency = 6;
  430. let NumMicroOps = 8;
  431. }
  432. def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  433. N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
  434. N2UnitV]> {
  435. let Latency = 2;
  436. let NumMicroOps = 8;
  437. }
  438. def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  439. N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
  440. N2UnitV]> {
  441. let Latency = 5;
  442. let NumMicroOps = 8;
  443. }
  444. def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
  445. N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
  446. let Latency = 8;
  447. let NumMicroOps = 8;
  448. }
  449. def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
  450. N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
  451. let Latency = 9;
  452. let NumMicroOps = 8;
  453. }
  454. //===----------------------------------------------------------------------===//
  455. // Define generic 10 micro-op types
  456. def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  457. N2UnitL01, N2UnitL01, N2UnitV,
  458. N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
  459. let Latency = 7;
  460. let NumMicroOps = 10;
  461. }
  462. //===----------------------------------------------------------------------===//
  463. // Define generic 12 micro-op types
  464. def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  465. N2UnitL01, N2UnitL01, N2UnitL01,
  466. N2UnitV, N2UnitV, N2UnitV, N2UnitV,
  467. N2UnitV, N2UnitV]> {
  468. let Latency = 7;
  469. let NumMicroOps = 12;
  470. }
  471. //===----------------------------------------------------------------------===//
  472. // Define generic 15 micro-op types
  473. def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  474. N2UnitL01, N2UnitL01, N2UnitS,
  475. N2UnitS, N2UnitS, N2UnitS,
  476. N2UnitS, N2UnitV, N2UnitV,
  477. N2UnitV, N2UnitV, N2UnitV]> {
  478. let Latency = 7;
  479. let NumMicroOps = 15;
  480. }
  481. //===----------------------------------------------------------------------===//
  482. // Define generic 18 micro-op types
  483. def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  484. N2UnitL01, N2UnitL01, N2UnitL01,
  485. N2UnitL01, N2UnitL01, N2UnitL01,
  486. N2UnitV, N2UnitV, N2UnitV,
  487. N2UnitV, N2UnitV, N2UnitV,
  488. N2UnitV, N2UnitV, N2UnitV]> {
  489. let Latency = 11;
  490. let NumMicroOps = 18;
  491. }
  492. //===----------------------------------------------------------------------===//
  493. // Define generic 27 micro-op types
  494. def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
  495. N2UnitL01, N2UnitL01, N2UnitL01,
  496. N2UnitL01, N2UnitL01, N2UnitL01,
  497. N2UnitS, N2UnitS, N2UnitS,
  498. N2UnitS, N2UnitS, N2UnitS,
  499. N2UnitS, N2UnitS, N2UnitS,
  500. N2UnitV, N2UnitV, N2UnitV,
  501. N2UnitV, N2UnitV, N2UnitV,
  502. N2UnitV, N2UnitV, N2UnitV]> {
  503. let Latency = 11;
  504. let NumMicroOps = 27;
  505. }
  506. // Miscellaneous
  507. // -----------------------------------------------------------------------------
  508. def : InstRW<[WriteI], (instrs COPY)>;
  509. // Branch Instructions
  510. // -----------------------------------------------------------------------------
  511. // Branch, immed
  512. // Compare and branch
  513. def : SchedAlias<WriteBr, N2Write_1cyc_1B>;
  514. // Branch, register
  515. def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
  516. // Branch and link, immed
  517. // Branch and link, register
  518. def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
  519. // Arithmetic and Logical Instructions
  520. // -----------------------------------------------------------------------------
  521. // ALU, basic
  522. // ALU, basic, flagset
  523. def : SchedAlias<WriteI, N2Write_1cyc_1I>;
  524. // ALU, extend and shift
  525. def : SchedAlias<WriteISReg, N2Write_2cyc_1M>;
  526. def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
  527. // Arithmetic, immediate to logical address tag
  528. def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
  529. // Convert floating-point condition flags
  530. // Flag manipulation instructions
  531. def : WriteRes<WriteSys, []> { let Latency = 1; }
  532. // Insert Random Tags
  533. def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
  534. // Insert Tag Mask
  535. // Subtract Pointer
  536. // Subtract Pointer, flagset
  537. def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
  538. // Move and shift instructions
  539. // -----------------------------------------------------------------------------
  540. def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
  541. // Divide and Multiply Instructions
  542. // -----------------------------------------------------------------------------
  543. // SDIV, UDIV
  544. def : SchedAlias<WriteID32, N2Write_12cyc_1M0>;
  545. def : SchedAlias<WriteID64, N2Write_20cyc_1M0>;
  546. def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
  547. def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
  548. // Multiply high
  549. def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
  550. // Pointer Authentication Instructions (v8.3 PAC)
  551. // -----------------------------------------------------------------------------
  552. // Authenticate data address
  553. // Authenticate instruction address
  554. // Compute pointer authentication code for data address
  555. // Compute pointer authentication code, using generic key
  556. // Compute pointer authentication code for instruction address
  557. def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
  558. // Branch and link, register, with pointer authentication
  559. // Branch, register, with pointer authentication
  560. // Branch, return, with pointer authentication
  561. def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
  562. BRAAZ, BRAB, BRABZ, RETAA, RETAB,
  563. ERETAA, ERETAB)>;
  564. // Load register, with pointer authentication
  565. def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
  566. // Strip pointer authentication code
  567. def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
  568. // Miscellaneous data-processing instructions
  569. // -----------------------------------------------------------------------------
  570. // Bitfield extract, one reg
  571. // Bitfield extract, two regs
  572. // NOTE: We don't model the difference between EXTR where both operands are the
  573. // same (one reg).
  574. def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
  575. def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
  576. // Bitfield move, basic
  577. def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
  578. // Bitfield move, insert
  579. def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
  580. // Load instructions
  581. // -----------------------------------------------------------------------------
  582. def : SchedAlias<WriteLD, N2Write_4cyc_1L>;
  583. def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
  584. // Load pair, signed immed offset, signed words
  585. def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
  586. // Load pair, immed post-index or immed pre-index, signed words
  587. def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr],
  588. (instregex "^LDPSW(post|pre)$")>;
  589. // Store instructions
  590. // -----------------------------------------------------------------------------
  591. def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>;
  592. def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
  593. def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>;
  594. def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57.
  595. // Tag load instructions
  596. // -----------------------------------------------------------------------------
  597. // Load allocation tag
  598. // Load multiple allocation tags
  599. def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
  600. // Tag store instructions
  601. // -----------------------------------------------------------------------------
  602. // Store allocation tags to one or two granules, post-index
  603. // Store allocation tags to one or two granules, pre-index
  604. // Store allocation tag to one or two granules, zeroing, post-index
  605. // Store Allocation Tag to one or two granules, zeroing, pre-index
  606. // Store allocation tag and reg pair to memory, post-Index
  607. // Store allocation tag and reg pair to memory, pre-Index
  608. def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
  609. ST2GPreIndex, ST2GPostIndex,
  610. STZGPreIndex, STZGPostIndex,
  611. STZ2GPreIndex, STZ2GPostIndex,
  612. STGPpre, STGPpost)>;
  613. // Store allocation tags to one or two granules, signed offset
  614. // Store allocation tag to two granules, zeroing, signed offset
  615. // Store allocation tag and reg pair to memory, signed offset
  616. // Store multiple allocation tags
  617. def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset,
  618. STZ2GOffset, STGPi, STGM, STZGM)>;
  619. // FP data processing instructions
  620. // -----------------------------------------------------------------------------
  621. // FP absolute value
  622. // FP arithmetic
  623. // FP min/max
  624. // FP negate
  625. // FP select
  626. def : SchedAlias<WriteF, N2Write_2cyc_1V>;
  627. // FP compare
  628. def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>;
  629. // FP divide, square root
  630. def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>;
  631. // FP divide, H-form
  632. def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>;
  633. // FP divide, S-form
  634. def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
  635. // FP divide, D-form
  636. def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
  637. // FP square root, H-form
  638. def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>;
  639. // FP square root, S-form
  640. def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>;
  641. // FP square root, D-form
  642. def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
  643. // FP multiply
  644. def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
  645. // FP multiply accumulate
  646. def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
  647. // FP round to integral
  648. def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
  649. "^FRINT(32|64)[XZ][SD]r$")>;
  650. // FP miscellaneous instructions
  651. // -----------------------------------------------------------------------------
  652. // FP convert, from gen to vec reg
  653. def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
  654. // FP convert, from vec to gen reg
  655. def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
  656. // FP convert, Javascript from vec to gen reg
  657. // FP convert, from vec to vec reg
  658. def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
  659. // FP move, immed
  660. // FP move, register
  661. def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
  662. // FP transfer, from gen to low half of vec reg
  663. def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
  664. FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
  665. // FP transfer, from gen to high half of vec reg
  666. def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
  667. // FP transfer, from vec to gen reg
  668. def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
  669. // FP load instructions
  670. // -----------------------------------------------------------------------------
  671. // Load vector reg, literal, S/D/Q forms
  672. // Load vector reg, unscaled immed
  673. def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
  674. "^LDUR[BHSDQ]i$")>;
  675. // Load vector reg, immed post-index
  676. def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
  677. // Load vector reg, immed pre-index
  678. def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>;
  679. // Load vector reg, unsigned immed
  680. def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
  681. // Load vector reg, register offset, basic
  682. // Load vector reg, register offset, scale, S/D-form
  683. // Load vector reg, register offset, extend
  684. // Load vector reg, register offset, extend, scale, S/D-form
  685. def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
  686. // Load vector reg, register offset, scale, H/Q-form
  687. // Load vector reg, register offset, extend, scale, H/Q-form
  688. def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
  689. // Load vector pair, immed offset, S/D-form
  690. def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
  691. // Load vector pair, immed offset, Q-form
  692. def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
  693. // Load vector pair, immed post-index, S/D-form
  694. // Load vector pair, immed pre-index, S/D-form
  695. def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
  696. (instregex "^LDP[SD](pre|post)$")>;
  697. // Load vector pair, immed post-index, Q-form
  698. // Load vector pair, immed pre-index, Q-form
  699. def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
  700. LDPQpre)>;
  701. // FP store instructions
  702. // -----------------------------------------------------------------------------
  703. // Store vector reg, unscaled immed, B/H/S/D-form
  704. // Store vector reg, unscaled immed, Q-form
  705. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
  706. // Store vector reg, immed post-index, B/H/S/D-form
  707. // Store vector reg, immed post-index, Q-form
  708. // Store vector reg, immed pre-index, B/H/S/D-form
  709. // Store vector reg, immed pre-index, Q-form
  710. def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
  711. (instregex "^STR[BHSDQ](pre|post)$")>;
  712. // Store vector reg, unsigned immed, B/H/S/D-form
  713. // Store vector reg, unsigned immed, Q-form
  714. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
  715. // Store vector reg, register offset, basic, B/H/S/D-form
  716. // Store vector reg, register offset, basic, Q-form
  717. // Store vector reg, register offset, scale, S/D-form
  718. // Store vector reg, register offset, extend, B/H/S/D-form
  719. // Store vector reg, register offset, extend, Q-form
  720. // Store vector reg, register offset, extend, scale, S/D-form
  721. def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
  722. (instregex "^STR[BSD]ro[WX]$")>;
  723. // Store vector reg, register offset, scale, H-form
  724. // Store vector reg, register offset, scale, Q-form
  725. // Store vector reg, register offset, extend, scale, H-form
  726. // Store vector reg, register offset, extend, scale, Q-form
  727. def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
  728. (instregex "^STR[HQ]ro[WX]$")>;
  729. // Store vector pair, immed offset, S-form
  730. // Store vector pair, immed offset, D-form
  731. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
  732. // Store vector pair, immed offset, Q-form
  733. def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
  734. // Store vector pair, immed post-index, S-form
  735. // Store vector pair, immed post-index, D-form
  736. // Store vector pair, immed pre-index, S-form
  737. // Store vector pair, immed pre-index, D-form
  738. def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
  739. (instregex "^STP[SD](pre|post)$")>;
  740. // Store vector pair, immed post-index, Q-form
  741. def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
  742. // Store vector pair, immed pre-index, Q-form
  743. def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
  744. // ASIMD integer instructions
  745. // -----------------------------------------------------------------------------
  746. // ASIMD absolute diff
  747. // ASIMD absolute diff long
  748. // ASIMD arith, basic
  749. // ASIMD arith, complex
  750. // ASIMD arith, pair-wise
  751. // ASIMD compare
  752. // ASIMD logical
  753. // ASIMD max/min, basic and pair-wise
  754. def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
  755. def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
  756. // ASIMD absolute diff accum
  757. // ASIMD absolute diff accum long
  758. def : InstRW<[N2Write_4cyc_1V1],
  759. (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
  760. // ASIMD arith, reduce, 4H/4S
  761. def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
  762. // ASIMD arith, reduce, 8B/8H
  763. def : InstRW<[N2Write_4cyc_1V1_1V],
  764. (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
  765. // ASIMD arith, reduce, 16B
  766. def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
  767. UADDLVv16i8v)>;
  768. // ASIMD dot product
  769. // ASIMD dot product using signed and unsigned integers
  770. def : InstRW<[N2Write_3cyc_1V],
  771. (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
  772. // ASIMD matrix multiply-accumulate
  773. def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
  774. // ASIMD max/min, reduce, 4H/4S
  775. def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
  776. "^[SU](MAX|MIN)Vv4i32v$")>;
  777. // ASIMD max/min, reduce, 8B/8H
  778. def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
  779. "^[SU](MAX|MIN)Vv8i16v$")>;
  780. // ASIMD max/min, reduce, 16B
  781. def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
  782. // ASIMD multiply
  783. def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
  784. // ASIMD multiply accumulate
  785. def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
  786. // ASIMD multiply accumulate high
  787. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
  788. // ASIMD multiply accumulate long
  789. def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
  790. // ASIMD multiply accumulate saturating long
  791. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
  792. // ASIMD multiply/multiply long (8x8) polynomial, D-form
  793. // ASIMD multiply/multiply long (8x8) polynomial, Q-form
  794. def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
  795. // ASIMD multiply long
  796. def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
  797. // ASIMD pairwise add and accumulate long
  798. def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
  799. // ASIMD shift accumulate
  800. def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
  801. // ASIMD shift by immed, basic
  802. def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
  803. "^SSHLLv", "^SSHRv", "^USHLLv",
  804. "^USHRv")>;
  805. // ASIMD shift by immed and insert, basic
  806. def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
  807. // ASIMD shift by immed, complex
  808. def : InstRW<[N2Write_4cyc_1V1],
  809. (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
  810. "^(SQSHLU?|UQSHL)[bhsd]$",
  811. "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
  812. "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
  813. "^UQSHRNv", "^URSHRv")>;
  814. // ASIMD shift by register, basic
  815. def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
  816. // ASIMD shift by register, complex
  817. def : InstRW<[N2Write_4cyc_1V1],
  818. (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
  819. "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
  820. // ASIMD floating-point instructions
  821. // -----------------------------------------------------------------------------
  822. // ASIMD FP absolute value/difference
  823. // ASIMD FP arith, normal
  824. // ASIMD FP compare
  825. // ASIMD FP complex add
  826. // ASIMD FP max/min, normal
  827. // ASIMD FP max/min, pairwise
  828. // ASIMD FP negate
  829. // Handled by SchedAlias<WriteV[dq], ...>
  830. // ASIMD FP complex multiply add
  831. def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
  832. // ASIMD FP convert, long (F16 to F32)
  833. def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
  834. // ASIMD FP convert, long (F32 to F64)
  835. def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
  836. // ASIMD FP convert, narrow (F32 to F16)
  837. def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
  838. // ASIMD FP convert, narrow (F64 to F32)
  839. def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
  840. "^FCVTXN(v2|v4)f32")>;
  841. // ASIMD FP convert, other, D-form F32 and Q-form F64
  842. def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
  843. "^[SU]CVTFv2f(32|64)$")>;
  844. // ASIMD FP convert, other, D-form F16 and Q-form F32
  845. def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
  846. "^[SU]CVTFv4f(16|32)$")>;
  847. // ASIMD FP convert, other, Q-form F16
  848. def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
  849. "^[SU]CVTFv8f16$")>;
  850. // ASIMD FP divide, D-form, F16
  851. def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
  852. // ASIMD FP divide, D-form, F32
  853. def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
  854. // ASIMD FP divide, Q-form, F16
  855. def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
  856. // ASIMD FP divide, Q-form, F32
  857. def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
  858. // ASIMD FP divide, Q-form, F64
  859. def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
  860. // ASIMD FP max/min, reduce, F32 and D-form F16
  861. def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
  862. // ASIMD FP max/min, reduce, Q-form F16
  863. def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
  864. // ASIMD FP multiply
  865. def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
  866. // ASIMD FP multiply accumulate
  867. def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
  868. // ASIMD FP multiply accumulate long
  869. def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
  870. // ASIMD FP round, D-form F32 and Q-form F64
  871. def : InstRW<[N2Write_3cyc_1V0],
  872. (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
  873. "^FRINT[32|64)[XZ]v2f(32|64)$")>;
  874. // ASIMD FP round, D-form F16 and Q-form F32
  875. def : InstRW<[N2Write_4cyc_2V0],
  876. (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
  877. "^FRINT(32|64)[XZ]v4f32$")>;
  878. // ASIMD FP round, Q-form F16
  879. def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
  880. // ASIMD FP square root, D-form, F16
  881. def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
  882. // ASIMD FP square root, D-form, F32
  883. def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
  884. // ASIMD FP square root, Q-form, F16
  885. def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
  886. // ASIMD FP square root, Q-form, F32
  887. def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
  888. // ASIMD FP square root, Q-form, F64
  889. def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
  890. // ASIMD BFloat16 (BF16) instructions
  891. // -----------------------------------------------------------------------------
  892. // ASIMD convert, F32 to BF16
  893. def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
  894. // ASIMD dot product
  895. def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
  896. // ASIMD matrix multiply accumulate
  897. def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
  898. // ASIMD multiply accumulate long
  899. def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
  900. BFMLALTIdx)>;
  901. // Scalar convert, F32 to BF16
  902. def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
  903. // ASIMD miscellaneous instructions
  904. // -----------------------------------------------------------------------------
  905. // ASIMD bit reverse
  906. // ASIMD bitwise insert
  907. // ASIMD count
  908. // ASIMD duplicate, element
  909. // ASIMD extract
  910. // ASIMD extract narrow
  911. // ASIMD insert, element to element
  912. // ASIMD move, FP immed
  913. // ASIMD move, integer immed
  914. // ASIMD reverse
  915. // ASIMD table lookup, 1 or 2 table regs
  916. // ASIMD table lookup extension, 1 table reg
  917. // ASIMD transfer, element to gen reg
  918. // ASIMD transpose
  919. // ASIMD unzip/zip
  920. // Handled by SchedAlias<WriteV[dq], ...>
  921. // ASIMD duplicate, gen reg
  922. def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
  923. // ASIMD extract narrow, saturating
  924. def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
  925. // ASIMD reciprocal and square root estimate, D-form U32
  926. def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
  927. // ASIMD reciprocal and square root estimate, Q-form U32
  928. def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
  929. // ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
  930. def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
  931. FRECPEv1i64, FRECPEv2f32,
  932. FRSQRTEv1f16, FRSQRTEv1i32,
  933. FRSQRTEv1i64, FRSQRTEv2f32)>;
  934. // ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
  935. def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
  936. FRSQRTEv4f16, FRSQRTEv4f32)>;
  937. // ASIMD reciprocal and square root estimate, Q-form F16
  938. def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
  939. // ASIMD reciprocal exponent
  940. def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
  941. // ASIMD reciprocal step
  942. def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
  943. // ASIMD table lookup, 3 table regs
  944. def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
  945. // ASIMD table lookup, 4 table regs
  946. def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
  947. // ASIMD table lookup extension, 2 table reg
  948. def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
  949. // ASIMD table lookup extension, 3 table reg
  950. def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
  951. // ASIMD table lookup extension, 4 table reg
  952. def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
  953. // ASIMD transfer, gen reg to element
  954. def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
  955. // ASIMD load instructions
  956. // -----------------------------------------------------------------------------
  957. // ASIMD load, 1 element, multiple, 1 reg, D-form
  958. def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
  959. def : InstRW<[N2Write_6cyc_1L, WriteAdr],
  960. (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
  961. // ASIMD load, 1 element, multiple, 1 reg, Q-form
  962. def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
  963. def : InstRW<[N2Write_6cyc_1L, WriteAdr],
  964. (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
  965. // ASIMD load, 1 element, multiple, 2 reg, D-form
  966. def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
  967. def : InstRW<[N2Write_6cyc_2L, WriteAdr],
  968. (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
  969. // ASIMD load, 1 element, multiple, 2 reg, Q-form
  970. def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
  971. def : InstRW<[N2Write_6cyc_2L, WriteAdr],
  972. (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
  973. // ASIMD load, 1 element, multiple, 3 reg, D-form
  974. def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
  975. def : InstRW<[N2Write_6cyc_3L, WriteAdr],
  976. (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
  977. // ASIMD load, 1 element, multiple, 3 reg, Q-form
  978. def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
  979. def : InstRW<[N2Write_6cyc_3L, WriteAdr],
  980. (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
  981. // ASIMD load, 1 element, multiple, 4 reg, D-form
  982. def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
  983. def : InstRW<[N2Write_7cyc_4L, WriteAdr],
  984. (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
  985. // ASIMD load, 1 element, multiple, 4 reg, Q-form
  986. def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
  987. def : InstRW<[N2Write_7cyc_4L, WriteAdr],
  988. (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
  989. // ASIMD load, 1 element, one lane, B/H/S
  990. // ASIMD load, 1 element, one lane, D
  991. def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
  992. def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
  993. // ASIMD load, 1 element, all lanes, D-form, B/H/S
  994. // ASIMD load, 1 element, all lanes, D-form, D
  995. def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
  996. def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
  997. // ASIMD load, 1 element, all lanes, Q-form
  998. def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
  999. def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
  1000. // ASIMD load, 2 element, multiple, D-form, B/H/S
  1001. def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
  1002. def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
  1003. // ASIMD load, 2 element, multiple, Q-form, B/H/S
  1004. // ASIMD load, 2 element, multiple, Q-form, D
  1005. def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
  1006. def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
  1007. // ASIMD load, 2 element, one lane, B/H
  1008. // ASIMD load, 2 element, one lane, S
  1009. // ASIMD load, 2 element, one lane, D
  1010. def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
  1011. def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
  1012. // ASIMD load, 2 element, all lanes, D-form, B/H/S
  1013. // ASIMD load, 2 element, all lanes, D-form, D
  1014. def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
  1015. def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
  1016. // ASIMD load, 2 element, all lanes, Q-form
  1017. def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
  1018. def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
  1019. // ASIMD load, 3 element, multiple, D-form, B/H/S
  1020. def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
  1021. def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
  1022. // ASIMD load, 3 element, multiple, Q-form, B/H/S
  1023. def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>;
  1024. def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
  1025. // ASIMD load, 3 element, multiple, Q-form, D
  1026. def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>;
  1027. def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
  1028. // ASIMD load, 3 element, one lane, B/H
  1029. // ASIMD load, 3 element, one lane, S
  1030. // ASIMD load, 3 element, one lane, D
  1031. def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
  1032. def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
  1033. // ASIMD load, 3 element, all lanes, D-form, B/H/S
  1034. // ASIMD load, 3 element, all lanes, D-form, D
  1035. def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
  1036. def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
  1037. // ASIMD load, 3 element, all lanes, Q-form, B/H/S
  1038. // ASIMD load, 3 element, all lanes, Q-form, D
  1039. def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
  1040. def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
  1041. // ASIMD load, 4 element, multiple, D-form, B/H/S
  1042. def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
  1043. def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
  1044. // ASIMD load, 4 element, multiple, Q-form, B/H/S
  1045. // ASIMD load, 4 element, multiple, Q-form, D
  1046. def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
  1047. def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
  1048. // ASIMD load, 4 element, one lane, B/H
  1049. // ASIMD load, 4 element, one lane, S
  1050. // ASIMD load, 4 element, one lane, D
  1051. def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
  1052. def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
  1053. // ASIMD load, 4 element, all lanes, D-form, B/H/S
  1054. // ASIMD load, 4 element, all lanes, D-form, D
  1055. def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
  1056. def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
  1057. // ASIMD load, 4 element, all lanes, Q-form, B/H/S
  1058. // ASIMD load, 4 element, all lanes, Q-form, D
  1059. def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
  1060. def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
  1061. // ASIMD store instructions
  1062. // -----------------------------------------------------------------------------
  1063. // ASIMD store, 1 element, multiple, 1 reg, D-form
  1064. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
  1065. def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
  1066. // ASIMD store, 1 element, multiple, 1 reg, Q-form
  1067. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
  1068. def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
  1069. // ASIMD store, 1 element, multiple, 2 reg, D-form
  1070. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
  1071. def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
  1072. // ASIMD store, 1 element, multiple, 2 reg, Q-form
  1073. def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
  1074. def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
  1075. // ASIMD store, 1 element, multiple, 3 reg, D-form
  1076. def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
  1077. def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
  1078. // ASIMD store, 1 element, multiple, 3 reg, Q-form
  1079. def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
  1080. def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
  1081. // ASIMD store, 1 element, multiple, 4 reg, D-form
  1082. def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
  1083. def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
  1084. // ASIMD store, 1 element, multiple, 4 reg, Q-form
  1085. def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
  1086. def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
  1087. // ASIMD store, 1 element, one lane, B/H/S
  1088. // ASIMD store, 1 element, one lane, D
  1089. def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>;
  1090. def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
  1091. // ASIMD store, 2 element, multiple, D-form, B/H/S
  1092. def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>;
  1093. def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
  1094. // ASIMD store, 2 element, multiple, Q-form, B/H/S
  1095. // ASIMD store, 2 element, multiple, Q-form, D
  1096. def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
  1097. def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
  1098. // ASIMD store, 2 element, one lane, B/H/S
  1099. // ASIMD store, 2 element, one lane, D
  1100. def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>;
  1101. def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
  1102. // ASIMD store, 3 element, multiple, D-form, B/H/S
  1103. def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>;
  1104. def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
  1105. // ASIMD store, 3 element, multiple, Q-form, B/H/S
  1106. // ASIMD store, 3 element, multiple, Q-form, D
  1107. def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
  1108. def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
  1109. // ASIMD store, 3 element, one lane, B/H
  1110. // ASIMD store, 3 element, one lane, S
  1111. // ASIMD store, 3 element, one lane, D
  1112. def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>;
  1113. def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
  1114. // ASIMD store, 4 element, multiple, D-form, B/H/S
  1115. def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>;
  1116. def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
  1117. // ASIMD store, 4 element, multiple, Q-form, B/H/S
  1118. def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>;
  1119. def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
  1120. // ASIMD store, 4 element, multiple, Q-form, D
  1121. def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>;
  1122. def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
  1123. // ASIMD store, 4 element, one lane, B/H/S
  1124. def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>;
  1125. def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
  1126. // ASIMD store, 4 element, one lane, D
  1127. def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>;
  1128. def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr], (instregex "ST4i(64)_POST$")>;
  1129. // Cryptography extensions
  1130. // -----------------------------------------------------------------------------
  1131. // Crypto AES ops
  1132. def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
  1133. // Crypto polynomial (64x64) multiply long
  1134. def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
  1135. // Crypto SHA1 hash acceleration op
  1136. // Crypto SHA1 schedule acceleration ops
  1137. def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
  1138. // Crypto SHA1 hash acceleration ops
  1139. // Crypto SHA256 hash acceleration ops
  1140. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
  1141. // Crypto SHA256 schedule acceleration ops
  1142. def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
  1143. // Crypto SHA512 hash acceleration ops
  1144. def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
  1145. // Crypto SHA3 ops
  1146. def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
  1147. // Crypto SM3 ops
  1148. def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
  1149. "^SM3TT[12][AB]$")>;
  1150. // Crypto SM4 ops
  1151. def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
  1152. // CRC
  1153. // -----------------------------------------------------------------------------
  1154. def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
  1155. // SVE Predicate instructions
  1156. // -----------------------------------------------------------------------------
  1157. // Loop control, based on predicate
  1158. def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
  1159. BRKB_PPmP, BRKB_PPzP)>;
  1160. // Loop control, based on predicate and flag setting
  1161. def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
  1162. // Loop control, propagating
  1163. def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
  1164. // Loop control, propagating and flag setting
  1165. def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
  1166. BRKPBS_PPzPP)>;
  1167. // Loop control, based on GPR
  1168. def : InstRW<[N2Write_3cyc_1M],
  1169. (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
  1170. def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
  1171. // Loop terminate
  1172. def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
  1173. // Predicate counting scalar
  1174. def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
  1175. def : InstRW<[N2Write_2cyc_1M],
  1176. (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
  1177. "^SQ(DEC|INC)[BHWD]_XPiWdI$",
  1178. "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
  1179. // Predicate counting scalar, active predicate
  1180. def : InstRW<[N2Write_2cyc_1M],
  1181. (instregex "^CNTP_XPP_[BHSD]$",
  1182. "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
  1183. "^(UQDEC|UQINC)P_WP_[BHSD]$",
  1184. "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
  1185. // Predicate counting vector, active predicate
  1186. def : InstRW<[N2Write_7cyc_1M_1M0_1V],
  1187. (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
  1188. // Predicate logical
  1189. def : InstRW<[N2Write_1cyc_1M0],
  1190. (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
  1191. // Predicate logical, flag setting
  1192. def : InstRW<[N2Write_2cyc_1M0_1M],
  1193. (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
  1194. // Predicate reverse
  1195. def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
  1196. // Predicate select
  1197. def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
  1198. // Predicate set
  1199. def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
  1200. // Predicate set/initialize, set flags
  1201. def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
  1202. // Predicate find first/next
  1203. def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
  1204. // Predicate test
  1205. def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
  1206. // Predicate transpose
  1207. def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
  1208. // Predicate unpack and widen
  1209. def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
  1210. // Predicate zip/unzip
  1211. def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
  1212. // SVE integer instructions
  1213. // -----------------------------------------------------------------------------
  1214. // Arithmetic, absolute diff
  1215. def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
  1216. // Arithmetic, absolute diff accum
  1217. def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
  1218. // Arithmetic, absolute diff accum long
  1219. def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
  1220. // Arithmetic, absolute diff long
  1221. def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
  1222. // Arithmetic, basic
  1223. def : InstRW<[N2Write_2cyc_1V],
  1224. (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
  1225. "^(ADD|SUB)_ZZZ_[BHSD]$",
  1226. "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
  1227. "^ADR_[SU]XTW_ZZZ_D_[0123]$",
  1228. "^ADR_LSL_ZZZ_[SD]_[0123]$",
  1229. "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
  1230. "^SADDLBT_ZZZ_[HSD]$",
  1231. "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
  1232. "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
  1233. // Arithmetic, complex
  1234. def : InstRW<[N2Write_2cyc_1V],
  1235. (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
  1236. "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
  1237. "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
  1238. "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
  1239. "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
  1240. "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
  1241. // Arithmetic, large integer
  1242. def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
  1243. // Arithmetic, pairwise add
  1244. def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
  1245. // Arithmetic, pairwise add and accum long
  1246. def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
  1247. // Arithmetic, shift
  1248. def : InstRW<[N2Write_2cyc_1V1],
  1249. (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
  1250. "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
  1251. "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
  1252. "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
  1253. "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
  1254. "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
  1255. // Arithmetic, shift and accumulate
  1256. def : InstRW<[N2Write_4cyc_1V1],
  1257. (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
  1258. // Arithmetic, shift by immediate
  1259. // Arithmetic, shift by immediate and insert
  1260. def : InstRW<[N2Write_2cyc_1V1],
  1261. (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
  1262. // Arithmetic, shift complex
  1263. def : InstRW<[N2Write_4cyc_1V1],
  1264. (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
  1265. "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
  1266. "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
  1267. "^SQSHRU?N[BT]_ZZI_[BHS]$",
  1268. "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
  1269. // Arithmetic, shift right for divide
  1270. def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>;
  1271. // Arithmetic, shift rounding
  1272. def : InstRW<[N2Write_4cyc_1V1],
  1273. (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
  1274. "^[SU]RSHR_ZPmI_[BHSD]$")>;
  1275. // Bit manipulation
  1276. def : InstRW<[N2Write_6cyc_2V1],
  1277. (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
  1278. // Bitwise select
  1279. def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
  1280. // Count/reverse bits
  1281. def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
  1282. // Broadcast logical bitmask immediate to vector
  1283. def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
  1284. // Compare and set flags
  1285. def : InstRW<[N2Write_4cyc_1V0_1M],
  1286. (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
  1287. "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
  1288. // Complex add
  1289. def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
  1290. // Complex dot product 8-bit element
  1291. def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
  1292. // Complex dot product 16-bit element
  1293. def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
  1294. // Complex multiply-add B, H, S element size
  1295. def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
  1296. "^CMLA_ZZZI_[HS]$")>;
  1297. // Complex multiply-add D element size
  1298. def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
  1299. // Conditional extract operations, scalar form
  1300. def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
  1301. // Conditional extract operations, SIMD&FP scalar and vector forms
  1302. def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
  1303. "^COMPACT_ZPZ_[SD]$",
  1304. "^SPLICE_ZPZZ?_[BHSD]$")>;
  1305. // Convert to floating point, 64b to float or convert to double
  1306. def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>;
  1307. // Convert to floating point, 64b to half
  1308. def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>;
  1309. // Convert to floating point, 32b to single or half
  1310. def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
  1311. // Convert to floating point, 32b to double
  1312. def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>;
  1313. // Convert to floating point, 16b to half
  1314. def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
  1315. // Copy, scalar
  1316. def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
  1317. // Copy, scalar SIMD&FP or imm
  1318. def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
  1319. "^CPY_ZPzI_[BHSD]$")>;
  1320. // Divides, 32 bit
  1321. def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
  1322. // Divides, 64 bit
  1323. def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
  1324. // Dot product, 8 bit
  1325. def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
  1326. // Dot product, 8 bit, using signed and unsigned integers
  1327. def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
  1328. // Dot product, 16 bit
  1329. def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
  1330. // Duplicate, immediate and indexed form
  1331. def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
  1332. "^DUP_ZZI_[BHSDQ]$")>;
  1333. // Duplicate, scalar form
  1334. def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
  1335. // Extend, sign or zero
  1336. def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
  1337. "^[SU]XTH_ZPmZ_[SD]$",
  1338. "^[SU]XTW_ZPmZ_[D]$")>;
  1339. // Extract
  1340. def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
  1341. // Extract narrow saturating
  1342. def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
  1343. "^SQXTUN[BT]_ZZ_[BHS]$")>;
  1344. // Extract/insert operation, SIMD and FP scalar form
  1345. def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
  1346. "^INSR_ZV_[BHSD]$")>;
  1347. // Extract/insert operation, scalar
  1348. def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
  1349. "^INSR_ZR_[BHSD]$")>;
  1350. // Histogram operations
  1351. def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
  1352. "^HISTSEG_ZZZ$")>;
  1353. // Horizontal operations, B, H, S form, immediate operands only
  1354. def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
  1355. // Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
  1356. // operands only / immediate, scalar operands
  1357. def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
  1358. // Horizontal operations, D form, immediate operands only
  1359. def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
  1360. // Horizontal operations, D form, scalar, immediate operands)/ scalar operands
  1361. // only / immediate, scalar operands
  1362. def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
  1363. // Logical
  1364. def : InstRW<[N2Write_2cyc_1V],
  1365. (instregex "^(AND|EOR|ORR)_ZI$",
  1366. "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
  1367. "^EOR(BT|TB)_ZZZ_[BHSD]$",
  1368. "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
  1369. // Max/min, basic and pairwise
  1370. def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
  1371. "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
  1372. // Matching operations
  1373. def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
  1374. // Matrix multiply-accumulate
  1375. def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
  1376. // Move prefix
  1377. def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
  1378. "^MOVPRFX_ZZ$")>;
  1379. // Multiply, B, H, S element size
  1380. def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
  1381. "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
  1382. // Multiply, D element size
  1383. def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
  1384. "^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
  1385. // Multiply long
  1386. def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
  1387. "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
  1388. // Multiply accumulate, B, H, S element size
  1389. def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
  1390. "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
  1391. // Multiply accumulate, D element size
  1392. def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
  1393. "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
  1394. // Multiply accumulate long
  1395. def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
  1396. "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
  1397. // Multiply accumulate saturating doubling long regular
  1398. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
  1399. "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
  1400. // Multiply saturating doubling high, B, H, S element size
  1401. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
  1402. "^SQDMULH_ZZZI_[HS]$")>;
  1403. // Multiply saturating doubling high, D element size
  1404. def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
  1405. // Multiply saturating doubling long
  1406. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
  1407. "^SQDMULL[BT]_ZZZI_[SD]$")>;
  1408. // Multiply saturating rounding doubling regular/complex accumulate, B, H, S
  1409. // element size
  1410. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
  1411. "^SQRDCMLAH_ZZZ_[BHS]$",
  1412. "^SQRDML[AS]H_ZZZI_[HS]$",
  1413. "^SQRDCMLAH_ZZZI_[HS]$")>;
  1414. // Multiply saturating rounding doubling regular/complex accumulate, D element
  1415. // size
  1416. def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
  1417. "^SQRDCMLAH_ZZZ_D$")>;
  1418. // Multiply saturating rounding doubling regular/complex, B, H, S element size
  1419. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
  1420. "^SQRDMULH_ZZZI_[HS]$")>;
  1421. // Multiply saturating rounding doubling regular/complex, D element size
  1422. def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
  1423. // Multiply/multiply long, (8x8) polynomial
  1424. def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
  1425. "^PMULL[BT]_ZZZ_[HDQ]$")>;
  1426. // Predicate counting vector
  1427. def : InstRW<[N2Write_2cyc_1V0],
  1428. (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
  1429. // Reciprocal estimate
  1430. def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
  1431. // Reduction, arithmetic, B form
  1432. def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
  1433. // Reduction, arithmetic, H form
  1434. def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
  1435. // Reduction, arithmetic, S form
  1436. def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
  1437. // Reduction, arithmetic, D form
  1438. def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
  1439. // Reduction, logical
  1440. def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
  1441. // Reverse, vector
  1442. def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
  1443. "^REVB_ZPmZ_[HSD]$",
  1444. "^REVH_ZPmZ_[SD]$",
  1445. "^REVW_ZPmZ_D$")>;
  1446. // Select, vector form
  1447. def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
  1448. // Table lookup
  1449. def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
  1450. // Table lookup extension
  1451. def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
  1452. // Transpose, vector form
  1453. def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
  1454. // Unpack and extend
  1455. def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
  1456. // Zip/unzip
  1457. def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
  1458. // SVE floating-point instructions
  1459. // -----------------------------------------------------------------------------
  1460. // Floating point absolute value/difference
  1461. def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
  1462. // Floating point arithmetic
  1463. def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
  1464. "^FADDP_ZPmZZ_[HSD]$",
  1465. "^FNEG_ZPmZ_[HSD]$",
  1466. "^FSUBR_ZPm[IZ]_[HSD]$")>;
  1467. // Floating point associative add, F16
  1468. def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
  1469. // Floating point associative add, F32
  1470. def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
  1471. // Floating point associative add, F64
  1472. def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
  1473. // Floating point compare
  1474. def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
  1475. "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
  1476. "^FCM(LE|LT)_PPzZ0_[HSD]$",
  1477. "^FCMUO_PPzZZ_[HSD]$")>;
  1478. // Floating point complex add
  1479. def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
  1480. // Floating point complex multiply add
  1481. def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
  1482. "^FCMLA_ZZZI_[HS]$")>;
  1483. // Floating point convert, long or narrow (F16 to F32 or F32 to F16)
  1484. def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
  1485. "^FCVTLT_ZPmZ_HtoS$",
  1486. "^FCVTNT_ZPmZ_StoH$")>;
  1487. // Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
  1488. // or F64 to F16)
  1489. def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
  1490. "^FCVTLT_ZPmZ_StoD$",
  1491. "^FCVTNT_ZPmZ_DtoS$")>;
  1492. // Floating point convert, round to odd
  1493. def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
  1494. // Floating point base2 log, F16
  1495. def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>;
  1496. // Floating point base2 log, F32
  1497. def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>;
  1498. // Floating point base2 log, F64
  1499. def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>;
  1500. // Floating point convert to integer, F16
  1501. def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
  1502. // Floating point convert to integer, F32
  1503. def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
  1504. // Floating point convert to integer, F64
  1505. def : InstRW<[N2Write_3cyc_1V0],
  1506. (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
  1507. // Floating point copy
  1508. def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
  1509. "^FDUP_ZI_[HSD]$")>;
  1510. // Floating point divide, F16
  1511. def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
  1512. // Floating point divide, F32
  1513. def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
  1514. // Floating point divide, F64
  1515. def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
  1516. // Floating point min/max pairwise
  1517. def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
  1518. // Floating point min/max
  1519. def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
  1520. // Floating point multiply
  1521. def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
  1522. "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
  1523. // Floating point multiply accumulate
  1524. def : InstRW<[N2Write_4cyc_1V],
  1525. (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
  1526. "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
  1527. // Floating point multiply add/sub accumulate long
  1528. def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
  1529. // Floating point reciprocal estimate, F16
  1530. def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
  1531. FRSQRTE_ZZ_H)>;
  1532. // Floating point reciprocal estimate, F32
  1533. def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
  1534. FRSQRTE_ZZ_S)>;
  1535. // Floating point reciprocal estimate, F64
  1536. def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
  1537. FRSQRTE_ZZ_D)>;
  1538. // Floating point reciprocal step
  1539. def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
  1540. // Floating point reduction, F16
  1541. def : InstRW<[N2Write_6cyc_2V],
  1542. (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
  1543. // Floating point reduction, F32
  1544. def : InstRW<[N2Write_4cyc_1V],
  1545. (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
  1546. // Floating point reduction, F64
  1547. def : InstRW<[N2Write_2cyc_1V],
  1548. (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
  1549. // Floating point round to integral, F16
  1550. def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
  1551. // Floating point round to integral, F32
  1552. def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
  1553. // Floating point round to integral, F64
  1554. def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
  1555. // Floating point square root, F16
  1556. def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>;
  1557. // Floating point square root, F32
  1558. def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>;
  1559. // Floating point square root, F64
  1560. def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>;
  1561. // Floating point trigonometric exponentiation
  1562. def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
  1563. // Floating point trigonometric multiply add
  1564. def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
  1565. // Floating point trigonometric, miscellaneous
  1566. def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
  1567. // SVE BFloat16 (BF16) instructions
  1568. // -----------------------------------------------------------------------------
  1569. // Convert, F32 to BF16
  1570. def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
  1571. // Dot product
  1572. def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
  1573. // Matrix multiply accumulate
  1574. def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
  1575. // Multiply accumulate long
  1576. def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
  1577. // SVE Load instructions
  1578. // -----------------------------------------------------------------------------
  1579. // Load vector
  1580. def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
  1581. // Load predicate
  1582. def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
  1583. // Contiguous load, scalar + imm
  1584. def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
  1585. "^LD1S?B_[HSD]_IMM_REAL$",
  1586. "^LD1S?H_[SD]_IMM_REAL$",
  1587. "^LD1S?W_D_IMM_REAL$" )>;
  1588. // Contiguous load, scalar + scalar
  1589. def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
  1590. "^LD1S?B_[HSD]$",
  1591. "^LD1S?H_[SD]$",
  1592. "^LD1S?W_D$" )>;
  1593. // Contiguous load broadcast, scalar + imm
  1594. def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
  1595. "^LD1RSW_IMM$",
  1596. "^LD1RS?B_[HSD]_IMM$",
  1597. "^LD1RS?H_[SD]_IMM$",
  1598. "^LD1RS?W_D_IMM$",
  1599. "^LD1RQ_[BHWD]_IMM$")>;
  1600. // Contiguous load broadcast, scalar + scalar
  1601. def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
  1602. // Non temporal load, scalar + imm
  1603. def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
  1604. // Non temporal load, scalar + scalar
  1605. def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
  1606. // Non temporal gather load, vector + scalar 32-bit element size
  1607. def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
  1608. "^LDNT1S[BH]_ZZR_S_REAL$")>;
  1609. // Non temporal gather load, vector + scalar 64-bit element size
  1610. def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
  1611. def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
  1612. // Contiguous first faulting load, scalar + scalar
  1613. def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
  1614. "^LDFF1S?B_[HSD]_REAL$",
  1615. "^LDFF1S?H_[SD]_REAL$",
  1616. "^LDFF1S?W_D_REAL$")>;
  1617. // Contiguous non faulting load, scalar + imm
  1618. def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
  1619. "^LDNF1S?B_[HSD]_IMM_REAL$",
  1620. "^LDNF1S?H_[SD]_IMM_REAL$",
  1621. "^LDNF1S?W_D_IMM_REAL$")>;
  1622. // Contiguous Load two structures to two vectors, scalar + imm
  1623. def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
  1624. // Contiguous Load two structures to two vectors, scalar + scalar
  1625. def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
  1626. // Contiguous Load three structures to three vectors, scalar + imm
  1627. def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
  1628. // Contiguous Load three structures to three vectors, scalar + scalar
  1629. def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
  1630. // Contiguous Load four structures to four vectors, scalar + imm
  1631. def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
  1632. // Contiguous Load four structures to four vectors, scalar + scalar
  1633. def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
  1634. // Gather load, vector + imm, 32-bit element size
  1635. def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
  1636. "^GLD(FF)?1W_IMM_REAL$")>;
  1637. // Gather load, vector + imm, 64-bit element size
  1638. def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
  1639. "^GLD(FF)?1D_IMM_REAL$")>;
  1640. // Gather load, 64-bit element size
  1641. def : InstRW<[N2Write_9cyc_2L_2V],
  1642. (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
  1643. "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
  1644. "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
  1645. "^GLD(FF)?1D_(SCALED_)?REAL$")>;
  1646. // Gather load, 32-bit scaled offset
  1647. def : InstRW<[N2Write_10cyc_2L_2V],
  1648. (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
  1649. "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
  1650. // Gather load, 32-bit unpacked unscaled offset
  1651. def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
  1652. "^GLD(FF)?1W_[SU]XTW_REAL$")>;
  1653. // SVE Store instructions
  1654. // -----------------------------------------------------------------------------
  1655. // Store from predicate reg
  1656. def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
  1657. // Store from vector reg
  1658. def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
  1659. // Contiguous store, scalar + imm
  1660. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
  1661. "^ST1B_[HSD]_IMM$",
  1662. "^ST1H_[SD]_IMM$",
  1663. "^ST1W_D_IMM$")>;
  1664. // Contiguous store, scalar + scalar
  1665. def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
  1666. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
  1667. "^ST1B_[HSD]$",
  1668. "^ST1W_D$")>;
  1669. // Contiguous store two structures from two vectors, scalar + imm
  1670. def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
  1671. // Contiguous store two structures from two vectors, scalar + scalar
  1672. def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
  1673. // Contiguous store two structures from two vectors, scalar + scalar
  1674. def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
  1675. // Contiguous store three structures from three vectors, scalar + imm
  1676. def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
  1677. // Contiguous store three structures from three vectors, scalar + scalar
  1678. def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
  1679. // Contiguous store three structures from three vectors, scalar + scalar
  1680. def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
  1681. // Contiguous store four structures from four vectors, scalar + imm
  1682. def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
  1683. // Contiguous store four structures from four vectors, scalar + scalar
  1684. def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
  1685. // Contiguous store four structures from four vectors, scalar + scalar
  1686. def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
  1687. // Non temporal store, scalar + imm
  1688. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
  1689. // Non temporal store, scalar + scalar
  1690. def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
  1691. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
  1692. // Scatter non temporal store, vector + scalar 32-bit element size
  1693. def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
  1694. // Scatter non temporal store, vector + scalar 64-bit element size
  1695. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
  1696. // Scatter store vector + imm 32-bit element size
  1697. def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
  1698. "^SST1W_IMM$")>;
  1699. // Scatter store vector + imm 64-bit element size
  1700. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
  1701. "^SST1D_IMM$")>;
  1702. // Scatter store, 32-bit scaled offset
  1703. def : InstRW<[N2Write_4cyc_2L01_2V],
  1704. (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
  1705. // Scatter store, 32-bit unpacked unscaled offset
  1706. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
  1707. "^SST1D_[SU]XTW$")>;
  1708. // Scatter store, 32-bit unpacked scaled offset
  1709. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
  1710. "^SST1D_[SU]XTW_SCALED$")>;
  1711. // Scatter store, 32-bit unscaled offset
  1712. def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
  1713. "^SST1W_[SU]XTW$")>;
  1714. // Scatter store, 64-bit scaled offset
  1715. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
  1716. "^SST1D_SCALED$")>;
  1717. // Scatter store, 64-bit unscaled offset
  1718. def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
  1719. "^SST1D$")>;
  1720. // SVE Miscellaneous instructions
  1721. // -----------------------------------------------------------------------------
  1722. // Read first fault register, unpredicated
  1723. def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
  1724. // Read first fault register, predicated
  1725. def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
  1726. // Read first fault register and set flags
  1727. def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
  1728. // Set first fault register
  1729. // Write to first fault register
  1730. def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
  1731. // Prefetch
  1732. def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
  1733. // SVE Cryptographic instructions
  1734. // -----------------------------------------------------------------------------
  1735. // Crypto AES ops
  1736. def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
  1737. "^AESI?MC_ZZ_B$")>;
  1738. // Crypto SHA3 ops
  1739. def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
  1740. "^RAX1_ZZZ_D$",
  1741. "^XAR_ZZZI_[BHSD]$")>;
  1742. // Crypto SM4 ops
  1743. def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
  1744. }