ARMScheduleA57.td 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497
  1. //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the machine model for ARM Cortex-A57 to support
  10. // instruction scheduling and other instruction cost heuristics.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //===----------------------------------------------------------------------===//
  14. // *** Common description and scheduling model parameters taken from AArch64 ***
  15. // The Cortex-A57 is a traditional superscalar microprocessor with a
  16. // conservative 3-wide in-order stage for decode and dispatch. Combined with the
  17. // much wider out-of-order issue stage, this produced a need to carefully
  18. // schedule micro-ops so that all three decoded each cycle are successfully
  19. // issued as the reservation station(s) simply don't stay occupied for long.
  20. // Therefore, IssueWidth is set to the narrower of the two at three, while still
  21. // modeling the machine as out-of-order.
  22. def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>;
  23. def IsCPSRDefinedAndPredicatedPred :
  24. MCSchedPredicate<IsCPSRDefinedAndPredicated>;
  25. // Cortex A57 rev. r1p0 or later (false = r0px)
  26. def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>;
  27. def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>;
  28. def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>;
  29. def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>;
  30. // If Addrmode3 contains "minus register"
  31. class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[
  32. CheckValidRegOperand<n>,
  33. CheckAM3OpSub<!add(n, 1)>]>>;
  34. def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>;
  35. def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>;
  36. def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>;
  37. // Load, scaled register offset, not plus LSL2
  38. class ScaledRegNotPlusLsl2<int n> : CheckNot<
  39. CheckAny<[
  40. CheckAM2NoShift<n>,
  41. CheckAll<[
  42. CheckAM2OpAdd<n>,
  43. CheckAM2ShiftLSL<n>,
  44. CheckAM2Offset<n, 2>
  45. ]>
  46. ]>
  47. >;
  48. def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>;
  49. def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>;
  50. def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>;
  51. def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>;
  52. def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>;
  53. def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>;
  54. def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>;
  55. class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
  56. list <SchedWriteRes> Writes = writes;
  57. SchedMachineModel SchedModel = ?;
  58. }
  59. // *** Common description and scheduling model parameters taken from AArch64 ***
  60. // (AArch64SchedA57.td)
  61. def CortexA57Model : SchedMachineModel {
  62. let IssueWidth = 3; // 3-way decode and dispatch
  63. let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
  64. let LoadLatency = 4; // Optimistic load latency
  65. let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch
  66. // Enable partial & runtime unrolling.
  67. let LoopMicroOpBufferSize = 16;
  68. let CompleteModel = 1;
  69. // FIXME: Remove when all errors have been fixed.
  70. let FullInstRWOverlapCheck = 0;
  71. let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat, IsMClass,
  72. HasFPRegsV8_1M, HasFP16FML, HasMatMulInt8, HasBF16];
  73. }
  74. //===----------------------------------------------------------------------===//
  75. // Define each kind of processor resource and number available on Cortex-A57.
  76. // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
  77. // micro-ops wait for their operands and then issue out-of-order.
  78. def A57UnitB : ProcResource<1>; // Type B micro-ops
  79. def A57UnitI : ProcResource<2>; // Type I micro-ops
  80. def A57UnitM : ProcResource<1>; // Type M micro-ops
  81. def A57UnitL : ProcResource<1>; // Type L micro-ops
  82. def A57UnitS : ProcResource<1>; // Type S micro-ops
  83. def A57UnitX : ProcResource<1>; // Type X micro-ops (F1)
  84. def A57UnitW : ProcResource<1>; // Type W micro-ops (F0)
  85. let SchedModel = CortexA57Model in {
  86. def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
  87. }
  88. let SchedModel = CortexA57Model in {
  89. //===----------------------------------------------------------------------===//
  90. // Define customized scheduler read/write types specific to the Cortex-A57.
  91. include "ARMScheduleA57WriteRes.td"
  92. // To have "CompleteModel = 1", support of pseudos and special instructions
  93. def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
  94. "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
  95. "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
  96. "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
  97. "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
  98. "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG",
  99. "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "t__brkdiv0")>;
  100. def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
  101. // Specific memory instrs
  102. def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
  103. "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>;
  104. // coprocessor moves
  105. def : InstRW<[WriteNoop, WriteNoop], (instregex
  106. "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$",
  107. "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$",
  108. "(t2)?MSR(banked|i|_AR|_M)?$")>;
  109. // Deprecated instructions
  110. def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
  111. // Pseudos
  112. def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
  113. "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
  114. "tLDRpci_pic", "(t2)?SUBS_PC_LR",
  115. "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
  116. "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
  117. "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
  118. "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
  119. "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
  120. "WIN__CHKSTK", "WIN__DBZCHK")>;
  121. // Miscellaneous
  122. // -----------------------------------------------------------------------------
  123. def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>;
  124. // --- 3.2 Branch Instructions ---
  125. // B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ
  126. def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$",
  127. "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>;
  128. def : InstRW<[A57Write_1cyc_1B_1I],
  129. (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>;
  130. def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>;
  131. // Pseudos
  132. def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>;
  133. def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr",
  134. "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>;
  135. def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
  136. // --- 3.3 Arithmetic and Logical Instructions ---
  137. // ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S},
  138. // RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST
  139. def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
  140. // Check branch forms of ALU ops:
  141. // check reg 0 for ARM_AM::PC
  142. // if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB
  143. class A57BranchForm<SchedWriteRes non_br> :
  144. BranchWriteRes<2, 1, [A57UnitB], [1], non_br>;
  145. // shift by register, conditional or unconditional
  146. // TODO: according to the doc, conditional uses I0/I1, unconditional uses M
  147. // Why more complex instruction uses more simple pipeline?
  148. // May be an error in doc.
  149. def A57WriteALUsr : SchedWriteVariant<[
  150. SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
  151. SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
  152. ]>;
  153. def A57WriteALUSsr : SchedWriteVariant<[
  154. SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>,
  155. SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]>
  156. ]>;
  157. def A57ReadALUsr : SchedReadVariant<[
  158. SchedVar<IsPredicatedPred, [ReadDefault]>,
  159. SchedVar<NoSchedPred, [ReadDefault]>
  160. ]>;
  161. def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>;
  162. def : SchedAlias<WriteALUsr, A57WriteALUsr>;
  163. def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
  164. def : SchedAlias<ReadALUsr, A57ReadALUsr>;
  165. def A57WriteCMPsr : SchedWriteVariant<[
  166. SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
  167. SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
  168. ]>;
  169. def : SchedAlias<WriteCMP, A57Write_1cyc_1I>;
  170. def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>;
  171. def : SchedAlias<WriteCMPsr, A57WriteCMPsr>;
  172. // --- 3.4 Move and Shift Instructions ---
  173. // Move, basic
  174. // MOV{S}, MOVW, MVN{S}
  175. def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)",
  176. "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL",
  177. "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>;
  178. // Move, shift by immed, setflags/no setflags
  179. // (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN
  180. // setflags = isCPSRDefined
  181. def A57WriteMOVsi : SchedWriteVariant<[
  182. SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
  183. SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
  184. ]>;
  185. def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi",
  186. "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi",
  187. "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>;
  188. // shift by register, conditional or unconditional, setflags/no setflags
  189. def A57WriteMOVsr : SchedWriteVariant<[
  190. SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>,
  191. SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
  192. SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
  193. SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
  194. ]>;
  195. def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs",
  196. "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr",
  197. "(t2|t)RORrr")>;
  198. // Move, top
  199. // MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
  200. def A57WriteMOVT : SchedWriteVariant<[
  201. SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>,
  202. SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
  203. ]>;
  204. def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>;
  205. def A57WriteI2pc :
  206. WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>;
  207. def A57WriteI2ld :
  208. WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>;
  209. def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>;
  210. def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
  211. // +2cyc for branch forms
  212. def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>;
  213. // --- 3.5 Divide and Multiply Instructions ---
  214. // Divide: SDIV, UDIV
  215. // latency from documentration: 4 ­‐ 20, maximum taken
  216. def : SchedAlias<WriteDIV, A57Write_20cyc_1M>;
  217. // Multiply: tMul not bound to common WriteRes types
  218. def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>;
  219. def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>;
  220. def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>;
  221. def : ReadAdvance<ReadMUL, 0>;
  222. // Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
  223. // SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R}
  224. // Multiply-accumulate pipelines support late-forwarding of accumulate operands
  225. // from similar μops, allowing a typical sequence of multiply-accumulate μops
  226. // to issue one every 1 cycle (sched advance = 2).
  227. def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
  228. def A57WriteMLAL : SchedWriteVariant<[
  229. SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>,
  230. SchedVar<NoSchedPred, [A57Write_4cyc_1M]>
  231. ]>;
  232. def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
  233. def : InstRW<[A57WriteMLA],
  234. (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>;
  235. def : SchedAlias<WriteMAC16, A57WriteMLA>;
  236. def : SchedAlias<WriteMAC32, A57WriteMLA>;
  237. def : SchedAlias<ReadMAC, A57ReadMLA>;
  238. def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>;
  239. def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>;
  240. // Multiply long: SMULL, UMULL
  241. def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>;
  242. def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>;
  243. // --- 3.6 Saturating and Parallel Arithmetic Instructions ---
  244. // Parallel arith
  245. // SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8
  246. // Conditional GE-setting instructions require three extra μops
  247. // and two additional cycles to conditionally update the GE field.
  248. def A57WriteParArith : SchedWriteVariant<[
  249. SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>,
  250. SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]>
  251. ]>;
  252. def : InstRW< [A57WriteParArith], (instregex
  253. "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)",
  254. "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>;
  255. // Parallel arith with exchange: SASX, SSAX, UASX, USAX
  256. def A57WriteParArithExch : SchedWriteVariant<[
  257. SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>,
  258. SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]>
  259. ]>;
  260. def : InstRW<[A57WriteParArithExch],
  261. (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>;
  262. // Parallel halving arith
  263. // SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8
  264. def : InstRW<[A57Write_2cyc_1M], (instregex
  265. "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)",
  266. "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>;
  267. // Parallel halving arith with exchange
  268. // SHASX, SHSAX, UHASX, UHSAX
  269. def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX",
  270. "(t2)?UHASX", "(t2)?UHSAX")>;
  271. // Parallel saturating arith
  272. // QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8
  273. def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)",
  274. "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>;
  275. // Parallel saturating arith with exchange
  276. // QASX, QSAX, UQASX, UQSAX
  277. def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX",
  278. "(t2)?UQASX", "(t2)?UQSAX")>;
  279. // Saturate: SSAT, SSAT16, USAT, USAT16
  280. def : InstRW<[A57Write_2cyc_1M],
  281. (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>;
  282. // Saturating arith: QADD, QSUB
  283. def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>;
  284. // Saturating doubling arith: QDADD, QDSUB
  285. def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>;
  286. // --- 3.7 Miscellaneous Data-Processing Instructions ---
  287. // Bit field extract: SBFX, UBFX
  288. def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>;
  289. // Bit field insert/clear: BFI, BFC
  290. def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>;
  291. // Select bytes, conditional/unconditional
  292. def A57WriteSEL : SchedWriteVariant<[
  293. SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
  294. SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
  295. ]>;
  296. def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>;
  297. // Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH
  298. def : InstRW<[A57Write_1cyc_1I],
  299. (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>;
  300. // Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH
  301. def : InstRW<[A57Write_2cyc_1M],
  302. (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>;
  303. // Sign/zero extend and add, parallel: SXTAB16, UXTAB16
  304. def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>;
  305. // Sum of absolute differences: USAD8, USADA8
  306. def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>;
  307. // --- 3.8 Load Instructions ---
  308. // Load, immed offset
  309. // LDR and LDRB have LDRi12 and LDRBi12 forms for immediate
  310. def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12",
  311. "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)",
  312. "PICLDR", "tLDR")>;
  313. def : InstRW<[A57Write_4cyc_1L],
  314. (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>;
  315. // For "Load, register offset, minus" we need +1cyc, +1I
  316. def A57WriteLdrAm3 : SchedWriteVariant<[
  317. SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>,
  318. SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
  319. ]>;
  320. def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>;
  321. def A57WriteLdrAm3X2 : SchedWriteVariant<[
  322. SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>,
  323. SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
  324. ]>;
  325. def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>;
  326. def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>;
  327. def A57WriteLdrAmLDSTSO : SchedWriteVariant<[
  328. SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>,
  329. SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>,
  330. SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
  331. ]>;
  332. def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>;
  333. def A57WrBackOne : SchedWriteRes<[]> {
  334. let Latency = 1;
  335. let NumMicroOps = 0;
  336. }
  337. def A57WrBackTwo : SchedWriteRes<[]> {
  338. let Latency = 2;
  339. let NumMicroOps = 0;
  340. }
  341. def A57WrBackThree : SchedWriteRes<[]> {
  342. let Latency = 3;
  343. let NumMicroOps = 0;
  344. }
  345. // --- LDR pre-indexed ---
  346. // Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
  347. def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM",
  348. "LDRB_PRE_IMM", "t2LDRB_PRE")>;
  349. // Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
  350. // (5 cyc load result for not-lsl2 scaled)
  351. def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[
  352. SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>,
  353. SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
  354. ]>;
  355. def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo],
  356. (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>;
  357. def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[
  358. SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>,
  359. SchedVar<NoSchedPred, [A57WrBackOne]>
  360. ]>;
  361. def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack],
  362. (instregex "LDR(H|SH|SB)_PRE")>;
  363. def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
  364. (instregex "t2LDR(H|SH|SB)?_PRE")>;
  365. // LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
  366. def A57WriteLdrDAm3Pre : SchedWriteVariant<[
  367. SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>,
  368. SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
  369. ]>;
  370. def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[
  371. SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
  372. SchedVar<NoSchedPred, [A57WrBackOne]>
  373. ]>;
  374. def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack],
  375. (instregex "LDRD_PRE")>;
  376. def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
  377. (instregex "t2LDRD_PRE")>;
  378. // --- LDR post-indexed ---
  379. def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM",
  380. "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>;
  381. def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[
  382. SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>,
  383. SchedVar<NoSchedPred, [A57WrBackOne]>
  384. ]>;
  385. def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack],
  386. (instregex "LDR(H|SH|SB)_POST")>;
  387. def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
  388. (instregex "t2LDR(H|SH|SB)?_POST")>;
  389. def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
  390. "LDRB_POST_REG", "LDR(B?)T_POST$")>;
  391. def A57WriteLdrTRegPost : SchedWriteVariant<[
  392. SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>,
  393. SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
  394. ]>;
  395. def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
  396. SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>,
  397. SchedVar<NoSchedPred, [A57WrBackTwo]>
  398. ]>;
  399. // 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
  400. def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack],
  401. (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>;
  402. def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>;
  403. def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[
  404. SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
  405. SchedVar<NoSchedPred, [A57WrBackOne]>
  406. ]>;
  407. // LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
  408. def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
  409. A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>;
  410. def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
  411. (instregex "t2LDRD_POST")>;
  412. // --- Preload instructions ---
  413. // Preload, immed offset
  414. def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12",
  415. "t2PLDW?(i8|pci|s)", "(t2)?PLI")>;
  416. // Preload, register offset,
  417. // 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2
  418. // otherwise 4cyc "L"
  419. def A57WritePLD : SchedWriteVariant<[
  420. SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>,
  421. SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>,
  422. SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
  423. ]>;
  424. def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
  425. // --- Load multiple instructions ---
  426. foreach NumAddr = 1-8 in {
  427. def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[
  428. CheckNumOperands<!add(!shl(NumAddr, 1), 2)>,
  429. CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>;
  430. def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[
  431. CheckNumOperands<!add(!shl(NumAddr, 1), 3)>,
  432. CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>;
  433. }
  434. def A57LDMOpsListNoregin : A57WriteLMOpsListType<
  435. [A57Write_3cyc_1L, A57Write_3cyc_1L,
  436. A57Write_4cyc_1L, A57Write_4cyc_1L,
  437. A57Write_5cyc_1L, A57Write_5cyc_1L,
  438. A57Write_6cyc_1L, A57Write_6cyc_1L,
  439. A57Write_7cyc_1L, A57Write_7cyc_1L,
  440. A57Write_8cyc_1L, A57Write_8cyc_1L,
  441. A57Write_9cyc_1L, A57Write_9cyc_1L,
  442. A57Write_10cyc_1L, A57Write_10cyc_1L]>;
  443. def A57WriteLDMnoreginlist : SchedWriteVariant<[
  444. SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>,
  445. SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>,
  446. SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>,
  447. SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>,
  448. SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>,
  449. SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>,
  450. SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>,
  451. SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>,
  452. SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]>
  453. ]> { let Variadic=1; }
  454. def A57LDMOpsListRegin : A57WriteLMOpsListType<
  455. [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
  456. A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
  457. A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
  458. A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
  459. A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
  460. A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
  461. A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
  462. A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>;
  463. def A57WriteLDMreginlist : SchedWriteVariant<[
  464. SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>,
  465. SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>,
  466. SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>,
  467. SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>,
  468. SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>,
  469. SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>,
  470. SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>,
  471. SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>,
  472. SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]>
  473. ]> { let Variadic=1; }
  474. def A57LDMOpsList_Upd : A57WriteLMOpsListType<
  475. [A57WrBackOne,
  476. A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I,
  477. A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
  478. A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
  479. A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
  480. A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
  481. A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
  482. A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
  483. A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
  484. def A57WriteLDM_Upd : SchedWriteVariant<[
  485. SchedVar<A57LMAddrUpdPred1, A57LDMOpsList_Upd.Writes[0-2]>,
  486. SchedVar<A57LMAddrUpdPred2, A57LDMOpsList_Upd.Writes[0-4]>,
  487. SchedVar<A57LMAddrUpdPred3, A57LDMOpsList_Upd.Writes[0-6]>,
  488. SchedVar<A57LMAddrUpdPred4, A57LDMOpsList_Upd.Writes[0-8]>,
  489. SchedVar<A57LMAddrUpdPred5, A57LDMOpsList_Upd.Writes[0-10]>,
  490. SchedVar<A57LMAddrUpdPred6, A57LDMOpsList_Upd.Writes[0-12]>,
  491. SchedVar<A57LMAddrUpdPred7, A57LDMOpsList_Upd.Writes[0-14]>,
  492. SchedVar<A57LMAddrUpdPred8, A57LDMOpsList_Upd.Writes[0-16]>,
  493. SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
  494. ]> { let Variadic=1; }
  495. def A57WriteLDM : SchedWriteVariant<[
  496. SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>,
  497. SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
  498. ]> { let Variadic=1; }
  499. def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
  500. // TODO: no writeback latency defined in documentation (implemented as 1 cyc)
  501. def : InstRW<[A57WriteLDM_Upd],
  502. (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
  503. def : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>;
  504. // --- 3.9 Store Instructions ---
  505. // Store, immed offset
  506. def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
  507. "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
  508. // Store, register offset
  509. // For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S",
  510. // otherwise 1cyc S.
  511. def A57WriteStrAmLDSTSO : SchedWriteVariant<[
  512. SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>,
  513. SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>,
  514. SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
  515. ]>;
  516. def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>;
  517. // STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg.
  518. def A57WriteStrAm3 : SchedWriteVariant<[
  519. SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>,
  520. SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
  521. ]>;
  522. def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>;
  523. def A57WriteStrAm3X2 : SchedWriteVariant<[
  524. SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
  525. SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
  526. ]>;
  527. def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>;
  528. // Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
  529. def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM",
  530. "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
  531. "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
  532. // Store, register pre-indexed:
  533. // 1(1) "S, I0/I1" for plus reg
  534. // 3(2) "I0/I1, S" for minus reg
  535. // 1(2) "S, M" for scaled plus lsl2
  536. // 3(2) "I0/I1, S" for other scaled
  537. def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[
  538. SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>,
  539. SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>,
  540. SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>,
  541. SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
  542. ]>;
  543. def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[
  544. SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>,
  545. SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>,
  546. SchedVar<NoSchedPred, [A57WrBackOne]>
  547. ]>;
  548. def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre],
  549. (instregex "STR_PRE_REG", "STRB_PRE_REG")>;
  550. // pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
  551. // 1(1) "S, I0/I1" for imm or reg plus
  552. // 3(2) "I0/I1, S" for reg minus
  553. def A57WriteStrAm3PreX2 : SchedWriteVariant<[
  554. SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
  555. SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
  556. ]>;
  557. def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[
  558. SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>,
  559. SchedVar<NoSchedPred, [A57WrBackOne]>
  560. ]>;
  561. def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2],
  562. (instregex "STRH_PRE")>;
  563. def A57WriteStrAm3PreX3 : SchedWriteVariant<[
  564. SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>,
  565. SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
  566. ]>;
  567. def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[
  568. SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>,
  569. SchedVar<NoSchedPred, [A57WrBackOne]>
  570. ]>;
  571. def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3],
  572. (instregex "STRD_PRE")>;
  573. def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM",
  574. "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>;
  575. // 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
  576. def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG",
  577. "STRB(T?)_POST_REG", "STR(B?)T_POST$")>;
  578. // post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
  579. // 1(1) "S, I0/I1" both for reg or imm
  580. def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
  581. (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>;
  582. // --- Store multiple instructions ---
  583. // TODO: no writeback latency defined in documentation
  584. def A57WriteSTM : SchedWriteVariant<[
  585. SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
  586. SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
  587. SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
  588. SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
  589. SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
  590. SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
  591. SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
  592. SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
  593. SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
  594. ]>;
  595. def A57WriteSTM_Upd : SchedWriteVariant<[
  596. SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
  597. SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
  598. SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
  599. SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
  600. SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
  601. SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
  602. SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
  603. SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
  604. SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
  605. ]>;
  606. def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
  607. def : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
  608. (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
  609. def : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>;
  610. // --- 3.10 FP Data Processing Instructions ---
  611. def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
  612. def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
  613. def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
  614. // fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
  615. def A57WriteVcmp : SchedWriteVariant<[
  616. SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>,
  617. SchedVar<NoSchedPred, [A57Write_3cyc_1X]>
  618. ]>;
  619. def : InstRW<[A57WriteVcmp],
  620. (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
  621. // fp convert
  622. def : InstRW<[A57Write_5cyc_1V], (instregex
  623. "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
  624. def : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>;
  625. def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
  626. def : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>;
  627. // FP round to integral
  628. def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
  629. // FP divide, FP square root
  630. def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
  631. def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
  632. def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
  633. def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
  634. def : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>;
  635. // FP max/min
  636. def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
  637. // FP multiply-accumulate pipelines support late forwarding of the result
  638. // from FP multiply μops to the accumulate operands of an
  639. // FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
  640. // after the FP multiply μop has been issued
  641. // FP multiply, FZ
  642. def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
  643. def : SchedAlias<WriteFPMUL32, A57WriteVMUL>;
  644. def : SchedAlias<WriteFPMUL64, A57WriteVMUL>;
  645. def : ReadAdvance<ReadFPMUL, 0>;
  646. // FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
  647. // VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS
  648. def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
  649. // VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
  650. // VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
  651. // Currently, there is no way to define different read advances for VFMA operand
  652. // from VFMA or from VMUL, so there will be 5 read advance.
  653. // Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
  654. // The same situation with ASIMD VMUL/VFMA instructions
  655. // def A57ReadVFMA : SchedRead;
  656. // def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>;
  657. // def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>;
  658. def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>;
  659. def : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
  660. def : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
  661. def : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
  662. // VMLAH/VMLSH are not binded to scheduling classes by default, so here custom:
  663. def : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL],
  664. (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>;
  665. def : InstRW<[A57WriteVMUL],
  666. (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>;
  667. def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
  668. def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
  669. // --- 3.11 FP Miscellaneous Instructions ---
  670. // VMOV: 3cyc "F0/F1" for imm/reg
  671. def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
  672. def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
  673. def : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>;
  674. // 5cyc L for FP transfer, vfp to core reg,
  675. // 5cyc L for FP transfer, core reg to vfp
  676. def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
  677. // VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
  678. def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>;
  679. // 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
  680. def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>;
  681. // --- 3.12 FP Load Instructions ---
  682. def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>;
  683. def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>;
  684. // FP load multiple (VLDM)
  685. def A57VLDMOpsListUncond : A57WriteLMOpsListType<
  686. [A57Write_5cyc_1L, A57Write_5cyc_1L,
  687. A57Write_6cyc_1L, A57Write_6cyc_1L,
  688. A57Write_7cyc_1L, A57Write_7cyc_1L,
  689. A57Write_8cyc_1L, A57Write_8cyc_1L,
  690. A57Write_9cyc_1L, A57Write_9cyc_1L,
  691. A57Write_10cyc_1L, A57Write_10cyc_1L,
  692. A57Write_11cyc_1L, A57Write_11cyc_1L,
  693. A57Write_12cyc_1L, A57Write_12cyc_1L]>;
  694. def A57WriteVLDMuncond : SchedWriteVariant<[
  695. SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>,
  696. SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>,
  697. SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>,
  698. SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>,
  699. SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
  700. SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
  701. SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
  702. SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
  703. ]> { let Variadic=1; }
  704. def A57VLDMOpsListCond : A57WriteLMOpsListType<
  705. [A57Write_5cyc_1L, A57Write_6cyc_1L,
  706. A57Write_7cyc_1L, A57Write_8cyc_1L,
  707. A57Write_9cyc_1L, A57Write_10cyc_1L,
  708. A57Write_11cyc_1L, A57Write_12cyc_1L,
  709. A57Write_13cyc_1L, A57Write_14cyc_1L,
  710. A57Write_15cyc_1L, A57Write_16cyc_1L,
  711. A57Write_17cyc_1L, A57Write_18cyc_1L,
  712. A57Write_19cyc_1L, A57Write_20cyc_1L]>;
  713. def A57WriteVLDMcond : SchedWriteVariant<[
  714. SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>,
  715. SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>,
  716. SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>,
  717. SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>,
  718. SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
  719. SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
  720. SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
  721. SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
  722. ]> { let Variadic=1; }
  723. def A57WriteVLDM : SchedWriteVariant<[
  724. SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>,
  725. SchedVar<NoSchedPred, [A57WriteVLDMuncond]>
  726. ]> { let Variadic=1; }
  727. def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>;
  728. def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType<
  729. [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
  730. A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
  731. A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
  732. A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
  733. A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
  734. A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
  735. A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I,
  736. A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>;
  737. def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
  738. SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>,
  739. SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>,
  740. SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>,
  741. SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>,
  742. SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
  743. SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
  744. SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
  745. SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
  746. ]> { let Variadic=1; }
  747. def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType<
  748. [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I,
  749. A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I,
  750. A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I,
  751. A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I,
  752. A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I,
  753. A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I,
  754. A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I,
  755. A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>;
  756. def A57WriteVLDMcond_UPD : SchedWriteVariant<[
  757. SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>,
  758. SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>,
  759. SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>,
  760. SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>,
  761. SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
  762. SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
  763. SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
  764. SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
  765. ]> { let Variadic=1; }
  766. def A57WriteVLDM_UPD : SchedWriteVariant<[
  767. SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>,
  768. SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]>
  769. ]> { let Variadic=1; }
  770. def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD],
  771. (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>;
  772. // --- 3.13 FP Store Instructions ---
  773. def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>;
  774. def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>;
  775. def A57WriteVSTMs : SchedWriteVariant<[
  776. SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
  777. SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
  778. SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
  779. SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
  780. SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
  781. SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
  782. SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
  783. SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
  784. SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
  785. ]>;
  786. def A57WriteVSTMd : SchedWriteVariant<[
  787. SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>,
  788. SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>,
  789. SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>,
  790. SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>,
  791. SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>,
  792. SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>,
  793. SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>,
  794. SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>,
  795. SchedVar<NoSchedPred, [A57Write_4cyc_1S]>
  796. ]>;
  797. def A57WriteVSTMs_Upd : SchedWriteVariant<[
  798. SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
  799. SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
  800. SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
  801. SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
  802. SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
  803. SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
  804. SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
  805. SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
  806. SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
  807. ]>;
  808. def A57WriteVSTMd_Upd : SchedWriteVariant<[
  809. SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>,
  810. SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>,
  811. SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>,
  812. SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>,
  813. SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>,
  814. SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>,
  815. SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>,
  816. SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>,
  817. SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
  818. ]>;
  819. def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>;
  820. def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>;
  821. def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd],
  822. (instregex "VSTM(SIA_UPD|SDB_UPD)")>;
  823. def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd],
  824. (instregex "VSTM(DIA_UPD|DDB_UPD)")>;
  825. // --- 3.14 ASIMD Integer Instructions ---
  826. // ASIMD absolute diff, 3cyc F0/F1 for integer VABD
  827. def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>;
  828. // ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
  829. def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
  830. def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>;
  831. def : InstRW<[A57WriteVABAD, A57ReadVABAD],
  832. (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>;
  833. def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
  834. def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>;
  835. def : InstRW<[A57WriteVABAQ, A57ReadVABAQ],
  836. (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>;
  837. // ASIMD absolute diff accum long: 4(1) F1 for VABAL
  838. def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
  839. def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>;
  840. def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>;
  841. // ASIMD absolute diff long: 3cyc F0/F1 for VABDL
  842. def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>;
  843. // ASIMD arith, basic
  844. def : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW",
  845. "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)",
  846. "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>;
  847. // ASIMD arith, complex
  848. def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB",
  849. "VQABS", "VQADD", "VQNEG", "VQSUB",
  850. "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>;
  851. // ASIMD compare
  852. def : InstRW<[A57Write_3cyc_1V],
  853. (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>;
  854. // ASIMD logical
  855. def : InstRW<[A57Write_3cyc_1V],
  856. (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>;
  857. // ASIMD max/min
  858. def : InstRW<[A57Write_3cyc_1V],
  859. (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>;
  860. // ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
  861. // Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
  862. // and multiply-with-accumulate instructions relative to r0pX.
  863. def A57WriteVMULD_VecInt : SchedWriteVariant<[
  864. SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
  865. SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
  866. def : InstRW<[A57WriteVMULD_VecInt], (instregex
  867. "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)",
  868. "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>;
  869. // ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
  870. def A57WriteVMULQ_VecInt : SchedWriteVariant<[
  871. SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
  872. SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
  873. def : InstRW<[A57WriteVMULQ_VecInt], (instregex
  874. "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)",
  875. "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>;
  876. // ASIMD multiply accumulate, D-form
  877. // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
  878. // (4 or 3 ReadAdvance)
  879. def A57WriteVMLAD_VecInt : SchedWriteVariant<[
  880. SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
  881. SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
  882. def A57ReadVMLAD_VecInt : SchedReadVariant<[
  883. SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>,
  884. SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]>
  885. ]>;
  886. def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt],
  887. (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>;
  888. // ASIMD multiply accumulate, Q-form
  889. // 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence
  890. // (4 or 3 ReadAdvance)
  891. def A57WriteVMLAQ_VecInt : SchedWriteVariant<[
  892. SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
  893. SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
  894. def A57ReadVMLAQ_VecInt : SchedReadVariant<[
  895. SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>,
  896. SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]>
  897. ]>;
  898. def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt],
  899. (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>;
  900. // ASIMD multiply accumulate long
  901. // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
  902. // (4 or 3 ReadAdvance)
  903. def A57WriteVMLAL_VecInt : SchedWriteVariant<[
  904. SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
  905. SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
  906. def A57ReadVMLAL_VecInt : SchedReadVariant<[
  907. SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>,
  908. SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]>
  909. ]>;
  910. def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt],
  911. (instregex "VMLAL(s|u)", "VMLSL(s|u)")>;
  912. // ASIMD multiply accumulate saturating long
  913. // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence
  914. // (3 or 2 ReadAdvance)
  915. def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[
  916. SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
  917. SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
  918. def A57ReadVQDMLAL_VecInt : SchedReadVariant<[
  919. SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>,
  920. SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
  921. ]>;
  922. def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
  923. (instregex "VQDMLAL", "VQDMLSL")>;
  924. // Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long
  925. // Scheduling info from VQDMLAL/VQDMLSL
  926. def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
  927. (instregex "VQRDMLAH", "VQRDMLSH")>;
  928. // ASIMD multiply long
  929. // 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
  930. def A57WriteVMULL_VecInt : SchedWriteVariant<[
  931. SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
  932. SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
  933. def : InstRW<[A57WriteVMULL_VecInt],
  934. (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>;
  935. // ASIMD pairwise add and accumulate
  936. // 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
  937. def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
  938. def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>;
  939. def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>;
  940. // ASIMD shift accumulate
  941. // 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
  942. def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
  943. def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>;
  944. def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>;
  945. // ASIMD shift by immed, basic
  946. def : InstRW<[A57Write_3cyc_1X],
  947. (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>;
  948. // ASIMD shift by immed, complex
  949. def : InstRW<[A57Write_4cyc_1X], (instregex
  950. "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)",
  951. "VRSHRN")>;
  952. // ASIMD shift by immed and insert, basic, D-form
  953. def : InstRW<[A57Write_4cyc_1X], (instregex
  954. "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>;
  955. // ASIMD shift by immed and insert, basic, Q-form
  956. def : InstRW<[A57Write_5cyc_1X], (instregex
  957. "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>;
  958. // ASIMD shift by register, basic, D-form
  959. def : InstRW<[A57Write_3cyc_1X], (instregex
  960. "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
  961. // ASIMD shift by register, basic, Q-form
  962. def : InstRW<[A57Write_4cyc_1X], (instregex
  963. "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
  964. // ASIMD shift by register, complex, D-form
  965. // VQRSHL, VQSHL, VRSHL
  966. def : InstRW<[A57Write_4cyc_1X], (instregex
  967. "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)",
  968. "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
  969. // ASIMD shift by register, complex, Q-form
  970. def : InstRW<[A57Write_5cyc_1X], (instregex
  971. "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)",
  972. "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
  973. // --- 3.15 ASIMD Floating-Point Instructions ---
  974. // ASIMD FP absolute value
  975. def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>;
  976. // ASIMD FP arith
  977. def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
  978. "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
  979. def : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>;
  980. // ASIMD FP compare
  981. def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
  982. "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
  983. // ASIMD FP convert, integer
  984. def : InstRW<[A57Write_5cyc_1V], (instregex
  985. "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)",
  986. "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)",
  987. "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>;
  988. // ASIMD FP convert, half-precision: 8cyc F0/F1
  989. def : InstRW<[A57Write_8cyc_1V], (instregex
  990. "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)",
  991. "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
  992. "VCVT(f2h|h2f)")>;
  993. // ASIMD FP max/min
  994. def : InstRW<[A57Write_5cyc_1V], (instregex
  995. "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM",
  996. "(NEON|VFP)_VMINNM")>;
  997. // ASIMD FP multiply
  998. def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
  999. def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
  1000. // ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
  1001. def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
  1002. def A57ReadVMLA_VecFP :
  1003. SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>;
  1004. def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP],
  1005. (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>;
  1006. // ASIMD FP negate
  1007. def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>;
  1008. // ASIMD FP round to integral
  1009. def : InstRW<[A57Write_5cyc_1V], (instregex
  1010. "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>;
  1011. // --- 3.16 ASIMD Miscellaneous Instructions ---
  1012. // ASIMD bitwise insert
  1013. def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
  1014. // ASIMD count
  1015. def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
  1016. // ASIMD duplicate, core reg: 8cyc "L, F0/F1"
  1017. def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>;
  1018. // ASIMD duplicate, scalar: 3cyc "F0/F1"
  1019. def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>;
  1020. // ASIMD extract
  1021. def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
  1022. // ASIMD move, immed
  1023. def : InstRW<[A57Write_3cyc_1V], (instregex
  1024. "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
  1025. "VMOVD0", "VMOVQ0")>;
  1026. // ASIMD move, narrowing
  1027. def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
  1028. // ASIMD move, saturating
  1029. def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>;
  1030. // ASIMD reciprocal estimate
  1031. def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>;
  1032. // ASIMD reciprocal step, FZ
  1033. def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>;
  1034. // ASIMD reverse, swap, table lookup (1-2 reg)
  1035. def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>;
  1036. // ASIMD table lookup (3-4 reg)
  1037. def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>;
  1038. // ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1"
  1039. def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>;
  1040. // ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1"
  1041. def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>;
  1042. // ASIMD transpose
  1043. def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>;
  1044. // ASIMD unzip/zip, D-form
  1045. def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V],
  1046. (instregex "VUZPd", "VZIPd")>;
  1047. // ASIMD unzip/zip, Q-form
  1048. def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V],
  1049. (instregex "VUZPq", "VZIPq")>;
  1050. // --- 3.17 ASIMD Load Instructions ---
  1051. // Overriden via InstRW for this processor.
  1052. def : WriteRes<WriteVLD1, []>;
  1053. def : WriteRes<WriteVLD2, []>;
  1054. def : WriteRes<WriteVLD3, []>;
  1055. def : WriteRes<WriteVLD4, []>;
  1056. def : WriteRes<WriteVST1, []>;
  1057. def : WriteRes<WriteVST2, []>;
  1058. def : WriteRes<WriteVST3, []>;
  1059. def : WriteRes<WriteVST4, []>;
  1060. // 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
  1061. def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>;
  1062. def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne],
  1063. (instregex "VLD1(d|q)(8|16|32|64)wb")>;
  1064. // 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
  1065. def : InstRW<[A57Write_6cyc_1L],
  1066. (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>;
  1067. def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne],
  1068. (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>;
  1069. // ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1"
  1070. def : InstRW<[A57Write_8cyc_1L_1V], (instregex
  1071. "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
  1072. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex
  1073. "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>;
  1074. // ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1"
  1075. def : InstRW<[A57Write_8cyc_1L_1V],
  1076. (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
  1077. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1078. (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
  1079. // ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1"
  1080. def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>;
  1081. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1082. (instregex "VLD2b(8|16|32)wb")>;
  1083. // ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1"
  1084. def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
  1085. (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
  1086. "VLD2LN(d|q)(8|16|32)Pseudo$")>;
  1087. // 2 results + wb result
  1088. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne],
  1089. (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
  1090. // 1 result + wb result
  1091. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1092. (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb",
  1093. "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
  1094. // ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1"
  1095. // 3 results
  1096. def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
  1097. (instregex "VLD3(d|q)(8|16|32)$")>;
  1098. // 1 result
  1099. def : InstRW<[A57Write_9cyc_1L_1V],
  1100. (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
  1101. // 3 results + wb
  1102. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
  1103. A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1104. (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
  1105. // 1 result + wb
  1106. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1107. (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
  1108. // ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1"
  1109. def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
  1110. (instregex "VLD3LN(d|q)32$",
  1111. "VLD3LN(d|q)32Pseudo$")>;
  1112. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
  1113. A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1114. (instregex "VLD3LN(d|q)32_UPD")>;
  1115. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1116. (instregex "VLD3LN(d|q)32Pseudo_UPD")>;
  1117. // ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1"
  1118. def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
  1119. (instregex "VLD3LN(d|q)(8|16)$",
  1120. "VLD3LN(d|q)(8|16)Pseudo$")>;
  1121. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
  1122. A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1123. (instregex "VLD3LN(d|q)(8|16)_UPD")>;
  1124. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1125. (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>;
  1126. // ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1"
  1127. def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
  1128. (instregex "VLD3DUP(d|q)(8|16|32)$",
  1129. "VLD3DUP(d|q)(8|16|32)Pseudo$")>;
  1130. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
  1131. A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1132. (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>;
  1133. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1134. (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>;
  1135. // ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1"
  1136. def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
  1137. A57Write_9cyc_1L_1V],
  1138. (instregex "VLD4(d|q)(8|16|32)$")>;
  1139. def : InstRW<[A57Write_9cyc_1L_1V],
  1140. (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
  1141. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
  1142. A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1143. (instregex "VLD4(d|q)(8|16|32)_UPD")>;
  1144. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1145. (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
  1146. // ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1"
  1147. def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
  1148. A57Write_8cyc_1L_1V],
  1149. (instregex "VLD4LN(d|q)32$",
  1150. "VLD4LN(d|q)32Pseudo$")>;
  1151. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
  1152. A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
  1153. A57WrBackOne],
  1154. (instregex "VLD4LN(d|q)32_UPD")>;
  1155. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1156. (instregex "VLD4LN(d|q)32Pseudo_UPD")>;
  1157. // ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1"
  1158. def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
  1159. A57Write_9cyc_1L_1V],
  1160. (instregex "VLD4LN(d|q)(8|16)$",
  1161. "VLD4LN(d|q)(8|16)Pseudo$")>;
  1162. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
  1163. A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
  1164. A57WrBackOne],
  1165. (instregex "VLD4LN(d|q)(8|16)_UPD")>;
  1166. def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
  1167. (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>;
  1168. // ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1"
  1169. def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
  1170. A57Write_8cyc_1L_1V],
  1171. (instregex "VLD4DUP(d|q)(8|16|32)$",
  1172. "VLD4DUP(d|q)(8|16|32)Pseudo$")>;
  1173. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
  1174. A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
  1175. A57WrBackOne],
  1176. (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>;
  1177. def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
  1178. (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>;
  1179. // --- 3.18 ASIMD Store Instructions ---
  1180. // ASIMD store, 1 element, multiple, 1 reg: 1cyc S
  1181. def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>;
  1182. def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
  1183. (instregex "VST1d(8|16|32|64)wb")>;
  1184. // ASIMD store, 1 element, multiple, 2 reg: 2cyc S
  1185. def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>;
  1186. def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I],
  1187. (instregex "VST1q(8|16|32|64)wb")>;
  1188. // ASIMD store, 1 element, multiple, 3 reg: 3cyc S
  1189. def : InstRW<[A57Write_3cyc_1S],
  1190. (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
  1191. def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I],
  1192. (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
  1193. // ASIMD store, 1 element, multiple, 4 reg: 4cyc S
  1194. def : InstRW<[A57Write_4cyc_1S],
  1195. (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
  1196. def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I],
  1197. (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
  1198. // ASIMD store, 1 element, one lane: 3cyc "F0/F1, S"
  1199. def : InstRW<[A57Write_3cyc_1S_1V],
  1200. (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
  1201. def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
  1202. (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
  1203. // ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S"
  1204. def : InstRW<[A57Write_3cyc_1S_1V],
  1205. (instregex "VST2(d|b)(8|16|32)$")>;
  1206. def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
  1207. (instregex "VST2(b|d)(8|16|32)wb")>;
  1208. // ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S"
  1209. def : InstRW<[A57Write_4cyc_1S_1V],
  1210. (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
  1211. def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
  1212. (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
  1213. // ASIMD store, 2 element, one lane: 3cyc "F0/F1, S"
  1214. def : InstRW<[A57Write_3cyc_1S_1V],
  1215. (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
  1216. def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
  1217. (instregex "VST2LN(d|q)(8|16|32)_UPD",
  1218. "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
  1219. // ASIMD store, 3 element, multiple, 3 reg
  1220. def : InstRW<[A57Write_3cyc_1S_1V],
  1221. (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
  1222. def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
  1223. (instregex "VST3(d|q)(8|16|32)_UPD",
  1224. "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
  1225. // ASIMD store, 3 element, one lane
  1226. def : InstRW<[A57Write_3cyc_1S_1V],
  1227. (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
  1228. def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
  1229. (instregex "VST3LN(d|q)(8|16|32)_UPD",
  1230. "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
  1231. // ASIMD store, 4 element, multiple, 4 reg
  1232. def : InstRW<[A57Write_4cyc_1S_1V],
  1233. (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
  1234. def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
  1235. (instregex "VST4(d|q)(8|16|32)_UPD",
  1236. "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
  1237. // ASIMD store, 4 element, one lane
  1238. def : InstRW<[A57Write_3cyc_1S_1V],
  1239. (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
  1240. def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
  1241. (instregex "VST4LN(d|q)(8|16|32)_UPD",
  1242. "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
  1243. // --- 3.19 Cryptography Extensions ---
  1244. // Crypto AES ops
  1245. // AESD, AESE, AESIMC, AESMC: 3cyc F0
  1246. def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
  1247. // Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
  1248. def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>;
  1249. // Crypto SHA1 xor ops: 6cyc F0/F1
  1250. def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
  1251. // Crypto SHA1 fast ops: 3cyc F0
  1252. def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
  1253. // Crypto SHA1 slow ops: 6cyc F0
  1254. def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
  1255. // Crypto SHA256 fast ops: 3cyc F0
  1256. def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
  1257. // Crypto SHA256 slow ops: 6cyc F0
  1258. def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
  1259. // --- 3.20 CRC ---
  1260. def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
  1261. // -----------------------------------------------------------------------------
  1262. // Common definitions
  1263. def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
  1264. def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>;
  1265. def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
  1266. def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
  1267. def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>;
  1268. def : SchedAlias<WritePreLd, A57Write_4cyc_1L>;
  1269. def : SchedAlias<WriteLd, A57Write_4cyc_1L>;
  1270. def : SchedAlias<WriteST, A57Write_1cyc_1S>;
  1271. def : ReadAdvance<ReadALU, 0>;
  1272. } // SchedModel = CortexA57Model