ARMScheduleM55.td 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. //==- ARMScheduleM55.td - Arm Cortex-M55 Scheduling Definitions -*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the scheduling model for the Arm Cortex-M55 processors.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. // ===---------------------------------------------------------------------===//
  13. // Cortex-M55 is a lot like the M4/M33 in terms of scheduling. It technically
  14. // has an extra pipeline stage but that is unimportant for scheduling, just
  15. // starting our model a stage later. The main points of interest over an
  16. // Cortex-M4 are MVE instructions and the ability to dual issue thumb1
  17. // instructions.
  18. //
  19. //
  20. // MVE
  21. //
  22. // The EPU pipelines now include both MVE and FP instructions. It has four
  23. // pipelines across 4 stages (E1-E4). These pipelines are "control",
  24. // "load/store", "integer" and "float/mul". We start the schedule at E2 to line
  25. // up with the rest of the pipeline we model, and take the latency as the time
  26. // between reading registers (almost always in E2) and register write (or
  27. // forward, if it allows it). This mean that a lot of instructions (including
  28. // loads) actually take 1 cycle (amazingly).
  29. //
  30. // Each MVE instruction needs to take 2 beats, each performing 64bits of the
  31. // 128bit vector operation. So long as the beats are to different pipelines,
  32. // the execution of the first-beat-of-the-second-instruction can overlap with
  33. // the second-beat-of-the-first. For example a sequence of VLDR;VADD;VMUL;VSTR
  34. // can look like this is a pipeline:
  35. // 1 2 3 4 5
  36. // LD/ST : VLDR VLDR VSTR VSTR
  37. // INTEGER: VADD VADD
  38. // FP/MUL : VMUL VMUL
  39. //
  40. // But a sequence of VLDR;VLDRB;VADD;VSTR because the loads cannot overlap,
  41. // looks like:
  42. // 1 2 3 4 5 6
  43. // LD/ST : VLDR VLDR VLDRB VLDRB VSTR VSTR
  44. // INTEGER: VADD VADD
  45. //
  46. // For this schedule, we currently model latencies and pipelines well for each
  47. // instruction. MVE instruction take two beats, modelled using
  48. // ResourceCycles=[2].
  49. //
  50. //
  51. // Dual Issue
  52. //
  53. // Cortex-M55 can dual issue two 16-bit T1 instructions providing one is one of
  54. // NOPs, ITs, Brs, ADDri/SUBri, UXTB/H, SXTB/H and MOVri's. NOPs and IT's are
  55. // not relevant (they will not appear when scheduling), Brs are only at the end
  56. // of the block. The others are more useful, and where the problems arise.
  57. //
  58. // The first problem comes from the fact that we will only be seeing Thumb2
  59. // instructions at the point in the pipeline where we do the scheduling. The
  60. // Thumb2SizeReductionPass has not been run yet. Especially pre-ra scheduling
  61. // (where the scheduler has the most freedom) we can only really guess at which
  62. // instructions will become thumb1 instructions. We are quite optimistic, and
  63. // may get some things wrong as a result.
  64. //
  65. // The other problem is one of telling llvm what to do exactly. The way we
  66. // attempt to meld this is:
  67. // Set IssueWidth to 2 to allow 2 instructions per cycle.
  68. // All instructions we cannot dual issue are "SingleIssue=1" (MVE/FP and T2
  69. // instructions)
  70. // We guess at another set of instructions that will become T1 instruction.
  71. // These become the primary instruction in a dual issue pair (the normal
  72. // one). These use normal resources and latencies, but set SingleIssue = 0.
  73. // We guess at another set of instructions that will be shrank down into T1 DI
  74. // instructions (add, sub, mov's, etc), which become the secondary. These
  75. // don't use a resource, and set SingleIssue = 0.
  76. //
  77. // So our guessing is a bit rough. It may be possible to improve this by moving
  78. // T2SizeReduction pass earlier in the pipeline, for example, so that at least
  79. // Post-RA scheduling sees what is T1/T2. It may also be possible to write a
  80. // custom instruction matcher for more accurately guess at T1 instructions.
  81. def CortexM55Model : SchedMachineModel {
  82. let MicroOpBufferSize = 0; // Explicitly set to zero since M55 is in-order.
  83. let IssueWidth = 2; // There is some dual-issue support in M55.
  84. let MispredictPenalty = 3; // Default is 10
  85. let LoadLatency = 4; // Default is 4
  86. let PostRAScheduler = 1;
  87. let FullInstRWOverlapCheck = 1;
  88. let CompleteModel = 0;
  89. let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasMatMulInt8, HasZCZ,
  90. IsNotMClass, HasV8, HasV8_3a, HasTrustZone, HasDFB,
  91. IsWindows];
  92. }
  93. let SchedModel = CortexM55Model in {
  94. //===----------------------------------------------------------------------===//
  95. // Define each kind of processor resource and number available.
  96. // Modeling each pipeline as a ProcResource using the BufferSize = 0 since
  97. // M55 is in-order.
  98. def M55UnitALU : ProcResource<1> { let BufferSize = 0; } // Int ALU
  99. def M55UnitVecALU : ProcResource<1> { let BufferSize = 0; } // MVE integer pipe
  100. def M55UnitVecFPALU : ProcResource<1> { let BufferSize = 0; } // MVE float pipe
  101. def M55UnitLoadStore : ProcResource<1> { let BufferSize = 0; } // MVE load/store pipe
  102. def M55UnitVecSys : ProcResource<1> { let BufferSize = 0; } // MVE control/sys pipe
  103. // Some VMOV's can go down either pipeline. FIXME: This M55Write2IntFPE2 is
  104. // intended to model the VMOV taking either Int or FP for 2 cycles. It is not
  105. // clear if the llvm scheduler is using it like we want though.
  106. def M55UnitVecIntFP: ProcResGroup<[M55UnitVecALU, M55UnitVecFPALU]>;
  107. //===----------------------------------------------------------------------===//
  108. // Subtarget-specific SchedWrite types which both map the ProcResources and
  109. // set the latency.
  110. //=====//
  111. // ALU //
  112. //=====//
  113. // Generic writes for Flags, GRPs and other extra operands (eg post-inc, vadc flags, vaddlv etc)
  114. def M55WriteLat0 : SchedWriteRes<[]> { let Latency = 0; let NumMicroOps = 0; }
  115. def M55WriteLat1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
  116. def M55WriteLat2 : SchedWriteRes<[]> { let Latency = 2; let NumMicroOps = 0; }
  117. // DX instructions are ALU instructions that take a single cycle. The
  118. // instructions that may be shrank to T1 (and can be dual issued) are
  119. // SingleIssue = 0. The others are SingleIssue = 1.
  120. let SingleIssue = 0, Latency = 1 in {
  121. def : WriteRes<WriteALU, [M55UnitALU]>;
  122. def : WriteRes<WriteCMP, [M55UnitALU]>;
  123. def : WriteRes<WriteBr, [M55UnitALU]>;
  124. def : WriteRes<WriteBrL, [M55UnitALU]>;
  125. def : WriteRes<WriteBrTbl, [M55UnitALU]>;
  126. def : WriteRes<WriteST, [M55UnitALU]>;
  127. def M55WriteDX_DI : SchedWriteRes<[M55UnitALU]>;
  128. }
  129. let SingleIssue = 1, Latency = 1 in {
  130. def : WriteRes<WritePreLd, [M55UnitALU]>;
  131. def M55WriteDX_SI : SchedWriteRes<[M55UnitALU]>;
  132. }
  133. def : InstRW<[M55WriteDX_SI], (instregex "t2BF[CI]", "t2CPS", "t2DBG",
  134. "t2MRS", "t2MSR", "t2SEL", "t2SG", "t2TT")>;
  135. def : InstRW<[M55WriteDX_SI], (instregex "t2SUBS_PC_LR", "COPY")>;
  136. def : InstRW<[M55WriteDX_SI], (instregex "t2CS(EL|INC|INV|NEG)")>;
  137. // Thumb 2 instructions that could be reduced to a thumb 1 instruction and can
  138. // be dual issued with one of the above. This list is optimistic.
  139. def : InstRW<[M55WriteDX_DI], (instregex "t2ADDC?rr$", "t2ADDrr$",
  140. "t2ADDSrr$", "t2ANDrr$", "t2ASRr[ir]$", "t2BICrr$", "t2CMNzrr$",
  141. "t2CMPr[ir]$", "t2EORrr$", "t2LSLr[ir]$", "t2LSRr[ir]$", "t2MVNr$",
  142. "t2ORRrr$", "t2REV(16|SH)?$", "t2RORrr$", "t2RSBr[ir]$", "t2RSBSri$",
  143. "t2SBCrr$", "t2SUBS?rr$", "t2TEQrr$", "t2TSTrr$", "t2STRi12$",
  144. "t2STRs$", "t2STRBi12$", "t2STRBs$", "t2STRHi12$", "t2STRHs$",
  145. "t2STR_POST$", "t2STMIA$", "t2STMIA_UPD$", "t2STMDB$", "t2STMDB_UPD$")>;
  146. def : InstRW<[M55WriteDX_DI], (instregex "t2SETPAN$", "tADC$", "tADDhirr$",
  147. "tADDrSP$", "tADDrSPi$", "tADDrr$", "tADDspi$", "tADDspr$", "tADR$",
  148. "tAND$", "tASRri$", "tASRrr$", "tBIC$", "tBKPT$", "tCBNZ$", "tCBZ$",
  149. "tCMNz$", "tCMPhir$", "tCMPi8$", "tCMPr$", "tCPS$", "tEOR$", "tHINT$",
  150. "tHLT$", "tLSLri$", "tLSLrr$", "tLSRri$", "tLSRrr$", "tMOVSr$",
  151. "tMUL$", "tMVN$", "tORR$", "tPICADD$", "tPOP$", "tPUSH$", "tREV$",
  152. "tREV16$", "tREVSH$", "tROR$", "tRSB$", "tSBC$", "tSETEND$",
  153. "tSTMIA_UPD$", "tSTRBi$", "tSTRBr$", "tSTRHi$", "tSTRHr$", "tSTRi$",
  154. "tSTRr$", "tSTRspi$", "tSUBrr$", "tSUBspi$", "tSVC$", "tTRAP$",
  155. "tTST$", "tUDF$")>;
  156. def : InstRW<[M55WriteDX_DI], (instregex "tB$", "tBLXNSr$", "tBLXr$", "tBX$",
  157. "tBXNS$", "tBcc$")>;
  158. // CX instructions take 2 (or more) cycles. Again T1 instructions may be dual
  159. // issues (SingleIssue = 0)
  160. let SingleIssue = 0, Latency = 2 in {
  161. def : WriteRes<WriteLd, [M55UnitALU]>;
  162. def M55WriteCX_DI : SchedWriteRes<[M55UnitALU]>;
  163. }
  164. let SingleIssue = 1, Latency = 2 in {
  165. def : WriteRes<WriteALUsi, [M55UnitALU]>;
  166. def : WriteRes<WriteALUsr, [M55UnitALU]>;
  167. def : WriteRes<WriteALUSsr, [M55UnitALU]>;
  168. def : WriteRes<WriteCMPsi, [M55UnitALU]>;
  169. def : WriteRes<WriteCMPsr, [M55UnitALU]>;
  170. def : WriteRes<WriteDIV, [M55UnitALU]>;
  171. def M55WriteCX_SI : SchedWriteRes<[M55UnitALU]>;
  172. }
  173. def : SchedAlias<WriteMUL16, M55WriteCX_SI>;
  174. def : SchedAlias<WriteMUL32, M55WriteCX_SI>;
  175. def : SchedAlias<WriteMUL64Lo, M55WriteCX_SI>;
  176. def : WriteRes<WriteMUL64Hi, []> { let Latency = 2; }
  177. def : SchedAlias<WriteMAC16, M55WriteCX_SI>;
  178. def : SchedAlias<WriteMAC32, M55WriteCX_SI>;
  179. def : SchedAlias<WriteMAC64Lo, M55WriteCX_SI>;
  180. def : WriteRes<WriteMAC64Hi, []> { let Latency = 2; }
  181. def : InstRW<[M55WriteCX_SI], (instregex "t2CDP", "t2CLREX", "t2[DI][MS]B",
  182. "t2MCR", "t2MOVSs[ir]", "t2MRC", "t2MUL", "t2STC")>;
  183. def : InstRW<[M55WriteCX_SI], (instregex "t2Q", "t2[SU](ADD|ASX|BFX|DIV)",
  184. "t2[SU]H(ADD|ASX|SUB|SAX)", "t2SM[LM]", "t2S(SAT|SUB|SAX)", "t2UQ",
  185. "t2USA", "t2USUB", "t2UXTA[BH]")>;
  186. def : InstRW<[M55WriteCX_SI], (instregex "t2LD[AC]", "t2STL", "t2STRD")>;
  187. def : InstRW<[M55WriteCX_SI], (instregex "MVE_[SU]Q?R?SH[LR]$")>;
  188. def : InstRW<[M55WriteCX_SI, M55WriteLat2], (instregex "MVE_ASRL", "MVE_LSLL",
  189. "MVE_LSRL", "MVE_[SU]Q?R?SH[LR]L")>;
  190. // This may be higher in practice, but that likely doesn't make a difference
  191. // for scheduling
  192. def : InstRW<[M55WriteCX_SI], (instregex "t2CLRM")>;
  193. def : InstRW<[M55WriteCX_DI], (instregex "t2LDR[BH]?i12$", "t2LDRS?[BH]?s$",
  194. "t2LDM")>;
  195. def : InstRW<[M55WriteCX_DI], (instregex "tLDM", "tLDRBi$", "tLDRBr$",
  196. "tLDRHi$", "tLDRHr$", "tLDRSB$", "tLDRSH$", "tLDRi$", "tLDRpci$",
  197. "tLDRr$", "tLDRspi$")>;
  198. // Dual Issue instructions
  199. let Latency = 1, SingleIssue = 0 in {
  200. def : WriteRes<WriteNoop, []>;
  201. def M55WriteDI : SchedWriteRes<[]>;
  202. }
  203. def : InstRW<[M55WriteDI], (instregex "tADDi[38]$", "tSUBi[38]$", "tMOVi8$",
  204. "tMOVr$", "tUXT[BH]$", "tSXT[BH]$")>;
  205. // Thumb 2 instructions that could be reduced to a dual issuable Thumb 1
  206. // instruction above.
  207. def : InstRW<[M55WriteDI], (instregex "t2ADDS?ri$", "t2MOV[ir]$", "t2MOVi16$",
  208. "t2MOVr$", "t2SUBS?ri$", "t2[US]XT[BH]$")>;
  209. def : InstRW<[M55WriteDI], (instregex "t2IT", "IT")>;
  210. def : InstRW<[M55WriteLat0], (instregex "t2LoopDec")>;
  211. // Forwarding
  212. // No forwarding in the ALU normally
  213. def : ReadAdvance<ReadALU, 0>;
  214. def : ReadAdvance<ReadALUsr, 0>;
  215. def : ReadAdvance<ReadMUL, 0>;
  216. def : ReadAdvance<ReadMAC, 0>;
  217. //=============//
  218. // MVE and VFP //
  219. //=============//
  220. // The Writes that take ResourceCycles=[2] are MVE instruction, the others VFP.
  221. let SingleIssue = 1, Latency = 1 in {
  222. def M55WriteLSE2 : SchedWriteRes<[M55UnitLoadStore]>;
  223. def M55WriteIntE2 : SchedWriteRes<[M55UnitVecALU]>;
  224. def M55WriteFloatE2 : SchedWriteRes<[M55UnitVecFPALU]>;
  225. def M55WriteSysE2 : SchedWriteRes<[M55UnitVecSys]>;
  226. def M55Write2LSE2 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
  227. def M55Write2IntE2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  228. def M55Write2FloatE2 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
  229. def M55Write2IntFPE2 : SchedWriteRes<[M55UnitVecIntFP]> { let ResourceCycles=[2]; }
  230. }
  231. let SingleIssue = 1, Latency = 2 in {
  232. def M55WriteLSE3 : SchedWriteRes<[M55UnitLoadStore]>;
  233. def M55WriteIntE3 : SchedWriteRes<[M55UnitVecALU]>;
  234. def M55WriteFloatE3 : SchedWriteRes<[M55UnitVecFPALU]>;
  235. def M55Write2LSE3 : SchedWriteRes<[M55UnitLoadStore]> { let ResourceCycles=[2]; }
  236. def M55Write2IntE3 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  237. def M55Write2FloatE3 : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
  238. }
  239. let SingleIssue = 1, Latency = 3 in {
  240. def M55Write2IntE3Plus1 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  241. // Same as M55Write2IntE3/M55Write2FloatE3 above, but longer latency and no forwarding into stores
  242. def M55Write2IntE4NoFwd : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  243. def M55Write2FloatE4NoFwd : SchedWriteRes<[M55UnitVecFPALU]> { let ResourceCycles=[2]; }
  244. }
  245. let SingleIssue = 1, Latency = 4 in {
  246. def M55Write2IntE3Plus2 : SchedWriteRes<[M55UnitVecALU]> { let ResourceCycles=[2]; }
  247. def M55WriteFloatE3Plus2 : SchedWriteRes<[M55UnitVecFPALU]>;
  248. }
  249. let SingleIssue = 1, Latency = 9 in {
  250. def M55WriteFloatE3Plus7 : SchedWriteRes<[M55UnitVecFPALU]>;
  251. }
  252. let SingleIssue = 1, Latency = 15 in {
  253. def M55WriteFloatE3Plus13 : SchedWriteRes<[M55UnitVecFPALU]>;
  254. }
  255. let SingleIssue = 1, Latency = 16 in {
  256. def M55WriteFloatE3Plus14 : SchedWriteRes<[M55UnitVecFPALU]>;
  257. }
  258. let SingleIssue = 1, Latency = 21 in {
  259. def M55WriteFloatE3Plus19 : SchedWriteRes<[M55UnitVecFPALU]>;
  260. }
  261. // VMUL (Double precision) + VADD (Double precision)
  262. let SingleIssue = 1, Latency = 24 in {
  263. def M55WriteFloatE3Plus22 : SchedWriteRes<[M55UnitVecFPALU]>;
  264. }
  265. let SingleIssue = 1, Latency = 30 in {
  266. def M55WriteFloatE3Plus28 : SchedWriteRes<[M55UnitVecFPALU]>;
  267. }
  268. let SingleIssue = 1, Latency = 36 in {
  269. def M55WriteFloatE3Plus34 : SchedWriteRes<[M55UnitVecFPALU]>;
  270. }
  271. def M55Read0 : SchedReadAdvance<0>;
  272. def M55Read1 : SchedReadAdvance<1, [M55Write2LSE3, M55Write2IntE3, M55Write2FloatE3]>;
  273. def M55GatherQRead : SchedReadAdvance<-4>;
  274. // MVE instructions
  275. // Loads and Stores of different kinds
  276. // Normal loads
  277. def : InstRW<[M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)$")>;
  278. // Pre/post inc loads
  279. def : InstRW<[M55WriteLat1, M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)_(post|pre)$")>;
  280. // Gather loads
  281. def : InstRW<[M55Write2LSE3, M55Read0, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_rq")>;
  282. def : InstRW<[M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_qi$")>;
  283. def : InstRW<[M55WriteLat1, M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(W|D)U(32|64)_qi_pre$")>;
  284. // Interleaving loads
  285. def : InstRW<[M55Write2LSE2], (instregex "MVE_VLD[24][0-3]_(8|16|32)$")>;
  286. // Interleaving loads with wb
  287. def : InstRW<[M55Write2LSE2, M55WriteLat1], (instregex "MVE_VLD[24][0-3]_(8|16|32)_wb$")>;
  288. // Normal stores
  289. def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)$")>;
  290. // Pre/post inc stores
  291. def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)_(post|pre)$")>;
  292. // Scatter stores
  293. def : InstRW<[M55Write2LSE2, M55Read0, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_rq")>;
  294. def : InstRW<[M55Write2LSE2, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_qi")>;
  295. // Interleaving stores
  296. def : InstRW<[M55Write2LSE2], (instregex "MVE_VST(2|4)")>;
  297. // Integer pipe operations
  298. def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_VABAV")>;
  299. def : InstRW<[M55Write2IntE2], (instregex "MVE_VABD(u|s)")>;
  300. def : InstRW<[M55Write2IntE2], (instregex "MVE_VABS(u|s)")>;
  301. def : InstRW<[M55Write2IntE3], (instregex "MVE_VADC")>;
  302. def : InstRW<[M55Write2IntE2], (instregex "MVE_VADD(_qr_)?i")>;
  303. def : InstRW<[M55Write2IntE2], (instregex "MVE_VAND")>;
  304. def : InstRW<[M55Write2IntE2], (instregex "MVE_VBIC")>;
  305. def : InstRW<[M55Write2IntE2], (instregex "MVE_VBRSR")>;
  306. def : InstRW<[M55Write2IntE2], (instregex "MVE_VCADDi")>;
  307. def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLS")>;
  308. def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLZ")>;
  309. def : InstRW<[M55Write2IntE2], (instregex "MVE_V(D|I)?W?DUP")>;
  310. def : InstRW<[M55Write2IntE2], (instregex "MVE_VEOR")>;
  311. def : InstRW<[M55Write2IntE2], (instregex "MVE_VHADD")>;
  312. def : InstRW<[M55Write2IntE2], (instregex "MVE_VHCADD")>;
  313. def : InstRW<[M55Write2IntE2], (instregex "MVE_VHSUB")>;
  314. def : InstRW<[M55Write2IntE2], (instregex "MVE_V(MAX|MIN)A?(s|u)")>;
  315. def : InstRW<[M55Write2IntE3], (instregex "MVE_V(MAX|MIN)A?V(s|u)8")>;
  316. def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_V(MAX|MIN)A?V(s|u)16")>;
  317. def : InstRW<[M55Write2IntE3Plus2], (instregex "MVE_V(MAX|MIN)A?V(s|u)32")>;
  318. def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VMOVN")>;
  319. def : InstRW<[M55Write2IntE2], (instregex "MVE_VMOVL")>;
  320. def : InstRW<[M55Write2IntE3], (instregex "MVE_VMULL[BT]p")>;
  321. def : InstRW<[M55Write2IntE2], (instregex "MVE_VMVN")>;
  322. def : InstRW<[M55Write2IntE2], (instregex "MVE_VNEG(u|s)")>;
  323. def : InstRW<[M55Write2IntE2], (instregex "MVE_VORN")>;
  324. def : InstRW<[M55Write2IntE2], (instregex "MVE_VORR")>;
  325. def : InstRW<[M55Write2IntE2], (instregex "MVE_VPSEL")>;
  326. def : InstRW<[M55Write2IntE2], (instregex "MQPRCopy")>;
  327. def : InstRW<[M55Write2IntE2], (instregex "MVE_VQABS")>;
  328. def : InstRW<[M55Write2IntE2], (instregex "MVE_VQADD")>;
  329. def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQMOV")>;
  330. def : InstRW<[M55Write2IntE2], (instregex "MVE_VQNEG")>;
  331. def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHL")>;
  332. def : InstRW<[M55Write2IntE3], (instregex "MVE_V[QR]SHL")>;
  333. def : InstRW<[M55Write2IntE3], (instregex "MVE_VQRSHL")>;
  334. def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQ?R?SHRU?N")>;
  335. def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHR_")>;
  336. def : InstRW<[M55Write2IntE3], (instregex "MVE_VRSHR_")>;
  337. def : InstRW<[M55Write2IntE2], (instregex "MVE_VQSUB")>;
  338. def : InstRW<[M55Write2IntE2], (instregex "MVE_VREV")>;
  339. def : InstRW<[M55Write2IntE2], (instregex "MVE_VRHADD")>;
  340. def : InstRW<[M55Write2IntE3], (instregex "MVE_VSBC")>;
  341. def : InstRW<[M55Write2IntE2], (instregex "MVE_VSLI")>;
  342. def : InstRW<[M55Write2IntE2], (instregex "MVE_VSRI")>;
  343. def : InstRW<[M55Write2IntE2], (instregex "MVE_VSUB(_qr_)?i")>;
  344. // FP/Mul pipe operations.
  345. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABDf")>;
  346. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABSf")>;
  347. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VADDf")>;
  348. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADD_qr_f")>;
  349. def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VADDLV")>;
  350. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADDV")>;
  351. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCADDf")>;
  352. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMLA")>;
  353. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMUL")>;
  354. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMP(i|s|u)", "MVE_VPTv(4|8|16)(i|s|u)")>;
  355. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMPf", "MVE_VPTv(4|8)f")>;
  356. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf16(u|s)16")>;
  357. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32(u|s)32")>;
  358. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)16f16")>;
  359. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)32f32")>;
  360. def : InstRW<[M55Write2FloatE4NoFwd], (instregex "MVE_VCVTf16f32")>;
  361. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32f16")>;
  362. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VFM(A|S)")>;
  363. def : InstRW<[M55Write2FloatE2], (instregex "MVE_V(MIN|MAX)NM")>;
  364. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_from_lane")>;
  365. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_rr_q")>;
  366. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMOVi")>;
  367. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMUL(_qr_)?[if]")>;
  368. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?MULH")>;
  369. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?D?MULL[TB]?[su]")>;
  370. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQDMULL_qr_")>;
  371. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?ML(A|S)[^L]")>;
  372. def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VR?ML(A|S)L")>;
  373. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VNEGf")>;
  374. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VRINTf")>;
  375. def : InstRW<[M55Write2FloatE2], (instregex "MVE_VSUBf")>;
  376. def : InstRW<[M55Write2FloatE3], (instregex "MVE_VSUB_qr_f")>;
  377. // Some VMOV's can go down either pipeline.
  378. def : InstRW<[M55Write2IntFPE2], (instregex "MVE_VMOV_to_lane", "MVE_VMOV_q_rr")>;
  379. def : InstRW<[M55WriteSysE2], (instregex "MVE_VCTP")>;
  380. def : InstRW<[M55WriteSysE2], (instregex "MVE_VPNOT")>;
  381. def : InstRW<[M55WriteSysE2], (instregex "MVE_VPST")>;
  382. // VFP instructions
  383. def : SchedAlias<WriteFPCVT, M55WriteFloatE3>;
  384. def : SchedAlias<WriteFPMOV, M55WriteFloatE3>;
  385. def : SchedAlias<WriteFPALU32, M55WriteFloatE3>;
  386. def : SchedAlias<WriteFPALU64, M55WriteFloatE3Plus13>;
  387. def : SchedAlias<WriteFPMUL32, M55WriteFloatE3>;
  388. def : SchedAlias<WriteFPMUL64, M55WriteFloatE3Plus19>;
  389. def : SchedAlias<WriteFPMAC32, M55WriteFloatE3Plus2>;
  390. def : SchedAlias<WriteFPMAC64, M55WriteFloatE3Plus34>;
  391. def : SchedAlias<WriteFPDIV32, M55WriteFloatE3Plus14>;
  392. def : SchedAlias<WriteFPDIV64, M55WriteFloatE3Plus28>;
  393. def : SchedAlias<WriteFPSQRT32, M55WriteFloatE3Plus14>;
  394. def : SchedAlias<WriteFPSQRT64, M55WriteFloatE3Plus28>;
  395. def : ReadAdvance<ReadFPMUL, 0>;
  396. def : ReadAdvance<ReadFPMAC, 0>;
  397. def : InstRW<[M55WriteLSE3], (instregex "VLD")>;
  398. def : InstRW<[M55WriteLSE2], (instregex "VST")>;
  399. def : InstRW<[M55WriteLSE3], (instregex "VLLD", "VLST")>;
  400. def : InstRW<[M55WriteFloatE3], (instregex "VABS(H|S|D)")>;
  401. def : InstRW<[M55WriteFloatE3], (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S|D)")>;
  402. def : InstRW<[M55WriteFloatE3], (instregex "VCVT(B|T)(DH|HD)")>;
  403. def : InstRW<[M55WriteFloatE2], (instregex "VCMPZ?(E|H|S|D)")>;
  404. def : InstRW<[M55WriteFloatE3Plus7], (instregex "VDIVH")>;
  405. def : InstRW<[M55WriteFloatE3], (instregex "VFN?M(A|S)(H|S)")>; // VFMA
  406. def : InstRW<[M55WriteFloatE3Plus22], (instregex "VFN?M(A|S)D")>; // VFMA
  407. def : InstRW<[M55WriteFloatE3], (instregex "VFP_V(MAX|MIN)NM")>;
  408. def : InstRW<[M55WriteFloatE3], (instregex "VINSH$", "VMOVH$", "VMOVHR$", "VMOVSR$", "VMOVDRR$")>; // VINS, VMOVX, to-FP reg movs
  409. def : InstRW<[M55WriteFloatE2], (instregex "VMOVD$", "VMOVS$", "VMOVR")>; // Other VMOV's
  410. def : InstRW<[M55WriteFloatE2], (instregex "FCONSTH", "FCONSTS", "FCONSTD")>;
  411. def : InstRW<[M55WriteFloatE2], (instregex "VGETLNi32", "VSETLNi32")>;
  412. def : InstRW<[M55WriteFloatE2], (instregex "VMSR", "VMRS")>;
  413. def : InstRW<[M55WriteFloatE3Plus2], (instregex "VN?ML(A|S)H")>; // VMLA
  414. def : InstRW<[M55WriteFloatE3], (instregex "VNEG(H|S|D)")>;
  415. def : InstRW<[M55WriteFloatE3], (instregex "VRINT(A|M|N|P|R|X|Z)(H|S|D)")>;
  416. def : InstRW<[M55WriteFloatE3], (instregex "VSEL..(H|S|D)")>;
  417. def : InstRW<[M55WriteFloatE3Plus7], (instregex "VSQRTH")>;
  418. def : WriteRes<WriteVLD1, []>;
  419. def : WriteRes<WriteVLD2, []>;
  420. def : WriteRes<WriteVLD3, []>;
  421. def : WriteRes<WriteVLD4, []>;
  422. def : WriteRes<WriteVST1, []>;
  423. def : WriteRes<WriteVST2, []>;
  424. def : WriteRes<WriteVST3, []>;
  425. def : WriteRes<WriteVST4, []>;
  426. }