ARMScheduleA8.td 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074
  1. //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the itinerary class data for the ARM Cortex A8 processors.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. //
  13. // Scheduling information derived from "Cortex-A8 Technical Reference Manual".
  14. // Functional Units.
  15. def A8_Pipe0 : FuncUnit; // pipeline 0
  16. def A8_Pipe1 : FuncUnit; // pipeline 1
  17. def A8_LSPipe : FuncUnit; // Load / store pipeline
  18. def A8_NPipe : FuncUnit; // NEON ALU/MUL pipe
  19. def A8_NLSPipe : FuncUnit; // NEON LS pipe
  20. //
  21. // Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
  22. //
  23. def CortexA8Itineraries : ProcessorItineraries<
  24. [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
  25. [], [
  26. // Two fully-pipelined integer ALU pipelines
  27. //
  28. // No operand cycles
  29. InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
  30. //
  31. // Binary Instructions that produce a result
  32. InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  33. InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
  34. InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
  35. InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
  36. InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
  37. //
  38. // Bitwise Instructions that produce a result
  39. InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  40. InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
  41. InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
  42. InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
  43. //
  44. // Unary Instructions that produce a result
  45. InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  46. InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  47. //
  48. // Zero and sign extension instructions
  49. InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  50. InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
  51. InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
  52. //
  53. // Compare instructions
  54. InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  55. InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  56. InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  57. InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
  58. //
  59. // Test instructions
  60. InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  61. InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  62. InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  63. InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
  64. //
  65. // Move instructions, unconditional
  66. InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
  67. InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  68. InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  69. InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
  70. InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  71. InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  72. InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  73. InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  74. InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
  75. InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  76. InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  77. InstrStage<1, [A8_LSPipe]>], [5]>,
  78. //
  79. // Move instructions, conditional
  80. InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
  81. InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  82. InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
  83. InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
  84. InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  85. InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
  86. //
  87. // MVN instructions
  88. InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
  89. InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  90. InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
  91. InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
  92. // Integer multiply pipeline
  93. // Result written in E5, but that is relative to the last cycle of multicycle,
  94. // so we use 6 for those cases
  95. //
  96. InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
  97. InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
  98. InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
  99. InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
  100. InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
  101. InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
  102. // Integer load pipeline
  103. //
  104. // Immediate offset
  105. InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  106. InstrStage<1, [A8_LSPipe]>], [3, 1]>,
  107. InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  108. InstrStage<1, [A8_LSPipe]>], [3, 1]>,
  109. InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  110. InstrStage<1, [A8_LSPipe]>], [3, 1]>,
  111. //
  112. // Register offset
  113. InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  114. InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
  115. InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  116. InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
  117. InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  118. InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
  119. //
  120. // Scaled register offset, issues over 2 cycles
  121. // FIXME: lsl by 2 takes 1 cycle.
  122. InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  123. InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
  124. InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  125. InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
  126. //
  127. // Immediate offset with update
  128. InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  129. InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
  130. InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  131. InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
  132. //
  133. // Register offset with update
  134. InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  135. InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
  136. InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  137. InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
  138. InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  139. InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
  140. //
  141. // Scaled register offset with update, issues over 2 cycles
  142. InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  143. InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
  144. InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  145. InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
  146. //
  147. // Load multiple, def is the 5th operand. Pipeline 0 only.
  148. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
  149. InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>,
  150. InstrStage<2, [A8_LSPipe]>],
  151. [1, 1, 1, 1, 3], [], -1>, // dynamic uops
  152. //
  153. // Load multiple + update, defs are the 1st and 5th operands.
  154. InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
  155. InstrStage<3, [A8_LSPipe]>],
  156. [2, 1, 1, 1, 3], [], -1>, // dynamic uops
  157. //
  158. // Load multiple plus branch
  159. InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
  160. InstrStage<3, [A8_LSPipe]>,
  161. InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
  162. [1, 2, 1, 1, 3], [], -1>, // dynamic uops
  163. //
  164. // Pop, def is the 3rd operand.
  165. InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>,
  166. InstrStage<3, [A8_LSPipe]>],
  167. [1, 1, 3], [], -1>, // dynamic uops
  168. //
  169. // Push, def is the 3th operand.
  170. InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>,
  171. InstrStage<3, [A8_LSPipe]>,
  172. InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
  173. [1, 1, 3], [], -1>, // dynamic uops
  174. //
  175. // iLoadi + iALUr for t2LDRpci_pic.
  176. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  177. InstrStage<1, [A8_LSPipe]>,
  178. InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
  179. // Integer store pipeline
  180. //
  181. // Immediate offset
  182. InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  183. InstrStage<1, [A8_LSPipe]>], [3, 1]>,
  184. InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  185. InstrStage<1, [A8_LSPipe]>], [3, 1]>,
  186. InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  187. InstrStage<1, [A8_LSPipe]>], [3, 1]>,
  188. //
  189. // Register offset
  190. InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  191. InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
  192. InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  193. InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
  194. InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  195. InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
  196. //
  197. // Scaled register offset, issues over 2 cycles
  198. InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  199. InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
  200. InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  201. InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
  202. //
  203. // Immediate offset with update
  204. InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  205. InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
  206. InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  207. InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
  208. //
  209. // Register offset with update
  210. InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  211. InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
  212. InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  213. InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
  214. InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  215. InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
  216. //
  217. // Scaled register offset with update, issues over 2 cycles
  218. InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  219. InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
  220. InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
  221. InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
  222. //
  223. // Store multiple. Pipeline 0 only.
  224. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
  225. InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
  226. InstrStage<2, [A8_LSPipe]>],
  227. [], [], -1>, // dynamic uops
  228. //
  229. // Store multiple + update
  230. InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
  231. InstrStage<2, [A8_LSPipe]>],
  232. [2], [], -1>, // dynamic uops
  233. //
  234. // Preload
  235. InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
  236. // Branch
  237. //
  238. // no delay slots, so the latency of a branch is unimportant
  239. InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
  240. // VFP
  241. // Issue through integer pipeline, and execute in NEON unit. We assume
  242. // RunFast mode so that NFP pipeline is used for single-precision when
  243. // possible.
  244. //
  245. // FP Special Register to Integer Register File Move
  246. InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  247. InstrStage<1, [A8_NLSPipe]>], [20]>,
  248. //
  249. // Single-precision FP Unary
  250. InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  251. InstrStage<1, [A8_NPipe]>], [7, 1]>,
  252. //
  253. // Double-precision FP Unary
  254. InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  255. InstrStage<4, [A8_NPipe], 0>,
  256. InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
  257. //
  258. // Single-precision FP Compare
  259. InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  260. InstrStage<1, [A8_NPipe]>], [1, 1]>,
  261. //
  262. // Double-precision FP Compare
  263. InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  264. InstrStage<4, [A8_NPipe], 0>,
  265. InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
  266. //
  267. // Single to Double FP Convert
  268. InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  269. InstrStage<7, [A8_NPipe], 0>,
  270. InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
  271. //
  272. // Double to Single FP Convert
  273. InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  274. InstrStage<5, [A8_NPipe], 0>,
  275. InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
  276. //
  277. // Single-Precision FP to Integer Convert
  278. InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  279. InstrStage<1, [A8_NPipe]>], [7, 1]>,
  280. //
  281. // Double-Precision FP to Integer Convert
  282. InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  283. InstrStage<8, [A8_NPipe], 0>,
  284. InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
  285. //
  286. // Integer to Single-Precision FP Convert
  287. InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  288. InstrStage<1, [A8_NPipe]>], [7, 1]>,
  289. //
  290. // Integer to Double-Precision FP Convert
  291. InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  292. InstrStage<8, [A8_NPipe], 0>,
  293. InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
  294. //
  295. // Single-precision FP ALU
  296. InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  297. InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
  298. //
  299. // Double-precision FP ALU
  300. InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  301. InstrStage<9, [A8_NPipe], 0>,
  302. InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
  303. //
  304. // Single-precision FP Multiply
  305. InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  306. InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
  307. //
  308. // Double-precision FP Multiply
  309. InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  310. InstrStage<11, [A8_NPipe], 0>,
  311. InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
  312. //
  313. // Single-precision FP MAC
  314. InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  315. InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
  316. //
  317. // Double-precision FP MAC
  318. InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  319. InstrStage<19, [A8_NPipe], 0>,
  320. InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
  321. //
  322. // Single-precision Fused FP MAC
  323. InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  324. InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
  325. //
  326. // Double-precision Fused FP MAC
  327. InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  328. InstrStage<19, [A8_NPipe], 0>,
  329. InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
  330. //
  331. // Single-precision FP DIV
  332. InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  333. InstrStage<20, [A8_NPipe], 0>,
  334. InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
  335. //
  336. // Double-precision FP DIV
  337. InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  338. InstrStage<29, [A8_NPipe], 0>,
  339. InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
  340. //
  341. // Single-precision FP SQRT
  342. InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  343. InstrStage<19, [A8_NPipe], 0>,
  344. InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
  345. //
  346. // Double-precision FP SQRT
  347. InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  348. InstrStage<29, [A8_NPipe], 0>,
  349. InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
  350. //
  351. // Integer to Single-precision Move
  352. InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  353. InstrStage<1, [A8_NPipe]>],
  354. [2, 1]>,
  355. //
  356. // Integer to Double-precision Move
  357. InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  358. InstrStage<1, [A8_NPipe]>],
  359. [2, 1, 1]>,
  360. //
  361. // Single-precision to Integer Move
  362. InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  363. InstrStage<1, [A8_NPipe]>],
  364. [20, 1]>,
  365. //
  366. // Double-precision to Integer Move
  367. InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  368. InstrStage<1, [A8_NPipe]>],
  369. [20, 20, 1]>,
  370. //
  371. // Single-precision FP Load
  372. InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  373. InstrStage<1, [A8_NLSPipe], 0>,
  374. InstrStage<1, [A8_LSPipe]>],
  375. [2, 1]>,
  376. //
  377. // Double-precision FP Load
  378. InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  379. InstrStage<1, [A8_NLSPipe], 0>,
  380. InstrStage<1, [A8_LSPipe]>],
  381. [2, 1]>,
  382. //
  383. // FP Load Multiple
  384. // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
  385. InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  386. InstrStage<1, [A8_NLSPipe], 0>,
  387. InstrStage<1, [A8_LSPipe]>,
  388. InstrStage<1, [A8_NLSPipe], 0>,
  389. InstrStage<1, [A8_LSPipe]>],
  390. [1, 1, 1, 2], [], -1>, // dynamic uops
  391. //
  392. // FP Load Multiple + update
  393. InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  394. InstrStage<1, [A8_NLSPipe], 0>,
  395. InstrStage<1, [A8_LSPipe]>,
  396. InstrStage<1, [A8_NLSPipe], 0>,
  397. InstrStage<1, [A8_LSPipe]>],
  398. [2, 1, 1, 1, 2], [], -1>, // dynamic uops
  399. //
  400. // Single-precision FP Store
  401. InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  402. InstrStage<1, [A8_NLSPipe], 0>,
  403. InstrStage<1, [A8_LSPipe]>],
  404. [1, 1]>,
  405. //
  406. // Double-precision FP Store
  407. InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  408. InstrStage<1, [A8_NLSPipe], 0>,
  409. InstrStage<1, [A8_LSPipe]>],
  410. [1, 1]>,
  411. //
  412. // FP Store Multiple
  413. InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  414. InstrStage<1, [A8_NLSPipe], 0>,
  415. InstrStage<1, [A8_LSPipe]>,
  416. InstrStage<1, [A8_NLSPipe], 0>,
  417. InstrStage<1, [A8_LSPipe]>],
  418. [1, 1, 1, 1], [], -1>, // dynamic uops
  419. //
  420. // FP Store Multiple + update
  421. InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  422. InstrStage<1, [A8_NLSPipe], 0>,
  423. InstrStage<1, [A8_LSPipe]>,
  424. InstrStage<1, [A8_NLSPipe], 0>,
  425. InstrStage<1, [A8_LSPipe]>],
  426. [2, 1, 1, 1, 1], [], -1>, // dynamic uops
  427. // NEON
  428. // Issue through integer pipeline, and execute in NEON unit.
  429. //
  430. // VLD1
  431. InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  432. InstrStage<2, [A8_NLSPipe], 0>,
  433. InstrStage<2, [A8_LSPipe]>],
  434. [2, 1]>,
  435. // VLD1x2
  436. InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  437. InstrStage<2, [A8_NLSPipe], 0>,
  438. InstrStage<2, [A8_LSPipe]>],
  439. [2, 2, 1]>,
  440. //
  441. // VLD1x3
  442. InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  443. InstrStage<3, [A8_NLSPipe], 0>,
  444. InstrStage<3, [A8_LSPipe]>],
  445. [2, 2, 3, 1]>,
  446. //
  447. // VLD1x4
  448. InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  449. InstrStage<3, [A8_NLSPipe], 0>,
  450. InstrStage<3, [A8_LSPipe]>],
  451. [2, 2, 3, 3, 1]>,
  452. //
  453. // VLD1u
  454. InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  455. InstrStage<2, [A8_NLSPipe], 0>,
  456. InstrStage<2, [A8_LSPipe]>],
  457. [2, 2, 1]>,
  458. //
  459. // VLD1x2u
  460. InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  461. InstrStage<2, [A8_NLSPipe], 0>,
  462. InstrStage<2, [A8_LSPipe]>],
  463. [2, 2, 2, 1]>,
  464. //
  465. // VLD1x3u
  466. InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  467. InstrStage<3, [A8_NLSPipe], 0>,
  468. InstrStage<3, [A8_LSPipe]>],
  469. [2, 2, 3, 2, 1]>,
  470. //
  471. // VLD1x4u
  472. InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  473. InstrStage<3, [A8_NLSPipe], 0>,
  474. InstrStage<3, [A8_LSPipe]>],
  475. [2, 2, 3, 3, 2, 1]>,
  476. //
  477. // VLD1ln
  478. InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  479. InstrStage<3, [A8_NLSPipe], 0>,
  480. InstrStage<3, [A8_LSPipe]>],
  481. [3, 1, 1, 1]>,
  482. //
  483. // VLD1lnu
  484. InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  485. InstrStage<3, [A8_NLSPipe], 0>,
  486. InstrStage<3, [A8_LSPipe]>],
  487. [3, 2, 1, 1, 1, 1]>,
  488. //
  489. // VLD1dup
  490. InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  491. InstrStage<2, [A8_NLSPipe], 0>,
  492. InstrStage<2, [A8_LSPipe]>],
  493. [2, 1]>,
  494. //
  495. // VLD1dupu
  496. InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  497. InstrStage<2, [A8_NLSPipe], 0>,
  498. InstrStage<2, [A8_LSPipe]>],
  499. [2, 2, 1, 1]>,
  500. //
  501. // VLD2
  502. InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  503. InstrStage<2, [A8_NLSPipe], 0>,
  504. InstrStage<2, [A8_LSPipe]>],
  505. [2, 2, 1]>,
  506. //
  507. // VLD2x2
  508. InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  509. InstrStage<3, [A8_NLSPipe], 0>,
  510. InstrStage<3, [A8_LSPipe]>],
  511. [2, 2, 3, 3, 1]>,
  512. //
  513. // VLD2ln
  514. InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  515. InstrStage<3, [A8_NLSPipe], 0>,
  516. InstrStage<3, [A8_LSPipe]>],
  517. [3, 3, 1, 1, 1, 1]>,
  518. //
  519. // VLD2u
  520. InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  521. InstrStage<2, [A8_NLSPipe], 0>,
  522. InstrStage<2, [A8_LSPipe]>],
  523. [2, 2, 2, 1, 1, 1]>,
  524. //
  525. // VLD2x2u
  526. InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  527. InstrStage<3, [A8_NLSPipe], 0>,
  528. InstrStage<3, [A8_LSPipe]>],
  529. [2, 2, 3, 3, 2, 1]>,
  530. //
  531. // VLD2lnu
  532. InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  533. InstrStage<3, [A8_NLSPipe], 0>,
  534. InstrStage<3, [A8_LSPipe]>],
  535. [3, 3, 2, 1, 1, 1, 1, 1]>,
  536. //
  537. // VLD2dup
  538. InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  539. InstrStage<2, [A8_NLSPipe], 0>,
  540. InstrStage<2, [A8_LSPipe]>],
  541. [2, 2, 1]>,
  542. //
  543. // VLD2dupu
  544. InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  545. InstrStage<2, [A8_NLSPipe], 0>,
  546. InstrStage<2, [A8_LSPipe]>],
  547. [2, 2, 2, 1, 1]>,
  548. //
  549. // VLD3
  550. InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  551. InstrStage<4, [A8_NLSPipe], 0>,
  552. InstrStage<4, [A8_LSPipe]>],
  553. [3, 3, 4, 1]>,
  554. //
  555. // VLD3ln
  556. InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  557. InstrStage<5, [A8_NLSPipe], 0>,
  558. InstrStage<5, [A8_LSPipe]>],
  559. [4, 4, 5, 1, 1, 1, 1, 2]>,
  560. //
  561. // VLD3u
  562. InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  563. InstrStage<4, [A8_NLSPipe], 0>,
  564. InstrStage<4, [A8_LSPipe]>],
  565. [3, 3, 4, 2, 1]>,
  566. //
  567. // VLD3lnu
  568. InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  569. InstrStage<5, [A8_NLSPipe], 0>,
  570. InstrStage<5, [A8_LSPipe]>],
  571. [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
  572. //
  573. // VLD3dup
  574. InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  575. InstrStage<3, [A8_NLSPipe], 0>,
  576. InstrStage<3, [A8_LSPipe]>],
  577. [2, 2, 3, 1]>,
  578. //
  579. // VLD3dupu
  580. InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  581. InstrStage<3, [A8_NLSPipe], 0>,
  582. InstrStage<3, [A8_LSPipe]>],
  583. [2, 2, 3, 2, 1, 1]>,
  584. //
  585. // VLD4
  586. InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  587. InstrStage<4, [A8_NLSPipe], 0>,
  588. InstrStage<4, [A8_LSPipe]>],
  589. [3, 3, 4, 4, 1]>,
  590. //
  591. // VLD4ln
  592. InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  593. InstrStage<5, [A8_NLSPipe], 0>,
  594. InstrStage<5, [A8_LSPipe]>],
  595. [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
  596. //
  597. // VLD4u
  598. InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  599. InstrStage<4, [A8_NLSPipe], 0>,
  600. InstrStage<4, [A8_LSPipe]>],
  601. [3, 3, 4, 4, 2, 1]>,
  602. //
  603. // VLD4lnu
  604. InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  605. InstrStage<5, [A8_NLSPipe], 0>,
  606. InstrStage<5, [A8_LSPipe]>],
  607. [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
  608. //
  609. // VLD4dup
  610. InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  611. InstrStage<3, [A8_NLSPipe], 0>,
  612. InstrStage<3, [A8_LSPipe]>],
  613. [2, 2, 3, 3, 1]>,
  614. //
  615. // VLD4dupu
  616. InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  617. InstrStage<3, [A8_NLSPipe], 0>,
  618. InstrStage<3, [A8_LSPipe]>],
  619. [2, 2, 3, 3, 2, 1, 1]>,
  620. //
  621. // VST1
  622. InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  623. InstrStage<2, [A8_NLSPipe], 0>,
  624. InstrStage<2, [A8_LSPipe]>],
  625. [1, 1, 1]>,
  626. //
  627. // VST1x2
  628. InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  629. InstrStage<2, [A8_NLSPipe], 0>,
  630. InstrStage<2, [A8_LSPipe]>],
  631. [1, 1, 1, 1]>,
  632. //
  633. // VST1x3
  634. InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  635. InstrStage<3, [A8_NLSPipe], 0>,
  636. InstrStage<3, [A8_LSPipe]>],
  637. [1, 1, 1, 1, 2]>,
  638. //
  639. // VST1x4
  640. InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  641. InstrStage<3, [A8_NLSPipe], 0>,
  642. InstrStage<3, [A8_LSPipe]>],
  643. [1, 1, 1, 1, 2, 2]>,
  644. //
  645. // VST1u
  646. InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  647. InstrStage<2, [A8_NLSPipe], 0>,
  648. InstrStage<2, [A8_LSPipe]>],
  649. [2, 1, 1, 1, 1]>,
  650. //
  651. // VST1x2u
  652. InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  653. InstrStage<2, [A8_NLSPipe], 0>,
  654. InstrStage<2, [A8_LSPipe]>],
  655. [2, 1, 1, 1, 1, 1]>,
  656. //
  657. // VST1x3u
  658. InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  659. InstrStage<3, [A8_NLSPipe], 0>,
  660. InstrStage<3, [A8_LSPipe]>],
  661. [2, 1, 1, 1, 1, 1, 2]>,
  662. //
  663. // VST1x4u
  664. InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  665. InstrStage<3, [A8_NLSPipe], 0>,
  666. InstrStage<3, [A8_LSPipe]>],
  667. [2, 1, 1, 1, 1, 1, 2, 2]>,
  668. //
  669. // VST1ln
  670. InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  671. InstrStage<2, [A8_NLSPipe], 0>,
  672. InstrStage<2, [A8_LSPipe]>],
  673. [1, 1, 1]>,
  674. //
  675. // VST1lnu
  676. InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
  677. InstrStage<2, [A8_NLSPipe], 0>,
  678. InstrStage<2, [A8_LSPipe]>],
  679. [2, 1, 1, 1, 1]>,
  680. //
  681. // VST2
  682. InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  683. InstrStage<2, [A8_NLSPipe], 0>,
  684. InstrStage<2, [A8_LSPipe]>],
  685. [1, 1, 1, 1]>,
  686. //
  687. // VST2x2
  688. InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  689. InstrStage<4, [A8_NLSPipe], 0>,
  690. InstrStage<4, [A8_LSPipe]>],
  691. [1, 1, 1, 1, 2, 2]>,
  692. //
  693. // VST2u
  694. InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  695. InstrStage<2, [A8_NLSPipe], 0>,
  696. InstrStage<2, [A8_LSPipe]>],
  697. [2, 1, 1, 1, 1, 1]>,
  698. //
  699. // VST2x2u
  700. InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  701. InstrStage<4, [A8_NLSPipe], 0>,
  702. InstrStage<4, [A8_LSPipe]>],
  703. [2, 1, 1, 1, 1, 1, 2, 2]>,
  704. //
  705. // VST2ln
  706. InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  707. InstrStage<2, [A8_NLSPipe], 0>,
  708. InstrStage<2, [A8_LSPipe]>],
  709. [1, 1, 1, 1]>,
  710. //
  711. // VST2lnu
  712. InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  713. InstrStage<2, [A8_NLSPipe], 0>,
  714. InstrStage<2, [A8_LSPipe]>],
  715. [2, 1, 1, 1, 1, 1]>,
  716. //
  717. // VST3
  718. InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  719. InstrStage<3, [A8_NLSPipe], 0>,
  720. InstrStage<3, [A8_LSPipe]>],
  721. [1, 1, 1, 1, 2]>,
  722. //
  723. // VST3u
  724. InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  725. InstrStage<3, [A8_NLSPipe], 0>,
  726. InstrStage<3, [A8_LSPipe]>],
  727. [2, 1, 1, 1, 1, 1, 2]>,
  728. //
  729. // VST3ln
  730. InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  731. InstrStage<3, [A8_NLSPipe], 0>,
  732. InstrStage<3, [A8_LSPipe]>],
  733. [1, 1, 1, 1, 2]>,
  734. //
  735. // VST3lnu
  736. InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  737. InstrStage<3, [A8_NLSPipe], 0>,
  738. InstrStage<3, [A8_LSPipe]>],
  739. [2, 1, 1, 1, 1, 1, 2]>,
  740. //
  741. // VST4
  742. InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  743. InstrStage<4, [A8_NLSPipe], 0>,
  744. InstrStage<4, [A8_LSPipe]>],
  745. [1, 1, 1, 1, 2, 2]>,
  746. //
  747. // VST4u
  748. InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  749. InstrStage<4, [A8_NLSPipe], 0>,
  750. InstrStage<4, [A8_LSPipe]>],
  751. [2, 1, 1, 1, 1, 1, 2, 2]>,
  752. //
  753. // VST4ln
  754. InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  755. InstrStage<4, [A8_NLSPipe], 0>,
  756. InstrStage<4, [A8_LSPipe]>],
  757. [1, 1, 1, 1, 2, 2]>,
  758. //
  759. // VST4lnu
  760. InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  761. InstrStage<4, [A8_NLSPipe], 0>,
  762. InstrStage<4, [A8_LSPipe]>],
  763. [2, 1, 1, 1, 1, 1, 2, 2]>,
  764. //
  765. // Double-register FP Unary
  766. InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  767. InstrStage<1, [A8_NPipe]>], [5, 2]>,
  768. //
  769. // Quad-register FP Unary
  770. // Result written in N5, but that is relative to the last cycle of multicycle,
  771. // so we use 6 for those cases
  772. InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  773. InstrStage<2, [A8_NPipe]>], [6, 2]>,
  774. //
  775. // Double-register FP Binary
  776. InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  777. InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
  778. //
  779. // VPADD, etc.
  780. InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  781. InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
  782. //
  783. // Double-register FP VMUL
  784. InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  785. InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
  786. //
  787. // Quad-register FP Binary
  788. // Result written in N5, but that is relative to the last cycle of multicycle,
  789. // so we use 6 for those cases
  790. InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  791. InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
  792. //
  793. // Quad-register FP VMUL
  794. InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  795. InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
  796. //
  797. // Move
  798. InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  799. InstrStage<1, [A8_NPipe]>], [1, 1]>,
  800. //
  801. // Move Immediate
  802. InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  803. InstrStage<1, [A8_NPipe]>], [3]>,
  804. //
  805. // Double-register Permute Move
  806. InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  807. InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
  808. //
  809. // Quad-register Permute Move
  810. // Result written in N2, but that is relative to the last cycle of multicycle,
  811. // so we use 3 for those cases
  812. InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  813. InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
  814. //
  815. // Integer to Single-precision Move
  816. InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  817. InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
  818. //
  819. // Integer to Double-precision Move
  820. InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  821. InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
  822. //
  823. // Single-precision to Integer Move
  824. InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  825. InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
  826. //
  827. // Double-precision to Integer Move
  828. InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  829. InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
  830. //
  831. // Integer to Lane Move
  832. InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  833. InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
  834. //
  835. // Vector narrow move
  836. InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  837. InstrStage<1, [A8_NPipe]>], [2, 1]>,
  838. //
  839. // Double-register Permute
  840. InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  841. InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
  842. //
  843. // Quad-register Permute
  844. // Result written in N2, but that is relative to the last cycle of multicycle,
  845. // so we use 3 for those cases
  846. InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  847. InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
  848. //
  849. // Quad-register Permute (3 cycle issue)
  850. // Result written in N2, but that is relative to the last cycle of multicycle,
  851. // so we use 4 for those cases
  852. InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  853. InstrStage<1, [A8_NLSPipe]>,
  854. InstrStage<1, [A8_NPipe], 0>,
  855. InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
  856. //
  857. // Double-register FP Multiple-Accumulate
  858. InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  859. InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
  860. //
  861. // Quad-register FP Multiple-Accumulate
  862. // Result written in N9, but that is relative to the last cycle of multicycle,
  863. // so we use 10 for those cases
  864. InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  865. InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
  866. //
  867. // Double-register Fused FP Multiple-Accumulate
  868. InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  869. InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
  870. //
  871. // Quad-register Fused FP Multiple-Accumulate
  872. // Result written in N9, but that is relative to the last cycle of multicycle,
  873. // so we use 10 for those cases
  874. InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  875. InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
  876. //
  877. // Double-register Reciprical Step
  878. InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  879. InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
  880. //
  881. // Quad-register Reciprical Step
  882. InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  883. InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
  884. //
  885. // Double-register Integer Count
  886. InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  887. InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
  888. //
  889. // Quad-register Integer Count
  890. // Result written in N3, but that is relative to the last cycle of multicycle,
  891. // so we use 4 for those cases
  892. InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  893. InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
  894. //
  895. // Double-register Integer Unary
  896. InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  897. InstrStage<1, [A8_NPipe]>], [4, 2]>,
  898. //
  899. // Quad-register Integer Unary
  900. InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  901. InstrStage<1, [A8_NPipe]>], [4, 2]>,
  902. //
  903. // Double-register Integer Q-Unary
  904. InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  905. InstrStage<1, [A8_NPipe]>], [4, 1]>,
  906. //
  907. // Quad-register Integer CountQ-Unary
  908. InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  909. InstrStage<1, [A8_NPipe]>], [4, 1]>,
  910. //
  911. // Double-register Integer Binary
  912. InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  913. InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
  914. //
  915. // Quad-register Integer Binary
  916. InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  917. InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
  918. //
  919. // Double-register Integer Binary (4 cycle)
  920. InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  921. InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
  922. //
  923. // Quad-register Integer Binary (4 cycle)
  924. InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  925. InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
  926. //
  927. // Double-register Integer Subtract
  928. InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  929. InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
  930. //
  931. // Quad-register Integer Subtract
  932. InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  933. InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
  934. //
  935. // Double-register Integer Subtract
  936. InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  937. InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
  938. //
  939. // Quad-register Integer Subtract
  940. InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  941. InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
  942. //
  943. // Double-register Integer Shift
  944. InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  945. InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
  946. //
  947. // Quad-register Integer Shift
  948. InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  949. InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
  950. //
  951. // Double-register Integer Shift (4 cycle)
  952. InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  953. InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
  954. //
  955. // Quad-register Integer Shift (4 cycle)
  956. InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  957. InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
  958. //
  959. // Double-register Integer Pair Add Long
  960. InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  961. InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
  962. //
  963. // Quad-register Integer Pair Add Long
  964. InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  965. InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
  966. //
  967. // Double-register Absolute Difference and Accumulate
  968. InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  969. InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
  970. //
  971. // Quad-register Absolute Difference and Accumulate
  972. InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  973. InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
  974. //
  975. // Double-register Integer Multiply (.8, .16)
  976. InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  977. InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
  978. //
  979. // Double-register Integer Multiply (.32)
  980. InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  981. InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
  982. //
  983. // Quad-register Integer Multiply (.8, .16)
  984. InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  985. InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
  986. //
  987. // Quad-register Integer Multiply (.32)
  988. InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  989. InstrStage<1, [A8_NPipe]>,
  990. InstrStage<2, [A8_NLSPipe], 0>,
  991. InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
  992. //
  993. // Double-register Integer Multiply-Accumulate (.8, .16)
  994. InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  995. InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
  996. //
  997. // Double-register Integer Multiply-Accumulate (.32)
  998. InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  999. InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
  1000. //
  1001. // Quad-register Integer Multiply-Accumulate (.8, .16)
  1002. InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1003. InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
  1004. //
  1005. // Quad-register Integer Multiply-Accumulate (.32)
  1006. InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1007. InstrStage<1, [A8_NPipe]>,
  1008. InstrStage<2, [A8_NLSPipe], 0>,
  1009. InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
  1010. //
  1011. // Double-register VEXT
  1012. InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1013. InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
  1014. //
  1015. // Quad-register VEXT
  1016. InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1017. InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
  1018. //
  1019. // VTB
  1020. InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1021. InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
  1022. InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1023. InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
  1024. InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1025. InstrStage<1, [A8_NLSPipe]>,
  1026. InstrStage<1, [A8_NPipe], 0>,
  1027. InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
  1028. InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1029. InstrStage<1, [A8_NLSPipe]>,
  1030. InstrStage<1, [A8_NPipe], 0>,
  1031. InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
  1032. //
  1033. // VTBX
  1034. InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1035. InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
  1036. InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1037. InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
  1038. InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1039. InstrStage<1, [A8_NLSPipe]>,
  1040. InstrStage<1, [A8_NPipe], 0>,
  1041. InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
  1042. InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
  1043. InstrStage<1, [A8_NLSPipe]>,
  1044. InstrStage<1, [A8_NPipe], 0>,
  1045. InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
  1046. ]>;
  1047. // ===---------------------------------------------------------------------===//
  1048. // This following definitions describe the simple machine model which
  1049. // will replace itineraries.
  1050. // Cortex-A8 machine model for scheduling and other instruction cost heuristics.
  1051. def CortexA8Model : SchedMachineModel {
  1052. let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
  1053. let LoadLatency = 2; // Optimistic load latency assuming bypass.
  1054. // This is overriden by OperandCycles if the
  1055. // Itineraries are queried instead.
  1056. let MispredictPenalty = 13; // Based on estimate of pipeline depth.
  1057. let CompleteModel = 0;
  1058. let Itineraries = CortexA8Itineraries;
  1059. }