ARMScheduleA9.td 130 KB


  1. //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the itinerary class data for the ARM Cortex A9 processors.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. // ===---------------------------------------------------------------------===//
  13. // This section contains legacy support for itineraries. This is
  14. // required until SD and PostRA schedulers are replaced by MachineScheduler.
  15. //
  16. // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
  17. // Reference Manual".
  18. //
  19. // Functional units
  20. def A9_Issue0 : FuncUnit; // Issue 0
  21. def A9_Issue1 : FuncUnit; // Issue 1
  22. def A9_Branch : FuncUnit; // Branch
  23. def A9_ALU0 : FuncUnit; // ALU / MUL pipeline 0
  24. def A9_ALU1 : FuncUnit; // ALU pipeline 1
  25. def A9_AGU : FuncUnit; // Address generation unit for ld / st
  26. def A9_NPipe : FuncUnit; // NEON pipeline
  27. def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer
  28. def A9_LSUnit : FuncUnit; // L/S Unit
  29. def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
  30. def A9_DRegsN : FuncUnit; // FP register set, NEON side
  31. // Bypasses
  32. def A9_LdBypass : Bypass;
  33. def CortexA9Itineraries : ProcessorItineraries<
  34. [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
  35. A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
  36. [A9_LdBypass], [
  37. // Two fully-pipelined integer ALU pipelines
  38. //
  39. // Move instructions, unconditional
  40. InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  41. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
  42. InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  43. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  44. InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  45. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  46. InstrItinData<IIC_iMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  47. InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
  48. InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  49. InstrStage<1, [A9_ALU0, A9_ALU1]>,
  50. InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
  51. InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  52. InstrStage<1, [A9_ALU0, A9_ALU1]>,
  53. InstrStage<1, [A9_ALU0, A9_ALU1]>,
  54. InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
  55. InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  56. InstrStage<1, [A9_ALU0, A9_ALU1]>,
  57. InstrStage<1, [A9_ALU0, A9_ALU1]>,
  58. InstrStage<1, [A9_MUX0], 0>,
  59. InstrStage<1, [A9_AGU], 0>,
  60. InstrStage<1, [A9_LSUnit]>], [5]>,
  61. //
  62. // MVN instructions
  63. InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  64. InstrStage<1, [A9_ALU0, A9_ALU1]>],
  65. [1]>,
  66. InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  67. InstrStage<1, [A9_ALU0, A9_ALU1]>],
  68. [1, 1], [NoBypass, A9_LdBypass]>,
  69. InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  70. InstrStage<2, [A9_ALU0, A9_ALU1]>],
  71. [2, 1]>,
  72. InstrItinData<IIC_iMVNsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  73. InstrStage<3, [A9_ALU0, A9_ALU1]>],
  74. [3, 1, 1]>,
  75. //
  76. // No operand cycles
  77. InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  78. InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
  79. //
  80. // Binary Instructions that produce a result
  81. InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  82. InstrStage<1, [A9_ALU0, A9_ALU1]>],
  83. [1, 1], [NoBypass, A9_LdBypass]>,
  84. InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  85. InstrStage<1, [A9_ALU0, A9_ALU1]>],
  86. [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
  87. InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  88. InstrStage<2, [A9_ALU0, A9_ALU1]>],
  89. [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
  90. InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  91. InstrStage<2, [A9_ALU0, A9_ALU1]>],
  92. [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
  93. InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  94. InstrStage<3, [A9_ALU0, A9_ALU1]>],
  95. [3, 1, 1, 1],
  96. [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
  97. //
  98. // Bitwise Instructions that produce a result
  99. InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  100. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  101. InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  102. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
  103. InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  104. InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
  105. InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  106. InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
  107. //
  108. // Unary Instructions that produce a result
  109. // CLZ, RBIT, etc.
  110. InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  111. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  112. // BFC, BFI, UBFX, SBFX
  113. InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  114. InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
  115. //
  116. // Zero and sign extension instructions
  117. InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  118. InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
  119. InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  120. InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
  121. InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  122. InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
  123. //
  124. // Compare instructions
  125. InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  126. InstrStage<1, [A9_ALU0, A9_ALU1]>],
  127. [1], [A9_LdBypass]>,
  128. InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  129. InstrStage<1, [A9_ALU0, A9_ALU1]>],
  130. [1, 1], [A9_LdBypass, A9_LdBypass]>,
  131. InstrItinData<IIC_iCMPsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  132. InstrStage<2, [A9_ALU0, A9_ALU1]>],
  133. [1, 1], [A9_LdBypass, NoBypass]>,
  134. InstrItinData<IIC_iCMPsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  135. InstrStage<3, [A9_ALU0, A9_ALU1]>],
  136. [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
  137. //
  138. // Test instructions
  139. InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  140. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
  141. InstrItinData<IIC_iTSTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  142. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  143. InstrItinData<IIC_iTSTsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  144. InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  145. InstrItinData<IIC_iTSTsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  146. InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
  147. //
  148. // Move instructions, conditional
  149. // FIXME: Correctly model the extra input dep on the destination.
  150. InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  151. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
  152. InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  153. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  154. InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  155. InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
  156. InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  157. InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
  158. InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  159. InstrStage<1, [A9_ALU0, A9_ALU1]>,
  160. InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  161. InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
  162. // Integer multiply pipeline
  163. //
  164. InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  165. InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
  166. InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  167. InstrStage<2, [A9_ALU0]>],
  168. [3, 1, 1, 1]>,
  169. InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  170. InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
  171. InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  172. InstrStage<2, [A9_ALU0]>],
  173. [4, 1, 1, 1]>,
  174. InstrItinData<IIC_iMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  175. InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
  176. InstrItinData<IIC_iMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  177. InstrStage<3, [A9_ALU0]>],
  178. [4, 5, 1, 1]>,
  179. // Integer load pipeline
  180. // FIXME: The timings are some rough approximations
  181. //
  182. // Immediate offset
  183. InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  184. InstrStage<1, [A9_MUX0], 0>,
  185. InstrStage<1, [A9_AGU], 0>,
  186. InstrStage<1, [A9_LSUnit]>],
  187. [3, 1], [A9_LdBypass]>,
  188. InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  189. InstrStage<1, [A9_MUX0], 0>,
  190. InstrStage<2, [A9_AGU], 0>,
  191. InstrStage<1, [A9_LSUnit]>],
  192. [4, 1], [A9_LdBypass]>,
  193. // FIXME: If address is 64-bit aligned, AGU cycles is 1.
  194. InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  195. InstrStage<1, [A9_MUX0], 0>,
  196. InstrStage<2, [A9_AGU], 0>,
  197. InstrStage<1, [A9_LSUnit]>],
  198. [3, 3, 1], [A9_LdBypass]>,
  199. //
  200. // Register offset
  201. InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  202. InstrStage<1, [A9_MUX0], 0>,
  203. InstrStage<1, [A9_AGU], 0>,
  204. InstrStage<1, [A9_LSUnit]>],
  205. [3, 1, 1], [A9_LdBypass]>,
  206. InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  207. InstrStage<1, [A9_MUX0], 0>,
  208. InstrStage<2, [A9_AGU], 0>,
  209. InstrStage<1, [A9_LSUnit]>],
  210. [4, 1, 1], [A9_LdBypass]>,
  211. InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  212. InstrStage<1, [A9_MUX0], 0>,
  213. InstrStage<2, [A9_AGU], 0>,
  214. InstrStage<1, [A9_LSUnit]>],
  215. [3, 3, 1, 1], [A9_LdBypass]>,
  216. //
  217. // Scaled register offset
  218. InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  219. InstrStage<1, [A9_MUX0], 0>,
  220. InstrStage<1, [A9_AGU], 0>,
  221. InstrStage<1, [A9_LSUnit], 0>],
  222. [4, 1, 1], [A9_LdBypass]>,
  223. InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  224. InstrStage<1, [A9_MUX0], 0>,
  225. InstrStage<2, [A9_AGU], 0>,
  226. InstrStage<1, [A9_LSUnit]>],
  227. [5, 1, 1], [A9_LdBypass]>,
  228. //
  229. // Immediate offset with update
  230. InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  231. InstrStage<1, [A9_MUX0], 0>,
  232. InstrStage<1, [A9_AGU], 0>,
  233. InstrStage<1, [A9_LSUnit]>],
  234. [3, 2, 1], [A9_LdBypass]>,
  235. InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  236. InstrStage<1, [A9_MUX0], 0>,
  237. InstrStage<2, [A9_AGU], 0>,
  238. InstrStage<1, [A9_LSUnit]>],
  239. [4, 3, 1], [A9_LdBypass]>,
  240. //
  241. // Register offset with update
  242. InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  243. InstrStage<1, [A9_MUX0], 0>,
  244. InstrStage<1, [A9_AGU], 0>,
  245. InstrStage<1, [A9_LSUnit]>],
  246. [3, 2, 1, 1], [A9_LdBypass]>,
  247. InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  248. InstrStage<1, [A9_MUX0], 0>,
  249. InstrStage<2, [A9_AGU], 0>,
  250. InstrStage<1, [A9_LSUnit]>],
  251. [4, 3, 1, 1], [A9_LdBypass]>,
  252. InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  253. InstrStage<1, [A9_MUX0], 0>,
  254. InstrStage<2, [A9_AGU], 0>,
  255. InstrStage<1, [A9_LSUnit]>],
  256. [3, 3, 1, 1], [A9_LdBypass]>,
  257. //
  258. // Scaled register offset with update
  259. InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  260. InstrStage<1, [A9_MUX0], 0>,
  261. InstrStage<1, [A9_AGU], 0>,
  262. InstrStage<1, [A9_LSUnit]>],
  263. [4, 3, 1, 1], [A9_LdBypass]>,
  264. InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  265. InstrStage<1, [A9_MUX0], 0>,
  266. InstrStage<2, [A9_AGU], 0>,
  267. InstrStage<1, [A9_LSUnit]>],
  268. [5, 4, 1, 1], [A9_LdBypass]>,
  269. //
  270. // Load multiple, def is the 5th operand.
  271. // FIXME: This assumes 3 to 4 registers.
  272. InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  273. InstrStage<1, [A9_MUX0], 0>,
  274. InstrStage<2, [A9_AGU], 1>,
  275. InstrStage<2, [A9_LSUnit]>],
  276. [1, 1, 1, 1, 3],
  277. [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
  278. -1>, // dynamic uops
  279. //
  280. // Load multiple + update, defs are the 1st and 5th operands.
  281. InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  282. InstrStage<1, [A9_MUX0], 0>,
  283. InstrStage<2, [A9_AGU], 1>,
  284. InstrStage<2, [A9_LSUnit]>],
  285. [2, 1, 1, 1, 3],
  286. [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
  287. -1>, // dynamic uops
  288. //
  289. // Load multiple plus branch
  290. InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  291. InstrStage<1, [A9_MUX0], 0>,
  292. InstrStage<1, [A9_AGU], 1>,
  293. InstrStage<2, [A9_LSUnit]>,
  294. InstrStage<1, [A9_Branch]>],
  295. [1, 2, 1, 1, 3],
  296. [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass],
  297. -1>, // dynamic uops
  298. //
  299. // Pop, def is the 3rd operand.
  300. InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  301. InstrStage<1, [A9_MUX0], 0>,
  302. InstrStage<2, [A9_AGU], 1>,
  303. InstrStage<2, [A9_LSUnit]>],
  304. [1, 1, 3],
  305. [NoBypass, NoBypass, A9_LdBypass],
  306. -1>, // dynamic uops
  307. //
  308. // Pop + branch, def is the 3rd operand.
  309. InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  310. InstrStage<1, [A9_MUX0], 0>,
  311. InstrStage<2, [A9_AGU], 1>,
  312. InstrStage<2, [A9_LSUnit]>,
  313. InstrStage<1, [A9_Branch]>],
  314. [1, 1, 3],
  315. [NoBypass, NoBypass, A9_LdBypass],
  316. -1>, // dynamic uops
  317. //
  318. // iLoadi + iALUr for t2LDRpci_pic.
  319. InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  320. InstrStage<1, [A9_MUX0], 0>,
  321. InstrStage<1, [A9_AGU], 0>,
  322. InstrStage<1, [A9_LSUnit]>,
  323. InstrStage<1, [A9_ALU0, A9_ALU1]>],
  324. [2, 1]>,
  325. // Integer store pipeline
  326. ///
  327. // Immediate offset
  328. InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  329. InstrStage<1, [A9_MUX0], 0>,
  330. InstrStage<1, [A9_AGU], 0>,
  331. InstrStage<1, [A9_LSUnit]>], [1, 1]>,
  332. InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  333. InstrStage<1, [A9_MUX0], 0>,
  334. InstrStage<2, [A9_AGU], 1>,
  335. InstrStage<1, [A9_LSUnit]>], [1, 1]>,
  336. // FIXME: If address is 64-bit aligned, AGU cycles is 1.
  337. InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  338. InstrStage<1, [A9_MUX0], 0>,
  339. InstrStage<2, [A9_AGU], 1>,
  340. InstrStage<1, [A9_LSUnit]>], [1, 1]>,
  341. //
  342. // Register offset
  343. InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  344. InstrStage<1, [A9_MUX0], 0>,
  345. InstrStage<1, [A9_AGU], 0>,
  346. InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
  347. InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  348. InstrStage<1, [A9_MUX0], 0>,
  349. InstrStage<2, [A9_AGU], 1>,
  350. InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
  351. InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  352. InstrStage<1, [A9_MUX0], 0>,
  353. InstrStage<2, [A9_AGU], 1>,
  354. InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
  355. //
  356. // Scaled register offset
  357. InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  358. InstrStage<1, [A9_MUX0], 0>,
  359. InstrStage<1, [A9_AGU], 0>,
  360. InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
  361. InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  362. InstrStage<1, [A9_MUX0], 0>,
  363. InstrStage<2, [A9_AGU], 1>,
  364. InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
  365. //
  366. // Immediate offset with update
  367. InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  368. InstrStage<1, [A9_MUX0], 0>,
  369. InstrStage<1, [A9_AGU], 0>,
  370. InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
  371. InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  372. InstrStage<1, [A9_MUX0], 0>,
  373. InstrStage<2, [A9_AGU], 1>,
  374. InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
  375. //
  376. // Register offset with update
  377. InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  378. InstrStage<1, [A9_MUX0], 0>,
  379. InstrStage<1, [A9_AGU], 0>,
  380. InstrStage<1, [A9_LSUnit]>],
  381. [2, 1, 1, 1]>,
  382. InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  383. InstrStage<1, [A9_MUX0], 0>,
  384. InstrStage<2, [A9_AGU], 1>,
  385. InstrStage<1, [A9_LSUnit]>],
  386. [3, 1, 1, 1]>,
  387. InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  388. InstrStage<1, [A9_MUX0], 0>,
  389. InstrStage<2, [A9_AGU], 1>,
  390. InstrStage<1, [A9_LSUnit]>],
  391. [3, 1, 1, 1]>,
  392. //
  393. // Scaled register offset with update
  394. InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  395. InstrStage<1, [A9_MUX0], 0>,
  396. InstrStage<1, [A9_AGU], 0>,
  397. InstrStage<1, [A9_LSUnit]>],
  398. [2, 1, 1, 1]>,
  399. InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  400. InstrStage<1, [A9_MUX0], 0>,
  401. InstrStage<2, [A9_AGU], 1>,
  402. InstrStage<1, [A9_LSUnit]>],
  403. [3, 1, 1, 1]>,
  404. //
  405. // Store multiple
  406. InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  407. InstrStage<1, [A9_MUX0], 0>,
  408. InstrStage<1, [A9_AGU], 0>,
  409. InstrStage<2, [A9_LSUnit]>],
  410. [], [], -1>, // dynamic uops
  411. //
  412. // Store multiple + update
  413. InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  414. InstrStage<1, [A9_MUX0], 0>,
  415. InstrStage<1, [A9_AGU], 0>,
  416. InstrStage<2, [A9_LSUnit]>],
  417. [2], [], -1>, // dynamic uops
  418. //
  419. // Preload
  420. InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
  421. // Branch
  422. //
  423. // no delay slots, so the latency of a branch is unimportant
  424. InstrItinData<IIC_Br , [InstrStage<1, [A9_Issue0], 0>,
  425. InstrStage<1, [A9_Issue1], 0>,
  426. InstrStage<1, [A9_Branch]>]>,
  427. // VFP and NEON shares the same register file. This means that every VFP
  428. // instruction should wait for full completion of the consecutive NEON
  429. // instruction and vice-versa. We model this behavior with two artificial FUs:
  430. // DRegsVFP and DRegsVFP.
  431. //
  432. // Every VFP instruction:
  433. // - Acquires DRegsVFP resource for 1 cycle
  434. // - Reserves DRegsN resource for the whole duration (including time to
  435. // register file writeback!).
  436. // Every NEON instruction does the same but with FUs swapped.
  437. //
  438. // Since the reserved FU cannot be acquired, this models precisely
  439. // "cross-domain" stalls.
  440. // VFP
  441. // Issue through integer pipeline, and execute in NEON unit.
  442. // FP Special Register to Integer Register File Move
  443. InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  444. InstrStage<1, [A9_MUX0], 0>,
  445. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  446. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  447. InstrStage<1, [A9_NPipe]>],
  448. [1]>,
  449. //
  450. // Single-precision FP Unary
  451. InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  452. InstrStage<1, [A9_MUX0], 0>,
  453. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  454. // Extra latency cycles since wbck is 2 cycles
  455. InstrStage<3, [A9_DRegsN], 0, Reserved>,
  456. InstrStage<1, [A9_NPipe]>],
  457. [1, 1]>,
  458. //
  459. // Double-precision FP Unary
  460. InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  461. InstrStage<1, [A9_MUX0], 0>,
  462. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  463. // Extra latency cycles since wbck is 2 cycles
  464. InstrStage<3, [A9_DRegsN], 0, Reserved>,
  465. InstrStage<1, [A9_NPipe]>],
  466. [1, 1]>,
  467. //
  468. // Single-precision FP Compare
  469. InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  470. InstrStage<1, [A9_MUX0], 0>,
  471. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  472. // Extra latency cycles since wbck is 4 cycles
  473. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  474. InstrStage<1, [A9_NPipe]>],
  475. [1, 1]>,
  476. //
  477. // Double-precision FP Compare
  478. InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  479. InstrStage<1, [A9_MUX0], 0>,
  480. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  481. // Extra latency cycles since wbck is 4 cycles
  482. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  483. InstrStage<1, [A9_NPipe]>],
  484. [1, 1]>,
  485. //
  486. // Single to Double FP Convert
  487. InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  488. InstrStage<1, [A9_MUX0], 0>,
  489. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  490. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  491. InstrStage<1, [A9_NPipe]>],
  492. [4, 1]>,
  493. //
  494. // Double to Single FP Convert
  495. InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  496. InstrStage<1, [A9_MUX0], 0>,
  497. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  498. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  499. InstrStage<1, [A9_NPipe]>],
  500. [4, 1]>,
  501. //
  502. // Single to Half FP Convert
  503. InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  504. InstrStage<1, [A9_MUX0], 0>,
  505. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  506. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  507. InstrStage<1, [A9_NPipe]>],
  508. [4, 1]>,
  509. //
  510. // Half to Single FP Convert
  511. InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  512. InstrStage<1, [A9_MUX0], 0>,
  513. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  514. InstrStage<3, [A9_DRegsN], 0, Reserved>,
  515. InstrStage<1, [A9_NPipe]>],
  516. [2, 1]>,
  517. //
  518. // Single-Precision FP to Integer Convert
  519. InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  520. InstrStage<1, [A9_MUX0], 0>,
  521. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  522. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  523. InstrStage<1, [A9_NPipe]>],
  524. [4, 1]>,
  525. //
  526. // Double-Precision FP to Integer Convert
  527. InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  528. InstrStage<1, [A9_MUX0], 0>,
  529. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  530. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  531. InstrStage<1, [A9_NPipe]>],
  532. [4, 1]>,
  533. //
  534. // Integer to Single-Precision FP Convert
  535. InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  536. InstrStage<1, [A9_MUX0], 0>,
  537. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  538. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  539. InstrStage<1, [A9_NPipe]>],
  540. [4, 1]>,
  541. //
  542. // Integer to Double-Precision FP Convert
  543. InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  544. InstrStage<1, [A9_MUX0], 0>,
  545. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  546. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  547. InstrStage<1, [A9_NPipe]>],
  548. [4, 1]>,
  549. //
  550. // Single-precision FP ALU
  551. InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  552. InstrStage<1, [A9_MUX0], 0>,
  553. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  554. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  555. InstrStage<1, [A9_NPipe]>],
  556. [4, 1, 1]>,
  557. //
  558. // Double-precision FP ALU
  559. InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  560. InstrStage<1, [A9_MUX0], 0>,
  561. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  562. InstrStage<5, [A9_DRegsN], 0, Reserved>,
  563. InstrStage<1, [A9_NPipe]>],
  564. [4, 1, 1]>,
  565. //
  566. // Single-precision FP Multiply
  567. InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  568. InstrStage<1, [A9_MUX0], 0>,
  569. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  570. InstrStage<6, [A9_DRegsN], 0, Reserved>,
  571. InstrStage<1, [A9_NPipe]>],
  572. [5, 1, 1]>,
  573. //
  574. // Double-precision FP Multiply
  575. InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  576. InstrStage<1, [A9_MUX0], 0>,
  577. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  578. InstrStage<7, [A9_DRegsN], 0, Reserved>,
  579. InstrStage<2, [A9_NPipe]>],
  580. [6, 1, 1]>,
  581. //
  582. // Single-precision FP MAC
  583. InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  584. InstrStage<1, [A9_MUX0], 0>,
  585. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  586. InstrStage<9, [A9_DRegsN], 0, Reserved>,
  587. InstrStage<1, [A9_NPipe]>],
  588. [8, 1, 1, 1]>,
  589. //
  590. // Double-precision FP MAC
  591. InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  592. InstrStage<1, [A9_MUX0], 0>,
  593. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  594. InstrStage<10, [A9_DRegsN], 0, Reserved>,
  595. InstrStage<2, [A9_NPipe]>],
  596. [9, 1, 1, 1]>,
  597. //
  598. // Single-precision Fused FP MAC
  599. InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  600. InstrStage<1, [A9_MUX0], 0>,
  601. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  602. InstrStage<9, [A9_DRegsN], 0, Reserved>,
  603. InstrStage<1, [A9_NPipe]>],
  604. [8, 1, 1, 1]>,
  605. //
  606. // Double-precision Fused FP MAC
  607. InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  608. InstrStage<1, [A9_MUX0], 0>,
  609. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  610. InstrStage<10, [A9_DRegsN], 0, Reserved>,
  611. InstrStage<2, [A9_NPipe]>],
  612. [9, 1, 1, 1]>,
  613. //
  614. // Single-precision FP DIV
  615. InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  616. InstrStage<1, [A9_MUX0], 0>,
  617. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  618. InstrStage<16, [A9_DRegsN], 0, Reserved>,
  619. InstrStage<10, [A9_NPipe]>],
  620. [15, 1, 1]>,
  621. //
  622. // Double-precision FP DIV
  623. InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  624. InstrStage<1, [A9_MUX0], 0>,
  625. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  626. InstrStage<26, [A9_DRegsN], 0, Reserved>,
  627. InstrStage<20, [A9_NPipe]>],
  628. [25, 1, 1]>,
  629. //
  630. // Single-precision FP SQRT
  631. InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  632. InstrStage<1, [A9_MUX0], 0>,
  633. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  634. InstrStage<18, [A9_DRegsN], 0, Reserved>,
  635. InstrStage<13, [A9_NPipe]>],
  636. [17, 1]>,
  637. //
  638. // Double-precision FP SQRT
  639. InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  640. InstrStage<1, [A9_MUX0], 0>,
  641. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  642. InstrStage<33, [A9_DRegsN], 0, Reserved>,
  643. InstrStage<28, [A9_NPipe]>],
  644. [32, 1]>,
  645. //
  646. // Integer to Single-precision Move
  647. InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  648. InstrStage<1, [A9_MUX0], 0>,
  649. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  650. // Extra 1 latency cycle since wbck is 2 cycles
  651. InstrStage<3, [A9_DRegsN], 0, Reserved>,
  652. InstrStage<1, [A9_NPipe]>],
  653. [1, 1]>,
  654. //
  655. // Integer to Double-precision Move
  656. InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  657. InstrStage<1, [A9_MUX0], 0>,
  658. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  659. // Extra 1 latency cycle since wbck is 2 cycles
  660. InstrStage<3, [A9_DRegsN], 0, Reserved>,
  661. InstrStage<1, [A9_NPipe]>],
  662. [1, 1, 1]>,
  663. //
  664. // Single-precision to Integer Move
  665. //
  666. // On A9 move-from-VFP is free to issue with no stall if other VFP
  667. // operations are in flight. I assume it still can't dual-issue though.
  668. InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  669. InstrStage<1, [A9_MUX0], 0>],
  670. [2, 1]>,
  671. //
  672. // Double-precision to Integer Move
  673. //
  674. // On A9 move-from-VFP is free to issue with no stall if other VFP
  675. // operations are in flight. I assume it still can't dual-issue though.
  676. InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  677. InstrStage<1, [A9_MUX0], 0>],
  678. [2, 1, 1]>,
  679. //
  680. // Single-precision FP Load
  681. InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  682. InstrStage<1, [A9_MUX0], 0>,
  683. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  684. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  685. InstrStage<1, [A9_NPipe], 0>,
  686. InstrStage<1, [A9_LSUnit]>],
  687. [1, 1]>,
  688. //
  689. // Double-precision FP Load
  690. // FIXME: Result latency is 1 if address is 64-bit aligned.
  691. InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  692. InstrStage<1, [A9_MUX0], 0>,
  693. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  694. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  695. InstrStage<1, [A9_NPipe], 0>,
  696. InstrStage<1, [A9_LSUnit]>],
  697. [2, 1]>,
  698. //
  699. // FP Load Multiple
  700. // FIXME: assumes 2 doubles which requires 2 LS cycles.
  701. InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  702. InstrStage<1, [A9_MUX0], 0>,
  703. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  704. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  705. InstrStage<1, [A9_NPipe], 0>,
  706. InstrStage<2, [A9_LSUnit]>],
  707. [1, 1, 1, 1], [], -1>, // dynamic uops
  708. //
  709. // FP Load Multiple + update
  710. // FIXME: assumes 2 doubles which requires 2 LS cycles.
  711. InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  712. InstrStage<1, [A9_MUX0], 0>,
  713. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  714. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  715. InstrStage<1, [A9_NPipe], 0>,
  716. InstrStage<2, [A9_LSUnit]>],
  717. [2, 1, 1, 1], [], -1>, // dynamic uops
  718. //
  719. // Single-precision FP Store
  720. InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  721. InstrStage<1, [A9_MUX0], 0>,
  722. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  723. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  724. InstrStage<1, [A9_NPipe], 0>,
  725. InstrStage<1, [A9_LSUnit]>],
  726. [1, 1]>,
  727. //
  728. // Double-precision FP Store
  729. InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  730. InstrStage<1, [A9_MUX0], 0>,
  731. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  732. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  733. InstrStage<1, [A9_NPipe], 0>,
  734. InstrStage<1, [A9_LSUnit]>],
  735. [1, 1]>,
  736. //
  737. // FP Store Multiple
  738. // FIXME: assumes 2 doubles which requires 2 LS cycles.
  739. InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  740. InstrStage<1, [A9_MUX0], 0>,
  741. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  742. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  743. InstrStage<1, [A9_NPipe], 0>,
  744. InstrStage<2, [A9_LSUnit]>],
  745. [1, 1, 1, 1], [], -1>, // dynamic uops
  746. //
  747. // FP Store Multiple + update
  748. // FIXME: assumes 2 doubles which requires 2 LS cycles.
  749. InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  750. InstrStage<1, [A9_MUX0], 0>,
  751. InstrStage<1, [A9_DRegsVFP], 0, Required>,
  752. InstrStage<2, [A9_DRegsN], 0, Reserved>,
  753. InstrStage<1, [A9_NPipe], 0>,
  754. InstrStage<2, [A9_LSUnit]>],
  755. [2, 1, 1, 1], [], -1>, // dynamic uops
  756. // NEON
  757. // VLD1
  758. InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  759. InstrStage<1, [A9_MUX0], 0>,
  760. InstrStage<1, [A9_DRegsN], 0, Required>,
  761. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  762. InstrStage<1, [A9_NPipe], 0>,
  763. InstrStage<1, [A9_LSUnit]>],
  764. [1, 1]>,
  765. // VLD1x2
  766. InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  767. InstrStage<1, [A9_MUX0], 0>,
  768. InstrStage<1, [A9_DRegsN], 0, Required>,
  769. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  770. InstrStage<1, [A9_NPipe], 0>,
  771. InstrStage<1, [A9_LSUnit]>],
  772. [1, 1, 1]>,
  773. // VLD1x3
  774. InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  775. InstrStage<1, [A9_MUX0], 0>,
  776. InstrStage<1, [A9_DRegsN], 0, Required>,
  777. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  778. InstrStage<2, [A9_NPipe], 0>,
  779. InstrStage<2, [A9_LSUnit]>],
  780. [1, 1, 2, 1]>,
  781. // VLD1x4
  782. InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  783. InstrStage<1, [A9_MUX0], 0>,
  784. InstrStage<1, [A9_DRegsN], 0, Required>,
  785. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  786. InstrStage<2, [A9_NPipe], 0>,
  787. InstrStage<2, [A9_LSUnit]>],
  788. [1, 1, 2, 2, 1]>,
  789. // VLD1u
  790. InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  791. InstrStage<1, [A9_MUX0], 0>,
  792. InstrStage<1, [A9_DRegsN], 0, Required>,
  793. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  794. InstrStage<1, [A9_NPipe], 0>,
  795. InstrStage<1, [A9_LSUnit]>],
  796. [1, 2, 1]>,
  797. // VLD1x2u
  798. InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  799. InstrStage<1, [A9_MUX0], 0>,
  800. InstrStage<1, [A9_DRegsN], 0, Required>,
  801. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  802. InstrStage<1, [A9_NPipe], 0>,
  803. InstrStage<1, [A9_LSUnit]>],
  804. [1, 1, 2, 1]>,
  805. // VLD1x3u
  806. InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  807. InstrStage<1, [A9_MUX0], 0>,
  808. InstrStage<1, [A9_DRegsN], 0, Required>,
  809. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  810. InstrStage<2, [A9_NPipe], 0>,
  811. InstrStage<2, [A9_LSUnit]>],
  812. [1, 1, 2, 2, 1]>,
  813. // VLD1x4u
  814. InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  815. InstrStage<1, [A9_MUX0], 0>,
  816. InstrStage<1, [A9_DRegsN], 0, Required>,
  817. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  818. InstrStage<2, [A9_NPipe], 0>,
  819. InstrStage<2, [A9_LSUnit]>],
  820. [1, 1, 2, 2, 2, 1]>,
  821. //
  822. // VLD1ln
  823. InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  824. InstrStage<1, [A9_MUX0], 0>,
  825. InstrStage<1, [A9_DRegsN], 0, Required>,
  826. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  827. InstrStage<2, [A9_NPipe], 0>,
  828. InstrStage<2, [A9_LSUnit]>],
  829. [3, 1, 1, 1]>,
  830. //
  831. // VLD1lnu
  832. InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  833. InstrStage<1, [A9_MUX0], 0>,
  834. InstrStage<1, [A9_DRegsN], 0, Required>,
  835. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  836. InstrStage<2, [A9_NPipe], 0>,
  837. InstrStage<2, [A9_LSUnit]>],
  838. [3, 2, 1, 1, 1, 1]>,
  839. //
  840. // VLD1dup
  841. InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  842. InstrStage<1, [A9_MUX0], 0>,
  843. InstrStage<1, [A9_DRegsN], 0, Required>,
  844. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  845. InstrStage<1, [A9_NPipe], 0>,
  846. InstrStage<1, [A9_LSUnit]>],
  847. [2, 1]>,
  848. //
  849. // VLD1dupu
  850. InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  851. InstrStage<1, [A9_MUX0], 0>,
  852. InstrStage<1, [A9_DRegsN], 0, Required>,
  853. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  854. InstrStage<1, [A9_NPipe], 0>,
  855. InstrStage<1, [A9_LSUnit]>],
  856. [2, 2, 1, 1]>,
  857. //
  858. // VLD2
  859. InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  860. InstrStage<1, [A9_MUX0], 0>,
  861. InstrStage<1, [A9_DRegsN], 0, Required>,
  862. // Extra latency cycles since wbck is 7 cycles
  863. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  864. InstrStage<1, [A9_NPipe], 0>,
  865. InstrStage<1, [A9_LSUnit]>],
  866. [2, 2, 1]>,
  867. //
  868. // VLD2x2
  869. InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  870. InstrStage<1, [A9_MUX0], 0>,
  871. InstrStage<1, [A9_DRegsN], 0, Required>,
  872. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  873. InstrStage<2, [A9_NPipe], 0>,
  874. InstrStage<2, [A9_LSUnit]>],
  875. [2, 3, 2, 3, 1]>,
  876. //
  877. // VLD2ln
  878. InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  879. InstrStage<1, [A9_MUX0], 0>,
  880. InstrStage<1, [A9_DRegsN], 0, Required>,
  881. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  882. InstrStage<2, [A9_NPipe], 0>,
  883. InstrStage<2, [A9_LSUnit]>],
  884. [3, 3, 1, 1, 1, 1]>,
  885. //
  886. // VLD2u
  887. InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  888. InstrStage<1, [A9_MUX0], 0>,
  889. InstrStage<1, [A9_DRegsN], 0, Required>,
  890. // Extra latency cycles since wbck is 7 cycles
  891. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  892. InstrStage<1, [A9_NPipe], 0>,
  893. InstrStage<1, [A9_LSUnit]>],
  894. [2, 2, 2, 1, 1, 1]>,
  895. //
  896. // VLD2x2u
  897. InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  898. InstrStage<1, [A9_MUX0], 0>,
  899. InstrStage<1, [A9_DRegsN], 0, Required>,
  900. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  901. InstrStage<2, [A9_NPipe], 0>,
  902. InstrStage<2, [A9_LSUnit]>],
  903. [2, 3, 2, 3, 2, 1]>,
  904. //
  905. // VLD2lnu
  906. InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  907. InstrStage<1, [A9_MUX0], 0>,
  908. InstrStage<1, [A9_DRegsN], 0, Required>,
  909. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  910. InstrStage<2, [A9_NPipe], 0>,
  911. InstrStage<2, [A9_LSUnit]>],
  912. [3, 3, 2, 1, 1, 1, 1, 1]>,
  913. //
  914. // VLD2dup
  915. InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  916. InstrStage<1, [A9_MUX0], 0>,
  917. InstrStage<1, [A9_DRegsN], 0, Required>,
  918. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  919. InstrStage<1, [A9_NPipe], 0>,
  920. InstrStage<1, [A9_LSUnit]>],
  921. [2, 2, 1]>,
  922. //
  923. // VLD2dupu
  924. InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  925. InstrStage<1, [A9_MUX0], 0>,
  926. InstrStage<1, [A9_DRegsN], 0, Required>,
  927. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  928. InstrStage<1, [A9_NPipe], 0>,
  929. InstrStage<1, [A9_LSUnit]>],
  930. [2, 2, 2, 1, 1]>,
  931. //
  932. // VLD3
  933. InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  934. InstrStage<1, [A9_MUX0], 0>,
  935. InstrStage<1, [A9_DRegsN], 0, Required>,
  936. InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
  937. InstrStage<3, [A9_NPipe], 0>,
  938. InstrStage<3, [A9_LSUnit]>],
  939. [3, 3, 4, 1]>,
  940. //
  941. // VLD3ln
  942. InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  943. InstrStage<1, [A9_MUX0], 0>,
  944. InstrStage<1, [A9_DRegsN], 0, Required>,
  945. InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
  946. InstrStage<5, [A9_NPipe], 0>,
  947. InstrStage<5, [A9_LSUnit]>],
  948. [5, 5, 6, 1, 1, 1, 1, 2]>,
  949. //
  950. // VLD3u
  951. InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  952. InstrStage<1, [A9_MUX0], 0>,
  953. InstrStage<1, [A9_DRegsN], 0, Required>,
  954. InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
  955. InstrStage<3, [A9_NPipe], 0>,
  956. InstrStage<3, [A9_LSUnit]>],
  957. [3, 3, 4, 2, 1]>,
  958. //
  959. // VLD3lnu
  960. InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  961. InstrStage<1, [A9_MUX0], 0>,
  962. InstrStage<1, [A9_DRegsN], 0, Required>,
  963. InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
  964. InstrStage<5, [A9_NPipe], 0>,
  965. InstrStage<5, [A9_LSUnit]>],
  966. [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
  967. //
  968. // VLD3dup
  969. InstrItinData<IIC_VLD3dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  970. InstrStage<1, [A9_MUX0], 0>,
  971. InstrStage<1, [A9_DRegsN], 0, Required>,
  972. InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
  973. InstrStage<3, [A9_NPipe], 0>,
  974. InstrStage<3, [A9_LSUnit]>],
  975. [3, 3, 4, 1]>,
  976. //
  977. // VLD3dupu
  978. InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  979. InstrStage<1, [A9_MUX0], 0>,
  980. InstrStage<1, [A9_DRegsN], 0, Required>,
  981. InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
  982. InstrStage<3, [A9_NPipe], 0>,
  983. InstrStage<3, [A9_LSUnit]>],
  984. [3, 3, 4, 2, 1, 1]>,
  985. //
  986. // VLD4
  987. InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  988. InstrStage<1, [A9_MUX0], 0>,
  989. InstrStage<1, [A9_DRegsN], 0, Required>,
  990. InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
  991. InstrStage<3, [A9_NPipe], 0>,
  992. InstrStage<3, [A9_LSUnit]>],
  993. [3, 3, 4, 4, 1]>,
  994. //
  995. // VLD4ln
  996. InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  997. InstrStage<1, [A9_MUX0], 0>,
  998. InstrStage<1, [A9_DRegsN], 0, Required>,
  999. InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
  1000. InstrStage<4, [A9_NPipe], 0>,
  1001. InstrStage<4, [A9_LSUnit]>],
  1002. [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
  1003. //
  1004. // VLD4u
  1005. InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1006. InstrStage<1, [A9_MUX0], 0>,
  1007. InstrStage<1, [A9_DRegsN], 0, Required>,
  1008. InstrStage<9,[A9_DRegsVFP], 0, Reserved>,
  1009. InstrStage<3, [A9_NPipe], 0>,
  1010. InstrStage<3, [A9_LSUnit]>],
  1011. [3, 3, 4, 4, 2, 1]>,
  1012. //
  1013. // VLD4lnu
  1014. InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1015. InstrStage<1, [A9_MUX0], 0>,
  1016. InstrStage<1, [A9_DRegsN], 0, Required>,
  1017. InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
  1018. InstrStage<4, [A9_NPipe], 0>,
  1019. InstrStage<4, [A9_LSUnit]>],
  1020. [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
  1021. //
  1022. // VLD4dup
  1023. InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1024. InstrStage<1, [A9_MUX0], 0>,
  1025. InstrStage<1, [A9_DRegsN], 0, Required>,
  1026. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1027. InstrStage<2, [A9_NPipe], 0>,
  1028. InstrStage<2, [A9_LSUnit]>],
  1029. [2, 2, 3, 3, 1]>,
  1030. //
  1031. // VLD4dupu
  1032. InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1033. InstrStage<1, [A9_MUX0], 0>,
  1034. InstrStage<1, [A9_DRegsN], 0, Required>,
  1035. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1036. InstrStage<2, [A9_NPipe], 0>,
  1037. InstrStage<2, [A9_LSUnit]>],
  1038. [2, 2, 3, 3, 2, 1, 1]>,
  1039. //
  1040. // VST1
  1041. InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1042. InstrStage<1, [A9_MUX0], 0>,
  1043. InstrStage<1, [A9_DRegsN], 0, Required>,
  1044. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1045. InstrStage<1, [A9_NPipe], 0>,
  1046. InstrStage<1, [A9_LSUnit]>],
  1047. [1, 1, 1]>,
  1048. //
  1049. // VST1x2
  1050. InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1051. InstrStage<1, [A9_MUX0], 0>,
  1052. InstrStage<1, [A9_DRegsN], 0, Required>,
  1053. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1054. InstrStage<1, [A9_NPipe], 0>,
  1055. InstrStage<1, [A9_LSUnit]>],
  1056. [1, 1, 1, 1]>,
  1057. //
  1058. // VST1x3
  1059. InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1060. InstrStage<1, [A9_MUX0], 0>,
  1061. InstrStage<1, [A9_DRegsN], 0, Required>,
  1062. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1063. InstrStage<2, [A9_NPipe], 0>,
  1064. InstrStage<2, [A9_LSUnit]>],
  1065. [1, 1, 1, 1, 2]>,
  1066. //
  1067. // VST1x4
  1068. InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1069. InstrStage<1, [A9_MUX0], 0>,
  1070. InstrStage<1, [A9_DRegsN], 0, Required>,
  1071. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1072. InstrStage<2, [A9_NPipe], 0>,
  1073. InstrStage<2, [A9_LSUnit]>],
  1074. [1, 1, 1, 1, 2, 2]>,
  1075. //
  1076. // VST1u
  1077. InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1078. InstrStage<1, [A9_MUX0], 0>,
  1079. InstrStage<1, [A9_DRegsN], 0, Required>,
  1080. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1081. InstrStage<1, [A9_NPipe], 0>,
  1082. InstrStage<1, [A9_LSUnit]>],
  1083. [2, 1, 1, 1, 1]>,
  1084. //
  1085. // VST1x2u
  1086. InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1087. InstrStage<1, [A9_MUX0], 0>,
  1088. InstrStage<1, [A9_DRegsN], 0, Required>,
  1089. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1090. InstrStage<1, [A9_NPipe], 0>,
  1091. InstrStage<1, [A9_LSUnit]>],
  1092. [2, 1, 1, 1, 1, 1]>,
  1093. //
  1094. // VST1x3u
  1095. InstrItinData<IIC_VST1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1096. InstrStage<1, [A9_MUX0], 0>,
  1097. InstrStage<1, [A9_DRegsN], 0, Required>,
  1098. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1099. InstrStage<2, [A9_NPipe], 0>,
  1100. InstrStage<2, [A9_LSUnit]>],
  1101. [2, 1, 1, 1, 1, 1, 2]>,
  1102. //
  1103. // VST1x4u
  1104. InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1105. InstrStage<1, [A9_MUX0], 0>,
  1106. InstrStage<1, [A9_DRegsN], 0, Required>,
  1107. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1108. InstrStage<2, [A9_NPipe], 0>,
  1109. InstrStage<2, [A9_LSUnit]>],
  1110. [2, 1, 1, 1, 1, 1, 2, 2]>,
  1111. //
  1112. // VST1ln
  1113. InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1114. InstrStage<1, [A9_MUX0], 0>,
  1115. InstrStage<1, [A9_DRegsN], 0, Required>,
  1116. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1117. InstrStage<1, [A9_NPipe], 0>,
  1118. InstrStage<1, [A9_LSUnit]>],
  1119. [1, 1, 1]>,
  1120. //
  1121. // VST1lnu
  1122. InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1123. InstrStage<1, [A9_MUX0], 0>,
  1124. InstrStage<1, [A9_DRegsN], 0, Required>,
  1125. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1126. InstrStage<1, [A9_NPipe], 0>,
  1127. InstrStage<1, [A9_LSUnit]>],
  1128. [2, 1, 1, 1, 1]>,
  1129. //
  1130. // VST2
  1131. InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1132. InstrStage<1, [A9_MUX0], 0>,
  1133. InstrStage<1, [A9_DRegsN], 0, Required>,
  1134. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1135. InstrStage<1, [A9_NPipe], 0>,
  1136. InstrStage<1, [A9_LSUnit]>],
  1137. [1, 1, 1, 1]>,
  1138. //
  1139. // VST2x2
  1140. InstrItinData<IIC_VST2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1141. InstrStage<1, [A9_MUX0], 0>,
  1142. InstrStage<1, [A9_DRegsN], 0, Required>,
  1143. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1144. InstrStage<3, [A9_NPipe], 0>,
  1145. InstrStage<3, [A9_LSUnit]>],
  1146. [1, 1, 1, 1, 2, 2]>,
  1147. //
  1148. // VST2u
  1149. InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1150. InstrStage<1, [A9_MUX0], 0>,
  1151. InstrStage<1, [A9_DRegsN], 0, Required>,
  1152. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1153. InstrStage<1, [A9_NPipe], 0>,
  1154. InstrStage<1, [A9_LSUnit]>],
  1155. [2, 1, 1, 1, 1, 1]>,
  1156. //
  1157. // VST2x2u
  1158. InstrItinData<IIC_VST2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1159. InstrStage<1, [A9_MUX0], 0>,
  1160. InstrStage<1, [A9_DRegsN], 0, Required>,
  1161. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1162. InstrStage<3, [A9_NPipe], 0>,
  1163. InstrStage<3, [A9_LSUnit]>],
  1164. [2, 1, 1, 1, 1, 1, 2, 2]>,
  1165. //
  1166. // VST2ln
  1167. InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1168. InstrStage<1, [A9_MUX0], 0>,
  1169. InstrStage<1, [A9_DRegsN], 0, Required>,
  1170. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1171. InstrStage<1, [A9_NPipe], 0>,
  1172. InstrStage<1, [A9_LSUnit]>],
  1173. [1, 1, 1, 1]>,
  1174. //
  1175. // VST2lnu
  1176. InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1177. InstrStage<1, [A9_MUX0], 0>,
  1178. InstrStage<1, [A9_DRegsN], 0, Required>,
  1179. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1180. InstrStage<1, [A9_NPipe], 0>,
  1181. InstrStage<1, [A9_LSUnit]>],
  1182. [2, 1, 1, 1, 1, 1]>,
  1183. //
  1184. // VST3
  1185. InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1186. InstrStage<1, [A9_MUX0], 0>,
  1187. InstrStage<1, [A9_DRegsN], 0, Required>,
  1188. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1189. InstrStage<2, [A9_NPipe], 0>,
  1190. InstrStage<2, [A9_LSUnit]>],
  1191. [1, 1, 1, 1, 2]>,
  1192. //
  1193. // VST3u
  1194. InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1195. InstrStage<1, [A9_MUX0], 0>,
  1196. InstrStage<1, [A9_DRegsN], 0, Required>,
  1197. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1198. InstrStage<2, [A9_NPipe], 0>,
  1199. InstrStage<2, [A9_LSUnit]>],
  1200. [2, 1, 1, 1, 1, 1, 2]>,
  1201. //
  1202. // VST3ln
  1203. InstrItinData<IIC_VST3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1204. InstrStage<1, [A9_MUX0], 0>,
  1205. InstrStage<1, [A9_DRegsN], 0, Required>,
  1206. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1207. InstrStage<3, [A9_NPipe], 0>,
  1208. InstrStage<3, [A9_LSUnit]>],
  1209. [1, 1, 1, 1, 2]>,
  1210. //
  1211. // VST3lnu
  1212. InstrItinData<IIC_VST3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1213. InstrStage<1, [A9_MUX0], 0>,
  1214. InstrStage<1, [A9_DRegsN], 0, Required>,
  1215. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1216. InstrStage<3, [A9_NPipe], 0>,
  1217. InstrStage<3, [A9_LSUnit]>],
  1218. [2, 1, 1, 1, 1, 1, 2]>,
  1219. //
  1220. // VST4
  1221. InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1222. InstrStage<1, [A9_MUX0], 0>,
  1223. InstrStage<1, [A9_DRegsN], 0, Required>,
  1224. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1225. InstrStage<2, [A9_NPipe], 0>,
  1226. InstrStage<2, [A9_LSUnit]>],
  1227. [1, 1, 1, 1, 2, 2]>,
  1228. //
  1229. // VST4u
  1230. InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1231. InstrStage<1, [A9_MUX0], 0>,
  1232. InstrStage<1, [A9_DRegsN], 0, Required>,
  1233. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1234. InstrStage<2, [A9_NPipe], 0>,
  1235. InstrStage<2, [A9_LSUnit]>],
  1236. [2, 1, 1, 1, 1, 1, 2, 2]>,
  1237. //
  1238. // VST4ln
  1239. InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1240. InstrStage<1, [A9_MUX0], 0>,
  1241. InstrStage<1, [A9_DRegsN], 0, Required>,
  1242. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1243. InstrStage<2, [A9_NPipe], 0>,
  1244. InstrStage<2, [A9_LSUnit]>],
  1245. [1, 1, 1, 1, 2, 2]>,
  1246. //
  1247. // VST4lnu
  1248. InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1249. InstrStage<1, [A9_MUX0], 0>,
  1250. InstrStage<1, [A9_DRegsN], 0, Required>,
  1251. InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
  1252. InstrStage<2, [A9_NPipe], 0>,
  1253. InstrStage<2, [A9_LSUnit]>],
  1254. [2, 1, 1, 1, 1, 1, 2, 2]>,
  1255. //
  1256. // Double-register Integer Unary
  1257. InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1258. InstrStage<1, [A9_MUX0], 0>,
  1259. InstrStage<1, [A9_DRegsN], 0, Required>,
  1260. // Extra latency cycles since wbck is 6 cycles
  1261. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1262. InstrStage<1, [A9_NPipe]>],
  1263. [4, 2]>,
  1264. //
  1265. // Quad-register Integer Unary
  1266. InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1267. InstrStage<1, [A9_MUX0], 0>,
  1268. InstrStage<1, [A9_DRegsN], 0, Required>,
  1269. // Extra latency cycles since wbck is 6 cycles
  1270. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1271. InstrStage<1, [A9_NPipe]>],
  1272. [4, 2]>,
  1273. //
  1274. // Double-register Integer Q-Unary
  1275. InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1276. InstrStage<1, [A9_MUX0], 0>,
  1277. InstrStage<1, [A9_DRegsN], 0, Required>,
  1278. // Extra latency cycles since wbck is 6 cycles
  1279. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1280. InstrStage<1, [A9_NPipe]>],
  1281. [4, 1]>,
  1282. //
  1283. // Quad-register Integer CountQ-Unary
  1284. InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1285. InstrStage<1, [A9_MUX0], 0>,
  1286. InstrStage<1, [A9_DRegsN], 0, Required>,
  1287. // Extra latency cycles since wbck is 6 cycles
  1288. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1289. InstrStage<1, [A9_NPipe]>],
  1290. [4, 1]>,
  1291. //
  1292. // Double-register Integer Binary
  1293. InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1294. InstrStage<1, [A9_MUX0], 0>,
  1295. InstrStage<1, [A9_DRegsN], 0, Required>,
  1296. // Extra latency cycles since wbck is 6 cycles
  1297. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1298. InstrStage<1, [A9_NPipe]>],
  1299. [3, 2, 2]>,
  1300. //
  1301. // Quad-register Integer Binary
  1302. InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1303. InstrStage<1, [A9_MUX0], 0>,
  1304. InstrStage<1, [A9_DRegsN], 0, Required>,
  1305. // Extra latency cycles since wbck is 6 cycles
  1306. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1307. InstrStage<1, [A9_NPipe]>],
  1308. [3, 2, 2]>,
  1309. //
  1310. // Double-register Integer Subtract
  1311. InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1312. InstrStage<1, [A9_MUX0], 0>,
  1313. InstrStage<1, [A9_DRegsN], 0, Required>,
  1314. // Extra latency cycles since wbck is 6 cycles
  1315. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1316. InstrStage<1, [A9_NPipe]>],
  1317. [3, 2, 1]>,
  1318. //
  1319. // Quad-register Integer Subtract
  1320. InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1321. InstrStage<1, [A9_MUX0], 0>,
  1322. InstrStage<1, [A9_DRegsN], 0, Required>,
  1323. // Extra latency cycles since wbck is 6 cycles
  1324. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1325. InstrStage<1, [A9_NPipe]>],
  1326. [3, 2, 1]>,
  1327. //
  1328. // Double-register Integer Shift
  1329. InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1330. InstrStage<1, [A9_MUX0], 0>,
  1331. InstrStage<1, [A9_DRegsN], 0, Required>,
  1332. // Extra latency cycles since wbck is 6 cycles
  1333. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1334. InstrStage<1, [A9_NPipe]>],
  1335. [3, 1, 1]>,
  1336. //
  1337. // Quad-register Integer Shift
  1338. InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1339. InstrStage<1, [A9_MUX0], 0>,
  1340. InstrStage<1, [A9_DRegsN], 0, Required>,
  1341. // Extra latency cycles since wbck is 6 cycles
  1342. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1343. InstrStage<1, [A9_NPipe]>],
  1344. [3, 1, 1]>,
  1345. //
  1346. // Double-register Integer Shift (4 cycle)
  1347. InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1348. InstrStage<1, [A9_MUX0], 0>,
  1349. InstrStage<1, [A9_DRegsN], 0, Required>,
  1350. // Extra latency cycles since wbck is 6 cycles
  1351. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1352. InstrStage<1, [A9_NPipe]>],
  1353. [4, 1, 1]>,
  1354. //
  1355. // Quad-register Integer Shift (4 cycle)
  1356. InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1357. InstrStage<1, [A9_MUX0], 0>,
  1358. InstrStage<1, [A9_DRegsN], 0, Required>,
  1359. // Extra latency cycles since wbck is 6 cycles
  1360. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1361. InstrStage<1, [A9_NPipe]>],
  1362. [4, 1, 1]>,
  1363. //
  1364. // Double-register Integer Binary (4 cycle)
  1365. InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1366. InstrStage<1, [A9_MUX0], 0>,
  1367. InstrStage<1, [A9_DRegsN], 0, Required>,
  1368. // Extra latency cycles since wbck is 6 cycles
  1369. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1370. InstrStage<1, [A9_NPipe]>],
  1371. [4, 2, 2]>,
  1372. //
  1373. // Quad-register Integer Binary (4 cycle)
  1374. InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1375. InstrStage<1, [A9_MUX0], 0>,
  1376. InstrStage<1, [A9_DRegsN], 0, Required>,
  1377. // Extra latency cycles since wbck is 6 cycles
  1378. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1379. InstrStage<1, [A9_NPipe]>],
  1380. [4, 2, 2]>,
  1381. //
  1382. // Double-register Integer Subtract (4 cycle)
  1383. InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1384. InstrStage<1, [A9_MUX0], 0>,
  1385. InstrStage<1, [A9_DRegsN], 0, Required>,
  1386. // Extra latency cycles since wbck is 6 cycles
  1387. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1388. InstrStage<1, [A9_NPipe]>],
  1389. [4, 2, 1]>,
  1390. //
  1391. // Quad-register Integer Subtract (4 cycle)
  1392. InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1393. InstrStage<1, [A9_MUX0], 0>,
  1394. InstrStage<1, [A9_DRegsN], 0, Required>,
  1395. // Extra latency cycles since wbck is 6 cycles
  1396. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1397. InstrStage<1, [A9_NPipe]>],
  1398. [4, 2, 1]>,
  1399. //
  1400. // Double-register Integer Count
  1401. InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1402. InstrStage<1, [A9_MUX0], 0>,
  1403. InstrStage<1, [A9_DRegsN], 0, Required>,
  1404. // Extra latency cycles since wbck is 6 cycles
  1405. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1406. InstrStage<1, [A9_NPipe]>],
  1407. [3, 2, 2]>,
  1408. //
  1409. // Quad-register Integer Count
  1410. // Result written in N3, but that is relative to the last cycle of multicycle,
  1411. // so we use 4 for those cases
  1412. InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1413. InstrStage<1, [A9_MUX0], 0>,
  1414. InstrStage<1, [A9_DRegsN], 0, Required>,
  1415. // Extra latency cycles since wbck is 7 cycles
  1416. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1417. InstrStage<2, [A9_NPipe]>],
  1418. [4, 2, 2]>,
  1419. //
  1420. // Double-register Absolute Difference and Accumulate
  1421. InstrItinData<IIC_VABAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1422. InstrStage<1, [A9_MUX0], 0>,
  1423. InstrStage<1, [A9_DRegsN], 0, Required>,
  1424. // Extra latency cycles since wbck is 6 cycles
  1425. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1426. InstrStage<1, [A9_NPipe]>],
  1427. [6, 3, 2, 1]>,
  1428. //
  1429. // Quad-register Absolute Difference and Accumulate
  1430. InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1431. InstrStage<1, [A9_MUX0], 0>,
  1432. InstrStage<1, [A9_DRegsN], 0, Required>,
  1433. // Extra latency cycles since wbck is 6 cycles
  1434. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1435. InstrStage<2, [A9_NPipe]>],
  1436. [6, 3, 2, 1]>,
  1437. //
  1438. // Double-register Integer Pair Add Long
  1439. InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1440. InstrStage<1, [A9_MUX0], 0>,
  1441. InstrStage<1, [A9_DRegsN], 0, Required>,
  1442. // Extra latency cycles since wbck is 6 cycles
  1443. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1444. InstrStage<1, [A9_NPipe]>],
  1445. [6, 3, 1]>,
  1446. //
  1447. // Quad-register Integer Pair Add Long
  1448. InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1449. InstrStage<1, [A9_MUX0], 0>,
  1450. InstrStage<1, [A9_DRegsN], 0, Required>,
  1451. // Extra latency cycles since wbck is 6 cycles
  1452. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1453. InstrStage<2, [A9_NPipe]>],
  1454. [6, 3, 1]>,
  1455. //
  1456. // Double-register Integer Multiply (.8, .16)
  1457. InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1458. InstrStage<1, [A9_MUX0], 0>,
  1459. InstrStage<1, [A9_DRegsN], 0, Required>,
  1460. // Extra latency cycles since wbck is 6 cycles
  1461. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1462. InstrStage<1, [A9_NPipe]>],
  1463. [6, 2, 2]>,
  1464. //
  1465. // Quad-register Integer Multiply (.8, .16)
  1466. InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1467. InstrStage<1, [A9_MUX0], 0>,
  1468. InstrStage<1, [A9_DRegsN], 0, Required>,
  1469. // Extra latency cycles since wbck is 7 cycles
  1470. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1471. InstrStage<2, [A9_NPipe]>],
  1472. [7, 2, 2]>,
  1473. //
  1474. // Double-register Integer Multiply (.32)
  1475. InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1476. InstrStage<1, [A9_MUX0], 0>,
  1477. InstrStage<1, [A9_DRegsN], 0, Required>,
  1478. // Extra latency cycles since wbck is 7 cycles
  1479. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1480. InstrStage<2, [A9_NPipe]>],
  1481. [7, 2, 1]>,
  1482. //
  1483. // Quad-register Integer Multiply (.32)
  1484. InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1485. InstrStage<1, [A9_MUX0], 0>,
  1486. InstrStage<1, [A9_DRegsN], 0, Required>,
  1487. // Extra latency cycles since wbck is 9 cycles
  1488. InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
  1489. InstrStage<4, [A9_NPipe]>],
  1490. [9, 2, 1]>,
  1491. //
  1492. // Double-register Integer Multiply-Accumulate (.8, .16)
  1493. InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1494. InstrStage<1, [A9_MUX0], 0>,
  1495. InstrStage<1, [A9_DRegsN], 0, Required>,
  1496. // Extra latency cycles since wbck is 6 cycles
  1497. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1498. InstrStage<1, [A9_NPipe]>],
  1499. [6, 3, 2, 2]>,
  1500. //
  1501. // Double-register Integer Multiply-Accumulate (.32)
  1502. InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1503. InstrStage<1, [A9_MUX0], 0>,
  1504. InstrStage<1, [A9_DRegsN], 0, Required>,
  1505. // Extra latency cycles since wbck is 7 cycles
  1506. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1507. InstrStage<2, [A9_NPipe]>],
  1508. [7, 3, 2, 1]>,
  1509. //
  1510. // Quad-register Integer Multiply-Accumulate (.8, .16)
  1511. InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1512. InstrStage<1, [A9_MUX0], 0>,
  1513. InstrStage<1, [A9_DRegsN], 0, Required>,
  1514. // Extra latency cycles since wbck is 7 cycles
  1515. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1516. InstrStage<2, [A9_NPipe]>],
  1517. [7, 3, 2, 2]>,
  1518. //
  1519. // Quad-register Integer Multiply-Accumulate (.32)
  1520. InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1521. InstrStage<1, [A9_MUX0], 0>,
  1522. InstrStage<1, [A9_DRegsN], 0, Required>,
  1523. // Extra latency cycles since wbck is 9 cycles
  1524. InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
  1525. InstrStage<4, [A9_NPipe]>],
  1526. [9, 3, 2, 1]>,
  1527. //
  1528. // Move
  1529. InstrItinData<IIC_VMOV, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1530. InstrStage<1, [A9_MUX0], 0>,
  1531. InstrStage<1, [A9_DRegsN], 0, Required>,
  1532. InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
  1533. InstrStage<1, [A9_NPipe]>],
  1534. [1,1]>,
  1535. //
  1536. // Move Immediate
  1537. InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1538. InstrStage<1, [A9_MUX0], 0>,
  1539. InstrStage<1, [A9_DRegsN], 0, Required>,
  1540. // Extra latency cycles since wbck is 6 cycles
  1541. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1542. InstrStage<1, [A9_NPipe]>],
  1543. [3]>,
  1544. //
  1545. // Double-register Permute Move
  1546. InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1547. InstrStage<1, [A9_MUX0], 0>,
  1548. InstrStage<1, [A9_DRegsN], 0, Required>,
  1549. // Extra latency cycles since wbck is 6 cycles
  1550. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1551. InstrStage<1, [A9_NPipe]>],
  1552. [2, 1]>,
  1553. //
  1554. // Quad-register Permute Move
  1555. InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1556. InstrStage<1, [A9_MUX0], 0>,
  1557. InstrStage<1, [A9_DRegsN], 0, Required>,
  1558. // Extra latency cycles since wbck is 6 cycles
  1559. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1560. InstrStage<1, [A9_NPipe]>],
  1561. [2, 1]>,
  1562. //
  1563. // Integer to Single-precision Move
  1564. InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1565. InstrStage<1, [A9_MUX0], 0>,
  1566. InstrStage<1, [A9_DRegsN], 0, Required>,
  1567. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1568. InstrStage<1, [A9_NPipe]>],
  1569. [1, 1]>,
  1570. //
  1571. // Integer to Double-precision Move
  1572. InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1573. InstrStage<1, [A9_MUX0], 0>,
  1574. InstrStage<1, [A9_DRegsN], 0, Required>,
  1575. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1576. InstrStage<1, [A9_NPipe]>],
  1577. [1, 1, 1]>,
  1578. //
  1579. // Single-precision to Integer Move
  1580. InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1581. InstrStage<1, [A9_MUX0], 0>,
  1582. InstrStage<1, [A9_DRegsN], 0, Required>,
  1583. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1584. InstrStage<1, [A9_NPipe]>],
  1585. [2, 1]>,
  1586. //
  1587. // Double-precision to Integer Move
  1588. InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1589. InstrStage<1, [A9_MUX0], 0>,
  1590. InstrStage<1, [A9_DRegsN], 0, Required>,
  1591. InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
  1592. InstrStage<1, [A9_NPipe]>],
  1593. [2, 2, 1]>,
  1594. //
  1595. // Integer to Lane Move
  1596. InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1597. InstrStage<1, [A9_MUX0], 0>,
  1598. InstrStage<1, [A9_DRegsN], 0, Required>,
  1599. InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
  1600. InstrStage<2, [A9_NPipe]>],
  1601. [3, 1, 1]>,
  1602. //
  1603. // Vector narrow move
  1604. InstrItinData<IIC_VMOVN, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1605. InstrStage<1, [A9_MUX0], 0>,
  1606. InstrStage<1, [A9_DRegsN], 0, Required>,
  1607. // Extra latency cycles since wbck is 6 cycles
  1608. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1609. InstrStage<1, [A9_NPipe]>],
  1610. [3, 1]>,
  1611. //
  1612. // Double-register FP Unary
  1613. InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1614. InstrStage<1, [A9_MUX0], 0>,
  1615. InstrStage<1, [A9_DRegsN], 0, Required>,
  1616. // Extra latency cycles since wbck is 6 cycles
  1617. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1618. InstrStage<1, [A9_NPipe]>],
  1619. [5, 2]>,
  1620. //
  1621. // Quad-register FP Unary
  1622. // Result written in N5, but that is relative to the last cycle of multicycle,
  1623. // so we use 6 for those cases
  1624. InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1625. InstrStage<1, [A9_MUX0], 0>,
  1626. InstrStage<1, [A9_DRegsN], 0, Required>,
  1627. // Extra latency cycles since wbck is 7 cycles
  1628. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1629. InstrStage<2, [A9_NPipe]>],
  1630. [6, 2]>,
  1631. //
  1632. // Double-register FP Binary
  1633. // FIXME: We're using this itin for many instructions and [2, 2] here is too
  1634. // optimistic.
  1635. InstrItinData<IIC_VBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1636. InstrStage<1, [A9_MUX0], 0>,
  1637. InstrStage<1, [A9_DRegsN], 0, Required>,
  1638. // Extra latency cycles since wbck is 6 cycles
  1639. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1640. InstrStage<1, [A9_NPipe]>],
  1641. [5, 2, 2]>,
  1642. //
  1643. // VPADD, etc.
  1644. InstrItinData<IIC_VPBIND, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1645. InstrStage<1, [A9_MUX0], 0>,
  1646. InstrStage<1, [A9_DRegsN], 0, Required>,
  1647. // Extra latency cycles since wbck is 6 cycles
  1648. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1649. InstrStage<1, [A9_NPipe]>],
  1650. [5, 1, 1]>,
  1651. //
  1652. // Double-register FP VMUL
  1653. InstrItinData<IIC_VFMULD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1654. InstrStage<1, [A9_MUX0], 0>,
  1655. InstrStage<1, [A9_DRegsN], 0, Required>,
  1656. // Extra latency cycles since wbck is 6 cycles
  1657. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1658. InstrStage<1, [A9_NPipe]>],
  1659. [5, 2, 1]>,
  1660. //
  1661. // Quad-register FP Binary
  1662. // Result written in N5, but that is relative to the last cycle of multicycle,
  1663. // so we use 6 for those cases
  1664. // FIXME: We're using this itin for many instructions and [2, 2] here is too
  1665. // optimistic.
  1666. InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1667. InstrStage<1, [A9_MUX0], 0>,
  1668. InstrStage<1, [A9_DRegsN], 0, Required>,
  1669. // Extra latency cycles since wbck is 7 cycles
  1670. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1671. InstrStage<2, [A9_NPipe]>],
  1672. [6, 2, 2]>,
  1673. //
  1674. // Quad-register FP VMUL
  1675. InstrItinData<IIC_VFMULQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1676. InstrStage<1, [A9_MUX0], 0>,
  1677. InstrStage<1, [A9_DRegsN], 0, Required>,
  1678. // Extra latency cycles since wbck is 7 cycles
  1679. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1680. InstrStage<1, [A9_NPipe]>],
  1681. [6, 2, 1]>,
  1682. //
  1683. // Double-register FP Multiple-Accumulate
  1684. InstrItinData<IIC_VMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1685. InstrStage<1, [A9_MUX0], 0>,
  1686. InstrStage<1, [A9_DRegsN], 0, Required>,
  1687. // Extra latency cycles since wbck is 7 cycles
  1688. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1689. InstrStage<2, [A9_NPipe]>],
  1690. [6, 3, 2, 1]>,
  1691. //
  1692. // Quad-register FP Multiple-Accumulate
  1693. // Result written in N9, but that is relative to the last cycle of multicycle,
  1694. // so we use 10 for those cases
  1695. InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1696. InstrStage<1, [A9_MUX0], 0>,
  1697. InstrStage<1, [A9_DRegsN], 0, Required>,
  1698. // Extra latency cycles since wbck is 9 cycles
  1699. InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
  1700. InstrStage<4, [A9_NPipe]>],
  1701. [8, 4, 2, 1]>,
  1702. //
  1703. // Double-register Fused FP Multiple-Accumulate
  1704. InstrItinData<IIC_VFMACD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1705. InstrStage<1, [A9_MUX0], 0>,
  1706. InstrStage<1, [A9_DRegsN], 0, Required>,
  1707. // Extra latency cycles since wbck is 7 cycles
  1708. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1709. InstrStage<2, [A9_NPipe]>],
  1710. [6, 3, 2, 1]>,
  1711. //
  1712. // Quad-register Fused FP Multiple-Accumulate
  1713. // Result written in N9, but that is relative to the last cycle of multicycle,
  1714. // so we use 10 for those cases
  1715. InstrItinData<IIC_VFMACQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1716. InstrStage<1, [A9_MUX0], 0>,
  1717. InstrStage<1, [A9_DRegsN], 0, Required>,
  1718. // Extra latency cycles since wbck is 9 cycles
  1719. InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
  1720. InstrStage<4, [A9_NPipe]>],
  1721. [8, 4, 2, 1]>,
  1722. //
  1723. // Double-register Reciprical Step
  1724. InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1725. InstrStage<1, [A9_MUX0], 0>,
  1726. InstrStage<1, [A9_DRegsN], 0, Required>,
  1727. // Extra latency cycles since wbck is 10 cycles
  1728. InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
  1729. InstrStage<1, [A9_NPipe]>],
  1730. [9, 2, 2]>,
  1731. //
  1732. // Quad-register Reciprical Step
  1733. InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1734. InstrStage<1, [A9_MUX0], 0>,
  1735. InstrStage<1, [A9_DRegsN], 0, Required>,
  1736. // Extra latency cycles since wbck is 11 cycles
  1737. InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
  1738. InstrStage<2, [A9_NPipe]>],
  1739. [10, 2, 2]>,
  1740. //
  1741. // Double-register Permute
  1742. InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1743. InstrStage<1, [A9_MUX0], 0>,
  1744. InstrStage<1, [A9_DRegsN], 0, Required>,
  1745. // Extra latency cycles since wbck is 6 cycles
  1746. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1747. InstrStage<1, [A9_NPipe]>],
  1748. [2, 2, 1, 1]>,
  1749. //
  1750. // Quad-register Permute
  1751. // Result written in N2, but that is relative to the last cycle of multicycle,
  1752. // so we use 3 for those cases
  1753. InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1754. InstrStage<1, [A9_MUX0], 0>,
  1755. InstrStage<1, [A9_DRegsN], 0, Required>,
  1756. // Extra latency cycles since wbck is 7 cycles
  1757. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1758. InstrStage<2, [A9_NPipe]>],
  1759. [3, 3, 1, 1]>,
  1760. //
  1761. // Quad-register Permute (3 cycle issue)
  1762. // Result written in N2, but that is relative to the last cycle of multicycle,
  1763. // so we use 4 for those cases
  1764. InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1765. InstrStage<1, [A9_MUX0], 0>,
  1766. InstrStage<1, [A9_DRegsN], 0, Required>,
  1767. // Extra latency cycles since wbck is 8 cycles
  1768. InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
  1769. InstrStage<3, [A9_NPipe]>],
  1770. [4, 4, 1, 1]>,
  1771. //
  1772. // Double-register VEXT
  1773. InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1774. InstrStage<1, [A9_MUX0], 0>,
  1775. InstrStage<1, [A9_DRegsN], 0, Required>,
  1776. // Extra latency cycles since wbck is 6 cycles
  1777. InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
  1778. InstrStage<1, [A9_NPipe]>],
  1779. [2, 1, 1]>,
  1780. //
  1781. // Quad-register VEXT
  1782. InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1783. InstrStage<1, [A9_MUX0], 0>,
  1784. InstrStage<1, [A9_DRegsN], 0, Required>,
  1785. // Extra latency cycles since wbck is 7 cycles
  1786. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1787. InstrStage<2, [A9_NPipe]>],
  1788. [3, 1, 2]>,
  1789. //
  1790. // VTB
  1791. InstrItinData<IIC_VTB1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1792. InstrStage<1, [A9_MUX0], 0>,
  1793. InstrStage<1, [A9_DRegsN], 0, Required>,
  1794. // Extra latency cycles since wbck is 7 cycles
  1795. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1796. InstrStage<2, [A9_NPipe]>],
  1797. [3, 2, 1]>,
  1798. InstrItinData<IIC_VTB2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1799. InstrStage<1, [A9_MUX0], 0>,
  1800. InstrStage<2, [A9_DRegsN], 0, Required>,
  1801. // Extra latency cycles since wbck is 7 cycles
  1802. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1803. InstrStage<2, [A9_NPipe]>],
  1804. [3, 2, 2, 1]>,
  1805. InstrItinData<IIC_VTB3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1806. InstrStage<1, [A9_MUX0], 0>,
  1807. InstrStage<2, [A9_DRegsN], 0, Required>,
  1808. // Extra latency cycles since wbck is 8 cycles
  1809. InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
  1810. InstrStage<3, [A9_NPipe]>],
  1811. [4, 2, 2, 3, 1]>,
  1812. InstrItinData<IIC_VTB4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1813. InstrStage<1, [A9_MUX0], 0>,
  1814. InstrStage<1, [A9_DRegsN], 0, Required>,
  1815. // Extra latency cycles since wbck is 8 cycles
  1816. InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
  1817. InstrStage<3, [A9_NPipe]>],
  1818. [4, 2, 2, 3, 3, 1]>,
  1819. //
  1820. // VTBX
  1821. InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1822. InstrStage<1, [A9_MUX0], 0>,
  1823. InstrStage<1, [A9_DRegsN], 0, Required>,
  1824. // Extra latency cycles since wbck is 7 cycles
  1825. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1826. InstrStage<2, [A9_NPipe]>],
  1827. [3, 1, 2, 1]>,
  1828. InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1829. InstrStage<1, [A9_MUX0], 0>,
  1830. InstrStage<1, [A9_DRegsN], 0, Required>,
  1831. // Extra latency cycles since wbck is 7 cycles
  1832. InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
  1833. InstrStage<2, [A9_NPipe]>],
  1834. [3, 1, 2, 2, 1]>,
  1835. InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1836. InstrStage<1, [A9_MUX0], 0>,
  1837. InstrStage<1, [A9_DRegsN], 0, Required>,
  1838. // Extra latency cycles since wbck is 8 cycles
  1839. InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
  1840. InstrStage<3, [A9_NPipe]>],
  1841. [4, 1, 2, 2, 3, 1]>,
  1842. InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
  1843. InstrStage<1, [A9_MUX0], 0>,
  1844. InstrStage<1, [A9_DRegsN], 0, Required>,
  1845. // Extra latency cycles since wbck is 8 cycles
  1846. InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
  1847. InstrStage<2, [A9_NPipe]>],
  1848. [4, 1, 2, 2, 3, 3, 1]>
  1849. ]>;
  1850. // ===---------------------------------------------------------------------===//
  1851. // The following definitions describe the simpler per-operand machine model.
  1852. // This works with MachineScheduler and will eventually replace itineraries.
  1853. class A9WriteLMOpsListType<list<WriteSequence> writes> {
  1854. list <WriteSequence> Writes = writes;
  1855. SchedMachineModel SchedModel = ?;
  1856. }
  1857. // Cortex-A9 machine model for scheduling and other instruction cost heuristics.
  1858. def CortexA9Model : SchedMachineModel {
  1859. let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
  1860. let MicroOpBufferSize = 56; // Based on available renamed registers.
  1861. let LoadLatency = 2; // Optimistic load latency assuming bypass.
  1862. // This is overriden by OperandCycles if the
  1863. // Itineraries are queried instead.
  1864. let MispredictPenalty = 8; // Based on estimate of pipeline depth.
  1865. let Itineraries = CortexA9Itineraries;
  1866. // FIXME: Many vector operations were never given an itinerary. We
  1867. // haven't mapped these to the new model either.
  1868. let CompleteModel = 0;
  1869. // FIXME: Remove when all errors have been fixed.
  1870. let FullInstRWOverlapCheck = 0;
  1871. }
  1872. //===----------------------------------------------------------------------===//
  1873. // Define each kind of processor resource and number available.
  1874. //
  1875. // The AGU unit has BufferSize=1 so that the latency between operations
  1876. // that use it are considered to stall other operations.
  1877. //
  1878. // The FP unit has BufferSize=0 so that it is a hard dispatch
  1879. // hazard. No instruction may be dispatched while the unit is reserved.
  1880. let SchedModel = CortexA9Model in {
  1881. def A9UnitALU : ProcResource<2>;
  1882. def A9UnitMul : ProcResource<1> { let Super = A9UnitALU; }
  1883. def A9UnitAGU : ProcResource<1> { let BufferSize = 1; }
  1884. def A9UnitLS : ProcResource<1>;
  1885. def A9UnitFP : ProcResource<1> { let BufferSize = 0; }
  1886. def A9UnitB : ProcResource<1>;
  1887. //===----------------------------------------------------------------------===//
  1888. // Define scheduler read/write types with their resources and latency on A9.
  1889. // Consume an issue slot, but no processor resources. This is useful when all
  1890. // other writes associated with the operand have NumMicroOps = 0.
  1891. def A9WriteIssue : SchedWriteRes<[]> { let Latency = 0; }
  1892. // Write an integer register.
  1893. def A9WriteI : SchedWriteRes<[A9UnitALU]>;
  1894. // Write an integer shifted-by register
  1895. def A9WriteIsr : SchedWriteRes<[A9UnitALU]> { let Latency = 2; }
  1896. // Basic ALU.
  1897. def A9WriteALU : SchedWriteRes<[A9UnitALU]>;
  1898. // ALU with operand shifted by immediate.
  1899. def : WriteRes<WriteALUsi, [A9UnitALU]> { let Latency = 2; }
  1900. // ALU with operand shifted by register.
  1901. def A9WriteALUsr : SchedWriteRes<[A9UnitALU]> { let Latency = 3; }
  1902. // Multiplication
  1903. def A9WriteM : SchedWriteRes<[A9UnitMul, A9UnitMul]> { let Latency = 4; }
  1904. def A9WriteMHi : SchedWriteRes<[A9UnitMul]> { let Latency = 5;
  1905. let NumMicroOps = 0; }
  1906. def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; }
  1907. def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4;
  1908. let NumMicroOps = 0; }
  1909. def : SchedAlias<WriteMUL16, A9WriteM16>;
  1910. def : SchedAlias<WriteMUL32, A9WriteM>;
  1911. def : SchedAlias<WriteMUL64Lo, A9WriteM>;
  1912. def : SchedAlias<WriteMUL64Hi, A9WriteMHi>;
  1913. def : SchedAlias<WriteMAC16, A9WriteM16>;
  1914. def : SchedAlias<WriteMAC32, A9WriteM>;
  1915. def : SchedAlias<WriteMAC64Lo, A9WriteM>;
  1916. def : SchedAlias<WriteMAC64Hi, A9WriteMHi>;
  1917. def : ReadAdvance<ReadMUL, 0>;
  1918. def : ReadAdvance<ReadMAC, 0>;
  1919. // Floating-point
  1920. // Only one FP or AGU instruction may issue per cycle. We model this
  1921. // by having FP instructions consume the AGU resource.
  1922. def A9WriteF : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
  1923. def A9WriteFMov : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
  1924. def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
  1925. def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
  1926. def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; }
  1927. def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
  1928. def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; }
  1929. def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; }
  1930. def A9WriteFSqrtS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 17; }
  1931. def A9WriteFSqrtD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 32; }
  1932. // NEON has an odd mix of latencies. Simply name the write types by latency.
  1933. def A9WriteV1 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 1; }
  1934. def A9WriteV2 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 2; }
  1935. def A9WriteV3 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 3; }
  1936. def A9WriteV4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; }
  1937. def A9WriteV5 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; }
  1938. def A9WriteV6 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; }
  1939. def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; }
  1940. def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; }
  1941. def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; }
  1942. def : WriteRes<WriteVLD1, []>;
  1943. def : WriteRes<WriteVLD2, []>;
  1944. def : WriteRes<WriteVLD3, []>;
  1945. def : WriteRes<WriteVLD4, []>;
  1946. def : WriteRes<WriteVST1, []>;
  1947. def : WriteRes<WriteVST2, []>;
  1948. def : WriteRes<WriteVST3, []>;
  1949. def : WriteRes<WriteVST4, []>;
  1950. // Reserve A9UnitFP for 2 consecutive cycles.
  1951. def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
  1952. let Latency = 4;
  1953. let ResourceCycles = [2, 1];
  1954. }
  1955. def A9Write2V7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
  1956. let Latency = 7;
  1957. let ResourceCycles = [2, 1];
  1958. }
  1959. def A9Write2V9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> {
  1960. let Latency = 9;
  1961. let ResourceCycles = [2, 1];
  1962. }
  1963. // Branches don't have a def operand but still consume resources.
  1964. def A9WriteB : SchedWriteRes<[A9UnitB]>;
  1965. // Address generation.
  1966. def A9WriteAdr : SchedWriteRes<[A9UnitAGU]> { let NumMicroOps = 0; }
  1967. // Load Integer.
  1968. def A9WriteL : SchedWriteRes<[A9UnitLS]> { let Latency = 3; }
  1969. def : SchedAlias<WriteLd, A9WriteL>;
  1970. // Load the upper 32-bits using the same micro-op.
  1971. def A9WriteLHi : SchedWriteRes<[]> { let Latency = 3;
  1972. let NumMicroOps = 0; }
  1973. // Offset shifted by register.
  1974. def A9WriteLsi : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
  1975. // Load (and zero extend) a byte.
  1976. def A9WriteLb : SchedWriteRes<[A9UnitLS]> { let Latency = 4; }
  1977. def A9WriteLbsi : SchedWriteRes<[A9UnitLS]> { let Latency = 5; }
  1978. // Load or Store Float, aligned.
  1979. def A9WriteLSfp : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 1; }
  1980. // Store Integer.
  1981. def A9WriteS : SchedWriteRes<[A9UnitLS]>;
  1982. //===----------------------------------------------------------------------===//
  1983. // Define resources dynamically for load multiple variants.
  1984. // Define helpers for extra latency without consuming resources.
  1985. def A9WriteCycle1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; }
  1986. foreach NumCycles = 2-8 in {
  1987. def A9WriteCycle#NumCycles : WriteSequence<[A9WriteCycle1], NumCycles>;
  1988. } // foreach NumCycles
  1989. // Define address generation sequences and predicates for 8 flavors of LDMs.
  1990. foreach NumAddr = 1-8 in {
  1991. // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
  1992. // latency for instructions that generate multiple loads or stores.
  1993. def A9WriteAdr#NumAddr : WriteSequence<[A9WriteAdr], NumAddr>;
  1994. // Define a predicate to select the LDM based on number of memory addresses.
  1995. def A9LMAdr#NumAddr#Pred :
  1996. SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>;
  1997. } // foreach NumAddr
  1998. // Fall-back for unknown LDMs.
  1999. def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">;
  2000. // LDM/VLDM/VLDn address generation latency & resources.
  2001. // Dynamically select the A9WriteAdrN sequence using a predicate.
  2002. def A9WriteLMAdr : SchedWriteVariant<[
  2003. SchedVar<A9LMAdr1Pred, [A9WriteAdr1]>,
  2004. SchedVar<A9LMAdr2Pred, [A9WriteAdr2]>,
  2005. SchedVar<A9LMAdr3Pred, [A9WriteAdr3]>,
  2006. SchedVar<A9LMAdr4Pred, [A9WriteAdr4]>,
  2007. SchedVar<A9LMAdr5Pred, [A9WriteAdr5]>,
  2008. SchedVar<A9LMAdr6Pred, [A9WriteAdr6]>,
  2009. SchedVar<A9LMAdr7Pred, [A9WriteAdr7]>,
  2010. SchedVar<A9LMAdr8Pred, [A9WriteAdr8]>,
  2011. // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
  2012. SchedVar<A9LMUnknownPred, [A9WriteAdr2]>]>;
  2013. // Define LDM Resources.
  2014. // These take no issue resource, so they can be combined with other
  2015. // writes like WriteB.
  2016. // A9WriteLMLo takes a single LS resource and 2 cycles.
  2017. def A9WriteLMLo : SchedWriteRes<[A9UnitLS]> { let Latency = 2;
  2018. let NumMicroOps = 0; }
  2019. // Assuming aligned access, the upper half of each pair is free with
  2020. // the same latency.
  2021. def A9WriteLMHi : SchedWriteRes<[]> { let Latency = 2;
  2022. let NumMicroOps = 0; }
  2023. // Each A9WriteL#N variant adds N cycles of latency without consuming
  2024. // additional resources.
  2025. foreach NumAddr = 1-8 in {
  2026. def A9WriteL#NumAddr : WriteSequence<
  2027. [A9WriteLMLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
  2028. def A9WriteL#NumAddr#Hi : WriteSequence<
  2029. [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
  2030. }
  2031. //===----------------------------------------------------------------------===//
  2032. // LDM: Load multiple into 32-bit integer registers.
  2033. def A9WriteLMOpsList : A9WriteLMOpsListType<
  2034. [A9WriteL1, A9WriteL1Hi,
  2035. A9WriteL2, A9WriteL2Hi,
  2036. A9WriteL3, A9WriteL3Hi,
  2037. A9WriteL4, A9WriteL4Hi,
  2038. A9WriteL5, A9WriteL5Hi,
  2039. A9WriteL6, A9WriteL6Hi,
  2040. A9WriteL7, A9WriteL7Hi,
  2041. A9WriteL8, A9WriteL8Hi]>;
  2042. // A9WriteLM variants expand into a pair of writes for each 64-bit
  2043. // value loaded. When the number of registers is odd, the last
  2044. // A9WriteLnHi is naturally ignored because the instruction has no
  2045. // following def operands. These variants take no issue resource, so
  2046. // they may need to be part of a WriteSequence that includes A9WriteIssue.
  2047. def A9WriteLM : SchedWriteVariant<[
  2048. SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
  2049. SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
  2050. SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
  2051. SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
  2052. SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
  2053. SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
  2054. SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
  2055. SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
  2056. // For unknown LDMs, define the maximum number of writes, but only
  2057. // make the first two consume resources.
  2058. SchedVar<A9LMUnknownPred, [A9WriteL1, A9WriteL1Hi,
  2059. A9WriteL2, A9WriteL2Hi,
  2060. A9WriteL3Hi, A9WriteL3Hi,
  2061. A9WriteL4Hi, A9WriteL4Hi,
  2062. A9WriteL5Hi, A9WriteL5Hi,
  2063. A9WriteL6Hi, A9WriteL6Hi,
  2064. A9WriteL7Hi, A9WriteL7Hi,
  2065. A9WriteL8Hi, A9WriteL8Hi]>]> {
  2066. let Variadic = 1;
  2067. }
  2068. //===----------------------------------------------------------------------===//
  2069. // VFP Load/Store Multiple Variants, and NEON VLDn/VSTn support.
  2070. // A9WriteLfpOp is the same as A9WriteLSfp but takes no issue resources
  2071. // so can be used in WriteSequences for in single-issue instructions that
  2072. // encapsulate multiple loads.
  2073. def A9WriteLfpOp : SchedWriteRes<[A9UnitLS, A9UnitFP]> {
  2074. let Latency = 1;
  2075. let NumMicroOps = 0;
  2076. }
  2077. foreach NumAddr = 1-8 in {
  2078. // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
  2079. def A9WriteLfp#NumAddr#Seq : WriteSequence<[A9WriteLfpOp], NumAddr>;
  2080. // A9WriteLfp1-8 definitions are statically expanded into a sequence of
  2081. // A9WriteLfpOps with additive latency that takes a single issue slot.
  2082. // Used directly to describe NEON VLDn.
  2083. def A9WriteLfp#NumAddr : WriteSequence<
  2084. [A9WriteIssue, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
  2085. // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
  2086. // permuting loaded values.
  2087. def A9WriteLfp#NumAddr#Mov : WriteSequence<
  2088. [A9WriteF, !cast<SchedWrite>("A9WriteLfp"#NumAddr#Seq)]>;
  2089. } // foreach NumAddr
  2090. // Define VLDM/VSTM PreRA resources.
  2091. // A9WriteLMfpPreRA are dynamically expanded into the correct
  2092. // A9WriteLfp1-8 sequence based on a predicate. This supports the
  2093. // preRA VLDM variants in which all 64-bit loads are written to the
  2094. // same tuple of either single or double precision registers.
  2095. def A9WriteLMfpPreRA : SchedWriteVariant<[
  2096. SchedVar<A9LMAdr1Pred, [A9WriteLfp1]>,
  2097. SchedVar<A9LMAdr2Pred, [A9WriteLfp2]>,
  2098. SchedVar<A9LMAdr3Pred, [A9WriteLfp3]>,
  2099. SchedVar<A9LMAdr4Pred, [A9WriteLfp4]>,
  2100. SchedVar<A9LMAdr5Pred, [A9WriteLfp5]>,
  2101. SchedVar<A9LMAdr6Pred, [A9WriteLfp6]>,
  2102. SchedVar<A9LMAdr7Pred, [A9WriteLfp7]>,
  2103. SchedVar<A9LMAdr8Pred, [A9WriteLfp8]>,
  2104. // For unknown VLDM/VSTM PreRA, assume 2xS registers.
  2105. SchedVar<A9LMUnknownPred, [A9WriteLfp2]>]>;
  2106. // Define VLDM/VSTM PostRA Resources.
  2107. // A9WriteLMfpLo takes a LS and FP resource and one issue slot but no latency.
  2108. def A9WriteLMfpLo : SchedWriteRes<[A9UnitLS, A9UnitFP]> { let Latency = 0; }
  2109. foreach NumAddr = 1-8 in {
  2110. // Each A9WriteL#N variant adds N cycles of latency without consuming
  2111. // additional resources.
  2112. def A9WriteLMfp#NumAddr : WriteSequence<
  2113. [A9WriteLMfpLo, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
  2114. // Assuming aligned access, the upper half of each pair is free with
  2115. // the same latency.
  2116. def A9WriteLMfp#NumAddr#Hi : WriteSequence<
  2117. [A9WriteLMHi, !cast<SchedWrite>("A9WriteCycle"#NumAddr)]>;
  2118. } // foreach NumAddr
  2119. // VLDM PostRA Variants. These variants expand A9WriteLMfpPostRA into a
  2120. // pair of writes for each 64-bit data loaded. When the number of
  2121. // registers is odd, the last WriteLMfpnHi is naturally ignored because
  2122. // the instruction has no following def operands.
  2123. def A9WriteLMfpPostRAOpsList : A9WriteLMOpsListType<
  2124. [A9WriteLMfp1, A9WriteLMfp2, // 0-1
  2125. A9WriteLMfp3, A9WriteLMfp4, // 2-3
  2126. A9WriteLMfp5, A9WriteLMfp6, // 4-5
  2127. A9WriteLMfp7, A9WriteLMfp8, // 6-7
  2128. A9WriteLMfp1Hi, // 8-8
  2129. A9WriteLMfp2Hi, A9WriteLMfp2Hi, // 9-10
  2130. A9WriteLMfp3Hi, A9WriteLMfp3Hi, // 11-12
  2131. A9WriteLMfp4Hi, A9WriteLMfp4Hi, // 13-14
  2132. A9WriteLMfp5Hi, A9WriteLMfp5Hi, // 15-16
  2133. A9WriteLMfp6Hi, A9WriteLMfp6Hi, // 17-18
  2134. A9WriteLMfp7Hi, A9WriteLMfp7Hi, // 19-20
  2135. A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
  2136. def A9WriteLMfpPostRA : SchedWriteVariant<[
  2137. SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
  2138. SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
  2139. SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
  2140. SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
  2141. SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
  2142. SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
  2143. SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
  2144. SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
  2145. // For unknown LDMs, define the maximum number of writes, but only
  2146. // make the first two consume resources. We are optimizing for the case
  2147. // where the operands are DPRs, and this determines the first eight
  2148. // types. The remaining eight types are filled to cover the case
  2149. // where the operands are SPRs.
  2150. SchedVar<A9LMUnknownPred, [A9WriteLMfp1, A9WriteLMfp2,
  2151. A9WriteLMfp3Hi, A9WriteLMfp4Hi,
  2152. A9WriteLMfp5Hi, A9WriteLMfp6Hi,
  2153. A9WriteLMfp7Hi, A9WriteLMfp8Hi,
  2154. A9WriteLMfp5Hi, A9WriteLMfp5Hi,
  2155. A9WriteLMfp6Hi, A9WriteLMfp6Hi,
  2156. A9WriteLMfp7Hi, A9WriteLMfp7Hi,
  2157. A9WriteLMfp8Hi, A9WriteLMfp8Hi]>]> {
  2158. let Variadic = 1;
  2159. }
  2160. // Distinguish between our multiple MI-level forms of the same
  2161. // VLDM/VSTM instructions.
  2162. def A9PreRA : SchedPredicate<
  2163. "MI->getOperand(0).getReg().isVirtual()">;
  2164. def A9PostRA : SchedPredicate<
  2165. "MI->getOperand(0).getReg().isPhysical()">;
  2166. // VLDM represents all destination registers as a single register
  2167. // tuple, unlike LDM. So the number of write operands is not variadic.
  2168. def A9WriteLMfp : SchedWriteVariant<[
  2169. SchedVar<A9PreRA, [A9WriteLMfpPreRA]>,
  2170. SchedVar<A9PostRA, [A9WriteLMfpPostRA]>]>;
  2171. //===----------------------------------------------------------------------===//
  2172. // Resources for other (non-LDM/VLDM) Variants.
  2173. // These mov immediate writers are unconditionally expanded with
  2174. // additive latency.
  2175. def A9WriteI2 : WriteSequence<[A9WriteI, A9WriteI]>;
  2176. def A9WriteI2pc : WriteSequence<[A9WriteI, A9WriteI, WriteALU]>;
  2177. def A9WriteI2ld : WriteSequence<[A9WriteI, A9WriteI, A9WriteL]>;
  2178. // Some ALU operations can read loaded integer values one cycle early.
  2179. def A9ReadALU : SchedReadAdvance<1,
  2180. [A9WriteL, A9WriteLHi, A9WriteLsi, A9WriteLb, A9WriteLbsi,
  2181. A9WriteL1, A9WriteL2, A9WriteL3, A9WriteL4,
  2182. A9WriteL5, A9WriteL6, A9WriteL7, A9WriteL8,
  2183. A9WriteL1Hi, A9WriteL2Hi, A9WriteL3Hi, A9WriteL4Hi,
  2184. A9WriteL5Hi, A9WriteL6Hi, A9WriteL7Hi, A9WriteL8Hi]>;
  2185. // Read types for operands that are unconditionally read in cycle N
  2186. // after the instruction issues, decreases producer latency by N-1.
  2187. def A9Read2 : SchedReadAdvance<1>;
  2188. def A9Read3 : SchedReadAdvance<2>;
  2189. def A9Read4 : SchedReadAdvance<3>;
  2190. //===----------------------------------------------------------------------===//
  2191. // Map itinerary classes to scheduler read/write resources per operand.
  2192. //
  2193. // For ARM, we piggyback scheduler resources on the Itinerary classes
  2194. // to avoid perturbing the existing instruction definitions.
  2195. // This table follows the ARM Cortex-A9 Technical Reference Manuals,
  2196. // mostly in order.
  2197. def :ItinRW<[WriteALU], [IIC_iMOVi,IIC_iMOVr,IIC_iMOVsi,
  2198. IIC_iMVNi,IIC_iMVNsi,
  2199. IIC_iCMOVi,IIC_iCMOVr,IIC_iCMOVsi]>;
  2200. def :ItinRW<[WriteALU, A9ReadALU],[IIC_iMVNr]>;
  2201. def :ItinRW<[A9WriteIsr], [IIC_iMOVsr,IIC_iMVNsr,IIC_iCMOVsr]>;
  2202. def :ItinRW<[A9WriteI2], [IIC_iMOVix2,IIC_iCMOVix2]>;
  2203. def :ItinRW<[A9WriteI2pc], [IIC_iMOVix2addpc]>;
  2204. def :ItinRW<[A9WriteI2ld], [IIC_iMOVix2ld]>;
  2205. def :ItinRW<[WriteALU], [IIC_iBITi,IIC_iBITr,IIC_iUNAr,IIC_iTSTi,IIC_iTSTr]>;
  2206. def :ItinRW<[WriteALU, A9ReadALU], [IIC_iALUi, IIC_iCMPi, IIC_iCMPsi]>;
  2207. def :ItinRW<[WriteALU, A9ReadALU, A9ReadALU],[IIC_iALUr,IIC_iCMPr]>;
  2208. def :ItinRW<[WriteALUsi], [IIC_iBITsi,IIC_iUNAsi,IIC_iEXTr,IIC_iTSTsi]>;
  2209. def :ItinRW<[WriteALUsi, A9ReadALU], [IIC_iALUsi]>;
  2210. def :ItinRW<[WriteALUsi, ReadDefault, A9ReadALU], [IIC_iALUsir]>; // RSB
  2211. def :ItinRW<[A9WriteALUsr], [IIC_iBITsr,IIC_iTSTsr,IIC_iEXTAr,IIC_iEXTAsr]>;
  2212. def :ItinRW<[A9WriteALUsr, A9ReadALU], [IIC_iALUsr,IIC_iCMPsr]>;
  2213. // A9WriteHi ignored for MUL32.
  2214. def :ItinRW<[A9WriteM, A9WriteMHi], [IIC_iMUL32,IIC_iMAC32,
  2215. IIC_iMUL64,IIC_iMAC64]>;
  2216. // FIXME: SMLALxx needs itin classes
  2217. def :ItinRW<[A9WriteM16, A9WriteM16Hi], [IIC_iMUL16,IIC_iMAC16]>;
  2218. // TODO: For floating-point ops, we model the pipeline forwarding
  2219. // latencies here. WAW latencies are sometimes longer.
  2220. def :ItinRW<[A9WriteFMov], [IIC_fpSTAT, IIC_fpMOVIS, IIC_fpMOVID, IIC_fpMOVSI,
  2221. IIC_fpUNA32, IIC_fpUNA64,
  2222. IIC_fpCMP32, IIC_fpCMP64]>;
  2223. def :ItinRW<[A9WriteFMov, A9WriteFMov], [IIC_fpMOVDI]>;
  2224. def :ItinRW<[A9WriteF], [IIC_fpCVTSD, IIC_fpCVTDS, IIC_fpCVTSH, IIC_fpCVTHS,
  2225. IIC_fpCVTIS, IIC_fpCVTID, IIC_fpCVTSI, IIC_fpCVTDI,
  2226. IIC_fpALU32, IIC_fpALU64]>;
  2227. def :ItinRW<[A9WriteFMulS], [IIC_fpMUL32]>;
  2228. def :ItinRW<[A9WriteFMulD], [IIC_fpMUL64]>;
  2229. def :ItinRW<[A9WriteFMAS], [IIC_fpMAC32]>;
  2230. def :ItinRW<[A9WriteFMAD], [IIC_fpMAC64]>;
  2231. def :ItinRW<[A9WriteFDivS], [IIC_fpDIV32]>;
  2232. def :ItinRW<[A9WriteFDivD], [IIC_fpDIV64]>;
  2233. def :ItinRW<[A9WriteFSqrtS], [IIC_fpSQRT32]>;
  2234. def :ItinRW<[A9WriteFSqrtD], [IIC_fpSQRT64]>;
  2235. def :ItinRW<[A9WriteB], [IIC_Br]>;
  2236. // A9 PLD is processed in a dedicated unit.
  2237. def :ItinRW<[], [IIC_Preload]>;
  2238. // Note: We must assume that loads are aligned, since the machine
  2239. // model cannot know this statically and A9 ignores alignment hints.
  2240. // A9WriteAdr consumes AGU regardless address writeback. But it's
  2241. // latency is only relevant for users of an updated address.
  2242. def :ItinRW<[A9WriteL, A9WriteAdr], [IIC_iLoad_i,IIC_iLoad_r,
  2243. IIC_iLoad_iu,IIC_iLoad_ru]>;
  2244. def :ItinRW<[A9WriteLsi, A9WriteAdr], [IIC_iLoad_si,IIC_iLoad_siu]>;
  2245. def :ItinRW<[A9WriteLb, A9WriteAdr2], [IIC_iLoad_bh_i,IIC_iLoad_bh_r,
  2246. IIC_iLoad_bh_iu,IIC_iLoad_bh_ru]>;
  2247. def :ItinRW<[A9WriteLbsi, A9WriteAdr2], [IIC_iLoad_bh_si,IIC_iLoad_bh_siu]>;
  2248. def :ItinRW<[A9WriteL, A9WriteLHi, A9WriteAdr], [IIC_iLoad_d_i,IIC_iLoad_d_r,
  2249. IIC_iLoad_d_ru]>;
  2250. // Store either has no def operands, or the one def for address writeback.
  2251. def :ItinRW<[A9WriteAdr, A9WriteS], [IIC_iStore_i, IIC_iStore_r,
  2252. IIC_iStore_iu, IIC_iStore_ru,
  2253. IIC_iStore_d_i, IIC_iStore_d_r,
  2254. IIC_iStore_d_ru]>;
  2255. def :ItinRW<[A9WriteAdr2, A9WriteS], [IIC_iStore_si, IIC_iStore_siu,
  2256. IIC_iStore_bh_i, IIC_iStore_bh_r,
  2257. IIC_iStore_bh_iu, IIC_iStore_bh_ru]>;
  2258. def :ItinRW<[A9WriteAdr3, A9WriteS], [IIC_iStore_bh_si, IIC_iStore_bh_siu]>;
  2259. // A9WriteML will be expanded into a separate write for each def
  2260. // operand. Address generation consumes resources, but A9WriteLMAdr
  2261. // is listed after all def operands, so has no effective latency.
  2262. //
  2263. // Note: A9WriteLM expands into an even number of def operands. The
  2264. // actual number of def operands may be less by one.
  2265. def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteIssue], [IIC_iLoad_m, IIC_iPop]>;
  2266. // Load multiple with address writeback has an extra def operand in
  2267. // front of the loaded registers.
  2268. //
  2269. // Reuse the load-multiple variants for store-multiple because the
  2270. // resources are identical, For stores only the address writeback
  2271. // has a def operand so the WriteL latencies are unused.
  2272. def :ItinRW<[A9WriteLMAdr, A9WriteLM, A9WriteIssue], [IIC_iLoad_mu,
  2273. IIC_iStore_m,
  2274. IIC_iStore_mu]>;
  2275. def :ItinRW<[A9WriteLM, A9WriteLMAdr, A9WriteB], [IIC_iLoad_mBr, IIC_iPop_Br]>;
  2276. def :ItinRW<[A9WriteL, A9WriteAdr, WriteALU], [IIC_iLoadiALU]>;
  2277. def :ItinRW<[A9WriteLSfp, A9WriteAdr], [IIC_fpLoad32, IIC_fpLoad64]>;
  2278. def :ItinRW<[A9WriteLMfp, A9WriteLMAdr], [IIC_fpLoad_m]>;
  2279. def :ItinRW<[A9WriteLMAdr, A9WriteLMfp], [IIC_fpLoad_mu]>;
  2280. def :ItinRW<[A9WriteAdr, A9WriteLSfp], [IIC_fpStore32, IIC_fpStore64,
  2281. IIC_fpStore_m, IIC_fpStore_mu]>;
  2282. // Note: Unlike VLDM, VLD1 expects the writeback operand after the
  2283. // normal writes.
  2284. def :ItinRW<[A9WriteLfp1, A9WriteAdr1], [IIC_VLD1, IIC_VLD1u,
  2285. IIC_VLD1x2, IIC_VLD1x2u]>;
  2286. def :ItinRW<[A9WriteLfp2, A9WriteAdr2], [IIC_VLD1x3, IIC_VLD1x3u,
  2287. IIC_VLD1x4, IIC_VLD1x4u,
  2288. IIC_VLD4dup, IIC_VLD4dupu]>;
  2289. def :ItinRW<[A9WriteLfp1Mov, A9WriteAdr1], [IIC_VLD1dup, IIC_VLD1dupu,
  2290. IIC_VLD2, IIC_VLD2u,
  2291. IIC_VLD2dup, IIC_VLD2dupu]>;
  2292. def :ItinRW<[A9WriteLfp2Mov, A9WriteAdr1], [IIC_VLD1ln, IIC_VLD1lnu,
  2293. IIC_VLD2x2, IIC_VLD2x2u,
  2294. IIC_VLD2ln, IIC_VLD2lnu]>;
  2295. def :ItinRW<[A9WriteLfp3Mov, A9WriteAdr3], [IIC_VLD3, IIC_VLD3u,
  2296. IIC_VLD3dup, IIC_VLD3dupu]>;
  2297. def :ItinRW<[A9WriteLfp4Mov, A9WriteAdr4], [IIC_VLD4, IIC_VLD4u,
  2298. IIC_VLD4ln, IIC_VLD4lnu]>;
  2299. def :ItinRW<[A9WriteLfp5Mov, A9WriteAdr5], [IIC_VLD3ln, IIC_VLD3lnu]>;
  2300. // Vector stores use similar resources to vector loads, so use the
  2301. // same write types. The address write must be first for stores with
  2302. // address writeback.
  2303. def :ItinRW<[A9WriteAdr1, A9WriteLfp1], [IIC_VST1, IIC_VST1u,
  2304. IIC_VST1x2, IIC_VST1x2u,
  2305. IIC_VST1ln, IIC_VST1lnu,
  2306. IIC_VST2, IIC_VST2u,
  2307. IIC_VST2x2, IIC_VST2x2u,
  2308. IIC_VST2ln, IIC_VST2lnu]>;
  2309. def :ItinRW<[A9WriteAdr2, A9WriteLfp2], [IIC_VST1x3, IIC_VST1x3u,
  2310. IIC_VST1x4, IIC_VST1x4u,
  2311. IIC_VST3, IIC_VST3u,
  2312. IIC_VST3ln, IIC_VST3lnu,
  2313. IIC_VST4, IIC_VST4u,
  2314. IIC_VST4ln, IIC_VST4lnu]>;
  2315. // NEON moves.
  2316. def :ItinRW<[A9WriteV2], [IIC_VMOVSI, IIC_VMOVDI, IIC_VMOVD, IIC_VMOVQ]>;
  2317. def :ItinRW<[A9WriteV1], [IIC_VMOV, IIC_VMOVIS, IIC_VMOVID]>;
  2318. def :ItinRW<[A9WriteV3], [IIC_VMOVISL, IIC_VMOVN]>;
  2319. // NEON integer arithmetic
  2320. //
  2321. // VADD/VAND/VORR/VEOR/VBIC/VORN/VBIT/VBIF/VBSL
  2322. def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VBINiD, IIC_VBINiQ]>;
  2323. // VSUB/VMVN/VCLSD/VCLZD/VCNTD
  2324. def :ItinRW<[A9WriteV3, A9Read2], [IIC_VSUBiD, IIC_VSUBiQ, IIC_VCNTiD]>;
  2325. // VADDL/VSUBL/VNEG are mapped later under IIC_SHLi.
  2326. // ...
  2327. // VHADD/VRHADD/VQADD/VTST/VADH/VRADH
  2328. def :ItinRW<[A9WriteV4, A9Read2, A9Read2], [IIC_VBINi4D, IIC_VBINi4Q]>;
  2329. // VSBH/VRSBH/VHSUB/VQSUB/VABD/VCEQ/VCGE/VCGT/VMAX/VMIN/VPMAX/VPMIN/VABDL
  2330. def :ItinRW<[A9WriteV4, A9Read2], [IIC_VSUBi4D, IIC_VSUBi4Q]>;
  2331. // VQNEG/VQABS
  2332. def :ItinRW<[A9WriteV4], [IIC_VQUNAiD, IIC_VQUNAiQ]>;
  2333. // VABS
  2334. def :ItinRW<[A9WriteV4, A9Read2], [IIC_VUNAiD, IIC_VUNAiQ]>;
  2335. // VPADD/VPADDL are mapped later under IIC_SHLi.
  2336. // ...
  2337. // VCLSQ/VCLZQ/VCNTQ, takes two cycles.
  2338. def :ItinRW<[A9Write2V4, A9Read3], [IIC_VCNTiQ]>;
  2339. // VMOVimm/VMVNimm/VORRimm/VBICimm
  2340. def :ItinRW<[A9WriteV3], [IIC_VMOVImm]>;
  2341. def :ItinRW<[A9WriteV6, A9Read3, A9Read2], [IIC_VABAD, IIC_VABAQ]>;
  2342. def :ItinRW<[A9WriteV6, A9Read3], [IIC_VPALiD, IIC_VPALiQ]>;
  2343. // NEON integer multiply
  2344. //
  2345. // Note: these don't quite match the timing docs, but they do match
  2346. // the original A9 itinerary.
  2347. def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VMULi16D]>;
  2348. def :ItinRW<[A9WriteV7, A9Read2, A9Read2], [IIC_VMULi16Q]>;
  2349. def :ItinRW<[A9Write2V7, A9Read2], [IIC_VMULi32D]>;
  2350. def :ItinRW<[A9Write2V9, A9Read2], [IIC_VMULi32Q]>;
  2351. def :ItinRW<[A9WriteV6, A9Read3, A9Read2, A9Read2], [IIC_VMACi16D]>;
  2352. def :ItinRW<[A9WriteV7, A9Read3, A9Read2, A9Read2], [IIC_VMACi16Q]>;
  2353. def :ItinRW<[A9Write2V7, A9Read3, A9Read2], [IIC_VMACi32D]>;
  2354. def :ItinRW<[A9Write2V9, A9Read3, A9Read2], [IIC_VMACi32Q]>;
  2355. // NEON integer shift
  2356. // TODO: Q,Q,Q shifts should actually reserve FP for 2 cycles.
  2357. def :ItinRW<[A9WriteV3], [IIC_VSHLiD, IIC_VSHLiQ]>;
  2358. def :ItinRW<[A9WriteV4], [IIC_VSHLi4D, IIC_VSHLi4Q]>;
  2359. // NEON permute
  2360. def :ItinRW<[A9WriteV2, A9WriteV2], [IIC_VPERMD, IIC_VPERMQ, IIC_VEXTD]>;
  2361. def :ItinRW<[A9WriteV3, A9WriteV4, ReadDefault, A9Read2],
  2362. [IIC_VPERMQ3, IIC_VEXTQ]>;
  2363. def :ItinRW<[A9WriteV3, A9Read2], [IIC_VTB1]>;
  2364. def :ItinRW<[A9WriteV3, A9Read2, A9Read2], [IIC_VTB2]>;
  2365. def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3], [IIC_VTB3]>;
  2366. def :ItinRW<[A9WriteV4, A9Read2, A9Read2, A9Read3, A9Read3], [IIC_VTB4]>;
  2367. def :ItinRW<[A9WriteV3, ReadDefault, A9Read2], [IIC_VTBX1]>;
  2368. def :ItinRW<[A9WriteV3, ReadDefault, A9Read2, A9Read2], [IIC_VTBX2]>;
  2369. def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3], [IIC_VTBX3]>;
  2370. def :ItinRW<[A9WriteV4, ReadDefault, A9Read2, A9Read2, A9Read3, A9Read3],
  2371. [IIC_VTBX4]>;
  2372. // NEON floating-point
  2373. def :ItinRW<[A9WriteV5, A9Read2, A9Read2], [IIC_VBIND]>;
  2374. def :ItinRW<[A9WriteV6, A9Read2, A9Read2], [IIC_VBINQ]>;
  2375. def :ItinRW<[A9WriteV5, A9Read2], [IIC_VUNAD, IIC_VFMULD]>;
  2376. def :ItinRW<[A9WriteV6, A9Read2], [IIC_VUNAQ, IIC_VFMULQ]>;
  2377. def :ItinRW<[A9WriteV9, A9Read3, A9Read2], [IIC_VMACD, IIC_VFMACD]>;
  2378. def :ItinRW<[A9WriteV10, A9Read3, A9Read2], [IIC_VMACQ, IIC_VFMACQ]>;
  2379. def :ItinRW<[A9WriteV9, A9Read2, A9Read2], [IIC_VRECSD]>;
  2380. def :ItinRW<[A9WriteV10, A9Read2, A9Read2], [IIC_VRECSQ]>;
  2381. // Map SchedRWs that are identical for cortexa9 to existing resources.
  2382. def : SchedAlias<WriteALU, A9WriteALU>;
  2383. def : SchedAlias<WriteALUsr, A9WriteALUsr>;
  2384. def : SchedAlias<WriteALUSsr, A9WriteALUsr>;
  2385. def : SchedAlias<ReadALU, A9ReadALU>;
  2386. def : SchedAlias<ReadALUsr, A9ReadALU>;
  2387. def : SchedAlias<WriteST, A9WriteS>;
  2388. // ===---------------------------------------------------------------------===//
  2389. // Floating-point. Map target defined SchedReadWrite to processor specific ones
  2390. //
  2391. def : WriteRes<WriteFPCVT, [A9UnitFP, A9UnitAGU]> { let Latency = 4; }
  2392. def : SchedAlias<WriteFPMOV, A9WriteFMov>;
  2393. def : SchedAlias<WriteFPALU32, A9WriteF>;
  2394. def : SchedAlias<WriteFPALU64, A9WriteF>;
  2395. def : SchedAlias<WriteFPMUL32, A9WriteFMulS>;
  2396. def : SchedAlias<WriteFPMUL64, A9WriteFMulD>;
  2397. def : SchedAlias<WriteFPMAC32, A9WriteFMAS>;
  2398. def : SchedAlias<WriteFPMAC64, A9WriteFMAD>;
  2399. def : SchedAlias<WriteFPDIV32, A9WriteFDivS>;
  2400. def : SchedAlias<WriteFPDIV64, A9WriteFDivD>;
  2401. def : SchedAlias<WriteFPSQRT32, A9WriteFSqrtS>;
  2402. def : SchedAlias<WriteFPSQRT64, A9WriteFSqrtD>;
  2403. def : ReadAdvance<ReadFPMUL, 0>;
  2404. def : ReadAdvance<ReadFPMAC, 0>;
  2405. // ===---------------------------------------------------------------------===//
  2406. // Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.
  2407. //
  2408. def : InstRW< [WriteALU],
  2409. (instregex "ANDri", "ORRri", "EORri", "BICri", "ANDrr", "ORRrr", "EORrr",
  2410. "BICrr")>;
  2411. def : InstRW< [WriteALUsi], (instrs ANDrsi, ORRrsi, EORrsi, BICrsi)>;
  2412. def : InstRW< [WriteALUsr], (instrs ANDrsr, ORRrsr, EORrsr, BICrsr)>;
  2413. def : SchedAlias<WriteCMP, A9WriteALU>;
  2414. def : SchedAlias<WriteCMPsi, A9WriteALU>;
  2415. def : SchedAlias<WriteCMPsr, A9WriteALU>;
  2416. def : InstRW< [A9WriteIsr], (instregex "MOVsr", "MOVsi", "MVNsr", "MOVCCsi",
  2417. "MOVCCsr")>;
  2418. def : InstRW< [WriteALU, A9ReadALU], (instregex "MVNr")>;
  2419. def : InstRW< [A9WriteI2], (instregex "MOVCCi32imm", "MOVi32imm")>;
  2420. def : InstRW< [A9WriteI2pc], (instregex "MOV_ga_pcrel")>;
  2421. def : InstRW< [A9WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
  2422. def : InstRW< [WriteALU], (instregex "SEL")>;
  2423. def : InstRW< [WriteALUsi], (instregex "BFC", "BFI", "UBFX", "SBFX")>;
  2424. def : InstRW< [A9WriteM],
  2425. (instregex "MUL", "MULv5", "SMMUL", "SMMULR", "MLA", "MLAv5", "MLS",
  2426. "SMMLA", "SMMLAR", "SMMLS", "SMMLSR")>;
  2427. def : InstRW< [A9WriteM, A9WriteMHi],
  2428. (instregex "SMULL", "SMULLv5", "UMULL", "UMULLv5", "SMLAL$", "UMLAL",
  2429. "UMAAL", "SMLALv5", "UMLALv5", "SMLALBB", "SMLALBT", "SMLALTB",
  2430. "SMLALTT")>;
  2431. // FIXME: These instructions used to have NoItinerary. Just copied the one from above.
  2432. def : InstRW< [A9WriteM, A9WriteMHi],
  2433. (instregex "SMLAD", "SMLADX", "SMLALD", "SMLALDX", "SMLSD", "SMLSDX",
  2434. "SMLSLD", "SMLSLDX", "SMUAD", "SMUADX", "SMUSD", "SMUSDX")>;
  2435. def : InstRW<[A9WriteM16, A9WriteM16Hi],
  2436. (instregex "SMULBB", "SMULBT", "SMULTB", "SMULTT", "SMULWB", "SMULWT")>;
  2437. def : InstRW<[A9WriteM16, A9WriteM16Hi],
  2438. (instregex "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLAWB", "SMLAWT")>;
  2439. def : InstRW<[A9WriteL], (instregex "LDRi12", "PICLDR$")>;
  2440. def : InstRW<[A9WriteLsi], (instregex "LDRrs")>;
  2441. def : InstRW<[A9WriteLb],
  2442. (instregex "LDRBi12", "PICLDRH", "PICLDRB", "PICLDRSH", "PICLDRSB",
  2443. "LDRH", "LDRSH", "LDRSB")>;
  2444. def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>;
  2445. def : WriteRes<WriteDIV, []> { let Latency = 0; }
  2446. def : WriteRes<WriteBr, [A9UnitB]>;
  2447. def : WriteRes<WriteBrL, [A9UnitB]>;
  2448. def : WriteRes<WriteBrTbl, [A9UnitB]>;
  2449. def : WriteRes<WritePreLd, []>;
  2450. def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
  2451. } // SchedModel = CortexA9Model