AArch64SchedFalkorDetails.td 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292
  1. //==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the uop and latency details for the machine model for the
  10. // Qualcomm Falkor subtarget.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. // Contains all of the Falkor specific SchedWriteRes types. The approach
  14. // below is to define a generic SchedWriteRes for every combination of
  15. // latency and microOps. The naming conventions is to use a prefix, one field
  16. // for latency, and one or more microOp count/type designators.
  17. // Prefix: FalkorWr
  18. // MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
  19. // Latency: #cyc
  20. //
  21. // e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
  22. // down one Z pipe, six SD pipes, four VX pipes and the total latency is
  23. // six cycles.
  24. //
  25. // Contains all of the Falkor specific ReadAdvance types for forwarding logic.
  26. //
  27. // Contains all of the Falkor specific WriteVariant types for immediate zero
  28. // and LSLFast.
  29. //===----------------------------------------------------------------------===//
  30. //===----------------------------------------------------------------------===//
  31. // Define 0 micro-op types
  32. def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> {
  33. let Latency = 2;
  34. let NumMicroOps = 0;
  35. }
  36. def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> {
  37. let Latency = 2;
  38. let NumMicroOps = 0;
  39. }
  40. def FalkorWr_none_3cyc : SchedWriteRes<[]> {
  41. let Latency = 3;
  42. let NumMicroOps = 0;
  43. }
  44. def FalkorWr_none_4cyc : SchedWriteRes<[]> {
  45. let Latency = 4;
  46. let NumMicroOps = 0;
  47. }
  48. //===----------------------------------------------------------------------===//
  49. // Define 1 micro-op types
  50. def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; }
  51. def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
  52. def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
  53. def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
  54. def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
  55. def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
  56. def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
  57. def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
  58. def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
  59. def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
  60. def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
  61. def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
  62. def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
  63. def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
  64. def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
  65. def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
  66. def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
  67. def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
  68. def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
  69. def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
  70. def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
  71. def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
  72. def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
  73. def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
  74. def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
  75. def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
  76. def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
  77. def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
  78. def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
  79. def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
  80. def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
  81. //===----------------------------------------------------------------------===//
  82. // Define 2 micro-op types
  83. def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  84. let Latency = 0;
  85. let NumMicroOps = 2;
  86. }
  87. def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  88. let Latency = 1;
  89. let NumMicroOps = 2;
  90. }
  91. def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  92. let Latency = 2;
  93. let NumMicroOps = 2;
  94. }
  95. def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  96. let Latency = 3;
  97. let NumMicroOps = 2;
  98. }
  99. def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  100. let Latency = 4;
  101. let NumMicroOps = 2;
  102. }
  103. def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  104. let Latency = 4;
  105. let NumMicroOps = 2;
  106. }
  107. def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  108. let Latency = 5;
  109. let NumMicroOps = 2;
  110. }
  111. def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  112. let Latency = 5;
  113. let NumMicroOps = 2;
  114. }
  115. def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  116. let Latency = 6;
  117. let NumMicroOps = 2;
  118. }
  119. def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  120. let Latency = 6;
  121. let NumMicroOps = 2;
  122. }
  123. def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
  124. let Latency = 4;
  125. let NumMicroOps = 2;
  126. }
  127. def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
  128. let Latency = 4;
  129. let NumMicroOps = 2;
  130. }
  131. def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
  132. let Latency = 3;
  133. let NumMicroOps = 2;
  134. }
  135. def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
  136. let Latency = 5;
  137. let NumMicroOps = 2;
  138. }
  139. def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
  140. let Latency = 2;
  141. let NumMicroOps = 2;
  142. }
  143. def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
  144. let Latency = 4;
  145. let NumMicroOps = 2;
  146. }
  147. def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
  148. let Latency = 10;
  149. let NumMicroOps = 2;
  150. }
  151. def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
  152. let Latency = 12;
  153. let NumMicroOps = 2;
  154. }
  155. def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
  156. let Latency = 14;
  157. let NumMicroOps = 2;
  158. }
  159. def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
  160. let Latency = 21;
  161. let NumMicroOps = 2;
  162. }
  163. def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
  164. let Latency = 2;
  165. let NumMicroOps = 2;
  166. }
  167. def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
  168. let Latency = 1;
  169. let NumMicroOps = 2;
  170. }
  171. def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
  172. let Latency = 4;
  173. let NumMicroOps = 2;
  174. }
  175. def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
  176. let Latency = 5;
  177. let NumMicroOps = 2;
  178. }
  179. def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
  180. let Latency = 2;
  181. let NumMicroOps = 2;
  182. }
  183. def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
  184. let Latency = 0;
  185. let NumMicroOps = 2;
  186. }
  187. def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
  188. let Latency = 8;
  189. let NumMicroOps = 2;
  190. let ResourceCycles = [2, 8];
  191. }
  192. def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
  193. let Latency = 11;
  194. let NumMicroOps = 2;
  195. let ResourceCycles = [2, 11];
  196. }
  197. def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
  198. let Latency = 3;
  199. let NumMicroOps = 2;
  200. }
  201. def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
  202. let Latency = 3;
  203. let NumMicroOps = 2;
  204. }
  205. def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
  206. let Latency = 0;
  207. let NumMicroOps = 2;
  208. }
  209. def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
  210. let Latency = 0;
  211. let NumMicroOps = 2;
  212. }
  213. //===----------------------------------------------------------------------===//
  214. // Define 3 micro-op types
  215. def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
  216. FalkorUnitLD]> {
  217. let Latency = 0;
  218. let NumMicroOps = 3;
  219. }
  220. def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
  221. FalkorUnitLD]> {
  222. let Latency = 3;
  223. let NumMicroOps = 3;
  224. }
  225. def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  226. let Latency = 3;
  227. let NumMicroOps = 3;
  228. }
  229. def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  230. let Latency = 4;
  231. let NumMicroOps = 3;
  232. }
  233. def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  234. let Latency = 5;
  235. let NumMicroOps = 3;
  236. }
  237. def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
  238. let Latency = 6;
  239. let NumMicroOps = 3;
  240. }
  241. def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
  242. let Latency = 4;
  243. let NumMicroOps = 3;
  244. }
  245. def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
  246. let Latency = 3;
  247. let NumMicroOps = 3;
  248. }
  249. def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
  250. FalkorUnitLD]> {
  251. let Latency = 3;
  252. let NumMicroOps = 3;
  253. }
  254. def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
  255. FalkorUnitZ]> {
  256. let Latency = 3;
  257. let NumMicroOps = 3;
  258. }
  259. def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
  260. let Latency = 0;
  261. let NumMicroOps = 3;
  262. }
  263. def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
  264. let Latency = 0;
  265. let NumMicroOps = 3;
  266. }
  267. //===----------------------------------------------------------------------===//
  268. // Define 4 micro-op types
  269. def FalkorWr_2VX_2VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
  270. FalkorUnitVX, FalkorUnitVY]> {
  271. let Latency = 14;
  272. let NumMicroOps = 4;
  273. }
  274. def FalkorWr_2VX_2VY_20cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
  275. FalkorUnitVX, FalkorUnitVY]> {
  276. let Latency = 20;
  277. let NumMicroOps = 4;
  278. }
  279. def FalkorWr_2VX_2VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
  280. FalkorUnitVX, FalkorUnitVY]> {
  281. let Latency = 21;
  282. let NumMicroOps = 4;
  283. }
  284. def FalkorWr_2VX_2VY_24cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
  285. FalkorUnitVX, FalkorUnitVY]> {
  286. let Latency = 24;
  287. let NumMicroOps = 4;
  288. }
  289. def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
  290. FalkorUnitVXVY, FalkorUnitVXVY]> {
  291. let Latency = 2;
  292. let NumMicroOps = 4;
  293. }
  294. def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
  295. FalkorUnitVXVY, FalkorUnitVXVY]> {
  296. let Latency = 3;
  297. let NumMicroOps = 4;
  298. }
  299. def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
  300. FalkorUnitVXVY, FalkorUnitVXVY]> {
  301. let Latency = 4;
  302. let NumMicroOps = 4;
  303. }
  304. def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
  305. FalkorUnitVXVY, FalkorUnitVXVY]> {
  306. let Latency = 6;
  307. let NumMicroOps = 4;
  308. }
  309. def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
  310. FalkorUnitLD, FalkorUnitLD]> {
  311. let Latency = 3;
  312. let NumMicroOps = 4;
  313. }
  314. def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
  315. FalkorUnitVXVY, FalkorUnitVXVY]> {
  316. let Latency = 4;
  317. let NumMicroOps = 4;
  318. }
  319. def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
  320. let Latency = 3;
  321. let NumMicroOps = 4;
  322. }
  323. def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
  324. FalkorUnitSD, FalkorUnitLD]> {
  325. let Latency = 3;
  326. let NumMicroOps = 4;
  327. }
  328. def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
  329. FalkorUnitST, FalkorUnitVSD]> {
  330. let Latency = 0;
  331. let NumMicroOps = 4;
  332. }
  333. //===----------------------------------------------------------------------===//
  334. // Define 5 micro-op types
  335. def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
  336. FalkorUnitVXVY, FalkorUnitVXVY,
  337. FalkorUnitVXVY]> {
  338. let Latency = 4;
  339. let NumMicroOps = 5;
  340. }
  341. def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
  342. FalkorUnitVXVY, FalkorUnitVXVY]> {
  343. let Latency = 4;
  344. let NumMicroOps = 5;
  345. }
  346. def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
  347. FalkorUnitVXVY, FalkorUnitVXVY,
  348. FalkorUnitVXVY]> {
  349. let Latency = 7;
  350. let NumMicroOps = 5;
  351. }
  352. def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
  353. FalkorUnitVSD, FalkorUnitST,
  354. FalkorUnitVSD]> {
  355. let Latency = 0;
  356. let NumMicroOps = 5;
  357. }
  358. def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
  359. FalkorUnitVSD, FalkorUnitST,
  360. FalkorUnitVSD]> {
  361. let Latency = 0;
  362. let NumMicroOps = 5;
  363. }
  364. //===----------------------------------------------------------------------===//
  365. // Define 6 micro-op types
  366. def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
  367. FalkorUnitVXVY, FalkorUnitVXVY]> {
  368. let Latency = 4;
  369. let NumMicroOps = 6;
  370. }
  371. def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
  372. FalkorUnitVSD, FalkorUnitXYZ,
  373. FalkorUnitST, FalkorUnitVSD]> {
  374. let Latency = 0;
  375. let NumMicroOps = 6;
  376. }
  377. def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
  378. FalkorUnitVSD, FalkorUnitVXVY,
  379. FalkorUnitST, FalkorUnitVSD]> {
  380. let Latency = 0;
  381. let NumMicroOps = 6;
  382. }
  383. def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
  384. FalkorUnitST, FalkorUnitVSD,
  385. FalkorUnitST, FalkorUnitVSD]> {
  386. let Latency = 0;
  387. let NumMicroOps = 6;
  388. }
  389. //===----------------------------------------------------------------------===//
  390. // Define 8 micro-op types
  391. def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
  392. FalkorUnitVXVY, FalkorUnitVXVY,
  393. FalkorUnitLD, FalkorUnitLD,
  394. FalkorUnitVXVY, FalkorUnitVXVY]> {
  395. let Latency = 4;
  396. let NumMicroOps = 8;
  397. }
  398. def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
  399. FalkorUnitST, FalkorUnitVSD,
  400. FalkorUnitST, FalkorUnitVSD,
  401. FalkorUnitST, FalkorUnitVSD]> {
  402. let Latency = 0;
  403. let NumMicroOps = 8;
  404. }
  405. //===----------------------------------------------------------------------===//
  406. // Define 9 micro-op types
  407. def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
  408. FalkorUnitLD, FalkorUnitVXVY,
  409. FalkorUnitVXVY, FalkorUnitLD,
  410. FalkorUnitLD, FalkorUnitXYZ,
  411. FalkorUnitVXVY, FalkorUnitVXVY]> {
  412. let Latency = 4;
  413. let NumMicroOps = 9;
  414. }
  415. def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
  416. FalkorUnitLD, FalkorUnitVXVY,
  417. FalkorUnitVXVY, FalkorUnitXYZ,
  418. FalkorUnitLD, FalkorUnitLD,
  419. FalkorUnitVXVY, FalkorUnitVXVY]> {
  420. let Latency = 4;
  421. let NumMicroOps = 9;
  422. }
  423. //===----------------------------------------------------------------------===//
  424. // Define 10 micro-op types
  425. def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
  426. FalkorUnitVSD, FalkorUnitVXVY,
  427. FalkorUnitST, FalkorUnitVSD,
  428. FalkorUnitST, FalkorUnitVSD,
  429. FalkorUnitST, FalkorUnitVSD]> {
  430. let Latency = 0;
  431. let NumMicroOps = 10;
  432. }
  433. //===----------------------------------------------------------------------===//
  434. // Define 12 micro-op types
  435. def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
  436. FalkorUnitVSD, FalkorUnitVXVY,
  437. FalkorUnitST, FalkorUnitVSD,
  438. FalkorUnitVXVY, FalkorUnitST,
  439. FalkorUnitVSD, FalkorUnitVXVY,
  440. FalkorUnitST, FalkorUnitVSD]> {
  441. let Latency = 0;
  442. let NumMicroOps = 12;
  443. }
  444. // Forwarding logic is modeled for multiply add/accumulate and
  445. // load/store base register increment.
  446. // -----------------------------------------------------------------------------
  447. def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
  448. def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
  449. def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
  450. def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
  451. def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
  452. def FalkorReadIncLd : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>;
  453. def FalkorReadIncSt : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>;
  454. // SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
  455. // -----------------------------------------------------------------------------
  456. def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).isImm() &&
  457. MI->getOperand(1).getImm() == 0}]>;
  458. def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
  459. MI->getOperand(1).getReg() == AArch64::XZR}]>;
  460. def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
  461. def FalkorWr_FMOV : SchedWriteVariant<[
  462. SchedVar<FalkorOp1ZrReg, [FalkorWr_1none_0cyc]>,
  463. SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
  464. def FalkorWr_MOVZ : SchedWriteVariant<[
  465. SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
  466. SchedVar<NoSchedPred, [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
  467. def FalkorWr_ADDSUBsx : SchedWriteVariant<[
  468. SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
  469. SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
  470. def FalkorWr_LDRro : SchedWriteVariant<[
  471. SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
  472. SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
  473. def FalkorWr_LDRSro : SchedWriteVariant<[
  474. SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
  475. SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
  476. def FalkorWr_ORRi : SchedWriteVariant<[
  477. SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
  478. SchedVar<NoSchedPred, [FalkorWr_1XYZ_1cyc]>]>;
  479. def FalkorWr_PRFMro : SchedWriteVariant<[
  480. SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
  481. SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
  482. def FalkorWr_STRVro : SchedWriteVariant<[
  483. SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
  484. SchedVar<NoSchedPred, [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
  485. def FalkorWr_STRQro : SchedWriteVariant<[
  486. SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
  487. SchedVar<NoSchedPred, [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
  488. def FalkorWr_STRro : SchedWriteVariant<[
  489. SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
  490. SchedVar<NoSchedPred, [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
  491. //===----------------------------------------------------------------------===//
  492. // Specialize the coarse model by associating instruction groups with the
  493. // subtarget-defined types. As the modeled is refined, this will override most
  494. // of the earlier mappings.
  495. // Miscellaneous
  496. // -----------------------------------------------------------------------------
  497. // FIXME: This could be better modeled by looking at the regclasses of the operands.
  498. def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
  499. // SIMD Floating-point Instructions
  500. // -----------------------------------------------------------------------------
  501. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>;
  502. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
  503. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>;
  504. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
  505. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
  506. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
  507. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
  508. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>;
  509. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
  510. def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
  511. def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
  512. def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
  513. def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
  514. (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
  515. def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
  516. (instrs FMULX32)>;
  517. def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
  518. (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
  519. def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
  520. (instrs FMULX64)>;
  521. def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
  522. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
  523. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
  524. def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>;
  525. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
  526. def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>;
  527. def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>;
  528. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
  529. def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
  530. def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>;
  531. def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
  532. def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
  533. (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
  534. def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
  535. (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
  536. def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
  537. def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
  538. def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>;
  539. def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>;
  540. def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>;
  541. def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>;
  542. def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
  543. (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
  544. def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
  545. (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
  546. def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
  547. (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
  548. def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
  549. (instregex "^FML(A|S)v1i64_indexed$")>;
  550. def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
  551. (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
  552. def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
  553. (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
  554. // SIMD Integer Instructions
  555. // -----------------------------------------------------------------------------
  556. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
  557. def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs ADDPv2i64p)>;
  558. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
  559. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
  560. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
  561. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
  562. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
  563. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>;
  564. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
  565. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>;
  566. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
  567. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
  568. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
  569. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
  570. def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>;
  571. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
  572. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>;
  573. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
  574. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
  575. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
  576. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)ADDLVv4i16v$")>;
  577. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
  578. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
  579. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
  580. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
  581. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
  582. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
  583. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
  584. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>;
  585. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
  586. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
  587. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
  588. def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>;
  589. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
  590. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
  591. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
  592. def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>;
  593. def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
  594. def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
  595. def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
  596. (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
  597. def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
  598. (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
  599. def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
  600. (instregex "^SQDMULL(i16|i32)$")>;
  601. def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
  602. (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
  603. def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
  604. def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs ADDVv4i32v)>;
  605. def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs ADDVv8i16v)>;
  606. def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(ADD|SUB)HNv.*$")>;
  607. def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
  608. def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs ADDVv16i8v)>;
  609. def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
  610. def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^R(ADD|SUB)HNv.*$")>;
  611. def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
  612. def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs ADDPv2i64)>; // sz==11
  613. def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
  614. def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
  615. def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
  616. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)ADDLv.*$")>;
  617. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
  618. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
  619. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
  620. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(S|U)SUBLv.*$")>;
  621. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
  622. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
  623. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
  624. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
  625. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
  626. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>;
  627. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
  628. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
  629. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
  630. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
  631. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)ABDLv.*$")>;
  632. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
  633. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
  634. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
  635. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
  636. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
  637. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
  638. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>;
  639. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
  640. def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
  641. def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
  642. (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
  643. def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
  644. (instregex "^SQDMULLv.*$")>;
  645. def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
  646. (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
  647. def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
  648. def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(S|U)ADDLVv8i16v$")>;
  649. def : InstRW<[FalkorWr_3VXVY_6cyc], (instregex "^(S|U)ADDLVv16i8v$")>;
  650. def : InstRW<[FalkorWr_4VXVY_2cyc], (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
  651. def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
  652. def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
  653. def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
  654. (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
  655. def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
  656. (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
  657. // SIMD Load Instructions
  658. // -----------------------------------------------------------------------------
  659. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
  660. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
  661. (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
  662. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
  663. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
  664. (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
  665. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd], (instrs LD2i64)>;
  666. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
  667. (instrs LD2i64_POST)>;
  668. def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>;
  669. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd],
  670. (instregex "^LD1i(8|16|32)_POST$")>;
  671. def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
  672. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
  673. (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
  674. def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>;
  675. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
  676. (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
  677. def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
  678. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
  679. (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
  680. def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
  681. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
  682. (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
  683. def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
  684. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
  685. (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
  686. def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
  687. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
  688. (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
  689. def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD3i64)>;
  690. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
  691. (instrs LD3i64_POST)>;
  692. def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd], (instrs LD4i64)>;
  693. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
  694. (instrs LD4i64_POST)>;
  695. def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>;
  696. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd],
  697. (instregex "^LD2i(8|16|32)_POST$")>;
  698. def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
  699. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
  700. (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
  701. def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
  702. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
  703. (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
  704. def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
  705. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
  706. (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
  707. def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instrs LD3Threev2d)>;
  708. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
  709. (instrs LD3Threev2d_POST)>;
  710. def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
  711. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
  712. (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
  713. def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>;
  714. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd],
  715. (instregex "^LD3i(8|16|32)_POST$")>;
  716. def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
  717. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
  718. (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
  719. def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
  720. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
  721. (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
  722. def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
  723. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
  724. (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
  725. def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instrs LD4Fourv2d)>;
  726. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
  727. (instrs LD4Fourv2d_POST)>;
  728. def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
  729. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
  730. (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
  731. def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>;
  732. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd],
  733. (instregex "^LD4i(8|16|32)_POST$")>;
  734. def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
  735. (instregex "^LD3Threev(8b|4h|2s)$")>;
  736. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
  737. (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
  738. def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
  739. (instregex "^LD4Fourv(8b|4h|2s)$")>;
  740. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
  741. (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
  742. def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
  743. (instregex "^LD3Threev(16b|8h|4s)$")>;
  744. def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
  745. (instregex "^LD4Fourv(16b|8h|4s)$")>;
  746. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd],
  747. (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
  748. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd],
  749. (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
  750. // Arithmetic and Logical Instructions
  751. // -----------------------------------------------------------------------------
  752. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
  753. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>;
  754. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>;
  755. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
  756. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
  757. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
  758. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
  759. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
  760. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
  761. def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>;
  762. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>;
  763. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>;
  764. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>;
  765. def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
  766. def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
  767. // SIMD Miscellaneous Instructions
  768. // -----------------------------------------------------------------------------
  769. def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
  770. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
  771. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(i8|i16|i32|i64)$")>;
  772. def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
  773. def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
  774. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
  775. def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
  776. def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
  777. def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
  778. def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
  779. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
  780. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
  781. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
  782. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
  783. def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
  784. def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
  785. def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
  786. def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
  787. (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
  788. def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
  789. (instrs FRECPS64, FRSQRTS64)>;
  790. def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
  791. (instregex "^INSv(i32|i64)(gpr|lane)$")>;
  792. def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
  793. def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>;
  794. def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
  795. def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
  796. def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
  797. def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
  798. def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
  799. def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
  800. def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
  801. def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
  802. def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
  803. def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
  804. (instrs FRECPSv4f32, FRSQRTSv4f32)>;
  805. def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
  806. (instrs FRECPSv2f64, FRSQRTSv2f64)>;
  807. def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
  808. def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
  809. def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>;
  810. def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>;
  811. def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>;
  812. def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
  813. // SIMD Store Instructions
  814. // -----------------------------------------------------------------------------
  815. def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  816. (instregex "^STR(Q|D|S|H|B)ui$")>;
  817. def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  818. (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
  819. def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt],
  820. (instregex "^STR(D|S|H|B)ro(W|X)$")>;
  821. def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  822. (instregex "^STPQi$")>;
  823. def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  824. (instregex "^STPQ(post|pre)$")>;
  825. def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  826. (instregex "^STP(D|S)(i)$")>;
  827. def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  828. (instregex "^STP(D|S)(post|pre)$")>;
  829. def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt],
  830. (instregex "^STRQro(W|X)$")>;
  831. def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  832. (instregex "^STUR(Q|D|S|B|H)i$")>;
  833. def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  834. (instrs STNPDi, STNPSi)>;
  835. def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  836. (instrs STNPQi)>;
  837. def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  838. (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
  839. def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  840. (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
  841. def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  842. (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>;
  843. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  844. (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
  845. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  846. (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>;
  847. def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  848. (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
  849. def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  850. (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
  851. def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  852. (instregex "^ST3(i8|i16|i32|i64)$")>;
  853. def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  854. (instregex "^ST4(i8|i16|i32|i64)$")>;
  855. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  856. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  857. (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
  858. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  859. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  860. (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
  861. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  862. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  863. (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
  864. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  865. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
  866. (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
  867. def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
  868. (instregex "^ST3Three(v8b|v4h|v2s)$")>;
  869. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  870. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
  871. (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>;
  872. def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
  873. (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
  874. def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
  875. (instrs ST3Threev2d)>;
  876. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  877. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
  878. (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
  879. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  880. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
  881. (instrs ST3Threev2d_POST)>;
  882. def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
  883. (instregex "^ST4Four(v8b|v4h|v2s)$")>;
  884. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  885. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
  886. (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>;
  887. def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
  888. (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
  889. def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
  890. (instrs ST4Fourv2d)>;
  891. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  892. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
  893. (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
  894. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  895. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
  896. (instrs ST4Fourv2d_POST)>;
  897. def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
  898. (instregex "^ST3Three(v16b|v8h|v4s)$")>;
  899. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  900. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
  901. (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
  902. def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
  903. (instregex "^ST4Four(v16b|v8h|v4s)$")>;
  904. // FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
  905. def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
  906. (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
  907. // Branch Instructions
  908. // -----------------------------------------------------------------------------
  909. def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>;
  910. def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
  911. def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>;
  912. def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>;
  913. def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>;
  914. def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>;
  915. // Cryptography Extensions
  916. // -----------------------------------------------------------------------------
  917. def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs SHA1Hrr)>;
  918. def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs AESIMCrr, AESMCrr)>;
  919. def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs AESDrr, AESErr)>;
  920. def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
  921. def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
  922. def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
  923. def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
  924. // FP Load Instructions
  925. // -----------------------------------------------------------------------------
  926. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
  927. (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
  928. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
  929. (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
  930. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
  931. (instregex "^LDUR(Q|D|S|H|B)i$")>;
  932. def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
  933. (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
  934. def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  935. (instrs LDNPQi)>;
  936. def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  937. (instrs LDPQi)>;
  938. def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  939. (instregex "LDNP(D|S)i$")>;
  940. def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  941. (instregex "LDP(D|S)i$")>;
  942. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  943. (instregex "LDP(D|S)(pre|post)$")>;
  944. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  945. (instregex "^LDPQ(pre|post)$")>;
  946. // FP Data Processing Instructions
  947. // -----------------------------------------------------------------------------
  948. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>;
  949. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
  950. def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
  951. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>;
  952. def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>;
  953. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
  954. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
  955. def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>;
  956. def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
  957. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>;
  958. def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>;
  959. def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>;
  960. def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
  961. def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
  962. (instregex "^F(N)?MULSrr$")>;
  963. def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
  964. (instregex "^F(N)?MULDrr$")>;
  965. def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>;
  966. def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>;
  967. def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>;
  968. def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>;
  969. def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
  970. (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
  971. def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
  972. (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
  973. // FP Miscellaneous Instructions
  974. // -----------------------------------------------------------------------------
  975. def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>;
  976. def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd
  977. def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
  978. def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>;
  979. def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>;
  980. def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
  981. // FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
  982. def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd
  983. def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
  984. def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
  985. def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
  986. // Load Instructions
  987. // -----------------------------------------------------------------------------
  988. def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
  989. def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
  990. def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  991. (instregex "^LDNP(W|X)i$")>;
  992. def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  993. (instregex "^LDP(W|X)i$")>;
  994. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  995. (instregex "^LDP(W|X)(post|pre)$")>;
  996. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
  997. (instregex "^LDR(BB|HH|W|X)ui$")>;
  998. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
  999. (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
  1000. def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
  1001. (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
  1002. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
  1003. (instregex "^LDR(W|X)l$")>;
  1004. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
  1005. (instregex "^LDTR(B|H|W|X)i$")>;
  1006. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
  1007. (instregex "^LDUR(BB|HH|W|X)i$")>;
  1008. def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
  1009. def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
  1010. (instrs LDPSWi)>;
  1011. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
  1012. (instregex "^LDPSW(post|pre)$")>;
  1013. def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
  1014. (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
  1015. def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd],
  1016. (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
  1017. def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd],
  1018. (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
  1019. def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
  1020. (instrs LDRSWl)>;
  1021. def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
  1022. (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
  1023. def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
  1024. (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
  1025. // Miscellaneous Data-Processing Instructions
  1026. // -----------------------------------------------------------------------------
  1027. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>;
  1028. def : InstRW<[FalkorWr_1X_2cyc], (instregex "^CRC32.*$")>;
  1029. def : InstRW<[FalkorWr_1XYZ_2cyc], (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
  1030. def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>;
  1031. // Divide and Multiply Instructions
  1032. // -----------------------------------------------------------------------------
  1033. def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
  1034. (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
  1035. def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
  1036. (instregex "^M(ADD|SUB)Wrrr$")>;
  1037. def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
  1038. def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
  1039. (instregex "^M(ADD|SUB)Xrrr$")>;
  1040. def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>;
  1041. def : InstRW<[FalkorWr_1X_1Z_11cyc], (instregex "^(S|U)DIVXr$")>;
  1042. def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
  1043. (instregex "^(S|U)MULLv.*$")>;
  1044. def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
  1045. (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
  1046. // Move and Shift Instructions
  1047. // -----------------------------------------------------------------------------
  1048. def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
  1049. def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd
  1050. def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd
  1051. def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd
  1052. def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
  1053. def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
  1054. def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
  1055. (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
  1056. def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
  1057. (instrs LOADgot)>;
  1058. // Other Instructions
  1059. // -----------------------------------------------------------------------------
  1060. def : InstRW<[FalkorWr_1LD_0cyc], (instrs CLREX, DMB, DSB)>;
  1061. def : InstRW<[FalkorWr_1none_0cyc], (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
  1062. def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>;
  1063. def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>;
  1064. def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
  1065. (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>;
  1066. def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
  1067. (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>;
  1068. def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>;
  1069. def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>;
  1070. def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
  1071. def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  1072. (instrs STNPWi, STNPXi)>;
  1073. def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>;
  1074. def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDCLR(A|AL|L)?(B|H)?$")>;
  1075. def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt],
  1076. (instregex "^STLR(B|H|W|X)$")>;
  1077. def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  1078. (instregex "^STXP(W|X)$")>;
  1079. def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  1080. (instregex "^STXR(B|H|W|X)$")>;
  1081. def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt],
  1082. (instregex "^STLXP(W|X)$")>;
  1083. def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  1084. (instregex "^STLXR(B|H|W|X)$")>;
  1085. // Store Instructions
  1086. // -----------------------------------------------------------------------------
  1087. def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  1088. (instregex "^STP(W|X)i$")>;
  1089. def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
  1090. (instregex "^STP(W|X)(post|pre)$")>;
  1091. def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  1092. (instregex "^STR(BB|HH|W|X)ui$")>;
  1093. def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  1094. (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
  1095. def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt],
  1096. (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
  1097. def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  1098. (instregex "^STTR(B|H|W|X)i$")>;
  1099. def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
  1100. (instregex "^STUR(BB|HH|W|X)i$")>;