arm_neon.td 94 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095
  1. //===--- arm_neon.td - ARM NEON compiler interface ------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the TableGen definitions from which the ARM NEON header
  10. // file will be generated. See ARM document DUI0348B.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. include "arm_neon_incl.td"
  14. def OP_ADD : Op<(op "+", $p0, $p1)>;
  15. def OP_ADDL : Op<(op "+", (call "vmovl", $p0), (call "vmovl", $p1))>;
  16. def OP_ADDLHi : Op<(op "+", (call "vmovl_high", $p0),
  17. (call "vmovl_high", $p1))>;
  18. def OP_ADDW : Op<(op "+", $p0, (call "vmovl", $p1))>;
  19. def OP_ADDWHi : Op<(op "+", $p0, (call "vmovl_high", $p1))>;
  20. def OP_SUB : Op<(op "-", $p0, $p1)>;
  21. def OP_SUBL : Op<(op "-", (call "vmovl", $p0), (call "vmovl", $p1))>;
  22. def OP_SUBLHi : Op<(op "-", (call "vmovl_high", $p0),
  23. (call "vmovl_high", $p1))>;
  24. def OP_SUBW : Op<(op "-", $p0, (call "vmovl", $p1))>;
  25. def OP_SUBWHi : Op<(op "-", $p0, (call "vmovl_high", $p1))>;
  26. def OP_MUL : Op<(op "*", $p0, $p1)>;
  27. def OP_MLA : Op<(op "+", $p0, (op "*", $p1, $p2))>;
  28. def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>;
  29. def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0),
  30. (call "vget_high", $p1))>;
  31. def OP_MULLHi_P64 : Op<(call "vmull",
  32. (cast "poly64_t", (call "vget_high", $p0)),
  33. (cast "poly64_t", (call "vget_high", $p1)))>;
  34. def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>;
  35. def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1),
  36. (call "vget_high", $p2))>;
  37. def OP_MLALHi_N : Op<(call "vmlal_n", $p0, (call "vget_high", $p1), $p2)>;
  38. def OP_MLS : Op<(op "-", $p0, (op "*", $p1, $p2))>;
  39. def OP_FMLS : Op<(call "vfma", $p0, (op "-", $p1), $p2)>;
  40. def OP_MLSL : Op<(op "-", $p0, (call "vmull", $p1, $p2))>;
  41. def OP_MLSLHi : Op<(call "vmlsl", $p0, (call "vget_high", $p1),
  42. (call "vget_high", $p2))>;
  43. def OP_MLSLHi_N : Op<(call "vmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
  44. def OP_MUL_N : Op<(op "*", $p0, (dup $p1))>;
  45. def OP_MULX_N : Op<(call "vmulx", $p0, (dup $p1))>;
  46. def OP_MLA_N : Op<(op "+", $p0, (op "*", $p1, (dup $p2)))>;
  47. def OP_MLS_N : Op<(op "-", $p0, (op "*", $p1, (dup $p2)))>;
  48. def OP_FMLA_N : Op<(call "vfma", $p0, $p1, (dup $p2))>;
  49. def OP_FMLS_N : Op<(call "vfma", $p0, (op "-", $p1), (dup $p2))>;
  50. def OP_MLAL_N : Op<(op "+", $p0, (call "vmull", $p1, (dup $p2)))>;
  51. def OP_MLSL_N : Op<(op "-", $p0, (call "vmull", $p1, (dup $p2)))>;
  52. def OP_MUL_LN : Op<(op "*", $p0, (call_mangled "splat_lane", $p1, $p2))>;
  53. def OP_MULX_LN : Op<(call "vmulx", $p0, (call_mangled "splat_lane", $p1, $p2))>;
  54. def OP_MULL_N : Op<(call "vmull", $p0, (dup $p1))>;
  55. def OP_MULL_LN : Op<(call "vmull", $p0, (call_mangled "splat_lane", $p1, $p2))>;
  56. def OP_MULLHi_LN: Op<(call "vmull", (call "vget_high", $p0), (call_mangled "splat_lane", $p1, $p2))>;
  57. def OP_MLA_LN : Op<(op "+", $p0, (op "*", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
  58. def OP_MLS_LN : Op<(op "-", $p0, (op "*", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
  59. def OP_MLAL_LN : Op<(op "+", $p0, (call "vmull", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
  60. def OP_MLALHi_LN: Op<(op "+", $p0, (call "vmull", (call "vget_high", $p1),
  61. (call_mangled "splat_lane", $p2, $p3)))>;
  62. def OP_MLSL_LN : Op<(op "-", $p0, (call "vmull", $p1, (call_mangled "splat_lane", $p2, $p3)))>;
  63. def OP_MLSLHi_LN : Op<(op "-", $p0, (call "vmull", (call "vget_high", $p1),
  64. (call_mangled "splat_lane", $p2, $p3)))>;
  65. def OP_QDMULL_N : Op<(call "vqdmull", $p0, (dup $p1))>;
  66. def OP_QDMULL_LN : Op<(call "vqdmull", $p0, (call_mangled "splat_lane", $p1, $p2))>;
  67. def OP_QDMULLHi_LN : Op<(call "vqdmull", (call "vget_high", $p0),
  68. (call_mangled "splat_lane", $p1, $p2))>;
  69. def OP_QDMLAL_N : Op<(call "vqdmlal", $p0, $p1, (dup $p2))>;
  70. def OP_QDMLAL_LN : Op<(call "vqdmlal", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
  71. def OP_QDMLALHi_LN : Op<(call "vqdmlal", $p0, (call "vget_high", $p1),
  72. (call_mangled "splat_lane", $p2, $p3))>;
  73. def OP_QDMLSL_N : Op<(call "vqdmlsl", $p0, $p1, (dup $p2))>;
  74. def OP_QDMLSL_LN : Op<(call "vqdmlsl", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
  75. def OP_QDMLSLHi_LN : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
  76. (call_mangled "splat_lane", $p2, $p3))>;
  77. def OP_QDMULH_N : Op<(call "vqdmulh", $p0, (dup $p1))>;
  78. def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
  79. def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (call_mangled "splat_lane", $p1, $p2))>;
  80. def OP_QRDMULH_N : Op<(call "vqrdmulh", $p0, (dup $p1))>;
  81. def OP_QRDMLAH_LN : Op<(call "vqrdmlah", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
  82. def OP_QRDMLSH_LN : Op<(call "vqrdmlsh", $p0, $p1, (call_mangled "splat_lane", $p2, $p3))>;
  83. def OP_FMS_LN : Op<(call "vfma_lane", $p0, (op "-", $p1), $p2, $p3)>;
  84. def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, (op "-", $p1), $p2, $p3)>;
  85. def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
  86. (decimate mask1, 2)))>;
  87. def OP_ZIP1 : Op<(shuffle $p0, $p1, (lowhalf (interleave mask0, mask1)))>;
  88. def OP_UZP1 : Op<(shuffle $p0, $p1, (add (decimate mask0, 2),
  89. (decimate mask1, 2)))>;
  90. def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave
  91. (decimate (rotl mask0, 1), 2),
  92. (decimate (rotl mask1, 1), 2)))>;
  93. def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>;
  94. def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2),
  95. (decimate (rotl mask1, 1), 2)))>;
  96. def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>;
  97. def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>;
  98. def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>;
  99. def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>;
  100. def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>;
  101. def OP_NEG : Op<(op "-", $p0)>;
  102. def OP_NOT : Op<(op "~", $p0)>;
  103. def OP_AND : Op<(op "&", $p0, $p1)>;
  104. def OP_OR : Op<(op "|", $p0, $p1)>;
  105. def OP_XOR : Op<(op "^", $p0, $p1)>;
  106. def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>;
  107. def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>;
  108. def OP_CAST : LOp<[(save_temp $promote, $p0),
  109. (cast "R", $promote)]>;
  110. def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>;
  111. def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>;
  112. def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>;
  113. def OP_DUP : Op<(dup $p0)>;
  114. def OP_DUP_LN : Op<(call_mangled "splat_lane", $p0, $p1)>;
  115. def OP_SEL : Op<(cast "R", (op "|",
  116. (op "&", $p0, (cast $p0, $p1)),
  117. (op "&", (op "~", $p0), (cast $p0, $p2))))>;
  118. def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>;
  119. def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>;
  120. def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>;
  121. def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
  122. def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0),
  123. (call "vqmovun", $p1))>;
  124. def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
  125. def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16_f32", $p1))>;
  126. def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
  127. def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
  128. def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
  129. def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>;
  130. def OP_REINT : Op<(cast "R", $p0)>;
  131. def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>;
  132. def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>;
  133. def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>;
  134. def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>;
  135. def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U",
  136. (call "vabd", $p0, $p1))))>;
  137. def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0),
  138. (call "vget_high", $p1))>;
  139. def OP_ABA : Op<(op "+", $p0, (call "vabd", $p1, $p2))>;
  140. def OP_ABAL : Op<(op "+", $p0, (call "vabdl", $p1, $p2))>;
  141. def OP_ABALHi : Op<(call "vabal", $p0, (call "vget_high", $p1),
  142. (call "vget_high", $p2))>;
  143. def OP_QDMULLHi : Op<(call "vqdmull", (call "vget_high", $p0),
  144. (call "vget_high", $p1))>;
  145. def OP_QDMULLHi_N : Op<(call "vqdmull_n", (call "vget_high", $p0), $p1)>;
  146. def OP_QDMLALHi : Op<(call "vqdmlal", $p0, (call "vget_high", $p1),
  147. (call "vget_high", $p2))>;
  148. def OP_QDMLALHi_N : Op<(call "vqdmlal_n", $p0, (call "vget_high", $p1), $p2)>;
  149. def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
  150. (call "vget_high", $p2))>;
  151. def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>;
  152. def OP_DIV : Op<(op "/", $p0, $p1)>;
  153. def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"),
  154. (call "vget_high", $p0), $p1))>;
  155. def OP_NARROW_HI : Op<(cast "R", (call "vcombine",
  156. (cast "R", "H", $p0),
  157. (cast "R", "H",
  158. (call (name_replace "_high_", "_"),
  159. $p1, $p2))))>;
  160. def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)),
  161. (cast "R",
  162. (call "vshll_n", $a1, (literal "int32_t", "0")))]>;
  163. def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>;
  164. def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>;
  165. def OP_SCALAR_MULX_LN : Op<(call "vmulx", $p0, (call "vget_lane", $p1, $p2))>;
  166. def OP_SCALAR_VMULX_LN : LOp<[(save_temp $x, (call "vget_lane", $p0,
  167. (literal "int32_t", "0"))),
  168. (save_temp $y, (call "vget_lane", $p1, $p2)),
  169. (save_temp $z, (call "vmulx", $x, $y)),
  170. (call "vset_lane", $z, $p0, $p2)]>;
  171. def OP_SCALAR_VMULX_LNQ : LOp<[(save_temp $x, (call "vget_lane", $p0,
  172. (literal "int32_t", "0"))),
  173. (save_temp $y, (call "vget_lane", $p1, $p2)),
  174. (save_temp $z, (call "vmulx", $x, $y)),
  175. (call "vset_lane", $z, $p0, (literal "int32_t",
  176. "0"))]>;
  177. class ScalarMulOp<string opname> :
  178. Op<(call opname, $p0, (call "vget_lane", $p1, $p2))>;
  179. def OP_SCALAR_QDMULL_LN : ScalarMulOp<"vqdmull">;
  180. def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">;
  181. def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">;
  182. def OP_SCALAR_QRDMLAH_LN : Op<(call "vqrdmlah", $p0, $p1,
  183. (call "vget_lane", $p2, $p3))>;
  184. def OP_SCALAR_QRDMLSH_LN : Op<(call "vqrdmlsh", $p0, $p1,
  185. (call "vget_lane", $p2, $p3))>;
  186. def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t",
  187. (call "vget_lane",
  188. (bitcast "int16x4_t", $p0), $p1))>;
  189. def OP_SCALAR_HALF_GET_LNQ : Op<(bitcast "float16_t",
  190. (call "vget_lane",
  191. (bitcast "int16x8_t", $p0), $p1))>;
  192. def OP_SCALAR_HALF_SET_LN : Op<(bitcast "float16x4_t",
  193. (call "vset_lane",
  194. (bitcast "int16_t", $p0),
  195. (bitcast "int16x4_t", $p1), $p2))>;
  196. def OP_SCALAR_HALF_SET_LNQ : Op<(bitcast "float16x8_t",
  197. (call "vset_lane",
  198. (bitcast "int16_t", $p0),
  199. (bitcast "int16x8_t", $p1), $p2))>;
  200. def OP_DOT_LN
  201. : Op<(call "vdot", $p0, $p1,
  202. (bitcast $p1, (call_mangled "splat_lane", (bitcast "32", $p2), $p3)))>;
  203. def OP_DOT_LNQ
  204. : Op<(call "vdot", $p0, $p1,
  205. (bitcast $p1, (call_mangled "splat_lane", (bitcast "32", $p2), $p3)))>;
  206. def OP_FMLAL_LN : Op<(call "vfmlal_low", $p0, $p1,
  207. (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
  208. def OP_FMLSL_LN : Op<(call "vfmlsl_low", $p0, $p1,
  209. (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
  210. def OP_FMLAL_LN_Hi : Op<(call "vfmlal_high", $p0, $p1,
  211. (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
  212. def OP_FMLSL_LN_Hi : Op<(call "vfmlsl_high", $p0, $p1,
  213. (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
  214. def OP_USDOT_LN
  215. : Op<(call "vusdot", $p0, $p1,
  216. (cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)))>;
  217. def OP_USDOT_LNQ
  218. : Op<(call "vusdot", $p0, $p1,
  219. (cast "8", "S", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)))>;
  220. // sudot splats the second vector and then calls vusdot
  221. def OP_SUDOT_LN
  222. : Op<(call "vusdot", $p0,
  223. (cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x2_t", $p2), $p3)), $p1)>;
  224. def OP_SUDOT_LNQ
  225. : Op<(call "vusdot", $p0,
  226. (cast "8", "U", (call_mangled "splat_lane", (bitcast "int32x4_t", $p2), $p3)), $p1)>;
  227. def OP_BFDOT_LN
  228. : Op<(call "vbfdot", $p0, $p1,
  229. (bitcast $p1, (call_mangled "splat_lane", (bitcast "float32x2_t", $p2), $p3)))>;
  230. def OP_BFDOT_LNQ
  231. : Op<(call "vbfdot", $p0, $p1,
  232. (bitcast $p1, (call_mangled "splat_lane", (bitcast "float32x4_t", $p2), $p3)))>;
  233. def OP_BFMLALB_LN
  234. : Op<(call "vbfmlalb", $p0, $p1,
  235. (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
  236. def OP_BFMLALT_LN
  237. : Op<(call "vbfmlalt", $p0, $p1,
  238. (dup_typed $p1, (call "vget_lane", $p2, $p3)))>;
  239. def OP_VCVT_F32_BF16
  240. : Op<(bitcast "R",
  241. (call "vshll_n", (bitcast "int16x4_t", $p0),
  242. (literal "int32_t", "16")))>;
  243. def OP_VCVT_F32_BF16_LO
  244. : Op<(call "vcvt_f32_bf16", (call "vget_low", $p0))>;
  245. def OP_VCVT_F32_BF16_HI
  246. : Op<(call "vcvt_f32_bf16", (call "vget_high", $p0))>;
  247. def OP_VCVT_BF16_F32_LO_A64
  248. : Op<(call "__a64_vcvtq_low_bf16", $p0)>;
  249. def OP_VCVT_BF16_F32_A64
  250. : Op<(call "vget_low", (call "__a64_vcvtq_low_bf16", $p0))>;
  251. def OP_VCVT_BF16_F32_A32
  252. : Op<(call "__a32_vcvt_bf16", $p0)>;
  253. def OP_VCVT_BF16_F32_LO_A32
  254. : Op<(call "vcombine", (cast "bfloat16x4_t", (literal "uint64_t", "0ULL")),
  255. (call "__a32_vcvt_bf16", $p0))>;
  256. def OP_VCVT_BF16_F32_HI_A32
  257. : Op<(call "vcombine", (call "__a32_vcvt_bf16", $p1),
  258. (call "vget_low", $p0))>;
  259. def OP_CVT_F32_BF16
  260. : Op<(bitcast "R", (op "<<", (bitcast "int32_t", $p0),
  261. (literal "int32_t", "16")))>;
  262. //===----------------------------------------------------------------------===//
  263. // Auxiliary Instructions
  264. //===----------------------------------------------------------------------===//
  265. // Splat operation - performs a range-checked splat over a vector
  266. def SPLAT : WInst<"splat_lane", ".(!q)I",
  267. "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl">;
  268. def SPLATQ : WInst<"splat_laneq", ".(!Q)I",
  269. "UcUsUicsilPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUlhdQhQdPlQPl"> {
  270. let isLaneQ = 1;
  271. }
  272. let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in {
  273. def SPLAT_BF : WInst<"splat_lane", ".(!q)I", "bQb">;
  274. def SPLATQ_BF : WInst<"splat_laneq", ".(!Q)I", "bQb"> {
  275. let isLaneQ = 1;
  276. }
  277. }
  278. //===----------------------------------------------------------------------===//
  279. // Intrinsics
  280. //===----------------------------------------------------------------------===//
  281. ////////////////////////////////////////////////////////////////////////////////
  282. // E.3.1 Addition
  283. def VADD : IOpInst<"vadd", "...",
  284. "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_ADD>;
  285. def VADDL : SOpInst<"vaddl", "(>Q)..", "csiUcUsUi", OP_ADDL>;
  286. def VADDW : SOpInst<"vaddw", "(>Q)(>Q).", "csiUcUsUi", OP_ADDW>;
  287. def VHADD : SInst<"vhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">;
  288. def VRHADD : SInst<"vrhadd", "...", "csiUcUsUiQcQsQiQUcQUsQUi">;
  289. def VQADD : SInst<"vqadd", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  290. def VADDHN : IInst<"vaddhn", "<QQ", "silUsUiUl">;
  291. def VRADDHN : IInst<"vraddhn", "<QQ", "silUsUiUl">;
  292. ////////////////////////////////////////////////////////////////////////////////
  293. // E.3.2 Multiplication
  294. def VMUL : IOpInst<"vmul", "...", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MUL>;
  295. def VMULP : SInst<"vmul", "...", "PcQPc">;
  296. def VMLA : IOpInst<"vmla", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLA>;
  297. def VMLAL : SOpInst<"vmlal", "(>Q)(>Q)..", "csiUcUsUi", OP_MLAL>;
  298. def VMLS : IOpInst<"vmls", "....", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>;
  299. def VMLSL : SOpInst<"vmlsl", "(>Q)(>Q)..", "csiUcUsUi", OP_MLSL>;
  300. def VQDMULH : SInst<"vqdmulh", "...", "siQsQi">;
  301. def VQRDMULH : SInst<"vqrdmulh", "...", "siQsQi">;
  302. let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
  303. def VQRDMLAH : SInst<"vqrdmlah", "....", "siQsQi">;
  304. def VQRDMLSH : SInst<"vqrdmlsh", "....", "siQsQi">;
  305. }
  306. def VQDMLAL : SInst<"vqdmlal", "(>Q)(>Q)..", "si">;
  307. def VQDMLSL : SInst<"vqdmlsl", "(>Q)(>Q)..", "si">;
  308. def VMULL : SInst<"vmull", "(>Q)..", "csiUcUsUiPc">;
  309. def VQDMULL : SInst<"vqdmull", "(>Q)..", "si">;
  310. ////////////////////////////////////////////////////////////////////////////////
  311. // E.3.3 Subtraction
  312. def VSUB : IOpInst<"vsub", "...",
  313. "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUl", OP_SUB>;
  314. def VSUBL : SOpInst<"vsubl", "(>Q)..", "csiUcUsUi", OP_SUBL>;
  315. def VSUBW : SOpInst<"vsubw", "(>Q)(>Q).", "csiUcUsUi", OP_SUBW>;
  316. def VQSUB : SInst<"vqsub", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  317. def VHSUB : SInst<"vhsub", "...", "csiUcUsUiQcQsQiQUcQUsQUi">;
  318. def VSUBHN : IInst<"vsubhn", "<QQ", "silUsUiUl">;
  319. def VRSUBHN : IInst<"vrsubhn", "<QQ", "silUsUiUl">;
  320. ////////////////////////////////////////////////////////////////////////////////
  321. // E.3.4 Comparison
  322. def VCEQ : IOpInst<"vceq", "U..", "csifUcUsUiPcQcQsQiQfQUcQUsQUiQPc", OP_EQ>;
  323. def VCGE : SOpInst<"vcge", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GE>;
  324. let InstName = "vcge" in
  325. def VCLE : SOpInst<"vcle", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LE>;
  326. def VCGT : SOpInst<"vcgt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_GT>;
  327. let InstName = "vcgt" in
  328. def VCLT : SOpInst<"vclt", "U..", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_LT>;
  329. let InstName = "vacge" in {
  330. def VCAGE : IInst<"vcage", "U..", "fQf">;
  331. def VCALE : IInst<"vcale", "U..", "fQf">;
  332. }
  333. let InstName = "vacgt" in {
  334. def VCAGT : IInst<"vcagt", "U..", "fQf">;
  335. def VCALT : IInst<"vcalt", "U..", "fQf">;
  336. }
  337. def VTST : WInst<"vtst", "U..", "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPs">;
  338. ////////////////////////////////////////////////////////////////////////////////
  339. // E.3.5 Absolute Difference
  340. def VABD : SInst<"vabd", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">;
  341. def VABDL : SOpInst<"vabdl", "(>Q)..", "csiUcUsUi", OP_ABDL>;
  342. def VABA : SOpInst<"vaba", "....", "csiUcUsUiQcQsQiQUcQUsQUi", OP_ABA>;
  343. def VABAL : SOpInst<"vabal", "(>Q)(>Q)..", "csiUcUsUi", OP_ABAL>;
  344. ////////////////////////////////////////////////////////////////////////////////
  345. // E.3.6 Max/Min
  346. def VMAX : SInst<"vmax", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">;
  347. def VMIN : SInst<"vmin", "...", "csiUcUsUifQcQsQiQUcQUsQUiQf">;
  348. ////////////////////////////////////////////////////////////////////////////////
  349. // E.3.7 Pairwise Addition
  350. def VPADD : IInst<"vpadd", "...", "csiUcUsUif">;
  351. def VPADDL : SInst<"vpaddl", ">.", "csiUcUsUiQcQsQiQUcQUsQUi">;
  352. def VPADAL : SInst<"vpadal", ">>.", "csiUcUsUiQcQsQiQUcQUsQUi">;
  353. ////////////////////////////////////////////////////////////////////////////////
  354. // E.3.8-9 Folding Max/Min
  355. def VPMAX : SInst<"vpmax", "...", "csiUcUsUif">;
  356. def VPMIN : SInst<"vpmin", "...", "csiUcUsUif">;
  357. ////////////////////////////////////////////////////////////////////////////////
  358. // E.3.10 Reciprocal/Sqrt
  359. def VRECPS : IInst<"vrecps", "...", "fQf">;
  360. def VRSQRTS : IInst<"vrsqrts", "...", "fQf">;
  361. ////////////////////////////////////////////////////////////////////////////////
  362. // E.3.11 Shifts by signed variable
  363. def VSHL : SInst<"vshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  364. def VQSHL : SInst<"vqshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  365. def VRSHL : SInst<"vrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  366. def VQRSHL : SInst<"vqrshl", "..S", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  367. ////////////////////////////////////////////////////////////////////////////////
  368. // E.3.12 Shifts by constant
  369. let isShift = 1 in {
  370. def VSHR_N : SInst<"vshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  371. def VSHL_N : IInst<"vshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  372. def VRSHR_N : SInst<"vrshr_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  373. def VSRA_N : SInst<"vsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  374. def VRSRA_N : SInst<"vrsra_n", "...I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  375. def VQSHL_N : SInst<"vqshl_n", "..I", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl">;
  376. def VQSHLU_N : SInst<"vqshlu_n", "U.I", "csilQcQsQiQl">;
  377. def VSHRN_N : IInst<"vshrn_n", "<QI", "silUsUiUl">;
  378. def VQSHRUN_N : SInst<"vqshrun_n", "(<U)QI", "sil">;
  379. def VQRSHRUN_N : SInst<"vqrshrun_n", "(<U)QI", "sil">;
  380. def VQSHRN_N : SInst<"vqshrn_n", "<QI", "silUsUiUl">;
  381. def VRSHRN_N : IInst<"vrshrn_n", "<QI", "silUsUiUl">;
  382. def VQRSHRN_N : SInst<"vqrshrn_n", "<QI", "silUsUiUl">;
  383. def VSHLL_N : SInst<"vshll_n", "(>Q).I", "csiUcUsUi">;
  384. ////////////////////////////////////////////////////////////////////////////////
  385. // E.3.13 Shifts with insert
  386. def VSRI_N : WInst<"vsri_n", "...I",
  387. "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
  388. def VSLI_N : WInst<"vsli_n", "...I",
  389. "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPs">;
  390. }
  391. ////////////////////////////////////////////////////////////////////////////////
  392. // E.3.14 Loads and stores of a single vector
  393. def VLD1 : WInst<"vld1", ".(c*!)",
  394. "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
  395. def VLD1_X2 : WInst<"vld1_x2", "2(c*!)",
  396. "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
  397. def VLD1_X3 : WInst<"vld1_x3", "3(c*!)",
  398. "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
  399. def VLD1_X4 : WInst<"vld1_x4", "4(c*!)",
  400. "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
  401. def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
  402. "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
  403. def VLD1_DUP : WInst<"vld1_dup", ".(c*!)",
  404. "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
  405. def VST1 : WInst<"vst1", "v*(.!)",
  406. "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
  407. def VST1_X2 : WInst<"vst1_x2", "v*(2!)",
  408. "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
  409. def VST1_X3 : WInst<"vst1_x3", "v*(3!)",
  410. "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
  411. def VST1_X4 : WInst<"vst1_x4", "v*(4!)",
  412. "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
  413. def VST1_LANE : WInst<"vst1_lane", "v*(.!)I",
  414. "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
  415. let ArchGuard = "(__ARM_FP & 2)" in {
  416. def VLD1_F16 : WInst<"vld1", ".(c*!)", "hQh">;
  417. def VLD1_X2_F16 : WInst<"vld1_x2", "2(c*!)", "hQh">;
  418. def VLD1_X3_F16 : WInst<"vld1_x3", "3(c*!)", "hQh">;
  419. def VLD1_X4_F16 : WInst<"vld1_x4", "4(c*!)", "hQh">;
  420. def VLD1_LANE_F16 : WInst<"vld1_lane", ".(c*!).I", "hQh">;
  421. def VLD1_DUP_F16 : WInst<"vld1_dup", ".(c*!)", "hQh">;
  422. def VST1_F16 : WInst<"vst1", "v*(.!)", "hQh">;
  423. def VST1_X2_F16 : WInst<"vst1_x2", "v*(2!)", "hQh">;
  424. def VST1_X3_F16 : WInst<"vst1_x3", "v*(3!)", "hQh">;
  425. def VST1_X4_F16 : WInst<"vst1_x4", "v*(4!)", "hQh">;
  426. def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh">;
  427. }
  428. ////////////////////////////////////////////////////////////////////////////////
  429. // E.3.15 Loads and stores of an N-element structure
  430. def VLD2 : WInst<"vld2", "2(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
  431. def VLD3 : WInst<"vld3", "3(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
  432. def VLD4 : WInst<"vld4", "4(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
  433. def VLD2_DUP : WInst<"vld2_dup", "2(c*!)",
  434. "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
  435. def VLD3_DUP : WInst<"vld3_dup", "3(c*!)",
  436. "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
  437. def VLD4_DUP : WInst<"vld4_dup", "4(c*!)",
  438. "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
  439. def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
  440. def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
  441. def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
  442. def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
  443. def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
  444. def VST4 : WInst<"vst4", "v*(4!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
  445. def VST2_LANE : WInst<"vst2_lane", "v*(2!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
  446. def VST3_LANE : WInst<"vst3_lane", "v*(3!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
  447. def VST4_LANE : WInst<"vst4_lane", "v*(4!)I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs">;
  448. let ArchGuard = "(__ARM_FP & 2)" in {
  449. def VLD2_F16 : WInst<"vld2", "2(c*!)", "hQh">;
  450. def VLD3_F16 : WInst<"vld3", "3(c*!)", "hQh">;
  451. def VLD4_F16 : WInst<"vld4", "4(c*!)", "hQh">;
  452. def VLD2_DUP_F16 : WInst<"vld2_dup", "2(c*!)", "hQh">;
  453. def VLD3_DUP_F16 : WInst<"vld3_dup", "3(c*!)", "hQh">;
  454. def VLD4_DUP_F16 : WInst<"vld4_dup", "4(c*!)", "hQh">;
  455. def VLD2_LANE_F16 : WInst<"vld2_lane", "2(c*!)2I", "hQh">;
  456. def VLD3_LANE_F16 : WInst<"vld3_lane", "3(c*!)3I", "hQh">;
  457. def VLD4_LANE_F16 : WInst<"vld4_lane", "4(c*!)4I", "hQh">;
  458. def VST2_F16 : WInst<"vst2", "v*(2!)", "hQh">;
  459. def VST3_F16 : WInst<"vst3", "v*(3!)", "hQh">;
  460. def VST4_F16 : WInst<"vst4", "v*(4!)", "hQh">;
  461. def VST2_LANE_F16 : WInst<"vst2_lane", "v*(2!)I", "hQh">;
  462. def VST3_LANE_F16 : WInst<"vst3_lane", "v*(3!)I", "hQh">;
  463. def VST4_LANE_F16 : WInst<"vst4_lane", "v*(4!)I", "hQh">;
  464. }
  465. ////////////////////////////////////////////////////////////////////////////////
  466. // E.3.16 Extract lanes from a vector
  467. let InstName = "vmov" in
  468. def VGET_LANE : IInst<"vget_lane", "1.I",
  469. "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
  470. ////////////////////////////////////////////////////////////////////////////////
  471. // E.3.17 Set lanes within a vector
  472. let InstName = "vmov" in
  473. def VSET_LANE : IInst<"vset_lane", ".1.I",
  474. "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl">;
  475. ////////////////////////////////////////////////////////////////////////////////
  476. // E.3.18 Initialize a vector from bit pattern
  477. def VCREATE : NoTestOpInst<"vcreate", ".(IU>)", "csihfUcUsUiUlPcPsl", OP_CAST> {
  478. let BigEndianSafe = 1;
  479. }
  480. ////////////////////////////////////////////////////////////////////////////////
  481. // E.3.19 Set all lanes to same value
  482. let InstName = "vmov" in {
  483. def VDUP_N : WOpInst<"vdup_n", ".1",
  484. "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
  485. OP_DUP>;
  486. def VMOV_N : WOpInst<"vmov_n", ".1",
  487. "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl",
  488. OP_DUP>;
  489. }
  490. let InstName = "" in
  491. def VDUP_LANE: WOpInst<"vdup_lane", ".qI",
  492. "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl",
  493. OP_DUP_LN>;
  494. ////////////////////////////////////////////////////////////////////////////////
  495. // E.3.20 Combining vectors
  496. def VCOMBINE : NoTestOpInst<"vcombine", "Q..", "csilhfUcUsUiUlPcPs", OP_CONC>;
  497. ////////////////////////////////////////////////////////////////////////////////
  498. // E.3.21 Splitting vectors
  499. // Note that the ARM NEON Reference 2.0 mistakenly document the vget_high_f16()
  500. // and vget_low_f16() intrinsics as AArch64-only. We (and GCC) support all
  501. // versions of these intrinsics in both AArch32 and AArch64 architectures. See
  502. // D45668 for more details.
  503. let InstName = "vmov" in {
  504. def VGET_HIGH : NoTestOpInst<"vget_high", ".Q", "csilhfUcUsUiUlPcPs", OP_HI>;
  505. def VGET_LOW : NoTestOpInst<"vget_low", ".Q", "csilhfUcUsUiUlPcPs", OP_LO>;
  506. }
  507. ////////////////////////////////////////////////////////////////////////////////
  508. // E.3.22 Converting vectors
  509. let ArchGuard = "(__ARM_FP & 2)" in {
  510. def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "(<q)(.!)", "Hf">;
  511. def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "(>Q)(.!)", "h">;
  512. }
  513. def VCVT_S32 : SInst<"vcvt_s32", "S.", "fQf">;
  514. def VCVT_U32 : SInst<"vcvt_u32", "U.", "fQf">;
  515. def VCVT_F32 : SInst<"vcvt_f32", "F(.!)", "iUiQiQUi">;
  516. let isVCVT_N = 1 in {
  517. def VCVT_N_S32 : SInst<"vcvt_n_s32", "S.I", "fQf">;
  518. def VCVT_N_U32 : SInst<"vcvt_n_u32", "U.I", "fQf">;
  519. def VCVT_N_F32 : SInst<"vcvt_n_f32", "F(.!)I", "iUiQiQUi">;
  520. }
  521. def VMOVN : IInst<"vmovn", "<Q", "silUsUiUl">;
  522. def VMOVL : SInst<"vmovl", "(>Q).", "csiUcUsUi">;
  523. def VQMOVN : SInst<"vqmovn", "<Q", "silUsUiUl">;
  524. def VQMOVUN : SInst<"vqmovun", "(<U)Q", "sil">;
  525. ////////////////////////////////////////////////////////////////////////////////
  526. // E.3.23-24 Table lookup, Extended table lookup
  527. let InstName = "vtbl" in {
  528. def VTBL1 : WInst<"vtbl1", "..p", "UccPc">;
  529. def VTBL2 : WInst<"vtbl2", ".2p", "UccPc">;
  530. def VTBL3 : WInst<"vtbl3", ".3p", "UccPc">;
  531. def VTBL4 : WInst<"vtbl4", ".4p", "UccPc">;
  532. }
  533. let InstName = "vtbx" in {
  534. def VTBX1 : WInst<"vtbx1", "...p", "UccPc">;
  535. def VTBX2 : WInst<"vtbx2", "..2p", "UccPc">;
  536. def VTBX3 : WInst<"vtbx3", "..3p", "UccPc">;
  537. def VTBX4 : WInst<"vtbx4", "..4p", "UccPc">;
  538. }
  539. ////////////////////////////////////////////////////////////////////////////////
  540. // E.3.25 Operations with a scalar value
  541. def VMLA_LANE : IOpInst<"vmla_lane", "...qI",
  542. "siUsUifQsQiQUsQUiQf", OP_MLA_LN>;
  543. def VMLAL_LANE : SOpInst<"vmlal_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLAL_LN>;
  544. def VQDMLAL_LANE : SOpInst<"vqdmlal_lane", "(>Q)(>Q)..I", "si", OP_QDMLAL_LN>;
  545. def VMLS_LANE : IOpInst<"vmls_lane", "...qI",
  546. "siUsUifQsQiQUsQUiQf", OP_MLS_LN>;
  547. def VMLSL_LANE : SOpInst<"vmlsl_lane", "(>Q)(>Q)..I", "siUsUi", OP_MLSL_LN>;
  548. def VQDMLSL_LANE : SOpInst<"vqdmlsl_lane", "(>Q)(>Q)..I", "si", OP_QDMLSL_LN>;
  549. def VMUL_N : IOpInst<"vmul_n", "..1", "sifUsUiQsQiQfQUsQUi", OP_MUL_N>;
  550. def VMUL_LANE : IOpInst<"vmul_lane", "..qI",
  551. "sifUsUiQsQiQfQUsQUi", OP_MUL_LN>;
  552. def VMULL_N : SOpInst<"vmull_n", "(>Q).1", "siUsUi", OP_MULL_N>;
  553. def VMULL_LANE : SOpInst<"vmull_lane", "(>Q)..I", "siUsUi", OP_MULL_LN>;
  554. def VQDMULL_N : SOpInst<"vqdmull_n", "(>Q).1", "si", OP_QDMULL_N>;
  555. def VQDMULL_LANE : SOpInst<"vqdmull_lane", "(>Q)..I", "si", OP_QDMULL_LN>;
  556. def VQDMULH_N : SOpInst<"vqdmulh_n", "..1", "siQsQi", OP_QDMULH_N>;
  557. def VQRDMULH_N : SOpInst<"vqrdmulh_n", "..1", "siQsQi", OP_QRDMULH_N>;
  558. let ArchGuard = "!defined(__aarch64__)" in {
  559. def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "..qI", "siQsQi", OP_QDMULH_LN>;
  560. def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "..qI", "siQsQi", OP_QRDMULH_LN>;
  561. }
  562. let ArchGuard = "defined(__aarch64__)" in {
  563. def A64_VQDMULH_LANE : SInst<"vqdmulh_lane", "..(!q)I", "siQsQi">;
  564. def A64_VQRDMULH_LANE : SInst<"vqrdmulh_lane", "..(!q)I", "siQsQi">;
  565. }
  566. let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
  567. def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "...qI", "siQsQi", OP_QRDMLAH_LN>;
  568. def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "...qI", "siQsQi", OP_QRDMLSH_LN>;
  569. }
  570. def VMLA_N : IOpInst<"vmla_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLA_N>;
  571. def VMLAL_N : SOpInst<"vmlal_n", "(>Q)(>Q).1", "siUsUi", OP_MLAL_N>;
  572. def VQDMLAL_N : SOpInst<"vqdmlal_n", "(>Q)(>Q).1", "si", OP_QDMLAL_N>;
  573. def VMLS_N : IOpInst<"vmls_n", "...1", "siUsUifQsQiQUsQUiQf", OP_MLS_N>;
  574. def VMLSL_N : SOpInst<"vmlsl_n", "(>Q)(>Q).1", "siUsUi", OP_MLSL_N>;
  575. def VQDMLSL_N : SOpInst<"vqdmlsl_n", "(>Q)(>Q).1", "si", OP_QDMLSL_N>;
  576. ////////////////////////////////////////////////////////////////////////////////
  577. // E.3.26 Vector Extract
  578. def VEXT : WInst<"vext", "...I",
  579. "cUcPcsUsPsiUilUlfQcQUcQPcQsQUsQPsQiQUiQlQUlQf">;
  580. ////////////////////////////////////////////////////////////////////////////////
  581. // E.3.27 Reverse vector elements
  582. def VREV64 : WOpInst<"vrev64", "..", "csiUcUsUiPcPsfQcQsQiQUcQUsQUiQPcQPsQf",
  583. OP_REV64>;
  584. def VREV32 : WOpInst<"vrev32", "..", "csUcUsPcPsQcQsQUcQUsQPcQPs", OP_REV32>;
  585. def VREV16 : WOpInst<"vrev16", "..", "cUcPcQcQUcQPc", OP_REV16>;
  586. ////////////////////////////////////////////////////////////////////////////////
  587. // E.3.28 Other single operand arithmetic
  588. def VABS : SInst<"vabs", "..", "csifQcQsQiQf">;
  589. def VQABS : SInst<"vqabs", "..", "csiQcQsQi">;
  590. def VNEG : SOpInst<"vneg", "..", "csifQcQsQiQf", OP_NEG>;
  591. def VQNEG : SInst<"vqneg", "..", "csiQcQsQi">;
  592. def VCLS : SInst<"vcls", "S.", "csiUcUsUiQcQsQiQUcQUsQUi">;
  593. def VCLZ : IInst<"vclz", "..", "csiUcUsUiQcQsQiQUcQUsQUi">;
  594. def VCNT : WInst<"vcnt", "..", "UccPcQUcQcQPc">;
  595. def VRECPE : SInst<"vrecpe", "..", "fUiQfQUi">;
  596. def VRSQRTE : SInst<"vrsqrte", "..", "fUiQfQUi">;
  597. ////////////////////////////////////////////////////////////////////////////////
  598. // E.3.29 Logical operations
  599. def VMVN : LOpInst<"vmvn", "..", "csiUcUsUiPcQcQsQiQUcQUsQUiQPc", OP_NOT>;
  600. def VAND : LOpInst<"vand", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_AND>;
  601. def VORR : LOpInst<"vorr", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_OR>;
  602. def VEOR : LOpInst<"veor", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_XOR>;
  603. def VBIC : LOpInst<"vbic", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ANDN>;
  604. def VORN : LOpInst<"vorn", "...", "csilUcUsUiUlQcQsQiQlQUcQUsQUiQUl", OP_ORN>;
  605. let isHiddenLInst = 1 in
  606. def VBSL : SInst<"vbsl", ".U..",
  607. "csilUcUsUiUlfPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPs">;
  608. ////////////////////////////////////////////////////////////////////////////////
  609. // E.3.30 Transposition operations
  610. def VTRN : WInst<"vtrn", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
  611. def VZIP : WInst<"vzip", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
  612. def VUZP : WInst<"vuzp", "2..", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">;
  613. ////////////////////////////////////////////////////////////////////////////////
  614. class REINTERPRET_CROSS_SELF<string Types> :
  615. NoTestOpInst<"vreinterpret", "..", Types, OP_REINT> {
  616. let CartesianProductWith = Types;
  617. }
  618. multiclass REINTERPRET_CROSS_TYPES<string TypesA, string TypesB> {
  619. def AXB: NoTestOpInst<"vreinterpret", "..", TypesA, OP_REINT> {
  620. let CartesianProductWith = TypesB;
  621. }
  622. def BXA: NoTestOpInst<"vreinterpret", "..", TypesB, OP_REINT> {
  623. let CartesianProductWith = TypesA;
  624. }
  625. }
  626. // E.3.31 Vector reinterpret cast operations
  627. def VREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs"> {
  628. let ArchGuard = "!defined(__aarch64__)";
  629. let BigEndianSafe = 1;
  630. }
  631. ////////////////////////////////////////////////////////////////////////////////
  632. // Vector fused multiply-add operations
  633. let ArchGuard = "defined(__ARM_FEATURE_FMA)" in {
  634. def VFMA : SInst<"vfma", "....", "fQf">;
  635. def VFMS : SOpInst<"vfms", "....", "fQf", OP_FMLS>;
  636. def FMLA_N_F32 : SOpInst<"vfma_n", "...1", "fQf", OP_FMLA_N>;
  637. }
  638. ////////////////////////////////////////////////////////////////////////////////
  639. // fp16 vector operations
  640. def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "1.I", "h", OP_SCALAR_HALF_GET_LN>;
  641. def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", ".1.I", "h", OP_SCALAR_HALF_SET_LN>;
  642. def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>;
  643. def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>;
  644. ////////////////////////////////////////////////////////////////////////////////
  645. // Non poly128_t vaddp for Arm and AArch64
  646. // TODO: poly128_t not implemented on arm32
  647. def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">;
  648. ////////////////////////////////////////////////////////////////////////////////
  649. // AArch64 Intrinsics
  650. let ArchGuard = "defined(__aarch64__)" in {
  651. ////////////////////////////////////////////////////////////////////////////////
  652. // Load/Store
  653. def LD1 : WInst<"vld1", ".(c*!)", "dQdPlQPl">;
  654. def LD2 : WInst<"vld2", "2(c*!)", "QUlQldQdPlQPl">;
  655. def LD3 : WInst<"vld3", "3(c*!)", "QUlQldQdPlQPl">;
  656. def LD4 : WInst<"vld4", "4(c*!)", "QUlQldQdPlQPl">;
  657. def ST1 : WInst<"vst1", "v*(.!)", "dQdPlQPl">;
  658. def ST2 : WInst<"vst2", "v*(2!)", "QUlQldQdPlQPl">;
  659. def ST3 : WInst<"vst3", "v*(3!)", "QUlQldQdPlQPl">;
  660. def ST4 : WInst<"vst4", "v*(4!)", "QUlQldQdPlQPl">;
  661. def LD1_X2 : WInst<"vld1_x2", "2(c*!)",
  662. "dQdPlQPl">;
  663. def LD1_X3 : WInst<"vld1_x3", "3(c*!)",
  664. "dQdPlQPl">;
  665. def LD1_X4 : WInst<"vld1_x4", "4(c*!)",
  666. "dQdPlQPl">;
  667. def ST1_X2 : WInst<"vst1_x2", "v*(2!)", "dQdPlQPl">;
  668. def ST1_X3 : WInst<"vst1_x3", "v*(3!)", "dQdPlQPl">;
  669. def ST1_X4 : WInst<"vst1_x4", "v*(4!)", "dQdPlQPl">;
  670. def LD1_LANE : WInst<"vld1_lane", ".(c*!).I", "dQdPlQPl">;
  671. def LD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "lUlQcQUcQPcQlQUldQdPlQPl">;
  672. def LD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "lUlQcQUcQPcQlQUldQdPlQPl">;
  673. def LD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "lUlQcQUcQPcQlQUldQdPlQPl">;
  674. def ST1_LANE : WInst<"vst1_lane", "v*(.!)I", "dQdPlQPl">;
  675. def ST2_LANE : WInst<"vst2_lane", "v*(2!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
  676. def ST3_LANE : WInst<"vst3_lane", "v*(3!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
  677. def ST4_LANE : WInst<"vst4_lane", "v*(4!)I", "lUlQcQUcQPcQlQUldQdPlQPl">;
  678. def LD1_DUP : WInst<"vld1_dup", ".(c*!)", "dQdPlQPl">;
  679. def LD2_DUP : WInst<"vld2_dup", "2(c*!)", "dQdPlQPl">;
  680. def LD3_DUP : WInst<"vld3_dup", "3(c*!)", "dQdPlQPl">;
  681. def LD4_DUP : WInst<"vld4_dup", "4(c*!)", "dQdPlQPl">;
  682. def VLDRQ : WInst<"vldrq", "1(c*!)", "Pk">;
  683. def VSTRQ : WInst<"vstrq", "v*(1!)", "Pk">;
  684. ////////////////////////////////////////////////////////////////////////////////
  685. // Addition
  686. def ADD : IOpInst<"vadd", "...", "dQd", OP_ADD>;
  687. ////////////////////////////////////////////////////////////////////////////////
  688. // Subtraction
  689. def SUB : IOpInst<"vsub", "...", "dQd", OP_SUB>;
  690. ////////////////////////////////////////////////////////////////////////////////
  691. // Multiplication
  692. def MUL : IOpInst<"vmul", "...", "dQd", OP_MUL>;
  693. def MLA : IOpInst<"vmla", "....", "dQd", OP_MLA>;
  694. def MLS : IOpInst<"vmls", "....", "dQd", OP_MLS>;
  695. ////////////////////////////////////////////////////////////////////////////////
  696. // Multiplication Extended
  697. def MULX : SInst<"vmulx", "...", "fdQfQd">;
  698. ////////////////////////////////////////////////////////////////////////////////
  699. // Division
  700. def FDIV : IOpInst<"vdiv", "...", "fdQfQd", OP_DIV>;
  701. ////////////////////////////////////////////////////////////////////////////////
  702. // Vector fused multiply-add operations
  703. def FMLA : SInst<"vfma", "....", "dQd">;
  704. def FMLS : SOpInst<"vfms", "....", "dQd", OP_FMLS>;
  705. ////////////////////////////////////////////////////////////////////////////////
  706. // MUL, MLA, MLS, FMA, FMS definitions with scalar argument
  707. def VMUL_N_A64 : IOpInst<"vmul_n", "..1", "Qd", OP_MUL_N>;
  708. def FMLA_N : SOpInst<"vfma_n", "...1", "dQd", OP_FMLA_N>;
  709. def FMLS_N : SOpInst<"vfms_n", "...1", "fdQfQd", OP_FMLS_N>;
  710. ////////////////////////////////////////////////////////////////////////////////
  711. // Logical operations
  712. def BSL : SInst<"vbsl", ".U..", "dPlQdQPl">;
  713. ////////////////////////////////////////////////////////////////////////////////
  714. // Absolute Difference
  715. def ABD : SInst<"vabd", "...", "dQd">;
  716. ////////////////////////////////////////////////////////////////////////////////
  717. // saturating absolute/negate
  718. def ABS : SInst<"vabs", "..", "dQdlQl">;
  719. def QABS : SInst<"vqabs", "..", "lQl">;
  720. def NEG : SOpInst<"vneg", "..", "dlQdQl", OP_NEG>;
  721. def QNEG : SInst<"vqneg", "..", "lQl">;
  722. ////////////////////////////////////////////////////////////////////////////////
  723. // Signed Saturating Accumulated of Unsigned Value
  724. def SUQADD : SInst<"vuqadd", "..U", "csilQcQsQiQl">;
  725. ////////////////////////////////////////////////////////////////////////////////
  726. // Unsigned Saturating Accumulated of Signed Value
  727. def USQADD : SInst<"vsqadd", "..S", "UcUsUiUlQUcQUsQUiQUl">;
  728. ////////////////////////////////////////////////////////////////////////////////
  729. // Reciprocal/Sqrt
  730. def FRECPS : IInst<"vrecps", "...", "dQd">;
  731. def FRSQRTS : IInst<"vrsqrts", "...", "dQd">;
  732. def FRECPE : SInst<"vrecpe", "..", "dQd">;
  733. def FRSQRTE : SInst<"vrsqrte", "..", "dQd">;
  734. def FSQRT : SInst<"vsqrt", "..", "fdQfQd">;
  735. ////////////////////////////////////////////////////////////////////////////////
  736. // bitwise reverse
  737. def RBIT : IInst<"vrbit", "..", "cUcPcQcQUcQPc">;
  738. ////////////////////////////////////////////////////////////////////////////////
  739. // Integer extract and narrow to high
  740. def XTN2 : SOpInst<"vmovn_high", "(<Q)<Q", "silUsUiUl", OP_XTN>;
  741. ////////////////////////////////////////////////////////////////////////////////
  742. // Signed integer saturating extract and unsigned narrow to high
  743. def SQXTUN2 : SOpInst<"vqmovun_high", "(<U)(<Uq).", "HsHiHl", OP_SQXTUN>;
  744. ////////////////////////////////////////////////////////////////////////////////
  745. // Integer saturating extract and narrow to high
  746. def QXTN2 : SOpInst<"vqmovn_high", "(<Q)<Q", "silUsUiUl", OP_QXTN>;
  747. ////////////////////////////////////////////////////////////////////////////////
  748. // Converting vectors
  749. def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "(<q).", "Qd">;
  750. def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "(>Q).", "f">;
  751. def VCVT_S64 : SInst<"vcvt_s64", "S.", "dQd">;
  752. def VCVT_U64 : SInst<"vcvt_u64", "U.", "dQd">;
  753. def VCVT_F64 : SInst<"vcvt_f64", "F(.!)", "lUlQlQUl">;
  754. def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "<(<q!)Q", "Hf", OP_VCVT_NA_HI_F16>;
  755. def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "(>Q)(Q!)", "h", OP_VCVT_EX_HI_F32>;
  756. def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "(<Q)(F<!)Q", "d", OP_VCVT_NA_HI_F32>;
  757. def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "(>Q)(Q!)", "f", OP_VCVT_EX_HI_F64>;
  758. def VCVTX_F32_F64 : SInst<"vcvtx_f32", "(F<)(Q!)", "d">;
  759. def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "(<Q)(F<!)Q", "d", OP_VCVTX_HI>;
  760. ////////////////////////////////////////////////////////////////////////////////
  761. // Comparison
  762. def FCAGE : IInst<"vcage", "U..", "dQd">;
  763. def FCAGT : IInst<"vcagt", "U..", "dQd">;
  764. def FCALE : IInst<"vcale", "U..", "dQd">;
  765. def FCALT : IInst<"vcalt", "U..", "dQd">;
  766. def CMTST : WInst<"vtst", "U..", "lUlPlQlQUlQPl">;
  767. def CFMEQ : SOpInst<"vceq", "U..", "lUldQdQlQUlPlQPl", OP_EQ>;
  768. def CFMGE : SOpInst<"vcge", "U..", "lUldQdQlQUl", OP_GE>;
  769. def CFMLE : SOpInst<"vcle", "U..", "lUldQdQlQUl", OP_LE>;
  770. def CFMGT : SOpInst<"vcgt", "U..", "lUldQdQlQUl", OP_GT>;
  771. def CFMLT : SOpInst<"vclt", "U..", "lUldQdQlQUl", OP_LT>;
  772. def CMEQ : SInst<"vceqz", "U.",
  773. "csilfUcUsUiUlPcPlQcQsQiQlQfQUcQUsQUiQUlQPcdQdQPl">;
  774. def CMGE : SInst<"vcgez", "U.", "csilfdQcQsQiQlQfQd">;
  775. def CMLE : SInst<"vclez", "U.", "csilfdQcQsQiQlQfQd">;
  776. def CMGT : SInst<"vcgtz", "U.", "csilfdQcQsQiQlQfQd">;
  777. def CMLT : SInst<"vcltz", "U.", "csilfdQcQsQiQlQfQd">;
  778. ////////////////////////////////////////////////////////////////////////////////
  779. // Max/Min Integer
  780. def MAX : SInst<"vmax", "...", "dQd">;
  781. def MIN : SInst<"vmin", "...", "dQd">;
  782. ////////////////////////////////////////////////////////////////////////////////
  783. // Pairwise Max/Min
  784. def MAXP : SInst<"vpmax", "...", "QcQsQiQUcQUsQUiQfQd">;
  785. def MINP : SInst<"vpmin", "...", "QcQsQiQUcQUsQUiQfQd">;
  786. ////////////////////////////////////////////////////////////////////////////////
  787. // Pairwise MaxNum/MinNum Floating Point
  788. def FMAXNMP : SInst<"vpmaxnm", "...", "fQfQd">;
  789. def FMINNMP : SInst<"vpminnm", "...", "fQfQd">;
  790. ////////////////////////////////////////////////////////////////////////////////
  791. // Pairwise Addition
  792. def ADDP : IInst<"vpadd", "...", "QcQsQiQlQUcQUsQUiQUlQfQd">;
  793. ////////////////////////////////////////////////////////////////////////////////
  794. // Shifts by constant
  795. let isShift = 1 in {
  796. // Left shift long high
  797. def SHLL_HIGH_N : SOpInst<"vshll_high_n", ">.I", "HcHsHiHUcHUsHUi",
  798. OP_LONG_HI>;
  799. ////////////////////////////////////////////////////////////////////////////////
  800. def SRI_N : WInst<"vsri_n", "...I", "PlQPl">;
  801. def SLI_N : WInst<"vsli_n", "...I", "PlQPl">;
  802. // Right shift narrow high
  803. def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "<(<q).I",
  804. "HsHiHlHUsHUiHUl", OP_NARROW_HI>;
  805. def QSHRUN_HIGH_N : SOpInst<"vqshrun_high_n", "<(<q).I",
  806. "HsHiHl", OP_NARROW_HI>;
  807. def RSHRN_HIGH_N : IOpInst<"vrshrn_high_n", "<(<q).I",
  808. "HsHiHlHUsHUiHUl", OP_NARROW_HI>;
  809. def QRSHRUN_HIGH_N : SOpInst<"vqrshrun_high_n", "<(<q).I",
  810. "HsHiHl", OP_NARROW_HI>;
  811. def QSHRN_HIGH_N : SOpInst<"vqshrn_high_n", "<(<q).I",
  812. "HsHiHlHUsHUiHUl", OP_NARROW_HI>;
  813. def QRSHRN_HIGH_N : SOpInst<"vqrshrn_high_n", "<(<q).I",
  814. "HsHiHlHUsHUiHUl", OP_NARROW_HI>;
  815. }
  816. ////////////////////////////////////////////////////////////////////////////////
  817. // Converting vectors
  818. def VMOVL_HIGH : SOpInst<"vmovl_high", ">.", "HcHsHiHUcHUsHUi", OP_MOVL_HI>;
  819. let isVCVT_N = 1 in {
  820. def CVTF_N_F64 : SInst<"vcvt_n_f64", "F(.!)I", "lUlQlQUl">;
  821. def FCVTZS_N_S64 : SInst<"vcvt_n_s64", "S.I", "dQd">;
  822. def FCVTZS_N_U64 : SInst<"vcvt_n_u64", "U.I", "dQd">;
  823. }
  824. ////////////////////////////////////////////////////////////////////////////////
  825. // 3VDiff class using high 64-bit in operands
  826. def VADDL_HIGH : SOpInst<"vaddl_high", "(>Q)QQ", "csiUcUsUi", OP_ADDLHi>;
  827. def VADDW_HIGH : SOpInst<"vaddw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_ADDWHi>;
  828. def VSUBL_HIGH : SOpInst<"vsubl_high", "(>Q)QQ", "csiUcUsUi", OP_SUBLHi>;
  829. def VSUBW_HIGH : SOpInst<"vsubw_high", "(>Q)(>Q)Q", "csiUcUsUi", OP_SUBWHi>;
  830. def VABDL_HIGH : SOpInst<"vabdl_high", "(>Q)QQ", "csiUcUsUi", OP_ABDLHi>;
  831. def VABAL_HIGH : SOpInst<"vabal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_ABALHi>;
  832. def VMULL_HIGH : SOpInst<"vmull_high", "(>Q)QQ", "csiUcUsUiPc", OP_MULLHi>;
  833. def VMULL_HIGH_N : SOpInst<"vmull_high_n", "(>Q)Q1", "siUsUi", OP_MULLHi_N>;
  834. def VMLAL_HIGH : SOpInst<"vmlal_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLALHi>;
  835. def VMLAL_HIGH_N : SOpInst<"vmlal_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLALHi_N>;
  836. def VMLSL_HIGH : SOpInst<"vmlsl_high", "(>Q)(>Q)QQ", "csiUcUsUi", OP_MLSLHi>;
  837. def VMLSL_HIGH_N : SOpInst<"vmlsl_high_n", "(>Q)(>Q)Q1", "siUsUi", OP_MLSLHi_N>;
  838. def VADDHN_HIGH : SOpInst<"vaddhn_high", "(<Q)<QQ", "silUsUiUl", OP_ADDHNHi>;
  839. def VRADDHN_HIGH : SOpInst<"vraddhn_high", "(<Q)<QQ", "silUsUiUl", OP_RADDHNHi>;
  840. def VSUBHN_HIGH : SOpInst<"vsubhn_high", "(<Q)<QQ", "silUsUiUl", OP_SUBHNHi>;
  841. def VRSUBHN_HIGH : SOpInst<"vrsubhn_high", "(<Q)<QQ", "silUsUiUl", OP_RSUBHNHi>;
  842. def VQDMULL_HIGH : SOpInst<"vqdmull_high", "(>Q)QQ", "si", OP_QDMULLHi>;
  843. def VQDMULL_HIGH_N : SOpInst<"vqdmull_high_n", "(>Q)Q1", "si", OP_QDMULLHi_N>;
  844. def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "(>Q)(>Q)QQ", "si", OP_QDMLALHi>;
  845. def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLALHi_N>;
  846. def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "(>Q)(>Q)QQ", "si", OP_QDMLSLHi>;
  847. def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "(>Q)(>Q)Q1", "si", OP_QDMLSLHi_N>;
  848. def VMULL_P64 : SInst<"vmull", "(1>)11", "Pl">;
  849. def VMULL_HIGH_P64 : SOpInst<"vmull_high", "(1>)..", "HPl", OP_MULLHi_P64>;
  850. ////////////////////////////////////////////////////////////////////////////////
  851. // Extract or insert element from vector
  852. def GET_LANE : IInst<"vget_lane", "1.I", "dQdPlQPl">;
  853. def SET_LANE : IInst<"vset_lane", ".1.I", "dQdPlQPl">;
  854. def COPY_LANE : IOpInst<"vcopy_lane", "..I.I",
  855. "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>;
  856. def COPYQ_LANE : IOpInst<"vcopy_lane", "..IqI",
  857. "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>;
  858. def COPY_LANEQ : IOpInst<"vcopy_laneq", "..IQI",
  859. "csilPcPsPlUcUsUiUlfd", OP_COPY_LN> {
  860. let isLaneQ = 1;
  861. }
  862. def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "..I.I",
  863. "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN> {
  864. let isLaneQ = 1;
  865. }
  866. ////////////////////////////////////////////////////////////////////////////////
  867. // Set all lanes to same value
  868. def VDUP_LANE1: WOpInst<"vdup_lane", ".qI", "hdQhQdPlQPl", OP_DUP_LN>;
  869. def VDUP_LANE2: WOpInst<"vdup_laneq", ".QI",
  870. "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl",
  871. OP_DUP_LN> {
  872. let isLaneQ = 1;
  873. }
  874. def DUP_N : WOpInst<"vdup_n", ".1", "dQdPlQPl", OP_DUP>;
  875. def MOV_N : WOpInst<"vmov_n", ".1", "dQdPlQPl", OP_DUP>;
  876. ////////////////////////////////////////////////////////////////////////////////
  877. def COMBINE : NoTestOpInst<"vcombine", "Q..", "dPl", OP_CONC>;
  878. ////////////////////////////////////////////////////////////////////////////////
  879. //Initialize a vector from bit pattern
  880. def CREATE : NoTestOpInst<"vcreate", ".(IU>)", "dPl", OP_CAST> {
  881. let BigEndianSafe = 1;
  882. }
  883. ////////////////////////////////////////////////////////////////////////////////
  884. def VMLA_LANEQ : IOpInst<"vmla_laneq", "...QI",
  885. "siUsUifQsQiQUsQUiQf", OP_MLA_LN> {
  886. let isLaneQ = 1;
  887. }
  888. def VMLS_LANEQ : IOpInst<"vmls_laneq", "...QI",
  889. "siUsUifQsQiQUsQUiQf", OP_MLS_LN> {
  890. let isLaneQ = 1;
  891. }
  892. def VFMA_LANE : IInst<"vfma_lane", "...qI", "fdQfQd">;
  893. def VFMA_LANEQ : IInst<"vfma_laneq", "...QI", "fdQfQd"> {
  894. let isLaneQ = 1;
  895. }
  896. def VFMS_LANE : IOpInst<"vfms_lane", "...qI", "fdQfQd", OP_FMS_LN>;
  897. def VFMS_LANEQ : IOpInst<"vfms_laneq", "...QI", "fdQfQd", OP_FMS_LNQ> {
  898. let isLaneQ = 1;
  899. }
  900. def VMLAL_LANEQ : SOpInst<"vmlal_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLAL_LN> {
  901. let isLaneQ = 1;
  902. }
  903. def VMLAL_HIGH_LANE : SOpInst<"vmlal_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
  904. OP_MLALHi_LN>;
  905. def VMLAL_HIGH_LANEQ : SOpInst<"vmlal_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
  906. OP_MLALHi_LN> {
  907. let isLaneQ = 1;
  908. }
  909. def VMLSL_LANEQ : SOpInst<"vmlsl_laneq", "(>Q)(>Q).QI", "siUsUi", OP_MLSL_LN> {
  910. let isLaneQ = 1;
  911. }
  912. def VMLSL_HIGH_LANE : SOpInst<"vmlsl_high_lane", "(>Q)(>Q)Q.I", "siUsUi",
  913. OP_MLSLHi_LN>;
  914. def VMLSL_HIGH_LANEQ : SOpInst<"vmlsl_high_laneq", "(>Q)(>Q)QQI", "siUsUi",
  915. OP_MLSLHi_LN> {
  916. let isLaneQ = 1;
  917. }
  918. def VQDMLAL_LANEQ : SOpInst<"vqdmlal_laneq", "(>Q)(>Q).QI", "si", OP_QDMLAL_LN> {
  919. let isLaneQ = 1;
  920. }
  921. def VQDMLAL_HIGH_LANE : SOpInst<"vqdmlal_high_lane", "(>Q)(>Q)Q.I", "si",
  922. OP_QDMLALHi_LN>;
  923. def VQDMLAL_HIGH_LANEQ : SOpInst<"vqdmlal_high_laneq", "(>Q)(>Q)QQI", "si",
  924. OP_QDMLALHi_LN> {
  925. let isLaneQ = 1;
  926. }
  927. def VQDMLSL_LANEQ : SOpInst<"vqdmlsl_laneq", "(>Q)(>Q).QI", "si", OP_QDMLSL_LN> {
  928. let isLaneQ = 1;
  929. }
  930. def VQDMLSL_HIGH_LANE : SOpInst<"vqdmlsl_high_lane", "(>Q)(>Q)Q.I", "si",
  931. OP_QDMLSLHi_LN>;
  932. def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdmlsl_high_laneq", "(>Q)(>Q)QQI", "si",
  933. OP_QDMLSLHi_LN> {
  934. let isLaneQ = 1;
  935. }
  936. // Newly add double parameter for vmul_lane in aarch64
  937. // Note: d type is handled by SCALAR_VMUL_LANE
  938. def VMUL_LANE_A64 : IOpInst<"vmul_lane", "..qI", "Qd", OP_MUL_LN>;
  939. // Note: d type is handled by SCALAR_VMUL_LANEQ
  940. def VMUL_LANEQ : IOpInst<"vmul_laneq", "..QI",
  941. "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN> {
  942. let isLaneQ = 1;
  943. }
  944. def VMULL_LANEQ : SOpInst<"vmull_laneq", "(>Q).QI", "siUsUi", OP_MULL_LN> {
  945. let isLaneQ = 1;
  946. }
  947. def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "(>Q)Q.I", "siUsUi",
  948. OP_MULLHi_LN>;
  949. def VMULL_HIGH_LANEQ : SOpInst<"vmull_high_laneq", "(>Q)QQI", "siUsUi",
  950. OP_MULLHi_LN> {
  951. let isLaneQ = 1;
  952. }
  953. def VQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(>Q).QI", "si", OP_QDMULL_LN> {
  954. let isLaneQ = 1;
  955. }
  956. def VQDMULL_HIGH_LANE : SOpInst<"vqdmull_high_lane", "(>Q)Q.I", "si",
  957. OP_QDMULLHi_LN>;
  958. def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmull_high_laneq", "(>Q)QQI", "si",
  959. OP_QDMULLHi_LN> {
  960. let isLaneQ = 1;
  961. }
  962. let isLaneQ = 1 in {
  963. def VQDMULH_LANEQ : SInst<"vqdmulh_laneq", "..QI", "siQsQi">;
  964. def VQRDMULH_LANEQ : SInst<"vqrdmulh_laneq", "..QI", "siQsQi">;
  965. }
  966. let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
  967. def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "...QI", "siQsQi", OP_QRDMLAH_LN> {
  968. let isLaneQ = 1;
  969. }
  970. def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "...QI", "siQsQi", OP_QRDMLSH_LN> {
  971. let isLaneQ = 1;
  972. }
  973. }
  974. // Note: d type implemented by SCALAR_VMULX_LANE
  975. def VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "fQfQd", OP_MULX_LN>;
  976. // Note: d type is implemented by SCALAR_VMULX_LANEQ
  977. def VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "fQfQd", OP_MULX_LN> {
  978. let isLaneQ = 1;
  979. }
  980. ////////////////////////////////////////////////////////////////////////////////
  981. // Across vectors class
  982. def VADDLV : SInst<"vaddlv", "(1>).", "csiUcUsUiQcQsQiQUcQUsQUi">;
  983. def VMAXV : SInst<"vmaxv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">;
  984. def VMINV : SInst<"vminv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQd">;
  985. def VADDV : SInst<"vaddv", "1.", "csifUcUsUiQcQsQiQUcQUsQUiQfQdQlQUl">;
  986. def FMAXNMV : SInst<"vmaxnmv", "1.", "fQfQd">;
  987. def FMINNMV : SInst<"vminnmv", "1.", "fQfQd">;
  988. ////////////////////////////////////////////////////////////////////////////////
  989. // Newly added Vector Extract for f64
  990. def VEXT_A64 : WInst<"vext", "...I", "dQdPlQPl">;
  991. ////////////////////////////////////////////////////////////////////////////////
  992. // Crypto
  993. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_AES)" in {
  994. def AESE : SInst<"vaese", "...", "QUc">;
  995. def AESD : SInst<"vaesd", "...", "QUc">;
  996. def AESMC : SInst<"vaesmc", "..", "QUc">;
  997. def AESIMC : SInst<"vaesimc", "..", "QUc">;
  998. }
  999. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA2)" in {
  1000. def SHA1H : SInst<"vsha1h", "11", "Ui">;
  1001. def SHA1SU1 : SInst<"vsha1su1", "...", "QUi">;
  1002. def SHA256SU0 : SInst<"vsha256su0", "...", "QUi">;
  1003. def SHA1C : SInst<"vsha1c", "..1.", "QUi">;
  1004. def SHA1P : SInst<"vsha1p", "..1.", "QUi">;
  1005. def SHA1M : SInst<"vsha1m", "..1.", "QUi">;
  1006. def SHA1SU0 : SInst<"vsha1su0", "....", "QUi">;
  1007. def SHA256H : SInst<"vsha256h", "....", "QUi">;
  1008. def SHA256H2 : SInst<"vsha256h2", "....", "QUi">;
  1009. def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">;
  1010. }
  1011. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA3) && defined(__aarch64__)" in {
  1012. def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQcQsQiQl">;
  1013. def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQcQsQiQl">;
  1014. def RAX1 : SInst<"vrax1", "...", "QUl">;
  1015. let isVXAR = 1 in {
  1016. def XAR : SInst<"vxar", "...I", "QUl">;
  1017. }
  1018. }
  1019. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SHA512) && defined(__aarch64__)" in {
  1020. def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">;
  1021. def SHA512su1 : SInst<"vsha512su1", "....", "QUl">;
  1022. def SHA512H : SInst<"vsha512h", "....", "QUl">;
  1023. def SHA512H2 : SInst<"vsha512h2", "....", "QUl">;
  1024. }
  1025. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SM3) && defined(__aarch64__)" in {
  1026. def SM3SS1 : SInst<"vsm3ss1", "....", "QUi">;
  1027. def SM3TT1A : SInst<"vsm3tt1a", "....I", "QUi">;
  1028. def SM3TT1B : SInst<"vsm3tt1b", "....I", "QUi">;
  1029. def SM3TT2A : SInst<"vsm3tt2a", "....I", "QUi">;
  1030. def SM3TT2B : SInst<"vsm3tt2b", "....I", "QUi">;
  1031. def SM3PARTW1 : SInst<"vsm3partw1", "....", "QUi">;
  1032. def SM3PARTW2 : SInst<"vsm3partw2", "....", "QUi">;
  1033. }
  1034. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_SM4) && defined(__aarch64__)" in {
  1035. def SM4E : SInst<"vsm4e", "...", "QUi">;
  1036. def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">;
  1037. }
  1038. ////////////////////////////////////////////////////////////////////////////////
  1039. // poly128_t vadd for AArch64 only see VADDP for the rest
  1040. def VADDP_Q : WInst<"vadd", "...", "QPk">;
  1041. ////////////////////////////////////////////////////////////////////////////////
  1042. // Float -> Int conversions with explicit rounding mode
  1043. let ArchGuard = "__ARM_ARCH >= 8" in {
  1044. def FCVTNS_S32 : SInst<"vcvtn_s32", "S.", "fQf">;
  1045. def FCVTNU_S32 : SInst<"vcvtn_u32", "U.", "fQf">;
  1046. def FCVTPS_S32 : SInst<"vcvtp_s32", "S.", "fQf">;
  1047. def FCVTPU_S32 : SInst<"vcvtp_u32", "U.", "fQf">;
  1048. def FCVTMS_S32 : SInst<"vcvtm_s32", "S.", "fQf">;
  1049. def FCVTMU_S32 : SInst<"vcvtm_u32", "U.", "fQf">;
  1050. def FCVTAS_S32 : SInst<"vcvta_s32", "S.", "fQf">;
  1051. def FCVTAU_S32 : SInst<"vcvta_u32", "U.", "fQf">;
  1052. }
  1053. let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in {
  1054. def FCVTNS_S64 : SInst<"vcvtn_s64", "S.", "dQd">;
  1055. def FCVTNU_S64 : SInst<"vcvtn_u64", "U.", "dQd">;
  1056. def FCVTPS_S64 : SInst<"vcvtp_s64", "S.", "dQd">;
  1057. def FCVTPU_S64 : SInst<"vcvtp_u64", "U.", "dQd">;
  1058. def FCVTMS_S64 : SInst<"vcvtm_s64", "S.", "dQd">;
  1059. def FCVTMU_S64 : SInst<"vcvtm_u64", "U.", "dQd">;
  1060. def FCVTAS_S64 : SInst<"vcvta_s64", "S.", "dQd">;
  1061. def FCVTAU_S64 : SInst<"vcvta_u64", "U.", "dQd">;
  1062. }
  1063. ////////////////////////////////////////////////////////////////////////////////
  1064. // Round to Integral
  1065. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in {
  1066. def FRINTN_S32 : SInst<"vrndn", "..", "fQf">;
  1067. def FRINTA_S32 : SInst<"vrnda", "..", "fQf">;
  1068. def FRINTP_S32 : SInst<"vrndp", "..", "fQf">;
  1069. def FRINTM_S32 : SInst<"vrndm", "..", "fQf">;
  1070. def FRINTX_S32 : SInst<"vrndx", "..", "fQf">;
  1071. def FRINTZ_S32 : SInst<"vrnd", "..", "fQf">;
  1072. def FRINTI_S32 : SInst<"vrndi", "..", "fQf">;
  1073. }
  1074. let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in {
  1075. def FRINTN_S64 : SInst<"vrndn", "..", "dQd">;
  1076. def FRINTA_S64 : SInst<"vrnda", "..", "dQd">;
  1077. def FRINTP_S64 : SInst<"vrndp", "..", "dQd">;
  1078. def FRINTM_S64 : SInst<"vrndm", "..", "dQd">;
  1079. def FRINTX_S64 : SInst<"vrndx", "..", "dQd">;
  1080. def FRINTZ_S64 : SInst<"vrnd", "..", "dQd">;
  1081. def FRINTI_S64 : SInst<"vrndi", "..", "dQd">;
  1082. }
  1083. let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_FRINT)" in {
  1084. def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
  1085. def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
  1086. def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
  1087. def FRINT64Z_S32 : SInst<"vrnd64z", "..", "fQf">;
  1088. }
  1089. ////////////////////////////////////////////////////////////////////////////////
  1090. // MaxNum/MinNum Floating Point
  1091. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in {
  1092. def FMAXNM_S32 : SInst<"vmaxnm", "...", "fQf">;
  1093. def FMINNM_S32 : SInst<"vminnm", "...", "fQf">;
  1094. }
  1095. let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__) && defined(__ARM_FEATURE_NUMERIC_MAXMIN)" in {
  1096. def FMAXNM_S64 : SInst<"vmaxnm", "...", "dQd">;
  1097. def FMINNM_S64 : SInst<"vminnm", "...", "dQd">;
  1098. }
  1099. ////////////////////////////////////////////////////////////////////////////////
  1100. // Permutation
  1101. def VTRN1 : SOpInst<"vtrn1", "...",
  1102. "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>;
  1103. def VZIP1 : SOpInst<"vzip1", "...",
  1104. "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP1>;
  1105. def VUZP1 : SOpInst<"vuzp1", "...",
  1106. "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP1>;
  1107. def VTRN2 : SOpInst<"vtrn2", "...",
  1108. "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN2>;
  1109. def VZIP2 : SOpInst<"vzip2", "...",
  1110. "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_ZIP2>;
  1111. def VUZP2 : SOpInst<"vuzp2", "...",
  1112. "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_UZP2>;
  1113. ////////////////////////////////////////////////////////////////////////////////
  1114. // Table lookup
  1115. let InstName = "vtbl" in {
  1116. def VQTBL1_A64 : WInst<"vqtbl1", ".QU", "UccPcQUcQcQPc">;
  1117. def VQTBL2_A64 : WInst<"vqtbl2", ".(2Q)U", "UccPcQUcQcQPc">;
  1118. def VQTBL3_A64 : WInst<"vqtbl3", ".(3Q)U", "UccPcQUcQcQPc">;
  1119. def VQTBL4_A64 : WInst<"vqtbl4", ".(4Q)U", "UccPcQUcQcQPc">;
  1120. }
  1121. let InstName = "vtbx" in {
  1122. def VQTBX1_A64 : WInst<"vqtbx1", "..QU", "UccPcQUcQcQPc">;
  1123. def VQTBX2_A64 : WInst<"vqtbx2", "..(2Q)U", "UccPcQUcQcQPc">;
  1124. def VQTBX3_A64 : WInst<"vqtbx3", "..(3Q)U", "UccPcQUcQcQPc">;
  1125. def VQTBX4_A64 : WInst<"vqtbx4", "..(4Q)U", "UccPcQUcQcQPc">;
  1126. }
  1127. ////////////////////////////////////////////////////////////////////////////////
  1128. // Vector reinterpret cast operations
  1129. // NeonEmitter implicitly takes the cartesian product of the type string with
  1130. // itself during generation so, unlike all other intrinsics, this one should
  1131. // include *all* types, not just additional ones.
  1132. def VVREINTERPRET : REINTERPRET_CROSS_SELF<"csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk"> {
  1133. let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)";
  1134. let BigEndianSafe = 1;
  1135. }
  1136. ////////////////////////////////////////////////////////////////////////////////
  1137. // Scalar Intrinsics
  1138. // Scalar Arithmetic
  1139. // Scalar Addition
  1140. def SCALAR_ADD : SInst<"vadd", "111", "SlSUl">;
  1141. // Scalar Saturating Add
  1142. def SCALAR_QADD : SInst<"vqadd", "111", "ScSsSiSlSUcSUsSUiSUl">;
  1143. // Scalar Subtraction
  1144. def SCALAR_SUB : SInst<"vsub", "111", "SlSUl">;
  1145. // Scalar Saturating Sub
  1146. def SCALAR_QSUB : SInst<"vqsub", "111", "ScSsSiSlSUcSUsSUiSUl">;
  1147. let InstName = "vmov" in {
  1148. def VGET_HIGH_A64 : NoTestOpInst<"vget_high", ".Q", "dPl", OP_HI>;
  1149. def VGET_LOW_A64 : NoTestOpInst<"vget_low", ".Q", "dPl", OP_LO>;
  1150. }
  1151. ////////////////////////////////////////////////////////////////////////////////
  1152. // Scalar Shift
  1153. // Scalar Shift Left
  1154. def SCALAR_SHL: SInst<"vshl", "11(S1)", "SlSUl">;
  1155. // Scalar Saturating Shift Left
  1156. def SCALAR_QSHL: SInst<"vqshl", "11(S1)", "ScSsSiSlSUcSUsSUiSUl">;
  1157. // Scalar Saturating Rounding Shift Left
  1158. def SCALAR_QRSHL: SInst<"vqrshl", "11(S1)", "ScSsSiSlSUcSUsSUiSUl">;
  1159. // Scalar Shift Rounding Left
  1160. def SCALAR_RSHL: SInst<"vrshl", "11(S1)", "SlSUl">;
  1161. ////////////////////////////////////////////////////////////////////////////////
  1162. // Scalar Shift (Immediate)
  1163. let isScalarShift = 1 in {
  1164. // Signed/Unsigned Shift Right (Immediate)
  1165. def SCALAR_SSHR_N: SInst<"vshr_n", "11I", "SlSUl">;
  1166. // Signed/Unsigned Rounding Shift Right (Immediate)
  1167. def SCALAR_SRSHR_N: SInst<"vrshr_n", "11I", "SlSUl">;
  1168. // Signed/Unsigned Shift Right and Accumulate (Immediate)
  1169. def SCALAR_SSRA_N: SInst<"vsra_n", "111I", "SlSUl">;
  1170. // Signed/Unsigned Rounding Shift Right and Accumulate (Immediate)
  1171. def SCALAR_SRSRA_N: SInst<"vrsra_n", "111I", "SlSUl">;
  1172. // Shift Left (Immediate)
  1173. def SCALAR_SHL_N: SInst<"vshl_n", "11I", "SlSUl">;
  1174. // Signed/Unsigned Saturating Shift Left (Immediate)
  1175. def SCALAR_SQSHL_N: SInst<"vqshl_n", "11I", "ScSsSiSlSUcSUsSUiSUl">;
  1176. // Signed Saturating Shift Left Unsigned (Immediate)
  1177. def SCALAR_SQSHLU_N: SInst<"vqshlu_n", "11I", "ScSsSiSl">;
  1178. // Shift Right And Insert (Immediate)
  1179. def SCALAR_SRI_N: SInst<"vsri_n", "111I", "SlSUl">;
  1180. // Shift Left And Insert (Immediate)
  1181. def SCALAR_SLI_N: SInst<"vsli_n", "111I", "SlSUl">;
  1182. let isScalarNarrowShift = 1 in {
  1183. // Signed/Unsigned Saturating Shift Right Narrow (Immediate)
  1184. def SCALAR_SQSHRN_N: SInst<"vqshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
  1185. // Signed/Unsigned Saturating Rounded Shift Right Narrow (Immediate)
  1186. def SCALAR_SQRSHRN_N: SInst<"vqrshrn_n", "(1<)1I", "SsSiSlSUsSUiSUl">;
  1187. // Signed Saturating Shift Right Unsigned Narrow (Immediate)
  1188. def SCALAR_SQSHRUN_N: SInst<"vqshrun_n", "(1<)1I", "SsSiSl">;
  1189. // Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
  1190. def SCALAR_SQRSHRUN_N: SInst<"vqrshrun_n", "(1<)1I", "SsSiSl">;
  1191. }
  1192. ////////////////////////////////////////////////////////////////////////////////
  1193. // Scalar Signed/Unsigned Fixed-point Convert To Floating-Point (Immediate)
  1194. def SCALAR_SCVTF_N_F32: SInst<"vcvt_n_f32", "(1F)(1!)I", "SiSUi">;
  1195. def SCALAR_SCVTF_N_F64: SInst<"vcvt_n_f64", "(1F)(1!)I", "SlSUl">;
  1196. ////////////////////////////////////////////////////////////////////////////////
  1197. // Scalar Floating-point Convert To Signed/Unsigned Fixed-point (Immediate)
  1198. def SCALAR_FCVTZS_N_S32 : SInst<"vcvt_n_s32", "(1S)1I", "Sf">;
  1199. def SCALAR_FCVTZU_N_U32 : SInst<"vcvt_n_u32", "(1U)1I", "Sf">;
  1200. def SCALAR_FCVTZS_N_S64 : SInst<"vcvt_n_s64", "(1S)1I", "Sd">;
  1201. def SCALAR_FCVTZU_N_U64 : SInst<"vcvt_n_u64", "(1U)1I", "Sd">;
  1202. }
  1203. ////////////////////////////////////////////////////////////////////////////////
  1204. // Scalar Floating-point Round to Integral
  1205. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING)" in {
  1206. def SCALAR_FRINTN_S32 : SInst<"vrndn", "11", "Sf">;
  1207. }
  1208. ////////////////////////////////////////////////////////////////////////////////
  1209. // Scalar Reduce Pairwise Addition (Scalar and Floating Point)
  1210. def SCALAR_ADDP : SInst<"vpadd", "1.", "SfSHlSHdSHUl">;
  1211. ////////////////////////////////////////////////////////////////////////////////
  1212. // Scalar Reduce Floating Point Pairwise Max/Min
  1213. def SCALAR_FMAXP : SInst<"vpmax", "1.", "SfSQd">;
  1214. def SCALAR_FMINP : SInst<"vpmin", "1.", "SfSQd">;
  1215. ////////////////////////////////////////////////////////////////////////////////
  1216. // Scalar Reduce Floating Point Pairwise maxNum/minNum
  1217. def SCALAR_FMAXNMP : SInst<"vpmaxnm", "1.", "SfSQd">;
  1218. def SCALAR_FMINNMP : SInst<"vpminnm", "1.", "SfSQd">;
  1219. ////////////////////////////////////////////////////////////////////////////////
  1220. // Scalar Integer Saturating Doubling Multiply Half High
  1221. def SCALAR_SQDMULH : SInst<"vqdmulh", "111", "SsSi">;
  1222. ////////////////////////////////////////////////////////////////////////////////
  1223. // Scalar Integer Saturating Rounding Doubling Multiply Half High
  1224. def SCALAR_SQRDMULH : SInst<"vqrdmulh", "111", "SsSi">;
  1225. let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
  1226. ////////////////////////////////////////////////////////////////////////////////
  1227. // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
  1228. def SCALAR_SQRDMLAH : SInst<"vqrdmlah", "1111", "SsSi">;
  1229. ////////////////////////////////////////////////////////////////////////////////
  1230. // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
  1231. def SCALAR_SQRDMLSH : SInst<"vqrdmlsh", "1111", "SsSi">;
  1232. }
  1233. ////////////////////////////////////////////////////////////////////////////////
  1234. // Scalar Floating-point Multiply Extended
  1235. def SCALAR_FMULX : IInst<"vmulx", "111", "SfSd">;
  1236. ////////////////////////////////////////////////////////////////////////////////
  1237. // Scalar Floating-point Reciprocal Step
  1238. def SCALAR_FRECPS : IInst<"vrecps", "111", "SfSd">;
  1239. ////////////////////////////////////////////////////////////////////////////////
  1240. // Scalar Floating-point Reciprocal Square Root Step
  1241. def SCALAR_FRSQRTS : IInst<"vrsqrts", "111", "SfSd">;
  1242. ////////////////////////////////////////////////////////////////////////////////
  1243. // Scalar Signed Integer Convert To Floating-point
  1244. def SCALAR_SCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "Si">;
  1245. def SCALAR_SCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "Sl">;
  1246. ////////////////////////////////////////////////////////////////////////////////
  1247. // Scalar Unsigned Integer Convert To Floating-point
  1248. def SCALAR_UCVTFS : SInst<"vcvt_f32", "(1F)(1!)", "SUi">;
  1249. def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">;
  1250. ////////////////////////////////////////////////////////////////////////////////
  1251. // Scalar Floating-point Converts
  1252. def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">;
  1253. def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">;
  1254. def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">;
  1255. def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">;
  1256. def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">;
  1257. def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">;
  1258. def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">;
  1259. def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">;
  1260. def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">;
  1261. def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">;
  1262. def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">;
  1263. def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">;
  1264. def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">;
  1265. def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">;
  1266. def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">;
  1267. def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">;
  1268. def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">;
  1269. def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">;
  1270. def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">;
  1271. def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">;
  1272. def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">;
  1273. ////////////////////////////////////////////////////////////////////////////////
  1274. // Scalar Floating-point Reciprocal Estimate
  1275. def SCALAR_FRECPE : IInst<"vrecpe", "11", "SfSd">;
  1276. ////////////////////////////////////////////////////////////////////////////////
  1277. // Scalar Floating-point Reciprocal Exponent
  1278. def SCALAR_FRECPX : IInst<"vrecpx", "11", "SfSd">;
  1279. ////////////////////////////////////////////////////////////////////////////////
  1280. // Scalar Floating-point Reciprocal Square Root Estimate
  1281. def SCALAR_FRSQRTE : IInst<"vrsqrte", "11", "SfSd">;
  1282. ////////////////////////////////////////////////////////////////////////////////
  1283. // Scalar Integer Comparison
  1284. def SCALAR_CMEQ : SInst<"vceq", "(U1)11", "SlSUl">;
  1285. def SCALAR_CMEQZ : SInst<"vceqz", "(U1)1", "SlSUl">;
  1286. def SCALAR_CMGE : SInst<"vcge", "(U1)11", "Sl">;
  1287. def SCALAR_CMGEZ : SInst<"vcgez", "(U1)1", "Sl">;
  1288. def SCALAR_CMHS : SInst<"vcge", "(U1)11", "SUl">;
  1289. def SCALAR_CMLE : SInst<"vcle", "(U1)11", "SlSUl">;
  1290. def SCALAR_CMLEZ : SInst<"vclez", "(U1)1", "Sl">;
  1291. def SCALAR_CMLT : SInst<"vclt", "(U1)11", "SlSUl">;
  1292. def SCALAR_CMLTZ : SInst<"vcltz", "(U1)1", "Sl">;
  1293. def SCALAR_CMGT : SInst<"vcgt", "(U1)11", "Sl">;
  1294. def SCALAR_CMGTZ : SInst<"vcgtz", "(U1)1", "Sl">;
  1295. def SCALAR_CMHI : SInst<"vcgt", "(U1)11", "SUl">;
  1296. def SCALAR_CMTST : SInst<"vtst", "(U1)11", "SlSUl">;
  1297. ////////////////////////////////////////////////////////////////////////////////
  1298. // Scalar Floating-point Comparison
  1299. def SCALAR_FCMEQ : IInst<"vceq", "(1U)11", "SfSd">;
  1300. def SCALAR_FCMEQZ : IInst<"vceqz", "(1U)1", "SfSd">;
  1301. def SCALAR_FCMGE : IInst<"vcge", "(1U)11", "SfSd">;
  1302. def SCALAR_FCMGEZ : IInst<"vcgez", "(1U)1", "SfSd">;
  1303. def SCALAR_FCMGT : IInst<"vcgt", "(1U)11", "SfSd">;
  1304. def SCALAR_FCMGTZ : IInst<"vcgtz", "(1U)1", "SfSd">;
  1305. def SCALAR_FCMLE : IInst<"vcle", "(1U)11", "SfSd">;
  1306. def SCALAR_FCMLEZ : IInst<"vclez", "(1U)1", "SfSd">;
  1307. def SCALAR_FCMLT : IInst<"vclt", "(1U)11", "SfSd">;
  1308. def SCALAR_FCMLTZ : IInst<"vcltz", "(1U)1", "SfSd">;
  1309. ////////////////////////////////////////////////////////////////////////////////
  1310. // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
  1311. def SCALAR_FACGE : IInst<"vcage", "(1U)11", "SfSd">;
  1312. def SCALAR_FACLE : IInst<"vcale", "(1U)11", "SfSd">;
  1313. ////////////////////////////////////////////////////////////////////////////////
  1314. // Scalar Floating-point Absolute Compare Mask Greater Than
  1315. def SCALAR_FACGT : IInst<"vcagt", "(1U)11", "SfSd">;
  1316. def SCALAR_FACLT : IInst<"vcalt", "(1U)11", "SfSd">;
  1317. ////////////////////////////////////////////////////////////////////////////////
  1318. // Scalar Absolute Value
  1319. def SCALAR_ABS : SInst<"vabs", "11", "Sl">;
  1320. ////////////////////////////////////////////////////////////////////////////////
  1321. // Scalar Absolute Difference
  1322. def SCALAR_ABD : IInst<"vabd", "111", "SfSd">;
  1323. ////////////////////////////////////////////////////////////////////////////////
  1324. // Scalar Signed Saturating Absolute Value
  1325. def SCALAR_SQABS : SInst<"vqabs", "11", "ScSsSiSl">;
  1326. ////////////////////////////////////////////////////////////////////////////////
  1327. // Scalar Negate
  1328. def SCALAR_NEG : SInst<"vneg", "11", "Sl">;
  1329. ////////////////////////////////////////////////////////////////////////////////
  1330. // Scalar Signed Saturating Negate
  1331. def SCALAR_SQNEG : SInst<"vqneg", "11", "ScSsSiSl">;
  1332. ////////////////////////////////////////////////////////////////////////////////
  1333. // Scalar Signed Saturating Accumulated of Unsigned Value
  1334. def SCALAR_SUQADD : SInst<"vuqadd", "11(1U)", "ScSsSiSl">;
  1335. ////////////////////////////////////////////////////////////////////////////////
  1336. // Scalar Unsigned Saturating Accumulated of Signed Value
  1337. def SCALAR_USQADD : SInst<"vsqadd", "11(1S)", "SUcSUsSUiSUl">;
  1338. ////////////////////////////////////////////////////////////////////////////////
  1339. // Signed Saturating Doubling Multiply-Add Long
  1340. def SCALAR_SQDMLAL : SInst<"vqdmlal", "(1>)(1>)11", "SsSi">;
  1341. ////////////////////////////////////////////////////////////////////////////////
  1342. // Signed Saturating Doubling Multiply-Subtract Long
  1343. def SCALAR_SQDMLSL : SInst<"vqdmlsl", "(1>)(1>)11", "SsSi">;
  1344. ////////////////////////////////////////////////////////////////////////////////
  1345. // Signed Saturating Doubling Multiply Long
  1346. def SCALAR_SQDMULL : SInst<"vqdmull", "(1>)11", "SsSi">;
  1347. ////////////////////////////////////////////////////////////////////////////////
  1348. // Scalar Signed Saturating Extract Unsigned Narrow
  1349. def SCALAR_SQXTUN : SInst<"vqmovun", "(U1<)1", "SsSiSl">;
  1350. ////////////////////////////////////////////////////////////////////////////////
  1351. // Scalar Signed Saturating Extract Narrow
  1352. def SCALAR_SQXTN : SInst<"vqmovn", "(1<)1", "SsSiSl">;
  1353. ////////////////////////////////////////////////////////////////////////////////
  1354. // Scalar Unsigned Saturating Extract Narrow
  1355. def SCALAR_UQXTN : SInst<"vqmovn", "(1<)1", "SUsSUiSUl">;
  1356. // Scalar Floating Point multiply (scalar, by element)
  1357. def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "11.I", "SfSd", OP_SCALAR_MUL_LN>;
  1358. def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "11QI", "SfSd", OP_SCALAR_MUL_LN> {
  1359. let isLaneQ = 1;
  1360. }
  1361. // Scalar Floating Point multiply extended (scalar, by element)
  1362. def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "11.I", "SfSd", OP_SCALAR_MULX_LN>;
  1363. def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "11QI", "SfSd", OP_SCALAR_MULX_LN> {
  1364. let isLaneQ = 1;
  1365. }
  1366. def SCALAR_VMUL_N : IInst<"vmul_n", "..1", "d">;
  1367. // VMUL_LANE_A64 d type implemented using scalar mul lane
  1368. def SCALAR_VMUL_LANE : IInst<"vmul_lane", "..qI", "d">;
  1369. // VMUL_LANEQ d type implemented using scalar mul lane
  1370. def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "..QI", "d"> {
  1371. let isLaneQ = 1;
  1372. }
  1373. // VMULX_LANE d type implemented using scalar vmulx_lane
  1374. def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "..qI", "d", OP_SCALAR_VMULX_LN>;
  1375. // VMULX_LANEQ d type implemented using scalar vmulx_laneq
  1376. def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "..QI", "d", OP_SCALAR_VMULX_LNQ> {
  1377. let isLaneQ = 1;
  1378. }
  1379. // Scalar Floating Point fused multiply-add (scalar, by element)
  1380. def SCALAR_FMLA_LANE : IInst<"vfma_lane", "111.I", "SfSd">;
  1381. def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "111QI", "SfSd"> {
  1382. let isLaneQ = 1;
  1383. }
  1384. // Scalar Floating Point fused multiply-subtract (scalar, by element)
  1385. def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "111.I", "SfSd", OP_FMS_LN>;
  1386. def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "111QI", "SfSd", OP_FMS_LNQ> {
  1387. let isLaneQ = 1;
  1388. }
  1389. // Signed Saturating Doubling Multiply Long (scalar by element)
  1390. def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "(1>)1.I", "SsSi", OP_SCALAR_QDMULL_LN>;
  1391. def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "(1>)1QI", "SsSi", OP_SCALAR_QDMULL_LN> {
  1392. let isLaneQ = 1;
  1393. }
  1394. // Signed Saturating Doubling Multiply-Add Long (scalar by element)
  1395. def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "(1>)(1>)1.I", "SsSi">;
  1396. def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "(1>)(1>)1QI", "SsSi"> {
  1397. let isLaneQ = 1;
  1398. }
  1399. // Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
  1400. def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "(1>)(1>)1.I", "SsSi">;
  1401. def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "(1>)(1>)1QI", "SsSi"> {
  1402. let isLaneQ = 1;
  1403. }
  1404. // Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
  1405. def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "11.I", "SsSi", OP_SCALAR_QDMULH_LN>;
  1406. def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QDMULH_LN> {
  1407. let isLaneQ = 1;
  1408. }
  1409. // Scalar Integer Saturating Rounding Doubling Multiply Half High
  1410. def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "11.I", "SsSi", OP_SCALAR_QRDMULH_LN>;
  1411. def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "11QI", "SsSi", OP_SCALAR_QRDMULH_LN> {
  1412. let isLaneQ = 1;
  1413. }
  1414. let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
  1415. // Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
  1416. def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "111.I", "SsSi", OP_SCALAR_QRDMLAH_LN>;
  1417. def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLAH_LN> {
  1418. let isLaneQ = 1;
  1419. }
  1420. // Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
  1421. def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "111.I", "SsSi", OP_SCALAR_QRDMLSH_LN>;
  1422. def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "111QI", "SsSi", OP_SCALAR_QRDMLSH_LN> {
  1423. let isLaneQ = 1;
  1424. }
  1425. }
  1426. def SCALAR_VDUP_LANE : IInst<"vdup_lane", "1.I", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
  1427. def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "1QI", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs"> {
  1428. let isLaneQ = 1;
  1429. }
  1430. }
  1431. // ARMv8.2-A FP16 vector intrinsics for A32/A64.
  1432. let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
  1433. // ARMv8.2-A FP16 one-operand vector intrinsics.
  1434. // Comparison
  1435. def CMEQH : SInst<"vceqz", "U.", "hQh">;
  1436. def CMGEH : SInst<"vcgez", "U.", "hQh">;
  1437. def CMGTH : SInst<"vcgtz", "U.", "hQh">;
  1438. def CMLEH : SInst<"vclez", "U.", "hQh">;
  1439. def CMLTH : SInst<"vcltz", "U.", "hQh">;
  1440. // Vector conversion
  1441. def VCVT_F16 : SInst<"vcvt_f16", "F(.!)", "sUsQsQUs">;
  1442. def VCVT_S16 : SInst<"vcvt_s16", "S.", "hQh">;
  1443. def VCVT_U16 : SInst<"vcvt_u16", "U.", "hQh">;
  1444. def VCVTA_S16 : SInst<"vcvta_s16", "S.", "hQh">;
  1445. def VCVTA_U16 : SInst<"vcvta_u16", "U.", "hQh">;
  1446. def VCVTM_S16 : SInst<"vcvtm_s16", "S.", "hQh">;
  1447. def VCVTM_U16 : SInst<"vcvtm_u16", "U.", "hQh">;
  1448. def VCVTN_S16 : SInst<"vcvtn_s16", "S.", "hQh">;
  1449. def VCVTN_U16 : SInst<"vcvtn_u16", "U.", "hQh">;
  1450. def VCVTP_S16 : SInst<"vcvtp_s16", "S.", "hQh">;
  1451. def VCVTP_U16 : SInst<"vcvtp_u16", "U.", "hQh">;
  1452. // Vector rounding
  1453. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_DIRECTED_ROUNDING) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
  1454. def FRINTZH : SInst<"vrnd", "..", "hQh">;
  1455. def FRINTNH : SInst<"vrndn", "..", "hQh">;
  1456. def FRINTAH : SInst<"vrnda", "..", "hQh">;
  1457. def FRINTPH : SInst<"vrndp", "..", "hQh">;
  1458. def FRINTMH : SInst<"vrndm", "..", "hQh">;
  1459. def FRINTXH : SInst<"vrndx", "..", "hQh">;
  1460. }
  1461. // Misc.
  1462. def VABSH : SInst<"vabs", "..", "hQh">;
  1463. def VNEGH : SOpInst<"vneg", "..", "hQh", OP_NEG>;
  1464. def VRECPEH : SInst<"vrecpe", "..", "hQh">;
  1465. def FRSQRTEH : SInst<"vrsqrte", "..", "hQh">;
  1466. // ARMv8.2-A FP16 two-operands vector intrinsics.
  1467. // Misc.
  1468. def VADDH : SOpInst<"vadd", "...", "hQh", OP_ADD>;
  1469. def VABDH : SInst<"vabd", "...", "hQh">;
  1470. def VSUBH : SOpInst<"vsub", "...", "hQh", OP_SUB>;
  1471. // Comparison
  1472. let InstName = "vacge" in {
  1473. def VCAGEH : SInst<"vcage", "U..", "hQh">;
  1474. def VCALEH : SInst<"vcale", "U..", "hQh">;
  1475. }
  1476. let InstName = "vacgt" in {
  1477. def VCAGTH : SInst<"vcagt", "U..", "hQh">;
  1478. def VCALTH : SInst<"vcalt", "U..", "hQh">;
  1479. }
  1480. def VCEQH : SOpInst<"vceq", "U..", "hQh", OP_EQ>;
  1481. def VCGEH : SOpInst<"vcge", "U..", "hQh", OP_GE>;
  1482. def VCGTH : SOpInst<"vcgt", "U..", "hQh", OP_GT>;
  1483. let InstName = "vcge" in
  1484. def VCLEH : SOpInst<"vcle", "U..", "hQh", OP_LE>;
  1485. let InstName = "vcgt" in
  1486. def VCLTH : SOpInst<"vclt", "U..", "hQh", OP_LT>;
  1487. // Vector conversion
  1488. let isVCVT_N = 1 in {
  1489. def VCVT_N_F16 : SInst<"vcvt_n_f16", "F(.!)I", "sUsQsQUs">;
  1490. def VCVT_N_S16 : SInst<"vcvt_n_s16", "S.I", "hQh">;
  1491. def VCVT_N_U16 : SInst<"vcvt_n_u16", "U.I", "hQh">;
  1492. }
  1493. // Max/Min
  1494. def VMAXH : SInst<"vmax", "...", "hQh">;
  1495. def VMINH : SInst<"vmin", "...", "hQh">;
  1496. let ArchGuard = "__ARM_ARCH >= 8 && defined(__ARM_FEATURE_NUMERIC_MAXMIN) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
  1497. def FMAXNMH : SInst<"vmaxnm", "...", "hQh">;
  1498. def FMINNMH : SInst<"vminnm", "...", "hQh">;
  1499. }
  1500. // Multiplication/Division
  1501. def VMULH : SOpInst<"vmul", "...", "hQh", OP_MUL>;
  1502. // Pairwise addition
  1503. def VPADDH : SInst<"vpadd", "...", "h">;
  1504. // Pairwise Max/Min
  1505. def VPMAXH : SInst<"vpmax", "...", "h">;
  1506. def VPMINH : SInst<"vpmin", "...", "h">;
  1507. // Reciprocal/Sqrt
  1508. def VRECPSH : SInst<"vrecps", "...", "hQh">;
  1509. def VRSQRTSH : SInst<"vrsqrts", "...", "hQh">;
  1510. // ARMv8.2-A FP16 three-operands vector intrinsics.
  1511. // Vector fused multiply-add operations
  1512. def VFMAH : SInst<"vfma", "....", "hQh">;
  1513. def VFMSH : SOpInst<"vfms", "....", "hQh", OP_FMLS>;
  1514. // ARMv8.2-A FP16 lane vector intrinsics.
  1515. // Mul lane
  1516. def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>;
  1517. def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>;
  1518. // Data processing intrinsics - section 5
  1519. // Logical operations
  1520. let isHiddenLInst = 1 in
  1521. def VBSLH : SInst<"vbsl", ".U..", "hQh">;
  1522. // Transposition operations
  1523. def VZIPH : WInst<"vzip", "2..", "hQh">;
  1524. def VUZPH : WInst<"vuzp", "2..", "hQh">;
  1525. def VTRNH : WInst<"vtrn", "2..", "hQh">;
  1526. let ArchGuard = "!defined(__aarch64__)" in {
  1527. // Set all lanes to same value.
  1528. // Already implemented prior to ARMv8.2-A.
  1529. def VMOV_NH : WOpInst<"vmov_n", ".1", "hQh", OP_DUP>;
  1530. def VDUP_NH : WOpInst<"vdup_n", ".1", "hQh", OP_DUP>;
  1531. def VDUP_LANE1H : WOpInst<"vdup_lane", ".qI", "hQh", OP_DUP_LN>;
  1532. }
  1533. // Vector Extract
  1534. def VEXTH : WInst<"vext", "...I", "hQh">;
  1535. // Reverse vector elements
  1536. def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>;
  1537. }
  1538. // ARMv8.2-A FP16 vector intrinsics for A64 only.
  1539. let ArchGuard = "defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in {
  1540. // Vector rounding
  1541. def FRINTIH : SInst<"vrndi", "..", "hQh">;
  1542. // Misc.
  1543. def FSQRTH : SInst<"vsqrt", "..", "hQh">;
  1544. // Multiplication/Division
  1545. def MULXH : SInst<"vmulx", "...", "hQh">;
  1546. def FDIVH : IOpInst<"vdiv", "...", "hQh", OP_DIV>;
  1547. // Pairwise addition
  1548. def VPADDH1 : SInst<"vpadd", "...", "Qh">;
  1549. // Pairwise Max/Min
  1550. def VPMAXH1 : SInst<"vpmax", "...", "Qh">;
  1551. def VPMINH1 : SInst<"vpmin", "...", "Qh">;
  1552. // Pairwise MaxNum/MinNum
  1553. def FMAXNMPH : SInst<"vpmaxnm", "...", "hQh">;
  1554. def FMINNMPH : SInst<"vpminnm", "...", "hQh">;
  1555. // ARMv8.2-A FP16 lane vector intrinsics.
  1556. // FMA lane
  1557. def VFMA_LANEH : IInst<"vfma_lane", "...qI", "hQh">;
  1558. def VFMA_LANEQH : IInst<"vfma_laneq", "...QI", "hQh"> {
  1559. let isLaneQ = 1;
  1560. }
  1561. // FMA lane with scalar argument
  1562. def FMLA_NH : SOpInst<"vfma_n", "...1", "hQh", OP_FMLA_N>;
  1563. // Scalar floating point fused multiply-add (scalar, by element)
  1564. def SCALAR_FMLA_LANEH : IInst<"vfma_lane", "111.I", "Sh">;
  1565. def SCALAR_FMLA_LANEQH : IInst<"vfma_laneq", "111QI", "Sh"> {
  1566. let isLaneQ = 1;
  1567. }
  1568. // FMS lane
  1569. def VFMS_LANEH : IOpInst<"vfms_lane", "...qI", "hQh", OP_FMS_LN>;
  1570. def VFMS_LANEQH : IOpInst<"vfms_laneq", "...QI", "hQh", OP_FMS_LNQ> {
  1571. let isLaneQ = 1;
  1572. }
  1573. // FMS lane with scalar argument
  1574. def FMLS_NH : SOpInst<"vfms_n", "...1", "hQh", OP_FMLS_N>;
  1575. // Scalar floating foint fused multiply-subtract (scalar, by element)
  1576. def SCALAR_FMLS_LANEH : IOpInst<"vfms_lane", "111.I", "Sh", OP_FMS_LN>;
  1577. def SCALAR_FMLS_LANEQH : IOpInst<"vfms_laneq", "111QI", "Sh", OP_FMS_LNQ> {
  1578. let isLaneQ = 1;
  1579. }
  1580. // Mul lane
  1581. def VMUL_LANEQH : IOpInst<"vmul_laneq", "..QI", "hQh", OP_MUL_LN> {
  1582. let isLaneQ = 1;
  1583. }
  1584. // Scalar floating point multiply (scalar, by element)
  1585. def SCALAR_FMUL_LANEH : IOpInst<"vmul_lane", "11.I", "Sh", OP_SCALAR_MUL_LN>;
  1586. def SCALAR_FMUL_LANEQH : IOpInst<"vmul_laneq", "11QI", "Sh", OP_SCALAR_MUL_LN> {
  1587. let isLaneQ = 1;
  1588. }
  1589. // Mulx lane
  1590. def VMULX_LANEH : IOpInst<"vmulx_lane", "..qI", "hQh", OP_MULX_LN>;
  1591. def VMULX_LANEQH : IOpInst<"vmulx_laneq", "..QI", "hQh", OP_MULX_LN> {
  1592. let isLaneQ = 1;
  1593. }
  1594. def VMULX_NH : IOpInst<"vmulx_n", "..1", "hQh", OP_MULX_N>;
  1595. // Scalar floating point mulx (scalar, by element)
  1596. def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "11.I", "Sh">;
  1597. def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "11QI", "Sh"> {
  1598. let isLaneQ = 1;
  1599. }
  1600. // ARMv8.2-A FP16 reduction vector intrinsics.
  1601. def VMAXVH : SInst<"vmaxv", "1.", "hQh">;
  1602. def VMINVH : SInst<"vminv", "1.", "hQh">;
  1603. def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">;
  1604. def FMINNMVH : SInst<"vminnmv", "1.", "hQh">;
  1605. // Permutation
  1606. def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>;
  1607. def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>;
  1608. def VUZP1H : SOpInst<"vuzp1", "...", "hQh", OP_UZP1>;
  1609. def VTRN2H : SOpInst<"vtrn2", "...", "hQh", OP_TRN2>;
  1610. def VZIP2H : SOpInst<"vzip2", "...", "hQh", OP_ZIP2>;
  1611. def VUZP2H : SOpInst<"vuzp2", "...", "hQh", OP_UZP2>;
  1612. def SCALAR_VDUP_LANEH : IInst<"vdup_lane", "1.I", "Sh">;
  1613. def SCALAR_VDUP_LANEQH : IInst<"vdup_laneq", "1QI", "Sh"> {
  1614. let isLaneQ = 1;
  1615. }
  1616. }
  1617. // v8.2-A dot product instructions.
  1618. let ArchGuard = "defined(__ARM_FEATURE_DOTPROD)" in {
  1619. def DOT : SInst<"vdot", "..(<<)(<<)", "iQiUiQUi">;
  1620. def DOT_LANE : SOpInst<"vdot_lane", "..(<<)(<<q)I", "iUiQiQUi", OP_DOT_LN>;
  1621. }
  1622. let ArchGuard = "defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__)" in {
  1623. // Variants indexing into a 128-bit vector are A64 only.
  1624. def UDOT_LANEQ : SOpInst<"vdot_laneq", "..(<<)(<<Q)I", "iUiQiQUi", OP_DOT_LNQ> {
  1625. let isLaneQ = 1;
  1626. }
  1627. }
  1628. // v8.2-A FP16 fused multiply-add long instructions.
  1629. let ArchGuard = "defined(__ARM_FEATURE_FP16_FML) && defined(__aarch64__)" in {
  1630. def VFMLAL_LOW : SInst<"vfmlal_low", ">>..", "hQh">;
  1631. def VFMLSL_LOW : SInst<"vfmlsl_low", ">>..", "hQh">;
  1632. def VFMLAL_HIGH : SInst<"vfmlal_high", ">>..", "hQh">;
  1633. def VFMLSL_HIGH : SInst<"vfmlsl_high", ">>..", "hQh">;
  1634. def VFMLAL_LANE_LOW : SOpInst<"vfmlal_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN>;
  1635. def VFMLSL_LANE_LOW : SOpInst<"vfmlsl_lane_low", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN>;
  1636. def VFMLAL_LANE_HIGH : SOpInst<"vfmlal_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLAL_LN_Hi>;
  1637. def VFMLSL_LANE_HIGH : SOpInst<"vfmlsl_lane_high", "(F>)(F>)F(Fq)I", "hQh", OP_FMLSL_LN_Hi>;
  1638. def VFMLAL_LANEQ_LOW : SOpInst<"vfmlal_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN> {
  1639. let isLaneQ = 1;
  1640. }
  1641. def VFMLSL_LANEQ_LOW : SOpInst<"vfmlsl_laneq_low", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN> {
  1642. let isLaneQ = 1;
  1643. }
  1644. def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi> {
  1645. let isLaneQ = 1;
  1646. }
  1647. def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi> {
  1648. let isLaneQ = 1;
  1649. }
  1650. }
  1651. let ArchGuard = "defined(__ARM_FEATURE_MATMUL_INT8)" in {
  1652. def VMMLA : SInst<"vmmla", "..(<<)(<<)", "QUiQi">;
  1653. def VUSMMLA : SInst<"vusmmla", "..(<<U)(<<)", "Qi">;
  1654. def VUSDOT : SInst<"vusdot", "..(<<U)(<<)", "iQi">;
  1655. def VUSDOT_LANE : SOpInst<"vusdot_lane", "..(<<U)(<<q)I", "iQi", OP_USDOT_LN>;
  1656. def VSUDOT_LANE : SOpInst<"vsudot_lane", "..(<<)(<<qU)I", "iQi", OP_SUDOT_LN>;
  1657. let ArchGuard = "defined(__aarch64__)" in {
  1658. let isLaneQ = 1 in {
  1659. def VUSDOT_LANEQ : SOpInst<"vusdot_laneq", "..(<<U)(<<Q)I", "iQi", OP_USDOT_LNQ>;
  1660. def VSUDOT_LANEQ : SOpInst<"vsudot_laneq", "..(<<)(<<QU)I", "iQi", OP_SUDOT_LNQ>;
  1661. }
  1662. }
  1663. }
  1664. let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in {
  1665. def VDOT_BF : SInst<"vbfdot", "..BB", "fQf">;
  1666. def VDOT_LANE_BF : SOpInst<"vbfdot_lane", "..B(Bq)I", "fQf", OP_BFDOT_LN>;
  1667. def VDOT_LANEQ_BF : SOpInst<"vbfdot_laneq", "..B(BQ)I", "fQf", OP_BFDOT_LNQ> {
  1668. let isLaneQ = 1;
  1669. }
  1670. def VFMMLA_BF : SInst<"vbfmmla", "..BB", "Qf">;
  1671. def VFMLALB_BF : SInst<"vbfmlalb", "..BB", "Qf">;
  1672. def VFMLALT_BF : SInst<"vbfmlalt", "..BB", "Qf">;
  1673. def VFMLALB_LANE_BF : SOpInst<"vbfmlalb_lane", "..B(Bq)I", "Qf", OP_BFMLALB_LN>;
  1674. def VFMLALB_LANEQ_BF : SOpInst<"vbfmlalb_laneq", "..B(BQ)I", "Qf", OP_BFMLALB_LN>;
  1675. def VFMLALT_LANE_BF : SOpInst<"vbfmlalt_lane", "..B(Bq)I", "Qf", OP_BFMLALT_LN>;
  1676. def VFMLALT_LANEQ_BF : SOpInst<"vbfmlalt_laneq", "..B(BQ)I", "Qf", OP_BFMLALT_LN>;
  1677. }
  1678. multiclass VCMLA_ROTS<string type, string lanety, string laneqty> {
  1679. foreach ROT = ["", "_rot90", "_rot180", "_rot270" ] in {
  1680. def : SInst<"vcmla" # ROT, "....", type # "Q" # type>;
  1681. // vcmla{ROT}_lane
  1682. def : SOpInst<"vcmla" # ROT # "_lane", "...qI", type, Op<(call "vcmla" # ROT, $p0, $p1,
  1683. (bitcast $p0, (dup_typed lanety , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
  1684. // vcmlaq{ROT}_lane
  1685. def : SOpInst<"vcmla" # ROT # "_lane", "...qI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
  1686. (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast lanety, $p2), $p3))))>>;
  1687. let isLaneQ = 1 in {
  1688. // vcmla{ROT}_laneq
  1689. def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", type, Op<(call "vcmla" # ROT, $p0, $p1,
  1690. (bitcast $p0, (dup_typed lanety, (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
  1691. // vcmlaq{ROT}_laneq
  1692. def : SOpInst<"vcmla" # ROT # "_laneq", "...QI", "Q" # type, Op<(call "vcmla" # ROT, $p0, $p1,
  1693. (bitcast $p0, (dup_typed laneqty , (call "vget_lane", (bitcast laneqty, $p2), $p3))))>>;
  1694. }
  1695. }
  1696. }
  1697. // v8.3-A Vector complex addition intrinsics
  1698. let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
  1699. def VCADD_ROT90_FP16 : SInst<"vcadd_rot90", "...", "h">;
  1700. def VCADD_ROT270_FP16 : SInst<"vcadd_rot270", "...", "h">;
  1701. def VCADDQ_ROT90_FP16 : SInst<"vcaddq_rot90", "QQQ", "h">;
  1702. def VCADDQ_ROT270_FP16 : SInst<"vcaddq_rot270", "QQQ", "h">;
  1703. defm VCMLA_FP16 : VCMLA_ROTS<"h", "uint32x2_t", "uint32x4_t">;
  1704. }
  1705. let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in {
  1706. def VCADD_ROT90 : SInst<"vcadd_rot90", "...", "f">;
  1707. def VCADD_ROT270 : SInst<"vcadd_rot270", "...", "f">;
  1708. def VCADDQ_ROT90 : SInst<"vcaddq_rot90", "QQQ", "f">;
  1709. def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">;
  1710. defm VCMLA_F32 : VCMLA_ROTS<"f", "uint64x1_t", "uint64x2_t">;
  1711. }
  1712. let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in {
  1713. def VCADDQ_ROT90_FP64 : SInst<"vcaddq_rot90", "QQQ", "d">;
  1714. def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
  1715. defm VCMLA_FP64 : VCMLA_ROTS<"d", "uint64x2_t", "uint64x2_t">;
  1716. }
  1717. // V8.2-A BFloat intrinsics
  1718. let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC)" in {
  1719. def VCREATE_BF : NoTestOpInst<"vcreate", ".(IU>)", "b", OP_CAST> {
  1720. let BigEndianSafe = 1;
  1721. }
  1722. def VDUP_N_BF : WOpInst<"vdup_n", ".1", "bQb", OP_DUP>;
  1723. def VDUP_LANE_BF : WOpInst<"vdup_lane", ".qI", "bQb", OP_DUP_LN>;
  1724. def VDUP_LANEQ_BF: WOpInst<"vdup_laneq", ".QI", "bQb", OP_DUP_LN> {
  1725. let isLaneQ = 1;
  1726. }
  1727. def VCOMBINE_BF : NoTestOpInst<"vcombine", "Q..", "b", OP_CONC>;
  1728. def VGET_HIGH_BF : NoTestOpInst<"vget_high", ".Q", "b", OP_HI>;
  1729. def VGET_LOW_BF : NoTestOpInst<"vget_low", ".Q", "b", OP_LO>;
  1730. def VGET_LANE_BF : IInst<"vget_lane", "1.I", "bQb">;
  1731. def VSET_LANE_BF : IInst<"vset_lane", ".1.I", "bQb">;
  1732. def SCALAR_VDUP_LANE_BF : IInst<"vdup_lane", "1.I", "Sb">;
  1733. def SCALAR_VDUP_LANEQ_BF : IInst<"vdup_laneq", "1QI", "Sb"> {
  1734. let isLaneQ = 1;
  1735. }
  1736. def VLD1_BF : WInst<"vld1", ".(c*!)", "bQb">;
  1737. def VLD2_BF : WInst<"vld2", "2(c*!)", "bQb">;
  1738. def VLD3_BF : WInst<"vld3", "3(c*!)", "bQb">;
  1739. def VLD4_BF : WInst<"vld4", "4(c*!)", "bQb">;
  1740. def VST1_BF : WInst<"vst1", "v*(.!)", "bQb">;
  1741. def VST2_BF : WInst<"vst2", "v*(2!)", "bQb">;
  1742. def VST3_BF : WInst<"vst3", "v*(3!)", "bQb">;
  1743. def VST4_BF : WInst<"vst4", "v*(4!)", "bQb">;
  1744. def VLD1_X2_BF : WInst<"vld1_x2", "2(c*!)", "bQb">;
  1745. def VLD1_X3_BF : WInst<"vld1_x3", "3(c*!)", "bQb">;
  1746. def VLD1_X4_BF : WInst<"vld1_x4", "4(c*!)", "bQb">;
  1747. def VST1_X2_BF : WInst<"vst1_x2", "v*(2!)", "bQb">;
  1748. def VST1_X3_BF : WInst<"vst1_x3", "v*(3!)", "bQb">;
  1749. def VST1_X4_BF : WInst<"vst1_x4", "v*(4!)", "bQb">;
  1750. def VLD1_LANE_BF : WInst<"vld1_lane", ".(c*!).I", "bQb">;
  1751. def VLD2_LANE_BF : WInst<"vld2_lane", "2(c*!)2I", "bQb">;
  1752. def VLD3_LANE_BF : WInst<"vld3_lane", "3(c*!)3I", "bQb">;
  1753. def VLD4_LANE_BF : WInst<"vld4_lane", "4(c*!)4I", "bQb">;
  1754. def VST1_LANE_BF : WInst<"vst1_lane", "v*(.!)I", "bQb">;
  1755. def VST2_LANE_BF : WInst<"vst2_lane", "v*(2!)I", "bQb">;
  1756. def VST3_LANE_BF : WInst<"vst3_lane", "v*(3!)I", "bQb">;
  1757. def VST4_LANE_BF : WInst<"vst4_lane", "v*(4!)I", "bQb">;
  1758. def VLD1_DUP_BF : WInst<"vld1_dup", ".(c*!)", "bQb">;
  1759. def VLD2_DUP_BF : WInst<"vld2_dup", "2(c*!)", "bQb">;
  1760. def VLD3_DUP_BF : WInst<"vld3_dup", "3(c*!)", "bQb">;
  1761. def VLD4_DUP_BF : WInst<"vld4_dup", "4(c*!)", "bQb">;
  1762. def VCVT_F32_BF16 : SOpInst<"vcvt_f32_bf16", "(F>)(Bq!)", "Qb", OP_VCVT_F32_BF16>;
  1763. def VCVT_LOW_F32_BF16 : SOpInst<"vcvt_low_f32", "(F>)(BQ!)", "Qb", OP_VCVT_F32_BF16_LO>;
  1764. def VCVT_HIGH_F32_BF16 : SOpInst<"vcvt_high_f32", "(F>)(BQ!)", "Qb", OP_VCVT_F32_BF16_HI>;
  1765. def SCALAR_CVT_BF16_F32 : SInst<"vcvth_bf16", "(1B)1", "f">;
  1766. def SCALAR_CVT_F32_BF16 : SOpInst<"vcvtah_f32", "(1F>)(1!)", "b", OP_CVT_F32_BF16>;
  1767. }
  1768. let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) && !defined(__aarch64__)" in {
  1769. def VCVT_BF16_F32_A32_INTERNAL : WInst<"__a32_vcvt_bf16", "BQ", "f">;
  1770. def VCVT_BF16_F32_A32 : SOpInst<"vcvt_bf16", "BQ", "f", OP_VCVT_BF16_F32_A32>;
  1771. def VCVT_LOW_BF16_F32_A32 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A32>;
  1772. def VCVT_HIGH_BF16_F32_A32 : SOpInst<"vcvt_high_bf16", "BBQ", "Qf", OP_VCVT_BF16_F32_HI_A32>;
  1773. }
  1774. let ArchGuard = "defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) && defined(__aarch64__)" in {
  1775. def VCVT_LOW_BF16_F32_A64_INTERNAL : WInst<"__a64_vcvtq_low_bf16", "BQ", "Hf">;
  1776. def VCVT_LOW_BF16_F32_A64 : SOpInst<"vcvt_low_bf16", "BQ", "Qf", OP_VCVT_BF16_F32_LO_A64>;
  1777. def VCVT_HIGH_BF16_F32_A64 : SInst<"vcvt_high_bf16", "BBQ", "Qf">;
  1778. def VCVT_BF16_F32 : SOpInst<"vcvt_bf16", "BQ", "f", OP_VCVT_BF16_F32_A64>;
  1779. def COPY_LANE_BF16 : IOpInst<"vcopy_lane", "..I.I", "b", OP_COPY_LN>;
  1780. def COPYQ_LANE_BF16 : IOpInst<"vcopy_lane", "..IqI", "Qb", OP_COPY_LN>;
  1781. def COPY_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..IQI", "b", OP_COPY_LN>;
  1782. def COPYQ_LANEQ_BF16 : IOpInst<"vcopy_laneq", "..I.I", "Qb", OP_COPY_LN>;
  1783. }
  1784. let ArchGuard = "defined(__ARM_FEATURE_BF16) && !defined(__aarch64__)" in {
  1785. let BigEndianSafe = 1 in {
  1786. defm VREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
  1787. "csilUcUsUiUlhfPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQPcQPsQPl", "bQb">;
  1788. }
  1789. }
  1790. let ArchGuard = "defined(__ARM_FEATURE_BF16) && defined(__aarch64__)" in {
  1791. let BigEndianSafe = 1 in {
  1792. defm VVREINTERPRET_BF : REINTERPRET_CROSS_TYPES<
  1793. "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", "bQb">;
  1794. }
  1795. }