WebAssemblyInstrSIMD.td 59 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467
  1. // WebAssemblyInstrSIMD.td - WebAssembly SIMD codegen support -*- tablegen -*-//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// WebAssembly SIMD operand code-gen constructs.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. // Instructions using the SIMD opcode prefix and requiring one of the SIMD
  14. // feature predicates.
  15. multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
  16. list<dag> pattern_r, string asmstr_r,
  17. string asmstr_s, bits<32> simdop,
  18. Predicate simd_level> {
  19. defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
  20. !if(!ge(simdop, 0x100),
  21. !or(0xfd0000, !and(0xffff, simdop)),
  22. !or(0xfd00, !and(0xff, simdop)))>,
  23. Requires<[simd_level]>;
  24. }
  25. multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
  26. list<dag> pattern_r, string asmstr_r = "",
  27. string asmstr_s = "", bits<32> simdop = -1> {
  28. defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
  29. asmstr_s, simdop, HasSIMD128>;
  30. }
  31. multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
  32. list<dag> pattern_r, string asmstr_r = "",
  33. string asmstr_s = "", bits<32> simdop = -1> {
  34. defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
  35. asmstr_s, simdop, HasRelaxedSIMD>;
  36. }
  37. defm "" : ARGUMENT<V128, v16i8>;
  38. defm "" : ARGUMENT<V128, v8i16>;
  39. defm "" : ARGUMENT<V128, v4i32>;
  40. defm "" : ARGUMENT<V128, v2i64>;
  41. defm "" : ARGUMENT<V128, v4f32>;
  42. defm "" : ARGUMENT<V128, v2f64>;
  43. // Constrained immediate argument types
  44. foreach SIZE = [8, 16] in
  45. def ImmI#SIZE : ImmLeaf<i32,
  46. "return -(1 << ("#SIZE#" - 1)) <= Imm && Imm < (1 << ("#SIZE#" - 1));"
  47. >;
  48. foreach SIZE = [2, 4, 8, 16, 32] in
  49. def LaneIdx#SIZE : ImmLeaf<i32, "return 0 <= Imm && Imm < "#SIZE#";">;
  50. class Vec {
  51. ValueType vt;
  52. ValueType int_vt;
  53. ValueType lane_vt;
  54. WebAssemblyRegClass lane_rc;
  55. int lane_bits;
  56. ImmLeaf lane_idx;
  57. SDPatternOperator lane_load;
  58. PatFrag splat;
  59. string prefix;
  60. Vec split;
  61. }
  62. def I8x16 : Vec {
  63. let vt = v16i8;
  64. let int_vt = vt;
  65. let lane_vt = i32;
  66. let lane_rc = I32;
  67. let lane_bits = 8;
  68. let lane_idx = LaneIdx16;
  69. let lane_load = extloadi8;
  70. let splat = PatFrag<(ops node:$x), (v16i8 (splat_vector (i8 $x)))>;
  71. let prefix = "i8x16";
  72. }
  73. def I16x8 : Vec {
  74. let vt = v8i16;
  75. let int_vt = vt;
  76. let lane_vt = i32;
  77. let lane_rc = I32;
  78. let lane_bits = 16;
  79. let lane_idx = LaneIdx8;
  80. let lane_load = extloadi16;
  81. let splat = PatFrag<(ops node:$x), (v8i16 (splat_vector (i16 $x)))>;
  82. let prefix = "i16x8";
  83. let split = I8x16;
  84. }
  85. def I32x4 : Vec {
  86. let vt = v4i32;
  87. let int_vt = vt;
  88. let lane_vt = i32;
  89. let lane_rc = I32;
  90. let lane_bits = 32;
  91. let lane_idx = LaneIdx4;
  92. let lane_load = load;
  93. let splat = PatFrag<(ops node:$x), (v4i32 (splat_vector (i32 $x)))>;
  94. let prefix = "i32x4";
  95. let split = I16x8;
  96. }
  97. def I64x2 : Vec {
  98. let vt = v2i64;
  99. let int_vt = vt;
  100. let lane_vt = i64;
  101. let lane_rc = I64;
  102. let lane_bits = 64;
  103. let lane_idx = LaneIdx2;
  104. let lane_load = load;
  105. let splat = PatFrag<(ops node:$x), (v2i64 (splat_vector (i64 $x)))>;
  106. let prefix = "i64x2";
  107. let split = I32x4;
  108. }
  109. def F32x4 : Vec {
  110. let vt = v4f32;
  111. let int_vt = v4i32;
  112. let lane_vt = f32;
  113. let lane_rc = F32;
  114. let lane_bits = 32;
  115. let lane_idx = LaneIdx4;
  116. let lane_load = load;
  117. let splat = PatFrag<(ops node:$x), (v4f32 (splat_vector (f32 $x)))>;
  118. let prefix = "f32x4";
  119. }
  120. def F64x2 : Vec {
  121. let vt = v2f64;
  122. let int_vt = v2i64;
  123. let lane_vt = f64;
  124. let lane_rc = F64;
  125. let lane_bits = 64;
  126. let lane_idx = LaneIdx2;
  127. let lane_load = load;
  128. let splat = PatFrag<(ops node:$x), (v2f64 (splat_vector (f64 $x)))>;
  129. let prefix = "f64x2";
  130. }
  131. defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
  132. defvar IntVecs = [I8x16, I16x8, I32x4, I64x2];
  133. //===----------------------------------------------------------------------===//
  134. // Load and store
  135. //===----------------------------------------------------------------------===//
  136. // Load: v128.load
  137. let mayLoad = 1, UseNamedOperandTable = 1 in {
  138. defm LOAD_V128_A32 :
  139. SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
  140. (outs), (ins P2Align:$p2align, offset32_op:$off), [],
  141. "v128.load\t$dst, ${off}(${addr})$p2align",
  142. "v128.load\t$off$p2align", 0>;
  143. defm LOAD_V128_A64 :
  144. SIMD_I<(outs V128:$dst), (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
  145. (outs), (ins P2Align:$p2align, offset64_op:$off), [],
  146. "v128.load\t$dst, ${off}(${addr})$p2align",
  147. "v128.load\t$off$p2align", 0>;
  148. }
  149. // Def load patterns from WebAssemblyInstrMemory.td for vector types
  150. foreach vec = AllVecs in {
  151. defm : LoadPat<vec.vt, load, "LOAD_V128">;
  152. }
  153. // v128.loadX_splat
  154. multiclass SIMDLoadSplat<int size, bits<32> simdop> {
  155. let mayLoad = 1, UseNamedOperandTable = 1 in {
  156. defm LOAD#size#_SPLAT_A32 :
  157. SIMD_I<(outs V128:$dst),
  158. (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
  159. (outs),
  160. (ins P2Align:$p2align, offset32_op:$off), [],
  161. "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
  162. "v128.load"#size#"_splat\t$off$p2align", simdop>;
  163. defm LOAD#size#_SPLAT_A64 :
  164. SIMD_I<(outs V128:$dst),
  165. (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
  166. (outs),
  167. (ins P2Align:$p2align, offset64_op:$off), [],
  168. "v128.load"#size#"_splat\t$dst, ${off}(${addr})$p2align",
  169. "v128.load"#size#"_splat\t$off$p2align", simdop>;
  170. }
  171. }
  172. defm "" : SIMDLoadSplat<8, 7>;
  173. defm "" : SIMDLoadSplat<16, 8>;
  174. defm "" : SIMDLoadSplat<32, 9>;
  175. defm "" : SIMDLoadSplat<64, 10>;
  176. foreach vec = AllVecs in {
  177. defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
  178. defm : LoadPat<vec.vt,
  179. PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>,
  180. inst>;
  181. }
  182. // Load and extend
  183. multiclass SIMDLoadExtend<Vec vec, string loadPat, bits<32> simdop> {
  184. defvar signed = vec.prefix#".load"#loadPat#"_s";
  185. defvar unsigned = vec.prefix#".load"#loadPat#"_u";
  186. let mayLoad = 1, UseNamedOperandTable = 1 in {
  187. defm LOAD_EXTEND_S_#vec#_A32 :
  188. SIMD_I<(outs V128:$dst),
  189. (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
  190. (outs), (ins P2Align:$p2align, offset32_op:$off), [],
  191. signed#"\t$dst, ${off}(${addr})$p2align",
  192. signed#"\t$off$p2align", simdop>;
  193. defm LOAD_EXTEND_U_#vec#_A32 :
  194. SIMD_I<(outs V128:$dst),
  195. (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
  196. (outs), (ins P2Align:$p2align, offset32_op:$off), [],
  197. unsigned#"\t$dst, ${off}(${addr})$p2align",
  198. unsigned#"\t$off$p2align", !add(simdop, 1)>;
  199. defm LOAD_EXTEND_S_#vec#_A64 :
  200. SIMD_I<(outs V128:$dst),
  201. (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
  202. (outs), (ins P2Align:$p2align, offset64_op:$off), [],
  203. signed#"\t$dst, ${off}(${addr})$p2align",
  204. signed#"\t$off$p2align", simdop>;
  205. defm LOAD_EXTEND_U_#vec#_A64 :
  206. SIMD_I<(outs V128:$dst),
  207. (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
  208. (outs), (ins P2Align:$p2align, offset64_op:$off), [],
  209. unsigned#"\t$dst, ${off}(${addr})$p2align",
  210. unsigned#"\t$off$p2align", !add(simdop, 1)>;
  211. }
  212. }
  213. defm "" : SIMDLoadExtend<I16x8, "8x8", 1>;
  214. defm "" : SIMDLoadExtend<I32x4, "16x4", 3>;
  215. defm "" : SIMDLoadExtend<I64x2, "32x2", 5>;
  216. foreach vec = [I16x8, I32x4, I64x2] in
  217. foreach exts = [["sextloadvi", "_S"],
  218. ["zextloadvi", "_U"],
  219. ["extloadvi", "_U"]] in {
  220. defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits);
  221. defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec;
  222. defm : LoadPat<vec.vt, loadpat, inst>;
  223. }
  224. // Load lane into zero vector
  225. multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
  226. defvar name = "v128.load"#vec.lane_bits#"_zero";
  227. let mayLoad = 1, UseNamedOperandTable = 1 in {
  228. defm LOAD_ZERO_#vec#_A32 :
  229. SIMD_I<(outs V128:$dst),
  230. (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
  231. (outs), (ins P2Align:$p2align, offset32_op:$off), [],
  232. name#"\t$dst, ${off}(${addr})$p2align",
  233. name#"\t$off$p2align", simdop>;
  234. defm LOAD_ZERO_#vec#_A64 :
  235. SIMD_I<(outs V128:$dst),
  236. (ins P2Align:$p2align, offset64_op:$off, I64:$addr),
  237. (outs), (ins P2Align:$p2align, offset64_op:$off), [],
  238. name#"\t$dst, ${off}(${addr})$p2align",
  239. name#"\t$off$p2align", simdop>;
  240. } // mayLoad = 1, UseNamedOperandTable = 1
  241. }
  242. defm "" : SIMDLoadZero<I32x4, 0x5c>;
  243. defm "" : SIMDLoadZero<I64x2, 0x5d>;
  244. // Use load_zero to load scalars into vectors as well where possible.
  245. // TODO: i16, and i8 scalars
  246. foreach vec = [I32x4, I64x2] in {
  247. defvar inst = "LOAD_ZERO_"#vec;
  248. defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>;
  249. defm : LoadPat<vec.vt, pat, inst>;
  250. }
  251. // TODO: f32x4 and f64x2 as well
  252. foreach vec = [I32x4, I64x2] in {
  253. defvar inst = "LOAD_ZERO_"#vec;
  254. defvar pat = PatFrag<(ops node:$ptr),
  255. (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>;
  256. defm : LoadPat<vec.vt, pat, inst>;
  257. }
  258. // Load lane
  259. multiclass SIMDLoadLane<Vec vec, bits<32> simdop> {
  260. defvar name = "v128.load"#vec.lane_bits#"_lane";
  261. let mayLoad = 1, UseNamedOperandTable = 1 in {
  262. defm LOAD_LANE_#vec#_A32 :
  263. SIMD_I<(outs V128:$dst),
  264. (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
  265. I32:$addr, V128:$vec),
  266. (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
  267. [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
  268. name#"\t$off$p2align, $idx", simdop>;
  269. defm LOAD_LANE_#vec#_A64 :
  270. SIMD_I<(outs V128:$dst),
  271. (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
  272. I64:$addr, V128:$vec),
  273. (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
  274. [], name#"\t$dst, ${off}(${addr})$p2align, $vec, $idx",
  275. name#"\t$off$p2align, $idx", simdop>;
  276. } // mayLoad = 1, UseNamedOperandTable = 1
  277. }
  278. defm "" : SIMDLoadLane<I8x16, 0x54>;
  279. defm "" : SIMDLoadLane<I16x8, 0x55>;
  280. defm "" : SIMDLoadLane<I32x4, 0x56>;
  281. defm "" : SIMDLoadLane<I64x2, 0x57>;
  282. // Select loads with no constant offset.
  283. multiclass LoadLanePatNoOffset<Vec vec, SDPatternOperator kind> {
  284. defvar load_lane_a32 = !cast<NI>("LOAD_LANE_"#vec#"_A32");
  285. defvar load_lane_a64 = !cast<NI>("LOAD_LANE_"#vec#"_A64");
  286. def : Pat<(vec.vt (kind (i32 I32:$addr),
  287. (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
  288. (load_lane_a32 0, 0, imm:$idx, $addr, $vec)>,
  289. Requires<[HasAddr32]>;
  290. def : Pat<(vec.vt (kind (i64 I64:$addr),
  291. (vec.vt V128:$vec), (i32 vec.lane_idx:$idx))),
  292. (load_lane_a64 0, 0, imm:$idx, $addr, $vec)>,
  293. Requires<[HasAddr64]>;
  294. }
  295. def load8_lane :
  296. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  297. (vector_insert $vec, (i32 (extloadi8 $ptr)), $idx)>;
  298. def load16_lane :
  299. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  300. (vector_insert $vec, (i32 (extloadi16 $ptr)), $idx)>;
  301. def load32_lane :
  302. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  303. (vector_insert $vec, (i32 (load $ptr)), $idx)>;
  304. def load64_lane :
  305. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  306. (vector_insert $vec, (i64 (load $ptr)), $idx)>;
  307. // TODO: floating point lanes as well
  308. defm : LoadLanePatNoOffset<I8x16, load8_lane>;
  309. defm : LoadLanePatNoOffset<I16x8, load16_lane>;
  310. defm : LoadLanePatNoOffset<I32x4, load32_lane>;
  311. defm : LoadLanePatNoOffset<I64x2, load64_lane>;
  312. // TODO: Also support the other load patterns for load_lane once the instructions
  313. // are merged to the proposal.
  314. // Store: v128.store
  315. let mayStore = 1, UseNamedOperandTable = 1 in {
  316. defm STORE_V128_A32 :
  317. SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
  318. (outs), (ins P2Align:$p2align, offset32_op:$off), [],
  319. "v128.store\t${off}(${addr})$p2align, $vec",
  320. "v128.store\t$off$p2align", 11>;
  321. defm STORE_V128_A64 :
  322. SIMD_I<(outs), (ins P2Align:$p2align, offset64_op:$off, I64:$addr, V128:$vec),
  323. (outs), (ins P2Align:$p2align, offset64_op:$off), [],
  324. "v128.store\t${off}(${addr})$p2align, $vec",
  325. "v128.store\t$off$p2align", 11>;
  326. }
  327. // Def store patterns from WebAssemblyInstrMemory.td for vector types
  328. foreach vec = AllVecs in {
  329. defm : StorePat<vec.vt, store, "STORE_V128">;
  330. }
  331. // Store lane
  332. multiclass SIMDStoreLane<Vec vec, bits<32> simdop> {
  333. defvar name = "v128.store"#vec.lane_bits#"_lane";
  334. let mayStore = 1, UseNamedOperandTable = 1 in {
  335. defm STORE_LANE_#vec#_A32 :
  336. SIMD_I<(outs),
  337. (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx,
  338. I32:$addr, V128:$vec),
  339. (outs), (ins P2Align:$p2align, offset32_op:$off, vec_i8imm_op:$idx),
  340. [], name#"\t${off}(${addr})$p2align, $vec, $idx",
  341. name#"\t$off$p2align, $idx", simdop>;
  342. defm STORE_LANE_#vec#_A64 :
  343. SIMD_I<(outs V128:$dst),
  344. (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
  345. I64:$addr, V128:$vec),
  346. (outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
  347. [], name#"\t${off}(${addr})$p2align, $vec, $idx",
  348. name#"\t$off$p2align, $idx", simdop>;
  349. } // mayStore = 1, UseNamedOperandTable = 1
  350. }
  351. defm "" : SIMDStoreLane<I8x16, 0x58>;
  352. defm "" : SIMDStoreLane<I16x8, 0x59>;
  353. defm "" : SIMDStoreLane<I32x4, 0x5a>;
  354. defm "" : SIMDStoreLane<I64x2, 0x5b>;
  355. multiclass StoreLanePat<Vec vec, SDPatternOperator kind> {
  356. def : Pat<(kind (AddrOps32 offset32_op:$offset, I32:$addr),
  357. (vec.vt V128:$vec),
  358. (i32 vec.lane_idx:$idx)),
  359. (!cast<NI>("STORE_LANE_"#vec#"_A32") 0, $offset, imm:$idx, $addr, $vec)>,
  360. Requires<[HasAddr32]>;
  361. def : Pat<(kind (AddrOps64 offset64_op:$offset, I64:$addr),
  362. (vec.vt V128:$vec),
  363. (i32 vec.lane_idx:$idx)),
  364. (!cast<NI>("STORE_LANE_"#vec#"_A64") 0, $offset, imm:$idx, $addr, $vec)>,
  365. Requires<[HasAddr64]>;
  366. }
  367. def store8_lane :
  368. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  369. (truncstorei8 (i32 (vector_extract $vec, $idx)), $ptr)>;
  370. def store16_lane :
  371. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  372. (truncstorei16 (i32 (vector_extract $vec, $idx)), $ptr)>;
  373. def store32_lane :
  374. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  375. (store (i32 (vector_extract $vec, $idx)), $ptr)>;
  376. def store64_lane :
  377. PatFrag<(ops node:$ptr, node:$vec, node:$idx),
  378. (store (i64 (vector_extract $vec, $idx)), $ptr)>;
  379. // TODO: floating point lanes as well
  380. let AddedComplexity = 1 in {
  381. defm : StoreLanePat<I8x16, store8_lane>;
  382. defm : StoreLanePat<I16x8, store16_lane>;
  383. defm : StoreLanePat<I32x4, store32_lane>;
  384. defm : StoreLanePat<I64x2, store64_lane>;
  385. }
  386. //===----------------------------------------------------------------------===//
  387. // Constructing SIMD values
  388. //===----------------------------------------------------------------------===//
  389. // Constant: v128.const
  390. multiclass ConstVec<Vec vec, dag ops, dag pat, string args> {
  391. let isMoveImm = 1, isReMaterializable = 1 in
  392. defm CONST_V128_#vec : SIMD_I<(outs V128:$dst), ops, (outs), ops,
  393. [(set V128:$dst, (vec.vt pat))],
  394. "v128.const\t$dst, "#args,
  395. "v128.const\t"#args, 12>;
  396. }
  397. defm "" : ConstVec<I8x16,
  398. (ins vec_i8imm_op:$i0, vec_i8imm_op:$i1,
  399. vec_i8imm_op:$i2, vec_i8imm_op:$i3,
  400. vec_i8imm_op:$i4, vec_i8imm_op:$i5,
  401. vec_i8imm_op:$i6, vec_i8imm_op:$i7,
  402. vec_i8imm_op:$i8, vec_i8imm_op:$i9,
  403. vec_i8imm_op:$iA, vec_i8imm_op:$iB,
  404. vec_i8imm_op:$iC, vec_i8imm_op:$iD,
  405. vec_i8imm_op:$iE, vec_i8imm_op:$iF),
  406. (build_vector ImmI8:$i0, ImmI8:$i1, ImmI8:$i2, ImmI8:$i3,
  407. ImmI8:$i4, ImmI8:$i5, ImmI8:$i6, ImmI8:$i7,
  408. ImmI8:$i8, ImmI8:$i9, ImmI8:$iA, ImmI8:$iB,
  409. ImmI8:$iC, ImmI8:$iD, ImmI8:$iE, ImmI8:$iF),
  410. !strconcat("$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7, ",
  411. "$i8, $i9, $iA, $iB, $iC, $iD, $iE, $iF")>;
  412. defm "" : ConstVec<I16x8,
  413. (ins vec_i16imm_op:$i0, vec_i16imm_op:$i1,
  414. vec_i16imm_op:$i2, vec_i16imm_op:$i3,
  415. vec_i16imm_op:$i4, vec_i16imm_op:$i5,
  416. vec_i16imm_op:$i6, vec_i16imm_op:$i7),
  417. (build_vector
  418. ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
  419. ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
  420. "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
  421. let IsCanonical = 1 in
  422. defm "" : ConstVec<I32x4,
  423. (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
  424. vec_i32imm_op:$i2, vec_i32imm_op:$i3),
  425. (build_vector (i32 imm:$i0), (i32 imm:$i1),
  426. (i32 imm:$i2), (i32 imm:$i3)),
  427. "$i0, $i1, $i2, $i3">;
  428. defm "" : ConstVec<I64x2,
  429. (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
  430. (build_vector (i64 imm:$i0), (i64 imm:$i1)),
  431. "$i0, $i1">;
  432. defm "" : ConstVec<F32x4,
  433. (ins f32imm_op:$i0, f32imm_op:$i1,
  434. f32imm_op:$i2, f32imm_op:$i3),
  435. (build_vector (f32 fpimm:$i0), (f32 fpimm:$i1),
  436. (f32 fpimm:$i2), (f32 fpimm:$i3)),
  437. "$i0, $i1, $i2, $i3">;
  438. defm "" : ConstVec<F64x2,
  439. (ins f64imm_op:$i0, f64imm_op:$i1),
  440. (build_vector (f64 fpimm:$i0), (f64 fpimm:$i1)),
  441. "$i0, $i1">;
  442. // Match splat(x) -> const.v128(x, ..., x)
  443. foreach vec = AllVecs in {
  444. defvar numEls = !div(vec.vt.Size, vec.lane_bits);
  445. defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64));
  446. defvar immKind = !if(isFloat, fpimm, imm);
  447. def : Pat<(vec.splat (vec.lane_vt immKind:$x)),
  448. !dag(!cast<NI>("CONST_V128_"#vec),
  449. !listsplat((vec.lane_vt immKind:$x), numEls),
  450. ?)>;
  451. }
  452. // Shuffle lanes: shuffle
  453. defm SHUFFLE :
  454. SIMD_I<(outs V128:$dst),
  455. (ins V128:$x, V128:$y,
  456. vec_i8imm_op:$m0, vec_i8imm_op:$m1,
  457. vec_i8imm_op:$m2, vec_i8imm_op:$m3,
  458. vec_i8imm_op:$m4, vec_i8imm_op:$m5,
  459. vec_i8imm_op:$m6, vec_i8imm_op:$m7,
  460. vec_i8imm_op:$m8, vec_i8imm_op:$m9,
  461. vec_i8imm_op:$mA, vec_i8imm_op:$mB,
  462. vec_i8imm_op:$mC, vec_i8imm_op:$mD,
  463. vec_i8imm_op:$mE, vec_i8imm_op:$mF),
  464. (outs),
  465. (ins
  466. vec_i8imm_op:$m0, vec_i8imm_op:$m1,
  467. vec_i8imm_op:$m2, vec_i8imm_op:$m3,
  468. vec_i8imm_op:$m4, vec_i8imm_op:$m5,
  469. vec_i8imm_op:$m6, vec_i8imm_op:$m7,
  470. vec_i8imm_op:$m8, vec_i8imm_op:$m9,
  471. vec_i8imm_op:$mA, vec_i8imm_op:$mB,
  472. vec_i8imm_op:$mC, vec_i8imm_op:$mD,
  473. vec_i8imm_op:$mE, vec_i8imm_op:$mF),
  474. [],
  475. "i8x16.shuffle\t$dst, $x, $y, "#
  476. "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
  477. "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
  478. "i8x16.shuffle\t"#
  479. "$m0, $m1, $m2, $m3, $m4, $m5, $m6, $m7, "#
  480. "$m8, $m9, $mA, $mB, $mC, $mD, $mE, $mF",
  481. 13>;
  482. // Shuffles after custom lowering
  483. def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
  484. def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
  485. foreach vec = AllVecs in {
  486. def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
  487. (i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
  488. (i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
  489. (i32 LaneIdx32:$m4), (i32 LaneIdx32:$m5),
  490. (i32 LaneIdx32:$m6), (i32 LaneIdx32:$m7),
  491. (i32 LaneIdx32:$m8), (i32 LaneIdx32:$m9),
  492. (i32 LaneIdx32:$mA), (i32 LaneIdx32:$mB),
  493. (i32 LaneIdx32:$mC), (i32 LaneIdx32:$mD),
  494. (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF))),
  495. (SHUFFLE $x, $y,
  496. imm:$m0, imm:$m1, imm:$m2, imm:$m3,
  497. imm:$m4, imm:$m5, imm:$m6, imm:$m7,
  498. imm:$m8, imm:$m9, imm:$mA, imm:$mB,
  499. imm:$mC, imm:$mD, imm:$mE, imm:$mF)>;
  500. }
  501. // Swizzle lanes: i8x16.swizzle
  502. def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
  503. def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
  504. defm SWIZZLE :
  505. SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
  506. [(set (v16i8 V128:$dst),
  507. (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
  508. "i8x16.swizzle\t$dst, $src, $mask", "i8x16.swizzle", 14>;
  509. def : Pat<(int_wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)),
  510. (SWIZZLE $src, $mask)>;
  511. multiclass Splat<Vec vec, bits<32> simdop> {
  512. defm SPLAT_#vec : SIMD_I<(outs V128:$dst), (ins vec.lane_rc:$x),
  513. (outs), (ins),
  514. [(set (vec.vt V128:$dst),
  515. (vec.splat vec.lane_rc:$x))],
  516. vec.prefix#".splat\t$dst, $x", vec.prefix#".splat",
  517. simdop>;
  518. }
  519. defm "" : Splat<I8x16, 15>;
  520. defm "" : Splat<I16x8, 16>;
  521. defm "" : Splat<I32x4, 17>;
  522. defm "" : Splat<I64x2, 18>;
  523. defm "" : Splat<F32x4, 19>;
  524. defm "" : Splat<F64x2, 20>;
  525. // scalar_to_vector leaves high lanes undefined, so can be a splat
  526. foreach vec = AllVecs in
  527. def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))),
  528. (!cast<Instruction>("SPLAT_"#vec) $x)>;
  529. //===----------------------------------------------------------------------===//
  530. // Accessing lanes
  531. //===----------------------------------------------------------------------===//
  532. // Extract lane as a scalar: extract_lane / extract_lane_s / extract_lane_u
  533. multiclass ExtractLane<Vec vec, bits<32> simdop, string suffix = ""> {
  534. defm EXTRACT_LANE_#vec#suffix :
  535. SIMD_I<(outs vec.lane_rc:$dst), (ins V128:$vec, vec_i8imm_op:$idx),
  536. (outs), (ins vec_i8imm_op:$idx), [],
  537. vec.prefix#".extract_lane"#suffix#"\t$dst, $vec, $idx",
  538. vec.prefix#".extract_lane"#suffix#"\t$idx", simdop>;
  539. }
  540. defm "" : ExtractLane<I8x16, 21, "_s">;
  541. defm "" : ExtractLane<I8x16, 22, "_u">;
  542. defm "" : ExtractLane<I16x8, 24, "_s">;
  543. defm "" : ExtractLane<I16x8, 25, "_u">;
  544. defm "" : ExtractLane<I32x4, 27>;
  545. defm "" : ExtractLane<I64x2, 29>;
  546. defm "" : ExtractLane<F32x4, 31>;
  547. defm "" : ExtractLane<F64x2, 33>;
  548. def : Pat<(vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)),
  549. (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
  550. def : Pat<(vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)),
  551. (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
  552. def : Pat<(vector_extract (v4i32 V128:$vec), (i32 LaneIdx4:$idx)),
  553. (EXTRACT_LANE_I32x4 $vec, imm:$idx)>;
  554. def : Pat<(vector_extract (v4f32 V128:$vec), (i32 LaneIdx4:$idx)),
  555. (EXTRACT_LANE_F32x4 $vec, imm:$idx)>;
  556. def : Pat<(vector_extract (v2i64 V128:$vec), (i32 LaneIdx2:$idx)),
  557. (EXTRACT_LANE_I64x2 $vec, imm:$idx)>;
  558. def : Pat<(vector_extract (v2f64 V128:$vec), (i32 LaneIdx2:$idx)),
  559. (EXTRACT_LANE_F64x2 $vec, imm:$idx)>;
  560. def : Pat<
  561. (sext_inreg (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), i8),
  562. (EXTRACT_LANE_I8x16_s $vec, imm:$idx)>;
  563. def : Pat<
  564. (and (vector_extract (v16i8 V128:$vec), (i32 LaneIdx16:$idx)), (i32 0xff)),
  565. (EXTRACT_LANE_I8x16_u $vec, imm:$idx)>;
  566. def : Pat<
  567. (sext_inreg (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), i16),
  568. (EXTRACT_LANE_I16x8_s $vec, imm:$idx)>;
  569. def : Pat<
  570. (and (vector_extract (v8i16 V128:$vec), (i32 LaneIdx8:$idx)), (i32 0xffff)),
  571. (EXTRACT_LANE_I16x8_u $vec, imm:$idx)>;
  572. // Replace lane value: replace_lane
  573. multiclass ReplaceLane<Vec vec, bits<32> simdop> {
  574. defm REPLACE_LANE_#vec :
  575. SIMD_I<(outs V128:$dst), (ins V128:$vec, vec_i8imm_op:$idx, vec.lane_rc:$x),
  576. (outs), (ins vec_i8imm_op:$idx),
  577. [(set V128:$dst, (vector_insert
  578. (vec.vt V128:$vec),
  579. (vec.lane_vt vec.lane_rc:$x),
  580. (i32 vec.lane_idx:$idx)))],
  581. vec.prefix#".replace_lane\t$dst, $vec, $idx, $x",
  582. vec.prefix#".replace_lane\t$idx", simdop>;
  583. }
  584. defm "" : ReplaceLane<I8x16, 23>;
  585. defm "" : ReplaceLane<I16x8, 26>;
  586. defm "" : ReplaceLane<I32x4, 28>;
  587. defm "" : ReplaceLane<I64x2, 30>;
  588. defm "" : ReplaceLane<F32x4, 32>;
  589. defm "" : ReplaceLane<F64x2, 34>;
  590. // Lower undef lane indices to zero
  591. def : Pat<(vector_insert (v16i8 V128:$vec), I32:$x, undef),
  592. (REPLACE_LANE_I8x16 $vec, 0, $x)>;
  593. def : Pat<(vector_insert (v8i16 V128:$vec), I32:$x, undef),
  594. (REPLACE_LANE_I16x8 $vec, 0, $x)>;
  595. def : Pat<(vector_insert (v4i32 V128:$vec), I32:$x, undef),
  596. (REPLACE_LANE_I32x4 $vec, 0, $x)>;
  597. def : Pat<(vector_insert (v2i64 V128:$vec), I64:$x, undef),
  598. (REPLACE_LANE_I64x2 $vec, 0, $x)>;
  599. def : Pat<(vector_insert (v4f32 V128:$vec), F32:$x, undef),
  600. (REPLACE_LANE_F32x4 $vec, 0, $x)>;
  601. def : Pat<(vector_insert (v2f64 V128:$vec), F64:$x, undef),
  602. (REPLACE_LANE_F64x2 $vec, 0, $x)>;
  603. //===----------------------------------------------------------------------===//
  604. // Comparisons
  605. //===----------------------------------------------------------------------===//
  606. multiclass SIMDCondition<Vec vec, string name, CondCode cond, bits<32> simdop> {
  607. defm _#vec :
  608. SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
  609. [(set (vec.int_vt V128:$dst),
  610. (setcc (vec.vt V128:$lhs), (vec.vt V128:$rhs), cond))],
  611. vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
  612. vec.prefix#"."#name, simdop>;
  613. }
  614. multiclass SIMDConditionInt<string name, CondCode cond, bits<32> baseInst> {
  615. defm "" : SIMDCondition<I8x16, name, cond, baseInst>;
  616. defm "" : SIMDCondition<I16x8, name, cond, !add(baseInst, 10)>;
  617. defm "" : SIMDCondition<I32x4, name, cond, !add(baseInst, 20)>;
  618. }
  619. multiclass SIMDConditionFP<string name, CondCode cond, bits<32> baseInst> {
  620. defm "" : SIMDCondition<F32x4, name, cond, baseInst>;
  621. defm "" : SIMDCondition<F64x2, name, cond, !add(baseInst, 6)>;
  622. }
  623. // Equality: eq
  624. let isCommutable = 1 in {
  625. defm EQ : SIMDConditionInt<"eq", SETEQ, 35>;
  626. defm EQ : SIMDCondition<I64x2, "eq", SETEQ, 214>;
  627. defm EQ : SIMDConditionFP<"eq", SETOEQ, 65>;
  628. } // isCommutable = 1
  629. // Non-equality: ne
  630. let isCommutable = 1 in {
  631. defm NE : SIMDConditionInt<"ne", SETNE, 36>;
  632. defm NE : SIMDCondition<I64x2, "ne", SETNE, 215>;
  633. defm NE : SIMDConditionFP<"ne", SETUNE, 66>;
  634. } // isCommutable = 1
  635. // Less than: lt_s / lt_u / lt
  636. defm LT_S : SIMDConditionInt<"lt_s", SETLT, 37>;
  637. defm LT_S : SIMDCondition<I64x2, "lt_s", SETLT, 216>;
  638. defm LT_U : SIMDConditionInt<"lt_u", SETULT, 38>;
  639. defm LT : SIMDConditionFP<"lt", SETOLT, 67>;
  640. // Greater than: gt_s / gt_u / gt
  641. defm GT_S : SIMDConditionInt<"gt_s", SETGT, 39>;
  642. defm GT_S : SIMDCondition<I64x2, "gt_s", SETGT, 217>;
  643. defm GT_U : SIMDConditionInt<"gt_u", SETUGT, 40>;
  644. defm GT : SIMDConditionFP<"gt", SETOGT, 68>;
  645. // Less than or equal: le_s / le_u / le
  646. defm LE_S : SIMDConditionInt<"le_s", SETLE, 41>;
  647. defm LE_S : SIMDCondition<I64x2, "le_s", SETLE, 218>;
  648. defm LE_U : SIMDConditionInt<"le_u", SETULE, 42>;
  649. defm LE : SIMDConditionFP<"le", SETOLE, 69>;
  650. // Greater than or equal: ge_s / ge_u / ge
  651. defm GE_S : SIMDConditionInt<"ge_s", SETGE, 43>;
  652. defm GE_S : SIMDCondition<I64x2, "ge_s", SETGE, 219>;
  653. defm GE_U : SIMDConditionInt<"ge_u", SETUGE, 44>;
  654. defm GE : SIMDConditionFP<"ge", SETOGE, 70>;
  655. // Lower float comparisons that don't care about NaN to standard WebAssembly
  656. // float comparisons. These instructions are generated with nnan and in the
  657. // target-independent expansion of unordered comparisons and ordered ne.
  658. foreach nodes = [[seteq, EQ_F32x4], [setne, NE_F32x4], [setlt, LT_F32x4],
  659. [setgt, GT_F32x4], [setle, LE_F32x4], [setge, GE_F32x4]] in
  660. def : Pat<(v4i32 (nodes[0] (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
  661. (nodes[1] $lhs, $rhs)>;
  662. foreach nodes = [[seteq, EQ_F64x2], [setne, NE_F64x2], [setlt, LT_F64x2],
  663. [setgt, GT_F64x2], [setle, LE_F64x2], [setge, GE_F64x2]] in
  664. def : Pat<(v2i64 (nodes[0] (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
  665. (nodes[1] $lhs, $rhs)>;
  666. //===----------------------------------------------------------------------===//
  667. // Bitwise operations
  668. //===----------------------------------------------------------------------===//
  669. multiclass SIMDBinary<Vec vec, SDPatternOperator node, string name, bits<32> simdop> {
  670. defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
  671. (outs), (ins),
  672. [(set (vec.vt V128:$dst),
  673. (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
  674. vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
  675. vec.prefix#"."#name, simdop>;
  676. }
  677. multiclass SIMDBitwise<SDPatternOperator node, string name, bits<32> simdop,
  678. bit commutable = false> {
  679. let isCommutable = commutable in
  680. defm "" : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
  681. (outs), (ins), [],
  682. "v128."#name#"\t$dst, $lhs, $rhs", "v128."#name, simdop>;
  683. foreach vec = IntVecs in
  684. def : Pat<(node (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
  685. (!cast<NI>(NAME) $lhs, $rhs)>;
  686. }
  687. multiclass SIMDUnary<Vec vec, SDPatternOperator node, string name, bits<32> simdop> {
  688. defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins),
  689. [(set (vec.vt V128:$dst),
  690. (vec.vt (node (vec.vt V128:$v))))],
  691. vec.prefix#"."#name#"\t$dst, $v",
  692. vec.prefix#"."#name, simdop>;
  693. }
  694. // Bitwise logic: v128.not
  695. defm NOT : SIMD_I<(outs V128:$dst), (ins V128:$v), (outs), (ins), [],
  696. "v128.not\t$dst, $v", "v128.not", 77>;
  697. foreach vec = IntVecs in
  698. def : Pat<(vnot (vec.vt V128:$v)), (NOT $v)>;
  699. // Bitwise logic: v128.and / v128.or / v128.xor
  700. defm AND : SIMDBitwise<and, "and", 78, true>;
  701. defm OR : SIMDBitwise<or, "or", 80, true>;
  702. defm XOR : SIMDBitwise<xor, "xor", 81, true>;
  703. // Bitwise logic: v128.andnot
  704. def andnot : PatFrag<(ops node:$left, node:$right), (and $left, (vnot $right))>;
  705. defm ANDNOT : SIMDBitwise<andnot, "andnot", 79>;
  706. // Bitwise select: v128.bitselect
  707. defm BITSELECT :
  708. SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [],
  709. "v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
  710. foreach vec = AllVecs in
  711. def : Pat<(vec.vt (int_wasm_bitselect
  712. (vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))),
  713. (BITSELECT $v1, $v2, $c)>;
  714. // Bitselect is equivalent to (c & v1) | (~c & v2)
  715. foreach vec = IntVecs in
  716. def : Pat<(vec.vt (or (and (vec.vt V128:$c), (vec.vt V128:$v1)),
  717. (and (vnot V128:$c), (vec.vt V128:$v2)))),
  718. (BITSELECT $v1, $v2, $c)>;
  719. // Bitselect is also equivalent to ((v1 ^ v2) & c) ^ v2
  720. foreach vec = IntVecs in
  721. def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
  722. (vec.vt V128:$c)),
  723. (vec.vt V128:$v2))),
  724. (BITSELECT $v1, $v2, $c)>;
  725. // Same pattern with `c` negated so `a` and `b` get swapped.
  726. foreach vec = IntVecs in
  727. def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
  728. (vnot (vec.vt V128:$c))),
  729. (vec.vt V128:$v2))),
  730. (BITSELECT $v2, $v1, $c)>;
  731. // Also implement vselect in terms of bitselect
  732. foreach vec = AllVecs in
  733. def : Pat<(vec.vt (vselect
  734. (vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))),
  735. (BITSELECT $v1, $v2, $c)>;
  736. // MVP select on v128 values
  737. defm SELECT_V128 :
  738. I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [],
  739. "v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>;
  740. foreach vec = AllVecs in {
  741. def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
  742. (SELECT_V128 $lhs, $rhs, $cond)>;
  743. // ISD::SELECT requires its operand to conform to getBooleanContents, but
  744. // WebAssembly's select interprets any non-zero value as true, so we can fold
  745. // a setne with 0 into a select.
  746. def : Pat<(select
  747. (i32 (setne I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
  748. (SELECT_V128 $lhs, $rhs, $cond)>;
  749. // And again, this time with seteq instead of setne and the arms reversed.
  750. def : Pat<(select
  751. (i32 (seteq I32:$cond, 0)), (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
  752. (SELECT_V128 $rhs, $lhs, $cond)>;
  753. } // foreach vec
  754. //===----------------------------------------------------------------------===//
  755. // Integer unary arithmetic
  756. //===----------------------------------------------------------------------===//
  757. multiclass SIMDUnaryInt<SDPatternOperator node, string name, bits<32> baseInst> {
  758. defm "" : SIMDUnary<I8x16, node, name, baseInst>;
  759. defm "" : SIMDUnary<I16x8, node, name, !add(baseInst, 32)>;
  760. defm "" : SIMDUnary<I32x4, node, name, !add(baseInst, 64)>;
  761. defm "" : SIMDUnary<I64x2, node, name, !add(baseInst, 96)>;
  762. }
  763. // Integer vector negation
  764. def ivneg : PatFrag<(ops node:$in), (sub immAllZerosV, $in)>;
  765. // Integer absolute value: abs
  766. defm ABS : SIMDUnaryInt<abs, "abs", 96>;
  767. // Integer negation: neg
  768. defm NEG : SIMDUnaryInt<ivneg, "neg", 97>;
  769. // Population count: popcnt
  770. defm POPCNT : SIMDUnary<I8x16, ctpop, "popcnt", 0x62>;
  771. // Any lane true: any_true
  772. defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [],
  773. "v128.any_true\t$dst, $vec", "v128.any_true", 0x53>;
  774. foreach vec = IntVecs in
  775. def : Pat<(int_wasm_anytrue (vec.vt V128:$vec)), (ANYTRUE V128:$vec)>;
  776. // All lanes true: all_true
  777. multiclass SIMDAllTrue<Vec vec, bits<32> simdop> {
  778. defm ALLTRUE_#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
  779. [(set I32:$dst,
  780. (i32 (int_wasm_alltrue (vec.vt V128:$vec))))],
  781. vec.prefix#".all_true\t$dst, $vec",
  782. vec.prefix#".all_true", simdop>;
  783. }
  784. defm "" : SIMDAllTrue<I8x16, 0x63>;
  785. defm "" : SIMDAllTrue<I16x8, 0x83>;
  786. defm "" : SIMDAllTrue<I32x4, 0xa3>;
  787. defm "" : SIMDAllTrue<I64x2, 0xc3>;
  788. // Reductions already return 0 or 1, so and 1, setne 0, and seteq 1
  789. // can be folded out
  790. foreach reduction =
  791. [["int_wasm_anytrue", "ANYTRUE", "I8x16"],
  792. ["int_wasm_anytrue", "ANYTRUE", "I16x8"],
  793. ["int_wasm_anytrue", "ANYTRUE", "I32x4"],
  794. ["int_wasm_anytrue", "ANYTRUE", "I64x2"],
  795. ["int_wasm_alltrue", "ALLTRUE_I8x16", "I8x16"],
  796. ["int_wasm_alltrue", "ALLTRUE_I16x8", "I16x8"],
  797. ["int_wasm_alltrue", "ALLTRUE_I32x4", "I32x4"],
  798. ["int_wasm_alltrue", "ALLTRUE_I64x2", "I64x2"]] in {
  799. defvar intrinsic = !cast<Intrinsic>(reduction[0]);
  800. defvar inst = !cast<NI>(reduction[1]);
  801. defvar vec = !cast<Vec>(reduction[2]);
  802. def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
  803. def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>;
  804. def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>;
  805. }
  806. multiclass SIMDBitmask<Vec vec, bits<32> simdop> {
  807. defm _#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins),
  808. [(set I32:$dst,
  809. (i32 (int_wasm_bitmask (vec.vt V128:$vec))))],
  810. vec.prefix#".bitmask\t$dst, $vec", vec.prefix#".bitmask",
  811. simdop>;
  812. }
  813. defm BITMASK : SIMDBitmask<I8x16, 100>;
  814. defm BITMASK : SIMDBitmask<I16x8, 132>;
  815. defm BITMASK : SIMDBitmask<I32x4, 164>;
  816. defm BITMASK : SIMDBitmask<I64x2, 196>;
  817. //===----------------------------------------------------------------------===//
  818. // Bit shifts
  819. //===----------------------------------------------------------------------===//
  820. multiclass SIMDShift<Vec vec, SDNode node, string name, bits<32> simdop> {
  821. defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$vec, I32:$x), (outs), (ins),
  822. [(set (vec.vt V128:$dst), (node V128:$vec, I32:$x))],
  823. vec.prefix#"."#name#"\t$dst, $vec, $x",
  824. vec.prefix#"."#name, simdop>;
  825. }
  826. multiclass SIMDShiftInt<SDNode node, string name, bits<32> baseInst> {
  827. defm "" : SIMDShift<I8x16, node, name, baseInst>;
  828. defm "" : SIMDShift<I16x8, node, name, !add(baseInst, 32)>;
  829. defm "" : SIMDShift<I32x4, node, name, !add(baseInst, 64)>;
  830. defm "" : SIMDShift<I64x2, node, name, !add(baseInst, 96)>;
  831. }
  832. // WebAssembly SIMD shifts are nonstandard in that the shift amount is
  833. // an i32 rather than a vector, so they need custom nodes.
  834. def wasm_shift_t :
  835. SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>;
  836. def wasm_shl : SDNode<"WebAssemblyISD::VEC_SHL", wasm_shift_t>;
  837. def wasm_shr_s : SDNode<"WebAssemblyISD::VEC_SHR_S", wasm_shift_t>;
  838. def wasm_shr_u : SDNode<"WebAssemblyISD::VEC_SHR_U", wasm_shift_t>;
  839. // Left shift by scalar: shl
  840. defm SHL : SIMDShiftInt<wasm_shl, "shl", 107>;
  841. // Right shift by scalar: shr_s / shr_u
  842. defm SHR_S : SIMDShiftInt<wasm_shr_s, "shr_s", 108>;
  843. defm SHR_U : SIMDShiftInt<wasm_shr_u, "shr_u", 109>;
  844. // Optimize away an explicit mask on a shift count.
  845. def : Pat<(wasm_shl (v16i8 V128:$lhs), (and I32:$rhs, 7)),
  846. (SHL_I8x16 V128:$lhs, I32:$rhs)>;
  847. def : Pat<(wasm_shr_s (v16i8 V128:$lhs), (and I32:$rhs, 7)),
  848. (SHR_S_I8x16 V128:$lhs, I32:$rhs)>;
  849. def : Pat<(wasm_shr_u (v16i8 V128:$lhs), (and I32:$rhs, 7)),
  850. (SHR_U_I8x16 V128:$lhs, I32:$rhs)>;
  851. def : Pat<(wasm_shl (v8i16 V128:$lhs), (and I32:$rhs, 15)),
  852. (SHL_I16x8 V128:$lhs, I32:$rhs)>;
  853. def : Pat<(wasm_shr_s (v8i16 V128:$lhs), (and I32:$rhs, 15)),
  854. (SHR_S_I16x8 V128:$lhs, I32:$rhs)>;
  855. def : Pat<(wasm_shr_u (v8i16 V128:$lhs), (and I32:$rhs, 15)),
  856. (SHR_U_I16x8 V128:$lhs, I32:$rhs)>;
  857. def : Pat<(wasm_shl (v4i32 V128:$lhs), (and I32:$rhs, 31)),
  858. (SHL_I32x4 V128:$lhs, I32:$rhs)>;
  859. def : Pat<(wasm_shr_s (v4i32 V128:$lhs), (and I32:$rhs, 31)),
  860. (SHR_S_I32x4 V128:$lhs, I32:$rhs)>;
  861. def : Pat<(wasm_shr_u (v4i32 V128:$lhs), (and I32:$rhs, 31)),
  862. (SHR_U_I32x4 V128:$lhs, I32:$rhs)>;
  863. def : Pat<(wasm_shl (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
  864. (SHL_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
  865. def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
  866. (SHR_S_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
  867. def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
  868. (SHR_U_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
  869. //===----------------------------------------------------------------------===//
  870. // Integer binary arithmetic
  871. //===----------------------------------------------------------------------===//
  872. multiclass SIMDBinaryIntNoI8x16<SDPatternOperator node, string name, bits<32> baseInst> {
  873. defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
  874. defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
  875. defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
  876. }
  877. multiclass SIMDBinaryIntSmall<SDPatternOperator node, string name, bits<32> baseInst> {
  878. defm "" : SIMDBinary<I8x16, node, name, baseInst>;
  879. defm "" : SIMDBinary<I16x8, node, name, !add(baseInst, 32)>;
  880. }
  881. multiclass SIMDBinaryIntNoI64x2<SDPatternOperator node, string name, bits<32> baseInst> {
  882. defm "" : SIMDBinaryIntSmall<node, name, baseInst>;
  883. defm "" : SIMDBinary<I32x4, node, name, !add(baseInst, 64)>;
  884. }
  885. multiclass SIMDBinaryInt<SDPatternOperator node, string name, bits<32> baseInst> {
  886. defm "" : SIMDBinaryIntNoI64x2<node, name, baseInst>;
  887. defm "" : SIMDBinary<I64x2, node, name, !add(baseInst, 96)>;
  888. }
  889. // Integer addition: add / add_sat_s / add_sat_u
  890. let isCommutable = 1 in {
  891. defm ADD : SIMDBinaryInt<add, "add", 110>;
  892. defm ADD_SAT_S : SIMDBinaryIntSmall<saddsat, "add_sat_s", 111>;
  893. defm ADD_SAT_U : SIMDBinaryIntSmall<uaddsat, "add_sat_u", 112>;
  894. } // isCommutable = 1
  895. // Integer subtraction: sub / sub_sat_s / sub_sat_u
  896. defm SUB : SIMDBinaryInt<sub, "sub", 113>;
  897. defm SUB_SAT_S :
  898. SIMDBinaryIntSmall<int_wasm_sub_sat_signed, "sub_sat_s", 114>;
  899. defm SUB_SAT_U :
  900. SIMDBinaryIntSmall<int_wasm_sub_sat_unsigned, "sub_sat_u", 115>;
  901. // Integer multiplication: mul
  902. let isCommutable = 1 in
  903. defm MUL : SIMDBinaryIntNoI8x16<mul, "mul", 117>;
  904. // Integer min_s / min_u / max_s / max_u
  905. let isCommutable = 1 in {
  906. defm MIN_S : SIMDBinaryIntNoI64x2<smin, "min_s", 118>;
  907. defm MIN_U : SIMDBinaryIntNoI64x2<umin, "min_u", 119>;
  908. defm MAX_S : SIMDBinaryIntNoI64x2<smax, "max_s", 120>;
  909. defm MAX_U : SIMDBinaryIntNoI64x2<umax, "max_u", 121>;
  910. } // isCommutable = 1
  911. // Integer unsigned rounding average: avgr_u
  912. let isCommutable = 1 in {
  913. defm AVGR_U : SIMDBinary<I8x16, int_wasm_avgr_unsigned, "avgr_u", 123>;
  914. defm AVGR_U : SIMDBinary<I16x8, int_wasm_avgr_unsigned, "avgr_u", 155>;
  915. }
  916. def add_nuw : PatFrag<(ops node:$lhs, node:$rhs), (add $lhs, $rhs),
  917. "return N->getFlags().hasNoUnsignedWrap();">;
  918. foreach vec = [I8x16, I16x8] in {
  919. defvar inst = !cast<NI>("AVGR_U_"#vec);
  920. def : Pat<(wasm_shr_u
  921. (add_nuw
  922. (add_nuw (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
  923. (vec.splat (i32 1))),
  924. (i32 1)),
  925. (inst $lhs, $rhs)>;
  926. }
  927. // Widening dot product: i32x4.dot_i16x8_s
  928. let isCommutable = 1 in
  929. defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
  930. [(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
  931. "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
  932. 186>;
  933. // Extending multiplication: extmul_{low,high}_P, extmul_high
  934. def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
  935. def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>;
  936. def extend_high_s : SDNode<"WebAssemblyISD::EXTEND_HIGH_S", extend_t>;
  937. def extend_low_u : SDNode<"WebAssemblyISD::EXTEND_LOW_U", extend_t>;
  938. def extend_high_u : SDNode<"WebAssemblyISD::EXTEND_HIGH_U", extend_t>;
  939. multiclass SIMDExtBinary<Vec vec, SDPatternOperator node, string name,
  940. bits<32> simdop> {
  941. defm _#vec : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
  942. (outs), (ins),
  943. [(set (vec.vt V128:$dst), (node
  944. (vec.split.vt V128:$lhs),(vec.split.vt V128:$rhs)))],
  945. vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
  946. vec.prefix#"."#name, simdop>;
  947. }
  948. class ExtMulPat<SDNode extend> :
  949. PatFrag<(ops node:$lhs, node:$rhs),
  950. (mul (extend $lhs), (extend $rhs))> {}
  951. def extmul_low_s : ExtMulPat<extend_low_s>;
  952. def extmul_high_s : ExtMulPat<extend_high_s>;
  953. def extmul_low_u : ExtMulPat<extend_low_u>;
  954. def extmul_high_u : ExtMulPat<extend_high_u>;
  955. defm EXTMUL_LOW_S :
  956. SIMDExtBinary<I16x8, extmul_low_s, "extmul_low_i8x16_s", 0x9c>;
  957. defm EXTMUL_HIGH_S :
  958. SIMDExtBinary<I16x8, extmul_high_s, "extmul_high_i8x16_s", 0x9d>;
  959. defm EXTMUL_LOW_U :
  960. SIMDExtBinary<I16x8, extmul_low_u, "extmul_low_i8x16_u", 0x9e>;
  961. defm EXTMUL_HIGH_U :
  962. SIMDExtBinary<I16x8, extmul_high_u, "extmul_high_i8x16_u", 0x9f>;
  963. defm EXTMUL_LOW_S :
  964. SIMDExtBinary<I32x4, extmul_low_s, "extmul_low_i16x8_s", 0xbc>;
  965. defm EXTMUL_HIGH_S :
  966. SIMDExtBinary<I32x4, extmul_high_s, "extmul_high_i16x8_s", 0xbd>;
  967. defm EXTMUL_LOW_U :
  968. SIMDExtBinary<I32x4, extmul_low_u, "extmul_low_i16x8_u", 0xbe>;
  969. defm EXTMUL_HIGH_U :
  970. SIMDExtBinary<I32x4, extmul_high_u, "extmul_high_i16x8_u", 0xbf>;
  971. defm EXTMUL_LOW_S :
  972. SIMDExtBinary<I64x2, extmul_low_s, "extmul_low_i32x4_s", 0xdc>;
  973. defm EXTMUL_HIGH_S :
  974. SIMDExtBinary<I64x2, extmul_high_s, "extmul_high_i32x4_s", 0xdd>;
  975. defm EXTMUL_LOW_U :
  976. SIMDExtBinary<I64x2, extmul_low_u, "extmul_low_i32x4_u", 0xde>;
  977. defm EXTMUL_HIGH_U :
  978. SIMDExtBinary<I64x2, extmul_high_u, "extmul_high_i32x4_u", 0xdf>;
  979. //===----------------------------------------------------------------------===//
  980. // Floating-point unary arithmetic
  981. //===----------------------------------------------------------------------===//
  982. multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
  983. defm "" : SIMDUnary<F32x4, node, name, baseInst>;
  984. defm "" : SIMDUnary<F64x2, node, name, !add(baseInst, 12)>;
  985. }
  986. // Absolute value: abs
  987. defm ABS : SIMDUnaryFP<fabs, "abs", 224>;
  988. // Negation: neg
  989. defm NEG : SIMDUnaryFP<fneg, "neg", 225>;
  990. // Square root: sqrt
  991. defm SQRT : SIMDUnaryFP<fsqrt, "sqrt", 227>;
  992. // Rounding: ceil, floor, trunc, nearest
  993. defm CEIL : SIMDUnary<F32x4, fceil, "ceil", 0x67>;
  994. defm FLOOR : SIMDUnary<F32x4, ffloor, "floor", 0x68>;
  995. defm TRUNC: SIMDUnary<F32x4, ftrunc, "trunc", 0x69>;
  996. defm NEAREST: SIMDUnary<F32x4, fnearbyint, "nearest", 0x6a>;
  997. defm CEIL : SIMDUnary<F64x2, fceil, "ceil", 0x74>;
  998. defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>;
  999. defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>;
  1000. defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>;
  1001. //===----------------------------------------------------------------------===//
  1002. // Floating-point binary arithmetic
  1003. //===----------------------------------------------------------------------===//
  1004. multiclass SIMDBinaryFP<SDPatternOperator node, string name, bits<32> baseInst> {
  1005. defm "" : SIMDBinary<F32x4, node, name, baseInst>;
  1006. defm "" : SIMDBinary<F64x2, node, name, !add(baseInst, 12)>;
  1007. }
  1008. // Addition: add
  1009. let isCommutable = 1 in
  1010. defm ADD : SIMDBinaryFP<fadd, "add", 228>;
  1011. // Subtraction: sub
  1012. defm SUB : SIMDBinaryFP<fsub, "sub", 229>;
  1013. // Multiplication: mul
  1014. let isCommutable = 1 in
  1015. defm MUL : SIMDBinaryFP<fmul, "mul", 230>;
  1016. // Division: div
  1017. defm DIV : SIMDBinaryFP<fdiv, "div", 231>;
  1018. // NaN-propagating minimum: min
  1019. defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
  1020. // NaN-propagating maximum: max
  1021. defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
  1022. // Pseudo-minimum: pmin
  1023. def pmin : PatFrag<(ops node:$lhs, node:$rhs),
  1024. (vselect (setolt $rhs, $lhs), $rhs, $lhs)>;
  1025. defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
  1026. // Pseudo-maximum: pmax
  1027. def pmax : PatFrag<(ops node:$lhs, node:$rhs),
  1028. (vselect (setolt $lhs, $rhs), $rhs, $lhs)>;
  1029. defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
  1030. // Also match the pmin/pmax cases where the operands are int vectors (but the
  1031. // comparison is still a floating point comparison). This can happen when using
  1032. // the wasm_simd128.h intrinsics because v128_t is an integer vector.
  1033. foreach vec = [F32x4, F64x2] in {
  1034. defvar pmin = !cast<NI>("PMIN_"#vec);
  1035. defvar pmax = !cast<NI>("PMAX_"#vec);
  1036. def : Pat<(vec.int_vt (vselect
  1037. (setolt (vec.vt (bitconvert V128:$rhs)),
  1038. (vec.vt (bitconvert V128:$lhs))),
  1039. V128:$rhs, V128:$lhs)),
  1040. (pmin $lhs, $rhs)>;
  1041. def : Pat<(vec.int_vt (vselect
  1042. (setolt (vec.vt (bitconvert V128:$lhs)),
  1043. (vec.vt (bitconvert V128:$rhs))),
  1044. V128:$rhs, V128:$lhs)),
  1045. (pmax $lhs, $rhs)>;
  1046. }
  1047. // And match the pmin/pmax LLVM intrinsics as well
  1048. def : Pat<(v4f32 (int_wasm_pmin (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
  1049. (PMIN_F32x4 V128:$lhs, V128:$rhs)>;
  1050. def : Pat<(v4f32 (int_wasm_pmax (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
  1051. (PMAX_F32x4 V128:$lhs, V128:$rhs)>;
  1052. def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
  1053. (PMIN_F64x2 V128:$lhs, V128:$rhs)>;
  1054. def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
  1055. (PMAX_F64x2 V128:$lhs, V128:$rhs)>;
  1056. //===----------------------------------------------------------------------===//
  1057. // Conversions
  1058. //===----------------------------------------------------------------------===//
  1059. multiclass SIMDConvert<Vec vec, Vec arg, SDPatternOperator op, string name,
  1060. bits<32> simdop> {
  1061. defm op#_#vec :
  1062. SIMD_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
  1063. [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
  1064. vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
  1065. }
  1066. // Floating point to integer with saturation: trunc_sat
  1067. defm "" : SIMDConvert<I32x4, F32x4, fp_to_sint, "trunc_sat_f32x4_s", 248>;
  1068. defm "" : SIMDConvert<I32x4, F32x4, fp_to_uint, "trunc_sat_f32x4_u", 249>;
  1069. // Support the saturating variety as well.
  1070. def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i32)>;
  1071. def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i32)>;
  1072. def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>;
  1073. def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>;
  1074. def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
  1075. def trunc_sat_zero_s :
  1076. SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>;
  1077. def trunc_sat_zero_u :
  1078. SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>;
  1079. defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_s, "trunc_sat_f64x2_s_zero",
  1080. 0xfc>;
  1081. defm "" : SIMDConvert<I32x4, F64x2, trunc_sat_zero_u, "trunc_sat_f64x2_u_zero",
  1082. 0xfd>;
  1083. // Integer to floating point: convert
  1084. def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
  1085. def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>;
  1086. def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>;
  1087. defm "" : SIMDConvert<F32x4, I32x4, sint_to_fp, "convert_i32x4_s", 250>;
  1088. defm "" : SIMDConvert<F32x4, I32x4, uint_to_fp, "convert_i32x4_u", 251>;
  1089. defm "" : SIMDConvert<F64x2, I32x4, convert_low_s, "convert_low_i32x4_s", 0xfe>;
  1090. defm "" : SIMDConvert<F64x2, I32x4, convert_low_u, "convert_low_i32x4_u", 0xff>;
  1091. // Extending operations
  1092. // TODO: refactor this to be uniform for i64x2 if the numbering is not changed.
  1093. multiclass SIMDExtend<Vec vec, bits<32> baseInst> {
  1094. defm "" : SIMDConvert<vec, vec.split, extend_low_s,
  1095. "extend_low_"#vec.split.prefix#"_s", baseInst>;
  1096. defm "" : SIMDConvert<vec, vec.split, extend_high_s,
  1097. "extend_high_"#vec.split.prefix#"_s", !add(baseInst, 1)>;
  1098. defm "" : SIMDConvert<vec, vec.split, extend_low_u,
  1099. "extend_low_"#vec.split.prefix#"_u", !add(baseInst, 2)>;
  1100. defm "" : SIMDConvert<vec, vec.split, extend_high_u,
  1101. "extend_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>;
  1102. }
  1103. defm "" : SIMDExtend<I16x8, 0x87>;
  1104. defm "" : SIMDExtend<I32x4, 0xa7>;
  1105. defm "" : SIMDExtend<I64x2, 0xc7>;
  1106. // Narrowing operations
  1107. multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {
  1108. defvar name = vec.split.prefix#".narrow_"#vec.prefix;
  1109. defm NARROW_S_#vec.split :
  1110. SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
  1111. [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_signed
  1112. (vec.vt V128:$low), (vec.vt V128:$high))))],
  1113. name#"_s\t$dst, $low, $high", name#"_s", baseInst>;
  1114. defm NARROW_U_#vec.split :
  1115. SIMD_I<(outs V128:$dst), (ins V128:$low, V128:$high), (outs), (ins),
  1116. [(set (vec.split.vt V128:$dst), (vec.split.vt (int_wasm_narrow_unsigned
  1117. (vec.vt V128:$low), (vec.vt V128:$high))))],
  1118. name#"_u\t$dst, $low, $high", name#"_u", !add(baseInst, 1)>;
  1119. }
  1120. defm "" : SIMDNarrow<I16x8, 101>;
  1121. defm "" : SIMDNarrow<I32x4, 133>;
  1122. // WebAssemblyISD::NARROW_U
  1123. def wasm_narrow_t : SDTypeProfile<1, 2, []>;
  1124. def wasm_narrow_u : SDNode<"WebAssemblyISD::NARROW_U", wasm_narrow_t>;
  1125. def : Pat<(v16i8 (wasm_narrow_u (v8i16 V128:$left), (v8i16 V128:$right))),
  1126. (NARROW_U_I8x16 $left, $right)>;
  1127. def : Pat<(v8i16 (wasm_narrow_u (v4i32 V128:$left), (v4i32 V128:$right))),
  1128. (NARROW_U_I16x8 $left, $right)>;
  1129. // Bitcasts are nops
  1130. // Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
  1131. foreach t1 = AllVecs in
  1132. foreach t2 = AllVecs in
  1133. if !ne(t1, t2) then
  1134. def : Pat<(t1.vt (bitconvert (t2.vt V128:$v))), (t1.vt V128:$v)>;
  1135. // Extended pairwise addition
  1136. defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_signed,
  1137. "extadd_pairwise_i8x16_s", 0x7c>;
  1138. defm "" : SIMDConvert<I16x8, I8x16, int_wasm_extadd_pairwise_unsigned,
  1139. "extadd_pairwise_i8x16_u", 0x7d>;
  1140. defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_signed,
  1141. "extadd_pairwise_i16x8_s", 0x7e>;
  1142. defm "" : SIMDConvert<I32x4, I16x8, int_wasm_extadd_pairwise_unsigned,
  1143. "extadd_pairwise_i16x8_u", 0x7f>;
  1144. // f64x2 <-> f32x4 conversions
  1145. def demote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
  1146. def demote_zero : SDNode<"WebAssemblyISD::DEMOTE_ZERO", demote_t>;
  1147. defm "" : SIMDConvert<F32x4, F64x2, demote_zero,
  1148. "demote_f64x2_zero", 0x5e>;
  1149. def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
  1150. def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
  1151. defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
  1152. // Lower extending loads to load64_zero + promote_low
  1153. def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
  1154. let MemoryVT = v2f32;
  1155. }
  1156. // Adapted from the body of LoadPatNoOffset
  1157. // TODO: other addressing patterns
  1158. def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))),
  1159. (promote_low_F64x2 (LOAD_ZERO_I64x2_A32 0, 0, I32:$addr))>,
  1160. Requires<[HasAddr32]>;
  1161. def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))),
  1162. (promote_low_F64x2 (LOAD_ZERO_I64x2_A64 0, 0, I64:$addr))>,
  1163. Requires<[HasAddr64]>;
  1164. //===----------------------------------------------------------------------===//
  1165. // Saturating Rounding Q-Format Multiplication
  1166. //===----------------------------------------------------------------------===//
  1167. defm Q15MULR_SAT_S :
  1168. SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>;
  1169. //===----------------------------------------------------------------------===//
  1170. // Relaxed swizzle
  1171. //===----------------------------------------------------------------------===//
  1172. defm RELAXED_SWIZZLE :
  1173. RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
  1174. [(set (v16i8 V128:$dst),
  1175. (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
  1176. "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 0x100>;
  1177. //===----------------------------------------------------------------------===//
  1178. // Relaxed floating-point to int conversions
  1179. //===----------------------------------------------------------------------===//
  1180. multiclass RelaxedConvert<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> {
  1181. defm op#_#vec :
  1182. RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
  1183. [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
  1184. vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
  1185. }
  1186. defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_signed,
  1187. "relaxed_trunc_f32x4_s", 0x101>;
  1188. defm "" : RelaxedConvert<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned,
  1189. "relaxed_trunc_f32x4_u", 0x102>;
  1190. defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_signed_zero,
  1191. "relaxed_trunc_f64x2_s_zero", 0x103>;
  1192. defm "" : RelaxedConvert<I32x4, F64x2, int_wasm_relaxed_trunc_unsigned_zero,
  1193. "relaxed_trunc_f64x2_u_zero", 0x104>;
  1194. //===----------------------------------------------------------------------===//
  1195. // Relaxed (Negative) Multiply-Add (madd/nmadd)
  1196. //===----------------------------------------------------------------------===//
  1197. multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS> {
  1198. defm MADD_#vec :
  1199. RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
  1200. [(set (vec.vt V128:$dst), (int_wasm_relaxed_madd
  1201. (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
  1202. vec.prefix#".relaxed_madd\t$dst, $a, $b, $c",
  1203. vec.prefix#".relaxed_madd", simdopA>;
  1204. defm NMADD_#vec :
  1205. RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
  1206. [(set (vec.vt V128:$dst), (int_wasm_relaxed_nmadd
  1207. (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
  1208. vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
  1209. vec.prefix#".relaxed_nmadd", simdopS>;
  1210. }
  1211. defm "" : SIMDMADD<F32x4, 0x105, 0x106>;
  1212. defm "" : SIMDMADD<F64x2, 0x107, 0x108>;
  1213. //===----------------------------------------------------------------------===//
  1214. // Laneselect
  1215. //===----------------------------------------------------------------------===//
  1216. multiclass SIMDLANESELECT<Vec vec, bits<32> op> {
  1217. defm LANESELECT_#vec :
  1218. RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
  1219. [(set (vec.vt V128:$dst), (int_wasm_relaxed_laneselect
  1220. (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
  1221. vec.prefix#".relaxed_laneselect\t$dst, $a, $b, $c",
  1222. vec.prefix#".relaxed_laneselect", op>;
  1223. }
  1224. defm "" : SIMDLANESELECT<I8x16, 0x109>;
  1225. defm "" : SIMDLANESELECT<I16x8, 0x10a>;
  1226. defm "" : SIMDLANESELECT<I32x4, 0x10b>;
  1227. defm "" : SIMDLANESELECT<I64x2, 0x10c>;
  1228. //===----------------------------------------------------------------------===//
  1229. // Relaxed floating-point min and max.
  1230. //===----------------------------------------------------------------------===//
  1231. multiclass RelaxedBinary<Vec vec, SDPatternOperator node, string name,
  1232. bits<32> simdop> {
  1233. defm _#vec : RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
  1234. (outs), (ins),
  1235. [(set (vec.vt V128:$dst),
  1236. (node (vec.vt V128:$lhs), (vec.vt V128:$rhs)))],
  1237. vec.prefix#"."#name#"\t$dst, $lhs, $rhs",
  1238. vec.prefix#"."#name, simdop>;
  1239. }
  1240. defm SIMD_RELAXED_FMIN :
  1241. RelaxedBinary<F32x4, int_wasm_relaxed_min, "relaxed_min", 0x10d>;
  1242. defm SIMD_RELAXED_FMAX :
  1243. RelaxedBinary<F32x4, int_wasm_relaxed_max, "relaxed_max", 0x10e>;
  1244. defm SIMD_RELAXED_FMIN :
  1245. RelaxedBinary<F64x2, int_wasm_relaxed_min, "relaxed_min", 0x10f>;
  1246. defm SIMD_RELAXED_FMAX :
  1247. RelaxedBinary<F64x2, int_wasm_relaxed_max, "relaxed_max", 0x110>;
  1248. //===----------------------------------------------------------------------===//
  1249. // Relaxed rounding q15 multiplication
  1250. //===----------------------------------------------------------------------===//
  1251. defm RELAXED_Q15MULR_S :
  1252. RelaxedBinary<I16x8, int_wasm_relaxed_q15mulr_signed, "relaxed_q15mulr_s",
  1253. 0x111>;
  1254. //===----------------------------------------------------------------------===//
  1255. // Relaxed integer dot product
  1256. //===----------------------------------------------------------------------===//
  1257. defm RELAXED_DOT :
  1258. RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
  1259. [(set (v8i16 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_signed
  1260. (v16i8 V128:$lhs), (v16i8 V128:$rhs)))],
  1261. "i16x8.relaxed_dot_i8x16_i7x16_s\t$dst, $lhs, $rhs",
  1262. "i16x8.relaxed_dot_i8x16_i7x16_s", 0x112>;
  1263. defm RELAXED_DOT_ADD :
  1264. RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
  1265. (outs), (ins),
  1266. [(set (v4i32 V128:$dst), (int_wasm_relaxed_dot_i8x16_i7x16_add_signed
  1267. (v16i8 V128:$lhs), (v16i8 V128:$rhs), (v4i32 V128:$acc)))],
  1268. "i32x4.relaxed_dot_i8x16_i7x16_add_s\t$dst, $lhs, $rhs, $acc",
  1269. "i32x4.relaxed_dot_i8x16_i7x16_add_s", 0x113>;
  1270. //===----------------------------------------------------------------------===//
  1271. // Relaxed BFloat16 dot product
  1272. //===----------------------------------------------------------------------===//
  1273. defm RELAXED_DOT_BFLOAT :
  1274. RELAXED_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, V128:$acc),
  1275. (outs), (ins),
  1276. [(set (v4f32 V128:$dst), (int_wasm_relaxed_dot_bf16x8_add_f32
  1277. (v8i16 V128:$lhs), (v8i16 V128:$rhs), (v4f32 V128:$acc)))],
  1278. "f32x4.relaxed_dot_bf16x8_add_f32\t$dst, $lhs, $rhs, $acc",
  1279. "f32x4.relaxed_dot_bf16x8_add_f32", 0x114>;