README_P9.txt 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. //===- README_P9.txt - Notes for improving Power9 code gen ----------------===//
  2. TODO: Instructions Need Implement Instrinstics or Map to LLVM IR
  3. Altivec:
  4. - Vector Compare Not Equal (Zero):
  5. vcmpneb(.) vcmpneh(.) vcmpnew(.)
  6. vcmpnezb(.) vcmpnezh(.) vcmpnezw(.)
  7. . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic)
  8. - Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd
  9. . Don't use llvm extractelement because they have different semantics
  10. . Use instrinstics:
  11. (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM))
  12. (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM))
  13. (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM))
  14. (set v2i64:$vD, (int_ppc_altivec_vextractd v2i64:$vA, imm:$UIMM))
  15. - Vector Extract Unsigned Byte Left/Right-Indexed:
  16. vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx
  17. . Use instrinstics:
  18. // Left-Indexed
  19. (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB))
  20. (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB))
  21. (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB))
  22. // Right-Indexed
  23. (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB))
  24. (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB))
  25. (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB))
  26. - Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw
  27. (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM))
  28. (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM))
  29. (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM))
  30. (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM))
  31. - Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]:
  32. vclzlsbb vctzlsbb
  33. . Use intrinsic:
  34. (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB))
  35. (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB))
  36. - Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd
  37. . Map to llvm cttz
  38. (set v16i8:$vD, (cttz v16i8:$vB)) // vctzb
  39. (set v8i16:$vD, (cttz v8i16:$vB)) // vctzh
  40. (set v4i32:$vD, (cttz v4i32:$vB)) // vctzw
  41. (set v2i64:$vD, (cttz v2i64:$vB)) // vctzd
  42. - Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d
  43. . vextsb2w:
  44. (set v4i32:$vD, (sext v4i8:$vB))
  45. // PowerISA_V3.0:
  46. do i = 0 to 3
  47. VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3])
  48. end
  49. . vextsh2w:
  50. (set v4i32:$vD, (sext v4i16:$vB))
  51. // PowerISA_V3.0:
  52. do i = 0 to 3
  53. VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1])
  54. end
  55. . vextsb2d
  56. (set v2i64:$vD, (sext v2i8:$vB))
  57. // PowerISA_V3.0:
  58. do i = 0 to 1
  59. VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7])
  60. end
  61. . vextsh2d
  62. (set v2i64:$vD, (sext v2i16:$vB))
  63. // PowerISA_V3.0:
  64. do i = 0 to 1
  65. VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3])
  66. end
  67. . vextsw2d
  68. (set v2i64:$vD, (sext v2i32:$vB))
  69. // PowerISA_V3.0:
  70. do i = 0 to 1
  71. VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1])
  72. end
  73. - Vector Integer Negate: vnegw vnegd
  74. . Map to llvm ineg
  75. (set v4i32:$rT, (ineg v4i32:$rA)) // vnegw
  76. (set v2i64:$rT, (ineg v2i64:$rA)) // vnegd
  77. - Vector Parity Byte: vprtybw vprtybd vprtybq
  78. . Use intrinsic:
  79. (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB))
  80. (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB))
  81. (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB))
  82. - Vector (Bit) Permute (Right-indexed):
  83. . vbpermd: Same as "vbpermq", use VX1_Int_Ty2:
  84. VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>;
  85. . vpermr: use VA1a_Int_Ty3
  86. VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>;
  87. - Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi
  88. . Use intrinsic:
  89. VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>;
  90. VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>;
  91. VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>;
  92. VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>;
  93. - Vector Shift Left/Right: vslv vsrv
  94. . Use intrinsic, don't map to llvm shl and lshr, because they have different
  95. semantics, e.g. vslv:
  96. do i = 0 to 15
  97. sh ← VR[VRB].byte[i].bit[5:7]
  98. VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7]
  99. end
  100. VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1]
  101. . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>;
  102. VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>;
  103. - Vector Multiply-by-10 (& Write Carry) Unsigned Quadword:
  104. vmul10uq vmul10cuq
  105. . Use intrinsic:
  106. VX1_Int_Ty<513, "vmul10uq", int_ppc_altivec_vmul10uq, v1i128>;
  107. VX1_Int_Ty< 1, "vmul10cuq", int_ppc_altivec_vmul10cuq, v1i128>;
  108. - Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword:
  109. vmul10euq vmul10ecuq
  110. . Use intrinsic:
  111. VX1_Int_Ty<577, "vmul10euq", int_ppc_altivec_vmul10euq, v1i128>;
  112. VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
  113. - Decimal Convert From/to National/Zoned/Signed-QWord:
  114. bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq.
  115. . Use instrinstics:
  116. (set v1i128:$vD, (int_ppc_altivec_bcdcfno v1i128:$vB, i1:$PS))
  117. (set v1i128:$vD, (int_ppc_altivec_bcdcfzo v1i128:$vB, i1:$PS))
  118. (set v1i128:$vD, (int_ppc_altivec_bcdctno v1i128:$vB))
  119. (set v1i128:$vD, (int_ppc_altivec_bcdctzo v1i128:$vB, i1:$PS))
  120. (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS))
  121. (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB))
  122. - Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn.
  123. . Use instrinstics:
  124. (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB))
  125. (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS))
  126. - Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr.
  127. . Use instrinstics:
  128. (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
  129. (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
  130. (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS))
  131. . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7]
  132. - Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc.
  133. . Use instrinstics:
  134. (set v1i128:$vD, (int_ppc_altivec_bcdso v1i128:$vA, v1i128:$vB, i1:$PS))
  135. (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
  136. . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63])
  137. VSX:
  138. - QP Copy Sign: xscpsgnqp
  139. . Similar to xscpsgndp
  140. . (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
  141. - QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
  142. . Similar to xsabsdp/xsnabsdp/xsnegdp
  143. . (set f128:$vT, (fabs f128:$vB)) // xsabsqp
  144. (set f128:$vT, (fneg (fabs f128:$vB))) // xsnabsqp
  145. (set f128:$vT, (fneg f128:$vB)) // xsnegqp
  146. - QP Add/Divide/Multiply/Subtract/Square-Root:
  147. xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
  148. . Similar to xsadddp
  149. . isCommutable = 1
  150. (set f128:$vT, (fadd f128:$vA, f128:$vB)) // xsaddqp
  151. (set f128:$vT, (fmul f128:$vA, f128:$vB)) // xsmulqp
  152. . isCommutable = 0
  153. (set f128:$vT, (fdiv f128:$vA, f128:$vB)) // xsdivqp
  154. (set f128:$vT, (fsub f128:$vA, f128:$vB)) // xssubqp
  155. (set f128:$vT, (fsqrt f128:$vB))) // xssqrtqp
  156. - Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
  157. xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
  158. . Similar to xsrsqrtedp??
  159. def XSRSQRTEDP : XX2Form<60, 74,
  160. (outs vsfrc:$XT), (ins vsfrc:$XB),
  161. "xsrsqrtedp $XT, $XB", IIC_VecFP,
  162. [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
  163. . Define DAG Node in PPCInstrInfo.td:
  164. def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
  165. def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
  166. def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
  167. def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
  168. def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
  169. DAG patterns of each instruction (PPCInstrVSX.td):
  170. . isCommutable = 1
  171. (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB)) // xsaddqpo
  172. (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB)) // xsmulqpo
  173. . isCommutable = 0
  174. (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB)) // xsdivqpo
  175. (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB)) // xssubqpo
  176. (set f128:$vT, (PPCfsqrtrto f128:$vB)) // xssqrtqpo
  177. - QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
  178. . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
  179. . isCommutable = 1
  180. // xsmaddqp
  181. [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
  182. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  183. AltVSXFMARel;
  184. // xsmsubqp
  185. [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
  186. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  187. AltVSXFMARel;
  188. // xsnmaddqp
  189. [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
  190. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  191. AltVSXFMARel;
  192. // xsnmsubqp
  193. [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
  194. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  195. AltVSXFMARel;
  196. - Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
  197. xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
  198. . Similar to xsrsqrtedp??
  199. . Define DAG Node in PPCInstrInfo.td:
  200. def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
  201. It looks like we only need to define "PPCfmarto" for these instructions,
  202. because according to PowerISA_V3.0, these instructions perform RTO on
  203. fma's result:
  204. xsmaddqp(o)
  205. v ← bfp_MULTIPLY_ADD(src1, src3, src2)
  206. rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
  207. result ← bfp_CONVERT_TO_BFP128(rnd)
  208. xsmsubqp(o)
  209. v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
  210. rnd ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
  211. result ← bfp_CONVERT_TO_BFP128(rnd)
  212. xsnmaddqp(o)
  213. v ← bfp_MULTIPLY_ADD(src1,src3,src2)
  214. rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
  215. result ← bfp_CONVERT_TO_BFP128(rnd)
  216. xsnmsubqp(o)
  217. v ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
  218. rnd ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
  219. result ← bfp_CONVERT_TO_BFP128(rnd)
  220. DAG patterns of each instruction (PPCInstrVSX.td):
  221. . isCommutable = 1
  222. // xsmaddqpo
  223. [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
  224. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  225. AltVSXFMARel;
  226. // xsmsubqpo
  227. [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
  228. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  229. AltVSXFMARel;
  230. // xsnmaddqpo
  231. [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
  232. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  233. AltVSXFMARel;
  234. // xsnmsubqpo
  235. [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
  236. RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
  237. AltVSXFMARel;
  238. - QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
  239. . ref: XSCMPUDP
  240. def XSCMPUDP : XX3Form_1<60, 35,
  241. (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
  242. "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
  243. . No SDAG, intrinsic, builtin are required??
  244. Or llvm fcmp order/unorder compare??
  245. - DP/QP Compare Exponents: xscmpexpdp xscmpexpqp
  246. . No SDAG, intrinsic, builtin are required?
  247. - DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
  248. . I checked existing instruction "XSCMPUDP". They are different in target
  249. register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
  250. . Use intrinsic:
  251. (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
  252. (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
  253. (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
  254. (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB))
  255. - Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp.
  256. . Similar to xvcmpeqdp:
  257. defm XVCMPEQDP : XX3Form_Rcr<60, 99,
  258. "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
  259. int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
  260. . So we should use "XX3Form_Rcr" to implement intrinsic
  261. - Convert DP -> QP: xscvdpqp
  262. . Similar to XSCVDPSP:
  263. def XSCVDPSP : XX2Form<60, 265,
  264. (outs vsfrc:$XT), (ins vsfrc:$XB),
  265. "xscvdpsp $XT, $XB", IIC_VecFP, []>;
  266. . So, No SDAG, intrinsic, builtin are required??
  267. - Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo
  268. . Similar to XSCVDPSP
  269. . No SDAG, intrinsic, builtin are required??
  270. - Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero):
  271. xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz
  272. . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS",
  273. "XSCVDPUXDS", "XSCVDPUXWS"
  274. . DAG patterns:
  275. (set f128:$XT, (PPCfctidz f128:$XB)) // xscvqpsdz
  276. (set f128:$XT, (PPCfctiwz f128:$XB)) // xscvqpswz
  277. (set f128:$XT, (PPCfctiduz f128:$XB)) // xscvqpudz
  278. (set f128:$XT, (PPCfctiwuz f128:$XB)) // xscvqpuwz
  279. - Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp
  280. . Similar to XSCVSXDSP
  281. . (set f128:$XT, (PPCfcfids f64:$XB)) // xscvsdqp
  282. (set f128:$XT, (PPCfcfidus f64:$XB)) // xscvudqp
  283. - (Round &) Convert DP <-> HP: xscvdphp xscvhpdp
  284. . Similar to XSCVDPSP
  285. . No SDAG, intrinsic, builtin are required??
  286. - Vector HP -> SP: xvcvhpsp xvcvsphp
  287. . Similar to XVCVDPSP:
  288. def XVCVDPSP : XX2Form<60, 393,
  289. (outs vsrc:$XT), (ins vsrc:$XB),
  290. "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
  291. . No SDAG, intrinsic, builtin are required??
  292. - Round to Quad-Precision Integer: xsrqpi xsrqpix
  293. . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you
  294. need to assign rounding mode in instruction
  295. . Provide builtin?
  296. (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB))
  297. (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB))
  298. - Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp
  299. . Provide builtin?
  300. (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB))
  301. Fixed Point Facility:
  302. - Exploit cmprb and cmpeqb (perhaps for something like
  303. isalpha/isdigit/isupper/islower and isspace respectivelly). This can
  304. perhaps be done through a builtin.
  305. - Provide testing for cnttz[dw]
  306. - Insert Exponent DP/QP: xsiexpdp xsiexpqp
  307. . Use intrinsic?
  308. . xsiexpdp:
  309. // Note: rA and rB are the unsigned integer value.
  310. (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB))
  311. . xsiexpqp:
  312. (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB))
  313. - Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp
  314. . Use intrinsic?
  315. . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB)) // xsxexpdp
  316. (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB)) // xsxsigdp
  317. (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB)) // xsxexpqp
  318. (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB)) // xsxsigqp
  319. - Vector Insert Word: xxinsertw
  320. - Useful for inserting f32/i32 elements into vectors (the element to be
  321. inserted needs to be prepared)
  322. . Note: llvm has insertelem in "Vector Operations"
  323. ; yields <n x <ty>>
  324. <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>
  325. But how to map to it??
  326. [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>,
  327. RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
  328. . Or use intrinsic?
  329. (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM))
  330. - Vector Extract Unsigned Word: xxextractuw
  331. - Not useful for extraction of f32 from v4f32 (the current pattern is better -
  332. shift->convert)
  333. - It is useful for (uint_to_fp (vector_extract v4i32, N))
  334. - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N))
  335. . Note: llvm has extractelement in "Vector Operations"
  336. ; yields <ty>
  337. <result> = extractelement <n x <ty>> <val>, <ty2> <idx>
  338. How to map to it??
  339. [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))]
  340. . Or use intrinsic?
  341. (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM))
  342. - Vector Insert Exponent DP/SP: xviexpdp xviexpsp
  343. . Use intrinsic
  344. (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB))
  345. (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB))
  346. - Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp
  347. . Use intrinsic
  348. (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB))
  349. (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB))
  350. (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB))
  351. (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB))
  352. - Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp
  353. . No SDAG, intrinsic, builtin are required?
  354. Because it seems that we have no way to map BF field?
  355. Instruction Form: [PO T XO B XO BX TX]
  356. Asm: xststd* BF,XB,DCMX
  357. BF is an index to CR register field.
  358. - Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp
  359. . Use intrinsic
  360. (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX))
  361. (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX))
  362. - Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp
  363. . PowerISA_V3.0:
  364. "xsmaxcdp can be used to implement the C/C++/Java conditional operation
  365. (x>y)?x:y for single-precision and double-precision arguments."
  366. Note! c type and j type have different behavior when:
  367. 1. Either input is NaN
  368. 2. Both input are +-Infinity, +-Zero
  369. . dtype map to llvm fmaxnum/fminnum
  370. jtype use intrinsic
  371. . xsmaxcdp xsmincdp
  372. (set f64:$XT, (fmaxnum f64:$XA, f64:$XB))
  373. (set f64:$XT, (fminnum f64:$XA, f64:$XB))
  374. . xsmaxjdp xsminjdp
  375. (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB))
  376. (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB))
  377. - Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq
  378. . Use intrinsic
  379. (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB))
  380. (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB))
  381. (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB))
  382. (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB))
  383. - Vector Permute: xxperm xxpermr
  384. . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different
  385. . Use intrinsic
  386. (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB))
  387. (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB))
  388. - Vector Splat Immediate Byte: xxspltib
  389. . Similar to XXSPLTW:
  390. def XXSPLTW : XX2Form_2<60, 164,
  391. (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
  392. "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
  393. . No SDAG, intrinsic, builtin are required?
  394. - Load/Store Vector: lxv stxv
  395. . Has likely SDAG match:
  396. (set v?:$XT, (load ix16addr:$src))
  397. (set v?:$XT, (store ix16addr:$dst))
  398. . Need define ix16addr in PPCInstrInfo.td
  399. ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td
  400. - Load/Store Vector Indexed: lxvx stxvx
  401. . Has likely SDAG match:
  402. (set v?:$XT, (load xoaddr:$src))
  403. (set v?:$XT, (store xoaddr:$dst))
  404. - Load/Store DWord: lxsd stxsd
  405. . Similar to lxsdx/stxsdx:
  406. def LXSDX : XX1Form<31, 588,
  407. (outs vsfrc:$XT), (ins memrr:$src),
  408. "lxsdx $XT, $src", IIC_LdStLFD,
  409. [(set f64:$XT, (load xoaddr:$src))]>;
  410. . (set f64:$XT, (load iaddrX4:$src))
  411. (set f64:$XT, (store iaddrX4:$dst))
  412. - Load/Store SP, with conversion from/to DP: lxssp stxssp
  413. . Similar to lxsspx/stxsspx:
  414. def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
  415. "lxsspx $XT, $src", IIC_LdStLFD,
  416. [(set f32:$XT, (load xoaddr:$src))]>;
  417. . (set f32:$XT, (load iaddrX4:$src))
  418. (set f32:$XT, (store iaddrX4:$dst))
  419. - Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
  420. . Similar to lxsiwzx:
  421. def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
  422. "lxsiwzx $XT, $src", IIC_LdStLFD,
  423. [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
  424. . (set f64:$XT, (PPClfiwzx xoaddr:$src))
  425. - Store as Integer Byte/Halfword Indexed: stxsibx stxsihx
  426. . Similar to stxsiwx:
  427. def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
  428. "stxsiwx $XT, $dst", IIC_LdStSTFD,
  429. [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
  430. . (PPCstfiwx f64:$XT, xoaddr:$dst)
  431. - Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x
  432. . Similar to lxvd2x/lxvw4x:
  433. def LXVD2X : XX1Form<31, 844,
  434. (outs vsrc:$XT), (ins memrr:$src),
  435. "lxvd2x $XT, $src", IIC_LdStLFD,
  436. [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
  437. . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src))
  438. (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src))
  439. - Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x
  440. . Similar to stxvd2x/stxvw4x:
  441. def STXVD2X : XX1Form<31, 972,
  442. (outs), (ins vsrc:$XT, memrr:$dst),
  443. "stxvd2x $XT, $dst", IIC_LdStSTFD,
  444. [(store v2f64:$XT, xoaddr:$dst)]>;
  445. . (store v8i16:$XT, xoaddr:$dst)
  446. (store v16i8:$XT, xoaddr:$dst)
  447. - Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll
  448. . Likely needs an intrinsic
  449. . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src))
  450. (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src))
  451. . (int_ppc_vsx_stxvl xoaddr:$dst))
  452. (int_ppc_vsx_stxvll xoaddr:$dst))
  453. - Load Vector Word & Splat Indexed: lxvwsx
  454. . Likely needs an intrinsic
  455. . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src))
  456. Atomic operations (l[dw]at, st[dw]at):
  457. - Provide custom lowering for common atomic operations to use these
  458. instructions with the correct Function Code
  459. - Ensure the operands are in the correct register (i.e. RT+1, RT+2)
  460. - Provide builtins since not all FC's necessarily have an existing LLVM
  461. atomic operation
  462. Move to CR from XER Extended (mcrxrx):
  463. - Is there a use for this in LLVM?
  464. Fixed Point Facility:
  465. - Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last
  466. . Use instrinstics:
  467. (int_ppc_copy_first i32:$rA, i32:$rB)
  468. (int_ppc_copy i32:$rA, i32:$rB)
  469. (int_ppc_paste i32:$rA, i32:$rB)
  470. (int_ppc_paste_last i32:$rA, i32:$rB)
  471. (int_cp_abort)
  472. - Message Synchronize: msgsync
  473. - SLB*: slbieg slbsync
  474. - stop
  475. . No instrinstics