LegalizeVectorOps.cpp 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722
  1. //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the SelectionDAG::LegalizeVectors method.
  10. //
  11. // The vector legalizer looks for vector operations which might need to be
  12. // scalarized and legalizes them. This is a separate step from Legalize because
  13. // scalarizing can introduce illegal types. For example, suppose we have an
  14. // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
  15. // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
  16. // operation, which introduces nodes with the illegal type i64 which must be
  17. // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
  18. // the operation must be unrolled, which introduces nodes with the illegal
  19. // type i8 which must be promoted.
  20. //
  21. // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
  22. // or operations that happen to take a vector which are custom-lowered;
  23. // the legalization for such operations never produces nodes
  24. // with illegal types, so it's okay to put off legalizing them until
  25. // SelectionDAG::Legalize runs.
  26. //
  27. //===----------------------------------------------------------------------===//
  28. #include "llvm/ADT/DenseMap.h"
  29. #include "llvm/ADT/SmallVector.h"
  30. #include "llvm/CodeGen/ISDOpcodes.h"
  31. #include "llvm/CodeGen/SelectionDAG.h"
  32. #include "llvm/CodeGen/SelectionDAGNodes.h"
  33. #include "llvm/CodeGen/TargetLowering.h"
  34. #include "llvm/CodeGen/ValueTypes.h"
  35. #include "llvm/IR/DataLayout.h"
  36. #include "llvm/Support/Casting.h"
  37. #include "llvm/Support/Compiler.h"
  38. #include "llvm/Support/Debug.h"
  39. #include "llvm/Support/ErrorHandling.h"
  40. #include "llvm/Support/MachineValueType.h"
  41. #include <cassert>
  42. #include <cstdint>
  43. #include <iterator>
  44. #include <utility>
  45. using namespace llvm;
  46. #define DEBUG_TYPE "legalizevectorops"
  47. namespace {
  48. class VectorLegalizer {
  49. SelectionDAG& DAG;
  50. const TargetLowering &TLI;
  51. bool Changed = false; // Keep track of whether anything changed
  52. /// For nodes that are of legal width, and that have more than one use, this
  53. /// map indicates what regularized operand to use. This allows us to avoid
  54. /// legalizing the same thing more than once.
  55. SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
  56. /// Adds a node to the translation cache.
  57. void AddLegalizedOperand(SDValue From, SDValue To) {
  58. LegalizedNodes.insert(std::make_pair(From, To));
  59. // If someone requests legalization of the new node, return itself.
  60. if (From != To)
  61. LegalizedNodes.insert(std::make_pair(To, To));
  62. }
  63. /// Legalizes the given node.
  64. SDValue LegalizeOp(SDValue Op);
  65. /// Assuming the node is legal, "legalize" the results.
  66. SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
  67. /// Make sure Results are legal and update the translation cache.
  68. SDValue RecursivelyLegalizeResults(SDValue Op,
  69. MutableArrayRef<SDValue> Results);
  70. /// Wrapper to interface LowerOperation with a vector of Results.
  71. /// Returns false if the target wants to use default expansion. Otherwise
  72. /// returns true. If return is true and the Results are empty, then the
  73. /// target wants to keep the input node as is.
  74. bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
  75. /// Implements unrolling a VSETCC.
  76. SDValue UnrollVSETCC(SDNode *Node);
  77. /// Implement expand-based legalization of vector operations.
  78. ///
  79. /// This is just a high-level routine to dispatch to specific code paths for
  80. /// operations to legalize them.
  81. void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  82. /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
  83. /// FP_TO_SINT isn't legal.
  84. void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  85. /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
  86. /// SINT_TO_FLOAT and SHR on vectors isn't legal.
  87. void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  88. /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
  89. SDValue ExpandSEXTINREG(SDNode *Node);
  90. /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
  91. ///
  92. /// Shuffles the low lanes of the operand into place and bitcasts to the proper
  93. /// type. The contents of the bits in the extended part of each element are
  94. /// undef.
  95. SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
  96. /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
  97. ///
  98. /// Shuffles the low lanes of the operand into place, bitcasts to the proper
  99. /// type, then shifts left and arithmetic shifts right to introduce a sign
  100. /// extension.
  101. SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
  102. /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
  103. ///
  104. /// Shuffles the low lanes of the operand into place and blends zeros into
  105. /// the remaining lanes, finally bitcasting to the proper type.
  106. SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
  107. /// Expand bswap of vectors into a shuffle if legal.
  108. SDValue ExpandBSWAP(SDNode *Node);
  109. /// Implement vselect in terms of XOR, AND, OR when blend is not
  110. /// supported by the target.
  111. SDValue ExpandVSELECT(SDNode *Node);
  112. SDValue ExpandVP_SELECT(SDNode *Node);
  113. SDValue ExpandVP_MERGE(SDNode *Node);
  114. SDValue ExpandVP_REM(SDNode *Node);
  115. SDValue ExpandSELECT(SDNode *Node);
  116. std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
  117. SDValue ExpandStore(SDNode *N);
  118. SDValue ExpandFNEG(SDNode *Node);
  119. void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  120. void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  121. void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  122. void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  123. void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  124. void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  125. void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  126. void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  127. void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  128. void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  129. /// Implements vector promotion.
  130. ///
  131. /// This is essentially just bitcasting the operands to a different type and
  132. /// bitcasting the result back to the original type.
  133. void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  134. /// Implements [SU]INT_TO_FP vector promotion.
  135. ///
  136. /// This is a [zs]ext of the input operand to a larger integer type.
  137. void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  138. /// Implements FP_TO_[SU]INT vector promotion of the result type.
  139. ///
  140. /// It is promoted to a larger integer type. The result is then
  141. /// truncated back to the original type.
  142. void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  143. public:
  144. VectorLegalizer(SelectionDAG& dag) :
  145. DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
  146. /// Begin legalizer the vector operations in the DAG.
  147. bool Run();
  148. };
  149. } // end anonymous namespace
  150. bool VectorLegalizer::Run() {
  151. // Before we start legalizing vector nodes, check if there are any vectors.
  152. bool HasVectors = false;
  153. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
  154. E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
  155. // Check if the values of the nodes contain vectors. We don't need to check
  156. // the operands because we are going to check their values at some point.
  157. HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
  158. // If we found a vector node we can start the legalization.
  159. if (HasVectors)
  160. break;
  161. }
  162. // If this basic block has no vectors then no need to legalize vectors.
  163. if (!HasVectors)
  164. return false;
  165. // The legalize process is inherently a bottom-up recursive process (users
  166. // legalize their uses before themselves). Given infinite stack space, we
  167. // could just start legalizing on the root and traverse the whole graph. In
  168. // practice however, this causes us to run out of stack space on large basic
  169. // blocks. To avoid this problem, compute an ordering of the nodes where each
  170. // node is only legalized after all of its operands are legalized.
  171. DAG.AssignTopologicalOrder();
  172. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
  173. E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
  174. LegalizeOp(SDValue(&*I, 0));
  175. // Finally, it's possible the root changed. Get the new root.
  176. SDValue OldRoot = DAG.getRoot();
  177. assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
  178. DAG.setRoot(LegalizedNodes[OldRoot]);
  179. LegalizedNodes.clear();
  180. // Remove dead nodes now.
  181. DAG.RemoveDeadNodes();
  182. return Changed;
  183. }
  184. SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
  185. assert(Op->getNumValues() == Result->getNumValues() &&
  186. "Unexpected number of results");
  187. // Generic legalization: just pass the operand through.
  188. for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
  189. AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
  190. return SDValue(Result, Op.getResNo());
  191. }
  192. SDValue
  193. VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
  194. MutableArrayRef<SDValue> Results) {
  195. assert(Results.size() == Op->getNumValues() &&
  196. "Unexpected number of results");
  197. // Make sure that the generated code is itself legal.
  198. for (unsigned i = 0, e = Results.size(); i != e; ++i) {
  199. Results[i] = LegalizeOp(Results[i]);
  200. AddLegalizedOperand(Op.getValue(i), Results[i]);
  201. }
  202. return Results[Op.getResNo()];
  203. }
  204. SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
  205. // Note that LegalizeOp may be reentered even from single-use nodes, which
  206. // means that we always must cache transformed nodes.
  207. DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
  208. if (I != LegalizedNodes.end()) return I->second;
  209. // Legalize the operands
  210. SmallVector<SDValue, 8> Ops;
  211. for (const SDValue &Oper : Op->op_values())
  212. Ops.push_back(LegalizeOp(Oper));
  213. SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
  214. bool HasVectorValueOrOp =
  215. llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
  216. llvm::any_of(Node->op_values(),
  217. [](SDValue O) { return O.getValueType().isVector(); });
  218. if (!HasVectorValueOrOp)
  219. return TranslateLegalizeResults(Op, Node);
  220. TargetLowering::LegalizeAction Action = TargetLowering::Legal;
  221. EVT ValVT;
  222. switch (Op.getOpcode()) {
  223. default:
  224. return TranslateLegalizeResults(Op, Node);
  225. case ISD::LOAD: {
  226. LoadSDNode *LD = cast<LoadSDNode>(Node);
  227. ISD::LoadExtType ExtType = LD->getExtensionType();
  228. EVT LoadedVT = LD->getMemoryVT();
  229. if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
  230. Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
  231. break;
  232. }
  233. case ISD::STORE: {
  234. StoreSDNode *ST = cast<StoreSDNode>(Node);
  235. EVT StVT = ST->getMemoryVT();
  236. MVT ValVT = ST->getValue().getSimpleValueType();
  237. if (StVT.isVector() && ST->isTruncatingStore())
  238. Action = TLI.getTruncStoreAction(ValVT, StVT);
  239. break;
  240. }
  241. case ISD::MERGE_VALUES:
  242. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
  243. // This operation lies about being legal: when it claims to be legal,
  244. // it should actually be expanded.
  245. if (Action == TargetLowering::Legal)
  246. Action = TargetLowering::Expand;
  247. break;
  248. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
  249. case ISD::STRICT_##DAGN:
  250. #include "llvm/IR/ConstrainedOps.def"
  251. ValVT = Node->getValueType(0);
  252. if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
  253. Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
  254. ValVT = Node->getOperand(1).getValueType();
  255. Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
  256. // If we're asked to expand a strict vector floating-point operation,
  257. // by default we're going to simply unroll it. That is usually the
  258. // best approach, except in the case where the resulting strict (scalar)
  259. // operations would themselves use the fallback mutation to non-strict.
  260. // In that specific case, just do the fallback on the vector op.
  261. if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
  262. TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
  263. TargetLowering::Legal) {
  264. EVT EltVT = ValVT.getVectorElementType();
  265. if (TLI.getOperationAction(Node->getOpcode(), EltVT)
  266. == TargetLowering::Expand &&
  267. TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
  268. == TargetLowering::Legal)
  269. Action = TargetLowering::Legal;
  270. }
  271. break;
  272. case ISD::ADD:
  273. case ISD::SUB:
  274. case ISD::MUL:
  275. case ISD::MULHS:
  276. case ISD::MULHU:
  277. case ISD::SDIV:
  278. case ISD::UDIV:
  279. case ISD::SREM:
  280. case ISD::UREM:
  281. case ISD::SDIVREM:
  282. case ISD::UDIVREM:
  283. case ISD::FADD:
  284. case ISD::FSUB:
  285. case ISD::FMUL:
  286. case ISD::FDIV:
  287. case ISD::FREM:
  288. case ISD::AND:
  289. case ISD::OR:
  290. case ISD::XOR:
  291. case ISD::SHL:
  292. case ISD::SRA:
  293. case ISD::SRL:
  294. case ISD::FSHL:
  295. case ISD::FSHR:
  296. case ISD::ROTL:
  297. case ISD::ROTR:
  298. case ISD::ABS:
  299. case ISD::BSWAP:
  300. case ISD::BITREVERSE:
  301. case ISD::CTLZ:
  302. case ISD::CTTZ:
  303. case ISD::CTLZ_ZERO_UNDEF:
  304. case ISD::CTTZ_ZERO_UNDEF:
  305. case ISD::CTPOP:
  306. case ISD::SELECT:
  307. case ISD::VSELECT:
  308. case ISD::SELECT_CC:
  309. case ISD::ZERO_EXTEND:
  310. case ISD::ANY_EXTEND:
  311. case ISD::TRUNCATE:
  312. case ISD::SIGN_EXTEND:
  313. case ISD::FP_TO_SINT:
  314. case ISD::FP_TO_UINT:
  315. case ISD::FNEG:
  316. case ISD::FABS:
  317. case ISD::FMINNUM:
  318. case ISD::FMAXNUM:
  319. case ISD::FMINNUM_IEEE:
  320. case ISD::FMAXNUM_IEEE:
  321. case ISD::FMINIMUM:
  322. case ISD::FMAXIMUM:
  323. case ISD::FCOPYSIGN:
  324. case ISD::FSQRT:
  325. case ISD::FSIN:
  326. case ISD::FCOS:
  327. case ISD::FPOWI:
  328. case ISD::FPOW:
  329. case ISD::FLOG:
  330. case ISD::FLOG2:
  331. case ISD::FLOG10:
  332. case ISD::FEXP:
  333. case ISD::FEXP2:
  334. case ISD::FCEIL:
  335. case ISD::FTRUNC:
  336. case ISD::FRINT:
  337. case ISD::FNEARBYINT:
  338. case ISD::FROUND:
  339. case ISD::FROUNDEVEN:
  340. case ISD::FFLOOR:
  341. case ISD::FP_ROUND:
  342. case ISD::FP_EXTEND:
  343. case ISD::FMA:
  344. case ISD::SIGN_EXTEND_INREG:
  345. case ISD::ANY_EXTEND_VECTOR_INREG:
  346. case ISD::SIGN_EXTEND_VECTOR_INREG:
  347. case ISD::ZERO_EXTEND_VECTOR_INREG:
  348. case ISD::SMIN:
  349. case ISD::SMAX:
  350. case ISD::UMIN:
  351. case ISD::UMAX:
  352. case ISD::SMUL_LOHI:
  353. case ISD::UMUL_LOHI:
  354. case ISD::SADDO:
  355. case ISD::UADDO:
  356. case ISD::SSUBO:
  357. case ISD::USUBO:
  358. case ISD::SMULO:
  359. case ISD::UMULO:
  360. case ISD::FCANONICALIZE:
  361. case ISD::SADDSAT:
  362. case ISD::UADDSAT:
  363. case ISD::SSUBSAT:
  364. case ISD::USUBSAT:
  365. case ISD::SSHLSAT:
  366. case ISD::USHLSAT:
  367. case ISD::FP_TO_SINT_SAT:
  368. case ISD::FP_TO_UINT_SAT:
  369. case ISD::MGATHER:
  370. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
  371. break;
  372. case ISD::SMULFIX:
  373. case ISD::SMULFIXSAT:
  374. case ISD::UMULFIX:
  375. case ISD::UMULFIXSAT:
  376. case ISD::SDIVFIX:
  377. case ISD::SDIVFIXSAT:
  378. case ISD::UDIVFIX:
  379. case ISD::UDIVFIXSAT: {
  380. unsigned Scale = Node->getConstantOperandVal(2);
  381. Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
  382. Node->getValueType(0), Scale);
  383. break;
  384. }
  385. case ISD::SINT_TO_FP:
  386. case ISD::UINT_TO_FP:
  387. case ISD::VECREDUCE_ADD:
  388. case ISD::VECREDUCE_MUL:
  389. case ISD::VECREDUCE_AND:
  390. case ISD::VECREDUCE_OR:
  391. case ISD::VECREDUCE_XOR:
  392. case ISD::VECREDUCE_SMAX:
  393. case ISD::VECREDUCE_SMIN:
  394. case ISD::VECREDUCE_UMAX:
  395. case ISD::VECREDUCE_UMIN:
  396. case ISD::VECREDUCE_FADD:
  397. case ISD::VECREDUCE_FMUL:
  398. case ISD::VECREDUCE_FMAX:
  399. case ISD::VECREDUCE_FMIN:
  400. Action = TLI.getOperationAction(Node->getOpcode(),
  401. Node->getOperand(0).getValueType());
  402. break;
  403. case ISD::VECREDUCE_SEQ_FADD:
  404. case ISD::VECREDUCE_SEQ_FMUL:
  405. Action = TLI.getOperationAction(Node->getOpcode(),
  406. Node->getOperand(1).getValueType());
  407. break;
  408. case ISD::SETCC: {
  409. MVT OpVT = Node->getOperand(0).getSimpleValueType();
  410. ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
  411. Action = TLI.getCondCodeAction(CCCode, OpVT);
  412. if (Action == TargetLowering::Legal)
  413. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
  414. break;
  415. }
  416. #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
  417. case ISD::VPID: { \
  418. EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
  419. : Node->getOperand(LEGALPOS).getValueType(); \
  420. if (ISD::VPID == ISD::VP_SETCC) { \
  421. ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
  422. Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
  423. if (Action != TargetLowering::Legal) \
  424. break; \
  425. } \
  426. Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
  427. } break;
  428. #include "llvm/IR/VPIntrinsics.def"
  429. }
  430. LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
  431. SmallVector<SDValue, 8> ResultVals;
  432. switch (Action) {
  433. default: llvm_unreachable("This action is not supported yet!");
  434. case TargetLowering::Promote:
  435. assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
  436. "This action is not supported yet!");
  437. LLVM_DEBUG(dbgs() << "Promoting\n");
  438. Promote(Node, ResultVals);
  439. assert(!ResultVals.empty() && "No results for promotion?");
  440. break;
  441. case TargetLowering::Legal:
  442. LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
  443. break;
  444. case TargetLowering::Custom:
  445. LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
  446. if (LowerOperationWrapper(Node, ResultVals))
  447. break;
  448. LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
  449. [[fallthrough]];
  450. case TargetLowering::Expand:
  451. LLVM_DEBUG(dbgs() << "Expanding\n");
  452. Expand(Node, ResultVals);
  453. break;
  454. }
  455. if (ResultVals.empty())
  456. return TranslateLegalizeResults(Op, Node);
  457. Changed = true;
  458. return RecursivelyLegalizeResults(Op, ResultVals);
  459. }
  460. // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
  461. // merge them somehow?
  462. bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
  463. SmallVectorImpl<SDValue> &Results) {
  464. SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
  465. if (!Res.getNode())
  466. return false;
  467. if (Res == SDValue(Node, 0))
  468. return true;
  469. // If the original node has one result, take the return value from
  470. // LowerOperation as is. It might not be result number 0.
  471. if (Node->getNumValues() == 1) {
  472. Results.push_back(Res);
  473. return true;
  474. }
  475. // If the original node has multiple results, then the return node should
  476. // have the same number of results.
  477. assert((Node->getNumValues() == Res->getNumValues()) &&
  478. "Lowering returned the wrong number of results!");
  479. // Places new result values base on N result number.
  480. for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
  481. Results.push_back(Res.getValue(I));
  482. return true;
  483. }
  484. void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
  485. // For a few operations there is a specific concept for promotion based on
  486. // the operand's type.
  487. switch (Node->getOpcode()) {
  488. case ISD::SINT_TO_FP:
  489. case ISD::UINT_TO_FP:
  490. case ISD::STRICT_SINT_TO_FP:
  491. case ISD::STRICT_UINT_TO_FP:
  492. // "Promote" the operation by extending the operand.
  493. PromoteINT_TO_FP(Node, Results);
  494. return;
  495. case ISD::FP_TO_UINT:
  496. case ISD::FP_TO_SINT:
  497. case ISD::STRICT_FP_TO_UINT:
  498. case ISD::STRICT_FP_TO_SINT:
  499. // Promote the operation by extending the operand.
  500. PromoteFP_TO_INT(Node, Results);
  501. return;
  502. case ISD::FP_ROUND:
  503. case ISD::FP_EXTEND:
  504. // These operations are used to do promotion so they can't be promoted
  505. // themselves.
  506. llvm_unreachable("Don't know how to promote this operation!");
  507. }
  508. // There are currently two cases of vector promotion:
  509. // 1) Bitcasting a vector of integers to a different type to a vector of the
  510. // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
  511. // 2) Extending a vector of floats to a vector of the same number of larger
  512. // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
  513. assert(Node->getNumValues() == 1 &&
  514. "Can't promote a vector with multiple results!");
  515. MVT VT = Node->getSimpleValueType(0);
  516. MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
  517. SDLoc dl(Node);
  518. SmallVector<SDValue, 4> Operands(Node->getNumOperands());
  519. for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
  520. if (Node->getOperand(j).getValueType().isVector())
  521. if (Node->getOperand(j)
  522. .getValueType()
  523. .getVectorElementType()
  524. .isFloatingPoint() &&
  525. NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
  526. Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
  527. else
  528. Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
  529. else
  530. Operands[j] = Node->getOperand(j);
  531. }
  532. SDValue Res =
  533. DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
  534. if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
  535. (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
  536. NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
  537. Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
  538. DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
  539. else
  540. Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
  541. Results.push_back(Res);
  542. }
  543. void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
  544. SmallVectorImpl<SDValue> &Results) {
  545. // INT_TO_FP operations may require the input operand be promoted even
  546. // when the type is otherwise legal.
  547. bool IsStrict = Node->isStrictFPOpcode();
  548. MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
  549. MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
  550. assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
  551. "Vectors have different number of elements!");
  552. SDLoc dl(Node);
  553. SmallVector<SDValue, 4> Operands(Node->getNumOperands());
  554. unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
  555. Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
  556. ? ISD::ZERO_EXTEND
  557. : ISD::SIGN_EXTEND;
  558. for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
  559. if (Node->getOperand(j).getValueType().isVector())
  560. Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
  561. else
  562. Operands[j] = Node->getOperand(j);
  563. }
  564. if (IsStrict) {
  565. SDValue Res = DAG.getNode(Node->getOpcode(), dl,
  566. {Node->getValueType(0), MVT::Other}, Operands);
  567. Results.push_back(Res);
  568. Results.push_back(Res.getValue(1));
  569. return;
  570. }
  571. SDValue Res =
  572. DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
  573. Results.push_back(Res);
  574. }
  575. // For FP_TO_INT we promote the result type to a vector type with wider
  576. // elements and then truncate the result. This is different from the default
  577. // PromoteVector which uses bitcast to promote thus assumning that the
  578. // promoted vector type has the same overall size.
  579. void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
  580. SmallVectorImpl<SDValue> &Results) {
  581. MVT VT = Node->getSimpleValueType(0);
  582. MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
  583. bool IsStrict = Node->isStrictFPOpcode();
  584. assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
  585. "Vectors have different number of elements!");
  586. unsigned NewOpc = Node->getOpcode();
  587. // Change FP_TO_UINT to FP_TO_SINT if possible.
  588. // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
  589. if (NewOpc == ISD::FP_TO_UINT &&
  590. TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
  591. NewOpc = ISD::FP_TO_SINT;
  592. if (NewOpc == ISD::STRICT_FP_TO_UINT &&
  593. TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
  594. NewOpc = ISD::STRICT_FP_TO_SINT;
  595. SDLoc dl(Node);
  596. SDValue Promoted, Chain;
  597. if (IsStrict) {
  598. Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
  599. {Node->getOperand(0), Node->getOperand(1)});
  600. Chain = Promoted.getValue(1);
  601. } else
  602. Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
  603. // Assert that the converted value fits in the original type. If it doesn't
  604. // (eg: because the value being converted is too big), then the result of the
  605. // original operation was undefined anyway, so the assert is still correct.
  606. if (Node->getOpcode() == ISD::FP_TO_UINT ||
  607. Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
  608. NewOpc = ISD::AssertZext;
  609. else
  610. NewOpc = ISD::AssertSext;
  611. Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
  612. DAG.getValueType(VT.getScalarType()));
  613. Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
  614. Results.push_back(Promoted);
  615. if (IsStrict)
  616. Results.push_back(Chain);
  617. }
  618. std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
  619. LoadSDNode *LD = cast<LoadSDNode>(N);
  620. return TLI.scalarizeVectorLoad(LD, DAG);
  621. }
  622. SDValue VectorLegalizer::ExpandStore(SDNode *N) {
  623. StoreSDNode *ST = cast<StoreSDNode>(N);
  624. SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
  625. return TF;
  626. }
  627. void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
  628. switch (Node->getOpcode()) {
  629. case ISD::LOAD: {
  630. std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
  631. Results.push_back(Tmp.first);
  632. Results.push_back(Tmp.second);
  633. return;
  634. }
  635. case ISD::STORE:
  636. Results.push_back(ExpandStore(Node));
  637. return;
  638. case ISD::MERGE_VALUES:
  639. for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
  640. Results.push_back(Node->getOperand(i));
  641. return;
  642. case ISD::SIGN_EXTEND_INREG:
  643. Results.push_back(ExpandSEXTINREG(Node));
  644. return;
  645. case ISD::ANY_EXTEND_VECTOR_INREG:
  646. Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
  647. return;
  648. case ISD::SIGN_EXTEND_VECTOR_INREG:
  649. Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
  650. return;
  651. case ISD::ZERO_EXTEND_VECTOR_INREG:
  652. Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
  653. return;
  654. case ISD::BSWAP:
  655. Results.push_back(ExpandBSWAP(Node));
  656. return;
  657. case ISD::VP_BSWAP:
  658. Results.push_back(TLI.expandVPBSWAP(Node, DAG));
  659. return;
  660. case ISD::VSELECT:
  661. Results.push_back(ExpandVSELECT(Node));
  662. return;
  663. case ISD::VP_SELECT:
  664. Results.push_back(ExpandVP_SELECT(Node));
  665. return;
  666. case ISD::VP_SREM:
  667. case ISD::VP_UREM:
  668. if (SDValue Expanded = ExpandVP_REM(Node)) {
  669. Results.push_back(Expanded);
  670. return;
  671. }
  672. break;
  673. case ISD::SELECT:
  674. Results.push_back(ExpandSELECT(Node));
  675. return;
  676. case ISD::SELECT_CC: {
  677. if (Node->getValueType(0).isScalableVector()) {
  678. EVT CondVT = TLI.getSetCCResultType(
  679. DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
  680. SDValue SetCC =
  681. DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
  682. Node->getOperand(1), Node->getOperand(4));
  683. Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
  684. Node->getOperand(2),
  685. Node->getOperand(3)));
  686. return;
  687. }
  688. break;
  689. }
  690. case ISD::FP_TO_UINT:
  691. ExpandFP_TO_UINT(Node, Results);
  692. return;
  693. case ISD::UINT_TO_FP:
  694. ExpandUINT_TO_FLOAT(Node, Results);
  695. return;
  696. case ISD::FNEG:
  697. Results.push_back(ExpandFNEG(Node));
  698. return;
  699. case ISD::FSUB:
  700. ExpandFSUB(Node, Results);
  701. return;
  702. case ISD::SETCC:
  703. case ISD::VP_SETCC:
  704. ExpandSETCC(Node, Results);
  705. return;
  706. case ISD::ABS:
  707. if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
  708. Results.push_back(Expanded);
  709. return;
  710. }
  711. break;
  712. case ISD::BITREVERSE:
  713. ExpandBITREVERSE(Node, Results);
  714. return;
  715. case ISD::VP_BITREVERSE:
  716. if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
  717. Results.push_back(Expanded);
  718. return;
  719. }
  720. break;
  721. case ISD::CTPOP:
  722. if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
  723. Results.push_back(Expanded);
  724. return;
  725. }
  726. break;
  727. case ISD::VP_CTPOP:
  728. if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
  729. Results.push_back(Expanded);
  730. return;
  731. }
  732. break;
  733. case ISD::CTLZ:
  734. case ISD::CTLZ_ZERO_UNDEF:
  735. if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
  736. Results.push_back(Expanded);
  737. return;
  738. }
  739. break;
  740. case ISD::VP_CTLZ:
  741. case ISD::VP_CTLZ_ZERO_UNDEF:
  742. if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
  743. Results.push_back(Expanded);
  744. return;
  745. }
  746. break;
  747. case ISD::CTTZ:
  748. case ISD::CTTZ_ZERO_UNDEF:
  749. if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
  750. Results.push_back(Expanded);
  751. return;
  752. }
  753. break;
  754. case ISD::VP_CTTZ:
  755. case ISD::VP_CTTZ_ZERO_UNDEF:
  756. if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
  757. Results.push_back(Expanded);
  758. return;
  759. }
  760. break;
  761. case ISD::FSHL:
  762. case ISD::VP_FSHL:
  763. case ISD::FSHR:
  764. case ISD::VP_FSHR:
  765. if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
  766. Results.push_back(Expanded);
  767. return;
  768. }
  769. break;
  770. case ISD::ROTL:
  771. case ISD::ROTR:
  772. if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
  773. Results.push_back(Expanded);
  774. return;
  775. }
  776. break;
  777. case ISD::FMINNUM:
  778. case ISD::FMAXNUM:
  779. if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
  780. Results.push_back(Expanded);
  781. return;
  782. }
  783. break;
  784. case ISD::SMIN:
  785. case ISD::SMAX:
  786. case ISD::UMIN:
  787. case ISD::UMAX:
  788. if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
  789. Results.push_back(Expanded);
  790. return;
  791. }
  792. break;
  793. case ISD::UADDO:
  794. case ISD::USUBO:
  795. ExpandUADDSUBO(Node, Results);
  796. return;
  797. case ISD::SADDO:
  798. case ISD::SSUBO:
  799. ExpandSADDSUBO(Node, Results);
  800. return;
  801. case ISD::UMULO:
  802. case ISD::SMULO:
  803. ExpandMULO(Node, Results);
  804. return;
  805. case ISD::USUBSAT:
  806. case ISD::SSUBSAT:
  807. case ISD::UADDSAT:
  808. case ISD::SADDSAT:
  809. if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
  810. Results.push_back(Expanded);
  811. return;
  812. }
  813. break;
  814. case ISD::USHLSAT:
  815. case ISD::SSHLSAT:
  816. if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
  817. Results.push_back(Expanded);
  818. return;
  819. }
  820. break;
  821. case ISD::FP_TO_SINT_SAT:
  822. case ISD::FP_TO_UINT_SAT:
  823. // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
  824. if (Node->getValueType(0).isScalableVector()) {
  825. if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
  826. Results.push_back(Expanded);
  827. return;
  828. }
  829. }
  830. break;
  831. case ISD::SMULFIX:
  832. case ISD::UMULFIX:
  833. if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
  834. Results.push_back(Expanded);
  835. return;
  836. }
  837. break;
  838. case ISD::SMULFIXSAT:
  839. case ISD::UMULFIXSAT:
  840. // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
  841. // why. Maybe it results in worse codegen compared to the unroll for some
  842. // targets? This should probably be investigated. And if we still prefer to
  843. // unroll an explanation could be helpful.
  844. break;
  845. case ISD::SDIVFIX:
  846. case ISD::UDIVFIX:
  847. ExpandFixedPointDiv(Node, Results);
  848. return;
  849. case ISD::SDIVFIXSAT:
  850. case ISD::UDIVFIXSAT:
  851. break;
  852. #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
  853. case ISD::STRICT_##DAGN:
  854. #include "llvm/IR/ConstrainedOps.def"
  855. ExpandStrictFPOp(Node, Results);
  856. return;
  857. case ISD::VECREDUCE_ADD:
  858. case ISD::VECREDUCE_MUL:
  859. case ISD::VECREDUCE_AND:
  860. case ISD::VECREDUCE_OR:
  861. case ISD::VECREDUCE_XOR:
  862. case ISD::VECREDUCE_SMAX:
  863. case ISD::VECREDUCE_SMIN:
  864. case ISD::VECREDUCE_UMAX:
  865. case ISD::VECREDUCE_UMIN:
  866. case ISD::VECREDUCE_FADD:
  867. case ISD::VECREDUCE_FMUL:
  868. case ISD::VECREDUCE_FMAX:
  869. case ISD::VECREDUCE_FMIN:
  870. Results.push_back(TLI.expandVecReduce(Node, DAG));
  871. return;
  872. case ISD::VECREDUCE_SEQ_FADD:
  873. case ISD::VECREDUCE_SEQ_FMUL:
  874. Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
  875. return;
  876. case ISD::SREM:
  877. case ISD::UREM:
  878. ExpandREM(Node, Results);
  879. return;
  880. case ISD::VP_MERGE:
  881. Results.push_back(ExpandVP_MERGE(Node));
  882. return;
  883. }
  884. Results.push_back(DAG.UnrollVectorOp(Node));
  885. }
  886. SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
  887. // Lower a select instruction where the condition is a scalar and the
  888. // operands are vectors. Lower this select to VSELECT and implement it
  889. // using XOR AND OR. The selector bit is broadcasted.
  890. EVT VT = Node->getValueType(0);
  891. SDLoc DL(Node);
  892. SDValue Mask = Node->getOperand(0);
  893. SDValue Op1 = Node->getOperand(1);
  894. SDValue Op2 = Node->getOperand(2);
  895. assert(VT.isVector() && !Mask.getValueType().isVector()
  896. && Op1.getValueType() == Op2.getValueType() && "Invalid type");
  897. // If we can't even use the basic vector operations of
  898. // AND,OR,XOR, we will have to scalarize the op.
  899. // Notice that the operation may be 'promoted' which means that it is
  900. // 'bitcasted' to another type which is handled.
  901. // Also, we need to be able to construct a splat vector using either
  902. // BUILD_VECTOR or SPLAT_VECTOR.
  903. // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
  904. // BUILD_VECTOR?
  905. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
  906. TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
  907. TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
  908. TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
  909. : ISD::SPLAT_VECTOR,
  910. VT) == TargetLowering::Expand)
  911. return DAG.UnrollVectorOp(Node);
  912. // Generate a mask operand.
  913. EVT MaskTy = VT.changeVectorElementTypeToInteger();
  914. // What is the size of each element in the vector mask.
  915. EVT BitTy = MaskTy.getScalarType();
  916. Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
  917. DAG.getConstant(0, DL, BitTy));
  918. // Broadcast the mask so that the entire vector is all one or all zero.
  919. Mask = DAG.getSplat(MaskTy, DL, Mask);
  920. // Bitcast the operands to be the same type as the mask.
  921. // This is needed when we select between FP types because
  922. // the mask is a vector of integers.
  923. Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
  924. Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
  925. SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
  926. Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
  927. Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
  928. SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
  929. return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
  930. }
  931. SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
  932. EVT VT = Node->getValueType(0);
  933. // Make sure that the SRA and SHL instructions are available.
  934. if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
  935. TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
  936. return DAG.UnrollVectorOp(Node);
  937. SDLoc DL(Node);
  938. EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
  939. unsigned BW = VT.getScalarSizeInBits();
  940. unsigned OrigBW = OrigTy.getScalarSizeInBits();
  941. SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
  942. SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
  943. return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
  944. }
  945. // Generically expand a vector anyext in register to a shuffle of the relevant
  946. // lanes into the appropriate locations, with other lanes left undef.
  947. SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
  948. SDLoc DL(Node);
  949. EVT VT = Node->getValueType(0);
  950. int NumElements = VT.getVectorNumElements();
  951. SDValue Src = Node->getOperand(0);
  952. EVT SrcVT = Src.getValueType();
  953. int NumSrcElements = SrcVT.getVectorNumElements();
  954. // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
  955. // into a larger vector type.
  956. if (SrcVT.bitsLE(VT)) {
  957. assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
  958. "ANY_EXTEND_VECTOR_INREG vector size mismatch");
  959. NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
  960. SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
  961. NumSrcElements);
  962. Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
  963. Src, DAG.getVectorIdxConstant(0, DL));
  964. }
  965. // Build a base mask of undef shuffles.
  966. SmallVector<int, 16> ShuffleMask;
  967. ShuffleMask.resize(NumSrcElements, -1);
  968. // Place the extended lanes into the correct locations.
  969. int ExtLaneScale = NumSrcElements / NumElements;
  970. int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
  971. for (int i = 0; i < NumElements; ++i)
  972. ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
  973. return DAG.getNode(
  974. ISD::BITCAST, DL, VT,
  975. DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
  976. }
  977. SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
  978. SDLoc DL(Node);
  979. EVT VT = Node->getValueType(0);
  980. SDValue Src = Node->getOperand(0);
  981. EVT SrcVT = Src.getValueType();
  982. // First build an any-extend node which can be legalized above when we
  983. // recurse through it.
  984. SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
  985. // Now we need sign extend. Do this by shifting the elements. Even if these
  986. // aren't legal operations, they have a better chance of being legalized
  987. // without full scalarization than the sign extension does.
  988. unsigned EltWidth = VT.getScalarSizeInBits();
  989. unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
  990. SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
  991. return DAG.getNode(ISD::SRA, DL, VT,
  992. DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
  993. ShiftAmount);
  994. }
  995. // Generically expand a vector zext in register to a shuffle of the relevant
  996. // lanes into the appropriate locations, a blend of zero into the high bits,
  997. // and a bitcast to the wider element type.
  998. SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
  999. SDLoc DL(Node);
  1000. EVT VT = Node->getValueType(0);
  1001. int NumElements = VT.getVectorNumElements();
  1002. SDValue Src = Node->getOperand(0);
  1003. EVT SrcVT = Src.getValueType();
  1004. int NumSrcElements = SrcVT.getVectorNumElements();
  1005. // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
  1006. // into a larger vector type.
  1007. if (SrcVT.bitsLE(VT)) {
  1008. assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
  1009. "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
  1010. NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
  1011. SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
  1012. NumSrcElements);
  1013. Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
  1014. Src, DAG.getVectorIdxConstant(0, DL));
  1015. }
  1016. // Build up a zero vector to blend into this one.
  1017. SDValue Zero = DAG.getConstant(0, DL, SrcVT);
  1018. // Shuffle the incoming lanes into the correct position, and pull all other
  1019. // lanes from the zero vector.
  1020. auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
  1021. int ExtLaneScale = NumSrcElements / NumElements;
  1022. int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
  1023. for (int i = 0; i < NumElements; ++i)
  1024. ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
  1025. return DAG.getNode(ISD::BITCAST, DL, VT,
  1026. DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
  1027. }
  1028. static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
  1029. int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
  1030. for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
  1031. for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
  1032. ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
  1033. }
  1034. SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
  1035. EVT VT = Node->getValueType(0);
  1036. // Scalable vectors can't use shuffle expansion.
  1037. if (VT.isScalableVector())
  1038. return TLI.expandBSWAP(Node, DAG);
  1039. // Generate a byte wise shuffle mask for the BSWAP.
  1040. SmallVector<int, 16> ShuffleMask;
  1041. createBSWAPShuffleMask(VT, ShuffleMask);
  1042. EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
  1043. // Only emit a shuffle if the mask is legal.
  1044. if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
  1045. SDLoc DL(Node);
  1046. SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
  1047. Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
  1048. return DAG.getNode(ISD::BITCAST, DL, VT, Op);
  1049. }
  1050. // If we have the appropriate vector bit operations, it is better to use them
  1051. // than unrolling and expanding each component.
  1052. if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
  1053. TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
  1054. TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
  1055. TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
  1056. return TLI.expandBSWAP(Node, DAG);
  1057. // Otherwise unroll.
  1058. return DAG.UnrollVectorOp(Node);
  1059. }
  1060. void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
  1061. SmallVectorImpl<SDValue> &Results) {
  1062. EVT VT = Node->getValueType(0);
  1063. // We can't unroll or use shuffles for scalable vectors.
  1064. if (VT.isScalableVector()) {
  1065. Results.push_back(TLI.expandBITREVERSE(Node, DAG));
  1066. return;
  1067. }
  1068. // If we have the scalar operation, it's probably cheaper to unroll it.
  1069. if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
  1070. SDValue Tmp = DAG.UnrollVectorOp(Node);
  1071. Results.push_back(Tmp);
  1072. return;
  1073. }
  1074. // If the vector element width is a whole number of bytes, test if its legal
  1075. // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
  1076. // vector. This greatly reduces the number of bit shifts necessary.
  1077. unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
  1078. if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
  1079. SmallVector<int, 16> BSWAPMask;
  1080. createBSWAPShuffleMask(VT, BSWAPMask);
  1081. EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
  1082. if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
  1083. (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
  1084. (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
  1085. TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
  1086. TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
  1087. TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
  1088. SDLoc DL(Node);
  1089. SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
  1090. Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
  1091. BSWAPMask);
  1092. Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
  1093. Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
  1094. Results.push_back(Op);
  1095. return;
  1096. }
  1097. }
  1098. // If we have the appropriate vector bit operations, it is better to use them
  1099. // than unrolling and expanding each component.
  1100. if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
  1101. TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
  1102. TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
  1103. TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
  1104. Results.push_back(TLI.expandBITREVERSE(Node, DAG));
  1105. return;
  1106. }
  1107. // Otherwise unroll.
  1108. SDValue Tmp = DAG.UnrollVectorOp(Node);
  1109. Results.push_back(Tmp);
  1110. }
  1111. SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
  1112. // Implement VSELECT in terms of XOR, AND, OR
  1113. // on platforms which do not support blend natively.
  1114. SDLoc DL(Node);
  1115. SDValue Mask = Node->getOperand(0);
  1116. SDValue Op1 = Node->getOperand(1);
  1117. SDValue Op2 = Node->getOperand(2);
  1118. EVT VT = Mask.getValueType();
  1119. // If we can't even use the basic vector operations of
  1120. // AND,OR,XOR, we will have to scalarize the op.
  1121. // Notice that the operation may be 'promoted' which means that it is
  1122. // 'bitcasted' to another type which is handled.
  1123. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
  1124. TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
  1125. TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
  1126. return DAG.UnrollVectorOp(Node);
  1127. // This operation also isn't safe with AND, OR, XOR when the boolean type is
  1128. // 0/1 and the select operands aren't also booleans, as we need an all-ones
  1129. // vector constant to mask with.
  1130. // FIXME: Sign extend 1 to all ones if that's legal on the target.
  1131. auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
  1132. if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
  1133. !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
  1134. Op1.getValueType().getVectorElementType() == MVT::i1))
  1135. return DAG.UnrollVectorOp(Node);
  1136. // If the mask and the type are different sizes, unroll the vector op. This
  1137. // can occur when getSetCCResultType returns something that is different in
  1138. // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
  1139. if (VT.getSizeInBits() != Op1.getValueSizeInBits())
  1140. return DAG.UnrollVectorOp(Node);
  1141. // Bitcast the operands to be the same type as the mask.
  1142. // This is needed when we select between FP types because
  1143. // the mask is a vector of integers.
  1144. Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
  1145. Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
  1146. SDValue NotMask = DAG.getNOT(DL, Mask, VT);
  1147. Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
  1148. Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
  1149. SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
  1150. return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
  1151. }
  1152. SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
  1153. // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
  1154. // do not support it natively.
  1155. SDLoc DL(Node);
  1156. SDValue Mask = Node->getOperand(0);
  1157. SDValue Op1 = Node->getOperand(1);
  1158. SDValue Op2 = Node->getOperand(2);
  1159. SDValue EVL = Node->getOperand(3);
  1160. EVT VT = Mask.getValueType();
  1161. // If we can't even use the basic vector operations of
  1162. // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
  1163. if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
  1164. TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
  1165. TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
  1166. return DAG.UnrollVectorOp(Node);
  1167. // This operation also isn't safe when the operands aren't also booleans.
  1168. if (Op1.getValueType().getVectorElementType() != MVT::i1)
  1169. return DAG.UnrollVectorOp(Node);
  1170. SDValue Ones = DAG.getAllOnesConstant(DL, VT);
  1171. SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
  1172. Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
  1173. Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
  1174. return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
  1175. }
  1176. SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
  1177. // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
  1178. // indices less than the EVL/pivot are true. Combine that with the original
  1179. // mask for a full-length mask. Use a full-length VSELECT to select between
  1180. // the true and false values.
  1181. SDLoc DL(Node);
  1182. SDValue Mask = Node->getOperand(0);
  1183. SDValue Op1 = Node->getOperand(1);
  1184. SDValue Op2 = Node->getOperand(2);
  1185. SDValue EVL = Node->getOperand(3);
  1186. EVT MaskVT = Mask.getValueType();
  1187. bool IsFixedLen = MaskVT.isFixedLengthVector();
  1188. EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
  1189. MaskVT.getVectorElementCount());
  1190. // If we can't construct the EVL mask efficiently, it's better to unroll.
  1191. if ((IsFixedLen &&
  1192. !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
  1193. (!IsFixedLen &&
  1194. (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
  1195. !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
  1196. return DAG.UnrollVectorOp(Node);
  1197. // If using a SETCC would result in a different type than the mask type,
  1198. // unroll.
  1199. if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
  1200. EVLVecVT) != MaskVT)
  1201. return DAG.UnrollVectorOp(Node);
  1202. SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
  1203. SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
  1204. SDValue EVLMask =
  1205. DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
  1206. SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
  1207. return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
  1208. }
  1209. SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
  1210. // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
  1211. EVT VT = Node->getValueType(0);
  1212. unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
  1213. if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
  1214. !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
  1215. !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
  1216. return SDValue();
  1217. SDLoc DL(Node);
  1218. SDValue Dividend = Node->getOperand(0);
  1219. SDValue Divisor = Node->getOperand(1);
  1220. SDValue Mask = Node->getOperand(2);
  1221. SDValue EVL = Node->getOperand(3);
  1222. // X % Y -> X-X/Y*Y
  1223. SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
  1224. SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
  1225. return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
  1226. }
  1227. void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
  1228. SmallVectorImpl<SDValue> &Results) {
  1229. // Attempt to expand using TargetLowering.
  1230. SDValue Result, Chain;
  1231. if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
  1232. Results.push_back(Result);
  1233. if (Node->isStrictFPOpcode())
  1234. Results.push_back(Chain);
  1235. return;
  1236. }
  1237. // Otherwise go ahead and unroll.
  1238. if (Node->isStrictFPOpcode()) {
  1239. UnrollStrictFPOp(Node, Results);
  1240. return;
  1241. }
  1242. Results.push_back(DAG.UnrollVectorOp(Node));
  1243. }
  1244. void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
  1245. SmallVectorImpl<SDValue> &Results) {
  1246. bool IsStrict = Node->isStrictFPOpcode();
  1247. unsigned OpNo = IsStrict ? 1 : 0;
  1248. SDValue Src = Node->getOperand(OpNo);
  1249. EVT VT = Src.getValueType();
  1250. SDLoc DL(Node);
  1251. // Attempt to expand using TargetLowering.
  1252. SDValue Result;
  1253. SDValue Chain;
  1254. if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
  1255. Results.push_back(Result);
  1256. if (IsStrict)
  1257. Results.push_back(Chain);
  1258. return;
  1259. }
  1260. // Make sure that the SINT_TO_FP and SRL instructions are available.
  1261. if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, VT) ==
  1262. TargetLowering::Expand) ||
  1263. (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, VT) ==
  1264. TargetLowering::Expand)) ||
  1265. TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) {
  1266. if (IsStrict) {
  1267. UnrollStrictFPOp(Node, Results);
  1268. return;
  1269. }
  1270. Results.push_back(DAG.UnrollVectorOp(Node));
  1271. return;
  1272. }
  1273. unsigned BW = VT.getScalarSizeInBits();
  1274. assert((BW == 64 || BW == 32) &&
  1275. "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
  1276. SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
  1277. // Constants to clear the upper part of the word.
  1278. // Notice that we can also use SHL+SHR, but using a constant is slightly
  1279. // faster on x86.
  1280. uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
  1281. SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
  1282. // Two to the power of half-word-size.
  1283. SDValue TWOHW =
  1284. DAG.getConstantFP(1ULL << (BW / 2), DL, Node->getValueType(0));
  1285. // Clear upper part of LO, lower HI
  1286. SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Src, HalfWord);
  1287. SDValue LO = DAG.getNode(ISD::AND, DL, VT, Src, HalfWordMask);
  1288. if (IsStrict) {
  1289. // Convert hi and lo to floats
  1290. // Convert the hi part back to the upper values
  1291. // TODO: Can any fast-math-flags be set on these nodes?
  1292. SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
  1293. {Node->getValueType(0), MVT::Other},
  1294. {Node->getOperand(0), HI});
  1295. fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {Node->getValueType(0), MVT::Other},
  1296. {fHI.getValue(1), fHI, TWOHW});
  1297. SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL,
  1298. {Node->getValueType(0), MVT::Other},
  1299. {Node->getOperand(0), LO});
  1300. SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
  1301. fLO.getValue(1));
  1302. // Add the two halves
  1303. SDValue Result =
  1304. DAG.getNode(ISD::STRICT_FADD, DL, {Node->getValueType(0), MVT::Other},
  1305. {TF, fHI, fLO});
  1306. Results.push_back(Result);
  1307. Results.push_back(Result.getValue(1));
  1308. return;
  1309. }
  1310. // Convert hi and lo to floats
  1311. // Convert the hi part back to the upper values
  1312. // TODO: Can any fast-math-flags be set on these nodes?
  1313. SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), HI);
  1314. fHI = DAG.getNode(ISD::FMUL, DL, Node->getValueType(0), fHI, TWOHW);
  1315. SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Node->getValueType(0), LO);
  1316. // Add the two halves
  1317. Results.push_back(
  1318. DAG.getNode(ISD::FADD, DL, Node->getValueType(0), fHI, fLO));
  1319. }
  1320. SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
  1321. if (TLI.isOperationLegalOrCustom(ISD::FSUB, Node->getValueType(0))) {
  1322. SDLoc DL(Node);
  1323. SDValue Zero = DAG.getConstantFP(-0.0, DL, Node->getValueType(0));
  1324. // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
  1325. return DAG.getNode(ISD::FSUB, DL, Node->getValueType(0), Zero,
  1326. Node->getOperand(0));
  1327. }
  1328. return DAG.UnrollVectorOp(Node);
  1329. }
  1330. void VectorLegalizer::ExpandFSUB(SDNode *Node,
  1331. SmallVectorImpl<SDValue> &Results) {
  1332. // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
  1333. // we can defer this to operation legalization where it will be lowered as
  1334. // a+(-b).
  1335. EVT VT = Node->getValueType(0);
  1336. if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
  1337. TLI.isOperationLegalOrCustom(ISD::FADD, VT))
  1338. return; // Defer to LegalizeDAG
  1339. SDValue Tmp = DAG.UnrollVectorOp(Node);
  1340. Results.push_back(Tmp);
  1341. }
  1342. void VectorLegalizer::ExpandSETCC(SDNode *Node,
  1343. SmallVectorImpl<SDValue> &Results) {
  1344. bool NeedInvert = false;
  1345. bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
  1346. SDLoc dl(Node);
  1347. MVT OpVT = Node->getOperand(0).getSimpleValueType();
  1348. ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
  1349. if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
  1350. Results.push_back(UnrollVSETCC(Node));
  1351. return;
  1352. }
  1353. SDValue Chain;
  1354. SDValue LHS = Node->getOperand(0);
  1355. SDValue RHS = Node->getOperand(1);
  1356. SDValue CC = Node->getOperand(2);
  1357. SDValue Mask, EVL;
  1358. if (IsVP) {
  1359. Mask = Node->getOperand(3);
  1360. EVL = Node->getOperand(4);
  1361. }
  1362. bool Legalized =
  1363. TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
  1364. EVL, NeedInvert, dl, Chain);
  1365. if (Legalized) {
  1366. // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
  1367. // condition code, create a new SETCC node.
  1368. if (CC.getNode()) {
  1369. if (!IsVP)
  1370. LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
  1371. Node->getFlags());
  1372. else
  1373. LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
  1374. {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
  1375. }
  1376. // If we expanded the SETCC by inverting the condition code, then wrap
  1377. // the existing SETCC in a NOT to restore the intended condition.
  1378. if (NeedInvert) {
  1379. if (!IsVP)
  1380. LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
  1381. else
  1382. LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
  1383. }
  1384. } else {
  1385. // Otherwise, SETCC for the given comparison type must be completely
  1386. // illegal; expand it into a SELECT_CC.
  1387. EVT VT = Node->getValueType(0);
  1388. LHS =
  1389. DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
  1390. DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
  1391. DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
  1392. LHS->setFlags(Node->getFlags());
  1393. }
  1394. Results.push_back(LHS);
  1395. }
  1396. void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
  1397. SmallVectorImpl<SDValue> &Results) {
  1398. SDValue Result, Overflow;
  1399. TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
  1400. Results.push_back(Result);
  1401. Results.push_back(Overflow);
  1402. }
  1403. void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
  1404. SmallVectorImpl<SDValue> &Results) {
  1405. SDValue Result, Overflow;
  1406. TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
  1407. Results.push_back(Result);
  1408. Results.push_back(Overflow);
  1409. }
  1410. void VectorLegalizer::ExpandMULO(SDNode *Node,
  1411. SmallVectorImpl<SDValue> &Results) {
  1412. SDValue Result, Overflow;
  1413. if (!TLI.expandMULO(Node, Result, Overflow, DAG))
  1414. std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
  1415. Results.push_back(Result);
  1416. Results.push_back(Overflow);
  1417. }
  1418. void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
  1419. SmallVectorImpl<SDValue> &Results) {
  1420. SDNode *N = Node;
  1421. if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
  1422. N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
  1423. Results.push_back(Expanded);
  1424. }
  1425. void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
  1426. SmallVectorImpl<SDValue> &Results) {
  1427. if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
  1428. ExpandUINT_TO_FLOAT(Node, Results);
  1429. return;
  1430. }
  1431. if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
  1432. ExpandFP_TO_UINT(Node, Results);
  1433. return;
  1434. }
  1435. UnrollStrictFPOp(Node, Results);
  1436. }
  1437. void VectorLegalizer::ExpandREM(SDNode *Node,
  1438. SmallVectorImpl<SDValue> &Results) {
  1439. assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
  1440. "Expected REM node");
  1441. SDValue Result;
  1442. if (!TLI.expandREM(Node, Result, DAG))
  1443. Result = DAG.UnrollVectorOp(Node);
  1444. Results.push_back(Result);
  1445. }
  1446. void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
  1447. SmallVectorImpl<SDValue> &Results) {
  1448. EVT VT = Node->getValueType(0);
  1449. EVT EltVT = VT.getVectorElementType();
  1450. unsigned NumElems = VT.getVectorNumElements();
  1451. unsigned NumOpers = Node->getNumOperands();
  1452. const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  1453. EVT TmpEltVT = EltVT;
  1454. if (Node->getOpcode() == ISD::STRICT_FSETCC ||
  1455. Node->getOpcode() == ISD::STRICT_FSETCCS)
  1456. TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
  1457. *DAG.getContext(), TmpEltVT);
  1458. EVT ValueVTs[] = {TmpEltVT, MVT::Other};
  1459. SDValue Chain = Node->getOperand(0);
  1460. SDLoc dl(Node);
  1461. SmallVector<SDValue, 32> OpValues;
  1462. SmallVector<SDValue, 32> OpChains;
  1463. for (unsigned i = 0; i < NumElems; ++i) {
  1464. SmallVector<SDValue, 4> Opers;
  1465. SDValue Idx = DAG.getVectorIdxConstant(i, dl);
  1466. // The Chain is the first operand.
  1467. Opers.push_back(Chain);
  1468. // Now process the remaining operands.
  1469. for (unsigned j = 1; j < NumOpers; ++j) {
  1470. SDValue Oper = Node->getOperand(j);
  1471. EVT OperVT = Oper.getValueType();
  1472. if (OperVT.isVector())
  1473. Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
  1474. OperVT.getVectorElementType(), Oper, Idx);
  1475. Opers.push_back(Oper);
  1476. }
  1477. SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
  1478. SDValue ScalarResult = ScalarOp.getValue(0);
  1479. SDValue ScalarChain = ScalarOp.getValue(1);
  1480. if (Node->getOpcode() == ISD::STRICT_FSETCC ||
  1481. Node->getOpcode() == ISD::STRICT_FSETCCS)
  1482. ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
  1483. DAG.getAllOnesConstant(dl, EltVT),
  1484. DAG.getConstant(0, dl, EltVT));
  1485. OpValues.push_back(ScalarResult);
  1486. OpChains.push_back(ScalarChain);
  1487. }
  1488. SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
  1489. SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
  1490. Results.push_back(Result);
  1491. Results.push_back(NewChain);
  1492. }
  1493. SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
  1494. EVT VT = Node->getValueType(0);
  1495. unsigned NumElems = VT.getVectorNumElements();
  1496. EVT EltVT = VT.getVectorElementType();
  1497. SDValue LHS = Node->getOperand(0);
  1498. SDValue RHS = Node->getOperand(1);
  1499. SDValue CC = Node->getOperand(2);
  1500. EVT TmpEltVT = LHS.getValueType().getVectorElementType();
  1501. SDLoc dl(Node);
  1502. SmallVector<SDValue, 8> Ops(NumElems);
  1503. for (unsigned i = 0; i < NumElems; ++i) {
  1504. SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
  1505. DAG.getVectorIdxConstant(i, dl));
  1506. SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
  1507. DAG.getVectorIdxConstant(i, dl));
  1508. Ops[i] = DAG.getNode(ISD::SETCC, dl,
  1509. TLI.getSetCCResultType(DAG.getDataLayout(),
  1510. *DAG.getContext(), TmpEltVT),
  1511. LHSElem, RHSElem, CC);
  1512. Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
  1513. DAG.getConstant(0, dl, EltVT));
  1514. }
  1515. return DAG.getBuildVector(VT, dl, Ops);
  1516. }
  1517. bool SelectionDAG::LegalizeVectors() {
  1518. return VectorLegalizer(*this).Run();
  1519. }