AArch64ISelLowering.h 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158
  1. //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the interfaces that AArch64 uses to lower LLVM code into a
  10. // selection DAG.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
  14. #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
  15. #include "AArch64.h"
  16. #include "llvm/CodeGen/CallingConvLower.h"
  17. #include "llvm/CodeGen/MachineFunction.h"
  18. #include "llvm/CodeGen/SelectionDAG.h"
  19. #include "llvm/CodeGen/TargetLowering.h"
  20. #include "llvm/IR/CallingConv.h"
  21. #include "llvm/IR/Instruction.h"
  22. namespace llvm {
  23. namespace AArch64ISD {
  24. // For predicated nodes where the result is a vector, the operation is
  25. // controlled by a governing predicate and the inactive lanes are explicitly
  26. // defined with a value, please stick the following naming convention:
  27. //
  28. // _MERGE_OP<n> The result value is a vector with inactive lanes equal
  29. // to source operand OP<n>.
  30. //
  31. // _MERGE_ZERO The result value is a vector with inactive lanes
  32. // actively zeroed.
  33. //
  34. // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
  35. // to the last source operand which only purpose is being
  36. // a passthru value.
  37. //
  38. // For other cases where no explicit action is needed to set the inactive lanes,
  39. // or when the result is not a vector and it is needed or helpful to
  40. // distinguish a node from similar unpredicated nodes, use:
  41. //
  42. // _PRED
  43. //
  44. enum NodeType : unsigned {
  45. FIRST_NUMBER = ISD::BUILTIN_OP_END,
  46. WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
  47. CALL, // Function call.
  48. // Pseudo for a OBJC call that gets emitted together with a special `mov
  49. // x29, x29` marker instruction.
  50. CALL_RVMARKER,
  51. CALL_BTI, // Function call followed by a BTI instruction.
  52. // Produces the full sequence of instructions for getting the thread pointer
  53. // offset of a variable into X0, using the TLSDesc model.
  54. TLSDESC_CALLSEQ,
  55. ADRP, // Page address of a TargetGlobalAddress operand.
  56. ADR, // ADR
  57. ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
  58. LOADgot, // Load from automatically generated descriptor (e.g. Global
  59. // Offset Table, TLS record).
  60. RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
  61. BRCOND, // Conditional branch instruction; "b.cond".
  62. CSEL,
  63. CSINV, // Conditional select invert.
  64. CSNEG, // Conditional select negate.
  65. CSINC, // Conditional select increment.
  66. // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
  67. // ELF.
  68. THREAD_POINTER,
  69. ADC,
  70. SBC, // adc, sbc instructions
  71. // Predicated instructions where inactive lanes produce undefined results.
  72. ABDS_PRED,
  73. ABDU_PRED,
  74. ADD_PRED,
  75. FADD_PRED,
  76. FDIV_PRED,
  77. FMA_PRED,
  78. FMAX_PRED,
  79. FMAXNM_PRED,
  80. FMIN_PRED,
  81. FMINNM_PRED,
  82. FMUL_PRED,
  83. FSUB_PRED,
  84. MUL_PRED,
  85. MULHS_PRED,
  86. MULHU_PRED,
  87. SDIV_PRED,
  88. SHL_PRED,
  89. SMAX_PRED,
  90. SMIN_PRED,
  91. SRA_PRED,
  92. SRL_PRED,
  93. SUB_PRED,
  94. UDIV_PRED,
  95. UMAX_PRED,
  96. UMIN_PRED,
  97. // Unpredicated vector instructions
  98. BIC,
  99. SRAD_MERGE_OP1,
  100. // Predicated instructions with the result of inactive lanes provided by the
  101. // last operand.
  102. FABS_MERGE_PASSTHRU,
  103. FCEIL_MERGE_PASSTHRU,
  104. FFLOOR_MERGE_PASSTHRU,
  105. FNEARBYINT_MERGE_PASSTHRU,
  106. FNEG_MERGE_PASSTHRU,
  107. FRECPX_MERGE_PASSTHRU,
  108. FRINT_MERGE_PASSTHRU,
  109. FROUND_MERGE_PASSTHRU,
  110. FROUNDEVEN_MERGE_PASSTHRU,
  111. FSQRT_MERGE_PASSTHRU,
  112. FTRUNC_MERGE_PASSTHRU,
  113. FP_ROUND_MERGE_PASSTHRU,
  114. FP_EXTEND_MERGE_PASSTHRU,
  115. UINT_TO_FP_MERGE_PASSTHRU,
  116. SINT_TO_FP_MERGE_PASSTHRU,
  117. FCVTZU_MERGE_PASSTHRU,
  118. FCVTZS_MERGE_PASSTHRU,
  119. SIGN_EXTEND_INREG_MERGE_PASSTHRU,
  120. ZERO_EXTEND_INREG_MERGE_PASSTHRU,
  121. ABS_MERGE_PASSTHRU,
  122. NEG_MERGE_PASSTHRU,
  123. SETCC_MERGE_ZERO,
  124. // Arithmetic instructions which write flags.
  125. ADDS,
  126. SUBS,
  127. ADCS,
  128. SBCS,
  129. ANDS,
  130. // Conditional compares. Operands: left,right,falsecc,cc,flags
  131. CCMP,
  132. CCMN,
  133. FCCMP,
  134. // Floating point comparison
  135. FCMP,
  136. // Scalar extract
  137. EXTR,
  138. // Scalar-to-vector duplication
  139. DUP,
  140. DUPLANE8,
  141. DUPLANE16,
  142. DUPLANE32,
  143. DUPLANE64,
  144. // Vector immedate moves
  145. MOVI,
  146. MOVIshift,
  147. MOVIedit,
  148. MOVImsl,
  149. FMOV,
  150. MVNIshift,
  151. MVNImsl,
  152. // Vector immediate ops
  153. BICi,
  154. ORRi,
  155. // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
  156. // element must be identical.
  157. BSP,
  158. // Vector shuffles
  159. ZIP1,
  160. ZIP2,
  161. UZP1,
  162. UZP2,
  163. TRN1,
  164. TRN2,
  165. REV16,
  166. REV32,
  167. REV64,
  168. EXT,
  169. SPLICE,
  170. // Vector shift by scalar
  171. VSHL,
  172. VLSHR,
  173. VASHR,
  174. // Vector shift by scalar (again)
  175. SQSHL_I,
  176. UQSHL_I,
  177. SQSHLU_I,
  178. SRSHR_I,
  179. URSHR_I,
  180. // Vector shift by constant and insert
  181. VSLI,
  182. VSRI,
  183. // Vector comparisons
  184. CMEQ,
  185. CMGE,
  186. CMGT,
  187. CMHI,
  188. CMHS,
  189. FCMEQ,
  190. FCMGE,
  191. FCMGT,
  192. // Vector zero comparisons
  193. CMEQz,
  194. CMGEz,
  195. CMGTz,
  196. CMLEz,
  197. CMLTz,
  198. FCMEQz,
  199. FCMGEz,
  200. FCMGTz,
  201. FCMLEz,
  202. FCMLTz,
  203. // Vector across-lanes addition
  204. // Only the lower result lane is defined.
  205. SADDV,
  206. UADDV,
  207. // Vector halving addition
  208. SHADD,
  209. UHADD,
  210. // Vector rounding halving addition
  211. SRHADD,
  212. URHADD,
  213. // Unsigned Add Long Pairwise
  214. UADDLP,
  215. // udot/sdot instructions
  216. UDOT,
  217. SDOT,
  218. // Vector across-lanes min/max
  219. // Only the lower result lane is defined.
  220. SMINV,
  221. UMINV,
  222. SMAXV,
  223. UMAXV,
  224. SADDV_PRED,
  225. UADDV_PRED,
  226. SMAXV_PRED,
  227. UMAXV_PRED,
  228. SMINV_PRED,
  229. UMINV_PRED,
  230. ORV_PRED,
  231. EORV_PRED,
  232. ANDV_PRED,
  233. // Vector bitwise insertion
  234. BIT,
  235. // Compare-and-branch
  236. CBZ,
  237. CBNZ,
  238. TBZ,
  239. TBNZ,
  240. // Tail calls
  241. TC_RETURN,
  242. // Custom prefetch handling
  243. PREFETCH,
  244. // {s|u}int to FP within a FP register.
  245. SITOF,
  246. UITOF,
  247. /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
  248. /// world w.r.t vectors; which causes additional REV instructions to be
  249. /// generated to compensate for the byte-swapping. But sometimes we do
  250. /// need to re-interpret the data in SIMD vector registers in big-endian
  251. /// mode without emitting such REV instructions.
  252. NVCAST,
  253. MRS, // MRS, also sets the flags via a glue.
  254. SMULL,
  255. UMULL,
  256. // Reciprocal estimates and steps.
  257. FRECPE,
  258. FRECPS,
  259. FRSQRTE,
  260. FRSQRTS,
  261. SUNPKHI,
  262. SUNPKLO,
  263. UUNPKHI,
  264. UUNPKLO,
  265. CLASTA_N,
  266. CLASTB_N,
  267. LASTA,
  268. LASTB,
  269. TBL,
  270. // Floating-point reductions.
  271. FADDA_PRED,
  272. FADDV_PRED,
  273. FMAXV_PRED,
  274. FMAXNMV_PRED,
  275. FMINV_PRED,
  276. FMINNMV_PRED,
  277. INSR,
  278. PTEST,
  279. PTRUE,
  280. BITREVERSE_MERGE_PASSTHRU,
  281. BSWAP_MERGE_PASSTHRU,
  282. REVH_MERGE_PASSTHRU,
  283. REVW_MERGE_PASSTHRU,
  284. CTLZ_MERGE_PASSTHRU,
  285. CTPOP_MERGE_PASSTHRU,
  286. DUP_MERGE_PASSTHRU,
  287. INDEX_VECTOR,
  288. // Cast between vectors of the same element type but differ in length.
  289. REINTERPRET_CAST,
  290. // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
  291. LS64_BUILD,
  292. LS64_EXTRACT,
  293. LD1_MERGE_ZERO,
  294. LD1S_MERGE_ZERO,
  295. LDNF1_MERGE_ZERO,
  296. LDNF1S_MERGE_ZERO,
  297. LDFF1_MERGE_ZERO,
  298. LDFF1S_MERGE_ZERO,
  299. LD1RQ_MERGE_ZERO,
  300. LD1RO_MERGE_ZERO,
  301. // Structured loads.
  302. SVE_LD2_MERGE_ZERO,
  303. SVE_LD3_MERGE_ZERO,
  304. SVE_LD4_MERGE_ZERO,
  305. // Unsigned gather loads.
  306. GLD1_MERGE_ZERO,
  307. GLD1_SCALED_MERGE_ZERO,
  308. GLD1_UXTW_MERGE_ZERO,
  309. GLD1_SXTW_MERGE_ZERO,
  310. GLD1_UXTW_SCALED_MERGE_ZERO,
  311. GLD1_SXTW_SCALED_MERGE_ZERO,
  312. GLD1_IMM_MERGE_ZERO,
  313. // Signed gather loads
  314. GLD1S_MERGE_ZERO,
  315. GLD1S_SCALED_MERGE_ZERO,
  316. GLD1S_UXTW_MERGE_ZERO,
  317. GLD1S_SXTW_MERGE_ZERO,
  318. GLD1S_UXTW_SCALED_MERGE_ZERO,
  319. GLD1S_SXTW_SCALED_MERGE_ZERO,
  320. GLD1S_IMM_MERGE_ZERO,
  321. // Unsigned gather loads.
  322. GLDFF1_MERGE_ZERO,
  323. GLDFF1_SCALED_MERGE_ZERO,
  324. GLDFF1_UXTW_MERGE_ZERO,
  325. GLDFF1_SXTW_MERGE_ZERO,
  326. GLDFF1_UXTW_SCALED_MERGE_ZERO,
  327. GLDFF1_SXTW_SCALED_MERGE_ZERO,
  328. GLDFF1_IMM_MERGE_ZERO,
  329. // Signed gather loads.
  330. GLDFF1S_MERGE_ZERO,
  331. GLDFF1S_SCALED_MERGE_ZERO,
  332. GLDFF1S_UXTW_MERGE_ZERO,
  333. GLDFF1S_SXTW_MERGE_ZERO,
  334. GLDFF1S_UXTW_SCALED_MERGE_ZERO,
  335. GLDFF1S_SXTW_SCALED_MERGE_ZERO,
  336. GLDFF1S_IMM_MERGE_ZERO,
  337. // Non-temporal gather loads
  338. GLDNT1_MERGE_ZERO,
  339. GLDNT1_INDEX_MERGE_ZERO,
  340. GLDNT1S_MERGE_ZERO,
  341. // Contiguous masked store.
  342. ST1_PRED,
  343. // Scatter store
  344. SST1_PRED,
  345. SST1_SCALED_PRED,
  346. SST1_UXTW_PRED,
  347. SST1_SXTW_PRED,
  348. SST1_UXTW_SCALED_PRED,
  349. SST1_SXTW_SCALED_PRED,
  350. SST1_IMM_PRED,
  351. // Non-temporal scatter store
  352. SSTNT1_PRED,
  353. SSTNT1_INDEX_PRED,
  354. // Asserts that a function argument (i32) is zero-extended to i8 by
  355. // the caller
  356. ASSERT_ZEXT_BOOL,
  357. // Strict (exception-raising) floating point comparison
  358. STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
  359. STRICT_FCMPE,
  360. // NEON Load/Store with post-increment base updates
  361. LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
  362. LD3post,
  363. LD4post,
  364. ST2post,
  365. ST3post,
  366. ST4post,
  367. LD1x2post,
  368. LD1x3post,
  369. LD1x4post,
  370. ST1x2post,
  371. ST1x3post,
  372. ST1x4post,
  373. LD1DUPpost,
  374. LD2DUPpost,
  375. LD3DUPpost,
  376. LD4DUPpost,
  377. LD1LANEpost,
  378. LD2LANEpost,
  379. LD3LANEpost,
  380. LD4LANEpost,
  381. ST2LANEpost,
  382. ST3LANEpost,
  383. ST4LANEpost,
  384. STG,
  385. STZG,
  386. ST2G,
  387. STZ2G,
  388. LDP,
  389. STP,
  390. STNP,
  391. // Memory Operations
  392. MOPS_MEMSET,
  393. MOPS_MEMSET_TAGGING,
  394. MOPS_MEMCOPY,
  395. MOPS_MEMMOVE,
  396. };
  397. } // end namespace AArch64ISD
  398. namespace {
  399. // Any instruction that defines a 32-bit result zeros out the high half of the
  400. // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
  401. // be copying from a truncate. But any other 32-bit operation will zero-extend
  402. // up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
  403. // 32 bits, they're probably just qualifying a CopyFromReg.
  404. static inline bool isDef32(const SDNode &N) {
  405. unsigned Opc = N.getOpcode();
  406. return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
  407. Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
  408. Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
  409. Opc != ISD::FREEZE;
  410. }
  411. } // end anonymous namespace
  412. namespace AArch64 {
  413. /// Possible values of current rounding mode, which is specified in bits
  414. /// 23:22 of FPCR.
  415. enum Rounding {
  416. RN = 0, // Round to Nearest
  417. RP = 1, // Round towards Plus infinity
  418. RM = 2, // Round towards Minus infinity
  419. RZ = 3, // Round towards Zero
  420. rmMask = 3 // Bit mask selecting rounding mode
  421. };
  422. // Bit position of rounding mode bits in FPCR.
  423. const unsigned RoundingBitsPos = 22;
  424. } // namespace AArch64
  425. class AArch64Subtarget;
  426. class AArch64TargetLowering : public TargetLowering {
  427. public:
  428. explicit AArch64TargetLowering(const TargetMachine &TM,
  429. const AArch64Subtarget &STI);
  430. /// Selects the correct CCAssignFn for a given CallingConvention value.
  431. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
  432. /// Selects the correct CCAssignFn for a given CallingConvention value.
  433. CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
  434. /// Determine which of the bits specified in Mask are known to be either zero
  435. /// or one and return them in the KnownZero/KnownOne bitsets.
  436. void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
  437. const APInt &DemandedElts,
  438. const SelectionDAG &DAG,
  439. unsigned Depth = 0) const override;
  440. MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
  441. // Returning i64 unconditionally here (i.e. even for ILP32) means that the
  442. // *DAG* representation of pointers will always be 64-bits. They will be
  443. // truncated and extended when transferred to memory, but the 64-bit DAG
  444. // allows us to use AArch64's addressing modes much more easily.
  445. return MVT::getIntegerVT(64);
  446. }
  447. bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
  448. const APInt &DemandedElts,
  449. TargetLoweringOpt &TLO) const override;
  450. MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
  451. /// Returns true if the target allows unaligned memory accesses of the
  452. /// specified type.
  453. bool allowsMisalignedMemoryAccesses(
  454. EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
  455. MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
  456. bool *Fast = nullptr) const override;
  457. /// LLT variant.
  458. bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
  459. Align Alignment,
  460. MachineMemOperand::Flags Flags,
  461. bool *Fast = nullptr) const override;
  462. /// Provide custom lowering hooks for some operations.
  463. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
  464. const char *getTargetNodeName(unsigned Opcode) const override;
  465. SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
  466. /// This method returns a target specific FastISel object, or null if the
  467. /// target does not support "fast" ISel.
  468. FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
  469. const TargetLibraryInfo *libInfo) const override;
  470. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
  471. bool isFPImmLegal(const APFloat &Imm, EVT VT,
  472. bool ForCodeSize) const override;
  473. /// Return true if the given shuffle mask can be codegen'd directly, or if it
  474. /// should be stack expanded.
  475. bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
  476. /// Return the ISD::SETCC ValueType.
  477. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
  478. EVT VT) const override;
  479. SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
  480. MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
  481. MachineBasicBlock *BB) const;
  482. MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
  483. MachineBasicBlock *BB) const;
  484. MachineBasicBlock *
  485. EmitInstrWithCustomInserter(MachineInstr &MI,
  486. MachineBasicBlock *MBB) const override;
  487. bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
  488. MachineFunction &MF,
  489. unsigned Intrinsic) const override;
  490. bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
  491. EVT NewVT) const override;
  492. bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
  493. bool isTruncateFree(EVT VT1, EVT VT2) const override;
  494. bool isProfitableToHoist(Instruction *I) const override;
  495. bool isZExtFree(Type *Ty1, Type *Ty2) const override;
  496. bool isZExtFree(EVT VT1, EVT VT2) const override;
  497. bool isZExtFree(SDValue Val, EVT VT2) const override;
  498. bool shouldSinkOperands(Instruction *I,
  499. SmallVectorImpl<Use *> &Ops) const override;
  500. bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
  501. unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
  502. bool lowerInterleavedLoad(LoadInst *LI,
  503. ArrayRef<ShuffleVectorInst *> Shuffles,
  504. ArrayRef<unsigned> Indices,
  505. unsigned Factor) const override;
  506. bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
  507. unsigned Factor) const override;
  508. bool isLegalAddImmediate(int64_t) const override;
  509. bool isLegalICmpImmediate(int64_t) const override;
  510. bool isMulAddWithConstProfitable(const SDValue &AddNode,
  511. const SDValue &ConstNode) const override;
  512. bool shouldConsiderGEPOffsetSplit() const override;
  513. EVT getOptimalMemOpType(const MemOp &Op,
  514. const AttributeList &FuncAttributes) const override;
  515. LLT getOptimalMemOpLLT(const MemOp &Op,
  516. const AttributeList &FuncAttributes) const override;
  517. /// Return true if the addressing mode represented by AM is legal for this
  518. /// target, for a load/store of the specified type.
  519. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
  520. unsigned AS,
  521. Instruction *I = nullptr) const override;
  522. /// Return the cost of the scaling factor used in the addressing
  523. /// mode represented by AM for this target, for a load/store
  524. /// of the specified type.
  525. /// If the AM is supported, the return value must be >= 0.
  526. /// If the AM is not supported, it returns a negative value.
  527. InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
  528. Type *Ty, unsigned AS) const override;
  529. /// Return true if an FMA operation is faster than a pair of fmul and fadd
  530. /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
  531. /// returns true, otherwise fmuladd is expanded to fmul + fadd.
  532. bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
  533. EVT VT) const override;
  534. bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
  535. bool generateFMAsInMachineCombiner(EVT VT,
  536. CodeGenOpt::Level OptLevel) const override;
  537. const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
  538. /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
  539. bool isDesirableToCommuteWithShift(const SDNode *N,
  540. CombineLevel Level) const override;
  541. /// Returns true if it is beneficial to convert a load of a constant
  542. /// to just the constant itself.
  543. bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
  544. Type *Ty) const override;
  545. /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
  546. /// with this index.
  547. bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
  548. unsigned Index) const override;
  549. bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
  550. bool MathUsed) const override {
  551. // Using overflow ops for overflow checks only should beneficial on
  552. // AArch64.
  553. return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
  554. }
  555. Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
  556. AtomicOrdering Ord) const override;
  557. Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
  558. AtomicOrdering Ord) const override;
  559. void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
  560. bool isOpSuitableForLDPSTP(const Instruction *I) const;
  561. bool shouldInsertFencesForAtomic(const Instruction *I) const override;
  562. TargetLoweringBase::AtomicExpansionKind
  563. shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
  564. bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
  565. TargetLoweringBase::AtomicExpansionKind
  566. shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
  567. TargetLoweringBase::AtomicExpansionKind
  568. shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
  569. bool useLoadStackGuardNode() const override;
  570. TargetLoweringBase::LegalizeTypeAction
  571. getPreferredVectorAction(MVT VT) const override;
  572. /// If the target has a standard location for the stack protector cookie,
  573. /// returns the address of that location. Otherwise, returns nullptr.
  574. Value *getIRStackGuard(IRBuilderBase &IRB) const override;
  575. void insertSSPDeclarations(Module &M) const override;
  576. Value *getSDagStackGuard(const Module &M) const override;
  577. Function *getSSPStackGuardCheck(const Module &M) const override;
  578. /// If the target has a standard location for the unsafe stack pointer,
  579. /// returns the address of that location. Otherwise, returns nullptr.
  580. Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
  581. /// If a physical register, this returns the register that receives the
  582. /// exception address on entry to an EH pad.
  583. Register
  584. getExceptionPointerRegister(const Constant *PersonalityFn) const override {
  585. // FIXME: This is a guess. Has this been defined yet?
  586. return AArch64::X0;
  587. }
  588. /// If a physical register, this returns the register that receives the
  589. /// exception typeid on entry to a landing pad.
  590. Register
  591. getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
  592. // FIXME: This is a guess. Has this been defined yet?
  593. return AArch64::X1;
  594. }
  595. bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
  596. bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
  597. const MachineFunction &MF) const override {
  598. // Do not merge to float value size (128 bytes) if no implicit
  599. // float attribute is set.
  600. bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
  601. if (NoFloat)
  602. return (MemVT.getSizeInBits() <= 64);
  603. return true;
  604. }
  605. bool isCheapToSpeculateCttz() const override {
  606. return true;
  607. }
  608. bool isCheapToSpeculateCtlz() const override {
  609. return true;
  610. }
  611. bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
  612. bool hasAndNotCompare(SDValue V) const override {
  613. // We can use bics for any scalar.
  614. return V.getValueType().isScalarInteger();
  615. }
  616. bool hasAndNot(SDValue Y) const override {
  617. EVT VT = Y.getValueType();
  618. if (!VT.isVector())
  619. return hasAndNotCompare(Y);
  620. TypeSize TS = VT.getSizeInBits();
  621. // TODO: We should be able to use bic/bif too for SVE.
  622. return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
  623. }
  624. bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
  625. SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
  626. unsigned OldShiftOpcode, unsigned NewShiftOpcode,
  627. SelectionDAG &DAG) const override;
  628. bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
  629. bool shouldTransformSignedTruncationCheck(EVT XVT,
  630. unsigned KeptBits) const override {
  631. // For vectors, we don't have a preference..
  632. if (XVT.isVector())
  633. return false;
  634. auto VTIsOk = [](EVT VT) -> bool {
  635. return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
  636. VT == MVT::i64;
  637. };
  638. // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
  639. // XVT will be larger than KeptBitsVT.
  640. MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
  641. return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
  642. }
  643. bool preferIncOfAddToSubOfNot(EVT VT) const override;
  644. bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
  645. bool hasBitPreservingFPLogic(EVT VT) const override {
  646. // FIXME: Is this always true? It should be true for vectors at least.
  647. return VT == MVT::f32 || VT == MVT::f64;
  648. }
  649. bool supportSplitCSR(MachineFunction *MF) const override {
  650. return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
  651. MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
  652. }
  653. void initializeSplitCSR(MachineBasicBlock *Entry) const override;
  654. void insertCopiesSplitCSR(
  655. MachineBasicBlock *Entry,
  656. const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
  657. bool supportSwiftError() const override {
  658. return true;
  659. }
  660. /// Enable aggressive FMA fusion on targets that want it.
  661. bool enableAggressiveFMAFusion(EVT VT) const override;
  662. /// Returns the size of the platform's va_list object.
  663. unsigned getVaListSizeInBits(const DataLayout &DL) const override;
  664. /// Returns true if \p VecTy is a legal interleaved access type. This
  665. /// function checks the vector element type and the overall width of the
  666. /// vector.
  667. bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
  668. bool &UseScalable) const;
  669. /// Returns the number of interleaved accesses that will be generated when
  670. /// lowering accesses of the given type.
  671. unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
  672. bool UseScalable) const;
  673. MachineMemOperand::Flags getTargetMMOFlags(
  674. const Instruction &I) const override;
  675. bool functionArgumentNeedsConsecutiveRegisters(
  676. Type *Ty, CallingConv::ID CallConv, bool isVarArg,
  677. const DataLayout &DL) const override;
  678. /// Used for exception handling on Win64.
  679. bool needsFixedCatchObjects() const override;
  680. bool fallBackToDAGISel(const Instruction &Inst) const override;
  681. /// SVE code generation for fixed length vectors does not custom lower
  682. /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
  683. /// merge. However, merging them creates a BUILD_VECTOR that is just as
  684. /// illegal as the original, thus leading to an infinite legalisation loop.
  685. /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
  686. /// vector types this override can be removed.
  687. bool mergeStoresAfterLegalization(EVT VT) const override;
  688. // If the platform/function should have a redzone, return the size in bytes.
  689. unsigned getRedZoneSize(const Function &F) const {
  690. if (F.hasFnAttribute(Attribute::NoRedZone))
  691. return 0;
  692. return 128;
  693. }
  694. bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
  695. EVT getPromotedVTForPredicate(EVT VT) const;
  696. EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
  697. bool AllowUnknown = false) const override;
  698. bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
  699. private:
  700. /// Keep a pointer to the AArch64Subtarget around so that we can
  701. /// make the right decision when generating code for different targets.
  702. const AArch64Subtarget *Subtarget;
  703. bool isExtFreeImpl(const Instruction *Ext) const override;
  704. void addTypeForNEON(MVT VT);
  705. void addTypeForFixedLengthSVE(MVT VT);
  706. void addDRTypeForNEON(MVT VT);
  707. void addQRTypeForNEON(MVT VT);
  708. SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
  709. bool isVarArg,
  710. const SmallVectorImpl<ISD::InputArg> &Ins,
  711. const SDLoc &DL, SelectionDAG &DAG,
  712. SmallVectorImpl<SDValue> &InVals) const override;
  713. SDValue LowerCall(CallLoweringInfo & /*CLI*/,
  714. SmallVectorImpl<SDValue> &InVals) const override;
  715. SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
  716. CallingConv::ID CallConv, bool isVarArg,
  717. const SmallVectorImpl<ISD::InputArg> &Ins,
  718. const SDLoc &DL, SelectionDAG &DAG,
  719. SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
  720. SDValue ThisVal) const;
  721. SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
  722. SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
  723. SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
  724. SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
  725. SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
  726. SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
  727. SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
  728. SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
  729. SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
  730. bool
  731. isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
  732. /// Finds the incoming stack arguments which overlap the given fixed stack
  733. /// object and incorporates their load into the current chain. This prevents
  734. /// an upcoming store from clobbering the stack argument before it's used.
  735. SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
  736. MachineFrameInfo &MFI, int ClobberedFI) const;
  737. bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
  738. void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
  739. SDValue &Chain) const;
  740. bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
  741. bool isVarArg,
  742. const SmallVectorImpl<ISD::OutputArg> &Outs,
  743. LLVMContext &Context) const override;
  744. SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
  745. const SmallVectorImpl<ISD::OutputArg> &Outs,
  746. const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
  747. SelectionDAG &DAG) const override;
  748. SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
  749. unsigned Flag) const;
  750. SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
  751. unsigned Flag) const;
  752. SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
  753. unsigned Flag) const;
  754. SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
  755. unsigned Flag) const;
  756. template <class NodeTy>
  757. SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  758. template <class NodeTy>
  759. SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  760. template <class NodeTy>
  761. SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  762. template <class NodeTy>
  763. SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  764. SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
  765. SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
  766. SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  767. SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  768. SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  769. SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
  770. const SDLoc &DL, SelectionDAG &DAG) const;
  771. SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
  772. SelectionDAG &DAG) const;
  773. SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  774. SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
  775. SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
  776. SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
  777. SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
  778. SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
  779. SDValue TVal, SDValue FVal, const SDLoc &dl,
  780. SelectionDAG &DAG) const;
  781. SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
  782. SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
  783. SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
  784. SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
  785. SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
  786. SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
  787. SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
  788. SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
  789. SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
  790. SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
  791. SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
  792. SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
  793. SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
  794. SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
  795. SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
  796. SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
  797. SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
  798. SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  799. SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  800. SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
  801. SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  802. SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
  803. SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
  804. bool OverrideNEON = false) const;
  805. SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
  806. SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
  807. SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
  808. SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
  809. SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
  810. SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
  811. SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
  812. SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
  813. SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
  814. SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
  815. SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
  816. SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
  817. SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
  818. SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
  819. SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
  820. SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
  821. SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
  822. SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
  823. SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
  824. SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
  825. SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
  826. SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
  827. SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
  828. SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
  829. SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
  830. SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
  831. SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
  832. SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
  833. SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
  834. SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
  835. SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
  836. SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
  837. SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
  838. SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
  839. SDValue &Size,
  840. SelectionDAG &DAG) const;
  841. SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
  842. EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;
  843. SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
  844. SelectionDAG &DAG) const;
  845. SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
  846. SelectionDAG &DAG) const;
  847. SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
  848. SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
  849. SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
  850. SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
  851. SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
  852. SelectionDAG &DAG) const;
  853. SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
  854. SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
  855. SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
  856. SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
  857. SelectionDAG &DAG) const;
  858. SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
  859. SelectionDAG &DAG) const;
  860. SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
  861. SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
  862. SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
  863. SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
  864. SelectionDAG &DAG) const;
  865. SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
  866. SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
  867. SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
  868. SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
  869. SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
  870. SelectionDAG &DAG) const;
  871. SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
  872. SmallVectorImpl<SDNode *> &Created) const override;
  873. SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
  874. int &ExtraSteps, bool &UseOneConst,
  875. bool Reciprocal) const override;
  876. SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
  877. int &ExtraSteps) const override;
  878. SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
  879. const DenormalMode &Mode) const override;
  880. SDValue getSqrtResultForDenormInput(SDValue Operand,
  881. SelectionDAG &DAG) const override;
  882. unsigned combineRepeatedFPDivisors() const override;
  883. ConstraintType getConstraintType(StringRef Constraint) const override;
  884. Register getRegisterByName(const char* RegName, LLT VT,
  885. const MachineFunction &MF) const override;
  886. /// Examine constraint string and operand type and determine a weight value.
  887. /// The operand object must already have been set up with the operand type.
  888. ConstraintWeight
  889. getSingleConstraintMatchWeight(AsmOperandInfo &info,
  890. const char *constraint) const override;
  891. std::pair<unsigned, const TargetRegisterClass *>
  892. getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
  893. StringRef Constraint, MVT VT) const override;
  894. const char *LowerXConstraint(EVT ConstraintVT) const override;
  895. void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
  896. std::vector<SDValue> &Ops,
  897. SelectionDAG &DAG) const override;
  898. unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
  899. if (ConstraintCode == "Q")
  900. return InlineAsm::Constraint_Q;
  901. // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
  902. // followed by llvm_unreachable so we'll leave them unimplemented in
  903. // the backend for now.
  904. return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
  905. }
  906. bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
  907. bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
  908. bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
  909. bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
  910. bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
  911. bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
  912. ISD::MemIndexedMode &AM, bool &IsInc,
  913. SelectionDAG &DAG) const;
  914. bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
  915. ISD::MemIndexedMode &AM,
  916. SelectionDAG &DAG) const override;
  917. bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
  918. SDValue &Offset, ISD::MemIndexedMode &AM,
  919. SelectionDAG &DAG) const override;
  920. void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
  921. SelectionDAG &DAG) const override;
  922. void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
  923. SelectionDAG &DAG) const;
  924. void ReplaceExtractSubVectorResults(SDNode *N,
  925. SmallVectorImpl<SDValue> &Results,
  926. SelectionDAG &DAG) const;
  927. bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
  928. void finalizeLowering(MachineFunction &MF) const override;
  929. bool shouldLocalize(const MachineInstr &MI,
  930. const TargetTransformInfo *TTI) const override;
  931. bool SimplifyDemandedBitsForTargetNode(SDValue Op,
  932. const APInt &OriginalDemandedBits,
  933. const APInt &OriginalDemandedElts,
  934. KnownBits &Known,
  935. TargetLoweringOpt &TLO,
  936. unsigned Depth) const override;
  937. // Normally SVE is only used for byte size vectors that do not fit within a
  938. // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
  939. // used for 64bit and 128bit vectors as well.
  940. bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
  941. // With the exception of data-predicate transitions, no instructions are
  942. // required to cast between legal scalable vector types. However:
  943. // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
  944. // is not universally useable.
  945. // 2. Most unpacked integer types are not legal and thus integer extends
  946. // cannot be used to convert between unpacked and packed types.
  947. // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
  948. // to transition between unpacked and packed types of the same element type,
  949. // with BITCAST used otherwise.
  950. SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
  951. bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
  952. LLT Ty2) const override;
  953. };
  954. namespace AArch64 {
  955. FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
  956. const TargetLibraryInfo *libInfo);
  957. } // end namespace AArch64
  958. } // end namespace llvm
  959. #endif