AArch64ISelLowering.h 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226
  1. //==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the interfaces that AArch64 uses to lower LLVM code into a
  10. // selection DAG.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
  14. #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
  15. #include "AArch64.h"
  16. #include "Utils/AArch64SMEAttributes.h"
  17. #include "llvm/CodeGen/CallingConvLower.h"
  18. #include "llvm/CodeGen/MachineFunction.h"
  19. #include "llvm/CodeGen/SelectionDAG.h"
  20. #include "llvm/CodeGen/TargetLowering.h"
  21. #include "llvm/IR/CallingConv.h"
  22. #include "llvm/IR/Instruction.h"
  23. namespace llvm {
  24. namespace AArch64ISD {
  25. // For predicated nodes where the result is a vector, the operation is
  26. // controlled by a governing predicate and the inactive lanes are explicitly
  27. // defined with a value, please stick the following naming convention:
  28. //
  29. // _MERGE_OP<n> The result value is a vector with inactive lanes equal
  30. // to source operand OP<n>.
  31. //
  32. // _MERGE_ZERO The result value is a vector with inactive lanes
  33. // actively zeroed.
  34. //
  35. // _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
  36. // to the last source operand which only purpose is being
  37. // a passthru value.
  38. //
  39. // For other cases where no explicit action is needed to set the inactive lanes,
  40. // or when the result is not a vector and it is needed or helpful to
  41. // distinguish a node from similar unpredicated nodes, use:
  42. //
  43. // _PRED
  44. //
  45. enum NodeType : unsigned {
  46. FIRST_NUMBER = ISD::BUILTIN_OP_END,
  47. WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
  48. CALL, // Function call.
  49. // Pseudo for a OBJC call that gets emitted together with a special `mov
  50. // x29, x29` marker instruction.
  51. CALL_RVMARKER,
  52. CALL_BTI, // Function call followed by a BTI instruction.
  53. // Essentially like a normal COPY that works on GPRs, but cannot be
  54. // rematerialised by passes like the simple register coalescer. It's
  55. // required for SME when lowering calls because we cannot allow frame
  56. // index calculations using addvl to slip in between the smstart/smstop
  57. // and the bl instruction. The scalable vector length may change across
  58. // the smstart/smstop boundary.
  59. OBSCURE_COPY,
  60. SMSTART,
  61. SMSTOP,
  62. RESTORE_ZA,
  63. // Produces the full sequence of instructions for getting the thread pointer
  64. // offset of a variable into X0, using the TLSDesc model.
  65. TLSDESC_CALLSEQ,
  66. ADRP, // Page address of a TargetGlobalAddress operand.
  67. ADR, // ADR
  68. ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
  69. LOADgot, // Load from automatically generated descriptor (e.g. Global
  70. // Offset Table, TLS record).
  71. RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
  72. BRCOND, // Conditional branch instruction; "b.cond".
  73. CSEL,
  74. CSINV, // Conditional select invert.
  75. CSNEG, // Conditional select negate.
  76. CSINC, // Conditional select increment.
  77. // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
  78. // ELF.
  79. THREAD_POINTER,
  80. ADC,
  81. SBC, // adc, sbc instructions
  82. // Predicated instructions where inactive lanes produce undefined results.
  83. ABDS_PRED,
  84. ABDU_PRED,
  85. FADD_PRED,
  86. FDIV_PRED,
  87. FMA_PRED,
  88. FMAX_PRED,
  89. FMAXNM_PRED,
  90. FMIN_PRED,
  91. FMINNM_PRED,
  92. FMUL_PRED,
  93. FSUB_PRED,
  94. HADDS_PRED,
  95. HADDU_PRED,
  96. MUL_PRED,
  97. MULHS_PRED,
  98. MULHU_PRED,
  99. RHADDS_PRED,
  100. RHADDU_PRED,
  101. SDIV_PRED,
  102. SHL_PRED,
  103. SMAX_PRED,
  104. SMIN_PRED,
  105. SRA_PRED,
  106. SRL_PRED,
  107. UDIV_PRED,
  108. UMAX_PRED,
  109. UMIN_PRED,
  110. // Unpredicated vector instructions
  111. BIC,
  112. SRAD_MERGE_OP1,
  113. // Predicated instructions with the result of inactive lanes provided by the
  114. // last operand.
  115. FABS_MERGE_PASSTHRU,
  116. FCEIL_MERGE_PASSTHRU,
  117. FFLOOR_MERGE_PASSTHRU,
  118. FNEARBYINT_MERGE_PASSTHRU,
  119. FNEG_MERGE_PASSTHRU,
  120. FRECPX_MERGE_PASSTHRU,
  121. FRINT_MERGE_PASSTHRU,
  122. FROUND_MERGE_PASSTHRU,
  123. FROUNDEVEN_MERGE_PASSTHRU,
  124. FSQRT_MERGE_PASSTHRU,
  125. FTRUNC_MERGE_PASSTHRU,
  126. FP_ROUND_MERGE_PASSTHRU,
  127. FP_EXTEND_MERGE_PASSTHRU,
  128. UINT_TO_FP_MERGE_PASSTHRU,
  129. SINT_TO_FP_MERGE_PASSTHRU,
  130. FCVTZU_MERGE_PASSTHRU,
  131. FCVTZS_MERGE_PASSTHRU,
  132. SIGN_EXTEND_INREG_MERGE_PASSTHRU,
  133. ZERO_EXTEND_INREG_MERGE_PASSTHRU,
  134. ABS_MERGE_PASSTHRU,
  135. NEG_MERGE_PASSTHRU,
  136. SETCC_MERGE_ZERO,
  137. // Arithmetic instructions which write flags.
  138. ADDS,
  139. SUBS,
  140. ADCS,
  141. SBCS,
  142. ANDS,
  143. // Conditional compares. Operands: left,right,falsecc,cc,flags
  144. CCMP,
  145. CCMN,
  146. FCCMP,
  147. // Floating point comparison
  148. FCMP,
  149. // Scalar extract
  150. EXTR,
  151. // Scalar-to-vector duplication
  152. DUP,
  153. DUPLANE8,
  154. DUPLANE16,
  155. DUPLANE32,
  156. DUPLANE64,
  157. DUPLANE128,
  158. // Vector immedate moves
  159. MOVI,
  160. MOVIshift,
  161. MOVIedit,
  162. MOVImsl,
  163. FMOV,
  164. MVNIshift,
  165. MVNImsl,
  166. // Vector immediate ops
  167. BICi,
  168. ORRi,
  169. // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
  170. // element must be identical.
  171. BSP,
  172. // Vector shuffles
  173. ZIP1,
  174. ZIP2,
  175. UZP1,
  176. UZP2,
  177. TRN1,
  178. TRN2,
  179. REV16,
  180. REV32,
  181. REV64,
  182. EXT,
  183. SPLICE,
  184. // Vector shift by scalar
  185. VSHL,
  186. VLSHR,
  187. VASHR,
  188. // Vector shift by scalar (again)
  189. SQSHL_I,
  190. UQSHL_I,
  191. SQSHLU_I,
  192. SRSHR_I,
  193. URSHR_I,
  194. // Vector shift by constant and insert
  195. VSLI,
  196. VSRI,
  197. // Vector comparisons
  198. CMEQ,
  199. CMGE,
  200. CMGT,
  201. CMHI,
  202. CMHS,
  203. FCMEQ,
  204. FCMGE,
  205. FCMGT,
  206. // Vector zero comparisons
  207. CMEQz,
  208. CMGEz,
  209. CMGTz,
  210. CMLEz,
  211. CMLTz,
  212. FCMEQz,
  213. FCMGEz,
  214. FCMGTz,
  215. FCMLEz,
  216. FCMLTz,
  217. // Vector across-lanes addition
  218. // Only the lower result lane is defined.
  219. SADDV,
  220. UADDV,
  221. // Add Pairwise of two vectors
  222. ADDP,
  223. // Add Long Pairwise
  224. SADDLP,
  225. UADDLP,
  226. // udot/sdot instructions
  227. UDOT,
  228. SDOT,
  229. // Vector across-lanes min/max
  230. // Only the lower result lane is defined.
  231. SMINV,
  232. UMINV,
  233. SMAXV,
  234. UMAXV,
  235. SADDV_PRED,
  236. UADDV_PRED,
  237. SMAXV_PRED,
  238. UMAXV_PRED,
  239. SMINV_PRED,
  240. UMINV_PRED,
  241. ORV_PRED,
  242. EORV_PRED,
  243. ANDV_PRED,
  244. // Vector bitwise insertion
  245. BIT,
  246. // Compare-and-branch
  247. CBZ,
  248. CBNZ,
  249. TBZ,
  250. TBNZ,
  251. // Tail calls
  252. TC_RETURN,
  253. // Custom prefetch handling
  254. PREFETCH,
  255. // {s|u}int to FP within a FP register.
  256. SITOF,
  257. UITOF,
  258. /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
  259. /// world w.r.t vectors; which causes additional REV instructions to be
  260. /// generated to compensate for the byte-swapping. But sometimes we do
  261. /// need to re-interpret the data in SIMD vector registers in big-endian
  262. /// mode without emitting such REV instructions.
  263. NVCAST,
  264. MRS, // MRS, also sets the flags via a glue.
  265. SMULL,
  266. UMULL,
  267. PMULL,
  268. // Reciprocal estimates and steps.
  269. FRECPE,
  270. FRECPS,
  271. FRSQRTE,
  272. FRSQRTS,
  273. SUNPKHI,
  274. SUNPKLO,
  275. UUNPKHI,
  276. UUNPKLO,
  277. CLASTA_N,
  278. CLASTB_N,
  279. LASTA,
  280. LASTB,
  281. TBL,
  282. // Floating-point reductions.
  283. FADDA_PRED,
  284. FADDV_PRED,
  285. FMAXV_PRED,
  286. FMAXNMV_PRED,
  287. FMINV_PRED,
  288. FMINNMV_PRED,
  289. INSR,
  290. PTEST,
  291. PTEST_ANY,
  292. PTRUE,
  293. BITREVERSE_MERGE_PASSTHRU,
  294. BSWAP_MERGE_PASSTHRU,
  295. REVH_MERGE_PASSTHRU,
  296. REVW_MERGE_PASSTHRU,
  297. CTLZ_MERGE_PASSTHRU,
  298. CTPOP_MERGE_PASSTHRU,
  299. DUP_MERGE_PASSTHRU,
  300. INDEX_VECTOR,
  301. // Cast between vectors of the same element type but differ in length.
  302. REINTERPRET_CAST,
  303. // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
  304. LS64_BUILD,
  305. LS64_EXTRACT,
  306. LD1_MERGE_ZERO,
  307. LD1S_MERGE_ZERO,
  308. LDNF1_MERGE_ZERO,
  309. LDNF1S_MERGE_ZERO,
  310. LDFF1_MERGE_ZERO,
  311. LDFF1S_MERGE_ZERO,
  312. LD1RQ_MERGE_ZERO,
  313. LD1RO_MERGE_ZERO,
  314. // Structured loads.
  315. SVE_LD2_MERGE_ZERO,
  316. SVE_LD3_MERGE_ZERO,
  317. SVE_LD4_MERGE_ZERO,
  318. // Unsigned gather loads.
  319. GLD1_MERGE_ZERO,
  320. GLD1_SCALED_MERGE_ZERO,
  321. GLD1_UXTW_MERGE_ZERO,
  322. GLD1_SXTW_MERGE_ZERO,
  323. GLD1_UXTW_SCALED_MERGE_ZERO,
  324. GLD1_SXTW_SCALED_MERGE_ZERO,
  325. GLD1_IMM_MERGE_ZERO,
  326. // Signed gather loads
  327. GLD1S_MERGE_ZERO,
  328. GLD1S_SCALED_MERGE_ZERO,
  329. GLD1S_UXTW_MERGE_ZERO,
  330. GLD1S_SXTW_MERGE_ZERO,
  331. GLD1S_UXTW_SCALED_MERGE_ZERO,
  332. GLD1S_SXTW_SCALED_MERGE_ZERO,
  333. GLD1S_IMM_MERGE_ZERO,
  334. // Unsigned gather loads.
  335. GLDFF1_MERGE_ZERO,
  336. GLDFF1_SCALED_MERGE_ZERO,
  337. GLDFF1_UXTW_MERGE_ZERO,
  338. GLDFF1_SXTW_MERGE_ZERO,
  339. GLDFF1_UXTW_SCALED_MERGE_ZERO,
  340. GLDFF1_SXTW_SCALED_MERGE_ZERO,
  341. GLDFF1_IMM_MERGE_ZERO,
  342. // Signed gather loads.
  343. GLDFF1S_MERGE_ZERO,
  344. GLDFF1S_SCALED_MERGE_ZERO,
  345. GLDFF1S_UXTW_MERGE_ZERO,
  346. GLDFF1S_SXTW_MERGE_ZERO,
  347. GLDFF1S_UXTW_SCALED_MERGE_ZERO,
  348. GLDFF1S_SXTW_SCALED_MERGE_ZERO,
  349. GLDFF1S_IMM_MERGE_ZERO,
  350. // Non-temporal gather loads
  351. GLDNT1_MERGE_ZERO,
  352. GLDNT1_INDEX_MERGE_ZERO,
  353. GLDNT1S_MERGE_ZERO,
  354. // Contiguous masked store.
  355. ST1_PRED,
  356. // Scatter store
  357. SST1_PRED,
  358. SST1_SCALED_PRED,
  359. SST1_UXTW_PRED,
  360. SST1_SXTW_PRED,
  361. SST1_UXTW_SCALED_PRED,
  362. SST1_SXTW_SCALED_PRED,
  363. SST1_IMM_PRED,
  364. // Non-temporal scatter store
  365. SSTNT1_PRED,
  366. SSTNT1_INDEX_PRED,
  367. // SME
  368. RDSVL,
  369. REVD_MERGE_PASSTHRU,
  370. // Asserts that a function argument (i32) is zero-extended to i8 by
  371. // the caller
  372. ASSERT_ZEXT_BOOL,
  373. // 128-bit system register accesses
  374. // lo64, hi64, chain = MRRS(chain, sysregname)
  375. MRRS,
  376. // chain = MSRR(chain, sysregname, lo64, hi64)
  377. MSRR,
  378. // Strict (exception-raising) floating point comparison
  379. STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
  380. STRICT_FCMPE,
  381. // NEON Load/Store with post-increment base updates
  382. LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
  383. LD3post,
  384. LD4post,
  385. ST2post,
  386. ST3post,
  387. ST4post,
  388. LD1x2post,
  389. LD1x3post,
  390. LD1x4post,
  391. ST1x2post,
  392. ST1x3post,
  393. ST1x4post,
  394. LD1DUPpost,
  395. LD2DUPpost,
  396. LD3DUPpost,
  397. LD4DUPpost,
  398. LD1LANEpost,
  399. LD2LANEpost,
  400. LD3LANEpost,
  401. LD4LANEpost,
  402. ST2LANEpost,
  403. ST3LANEpost,
  404. ST4LANEpost,
  405. STG,
  406. STZG,
  407. ST2G,
  408. STZ2G,
  409. LDP,
  410. LDNP,
  411. STP,
  412. STNP,
  413. // Memory Operations
  414. MOPS_MEMSET,
  415. MOPS_MEMSET_TAGGING,
  416. MOPS_MEMCOPY,
  417. MOPS_MEMMOVE,
  418. };
  419. } // end namespace AArch64ISD
  420. namespace AArch64 {
  421. /// Possible values of current rounding mode, which is specified in bits
  422. /// 23:22 of FPCR.
  423. enum Rounding {
  424. RN = 0, // Round to Nearest
  425. RP = 1, // Round towards Plus infinity
  426. RM = 2, // Round towards Minus infinity
  427. RZ = 3, // Round towards Zero
  428. rmMask = 3 // Bit mask selecting rounding mode
  429. };
  430. // Bit position of rounding mode bits in FPCR.
  431. const unsigned RoundingBitsPos = 22;
  432. } // namespace AArch64
  433. class AArch64Subtarget;
  434. class AArch64TargetLowering : public TargetLowering {
  435. public:
  436. explicit AArch64TargetLowering(const TargetMachine &TM,
  437. const AArch64Subtarget &STI);
  438. /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
  439. /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
  440. bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
  441. SDValue N1) const override;
  442. /// Selects the correct CCAssignFn for a given CallingConvention value.
  443. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
  444. /// Selects the correct CCAssignFn for a given CallingConvention value.
  445. CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
  446. /// Determine which of the bits specified in Mask are known to be either zero
  447. /// or one and return them in the KnownZero/KnownOne bitsets.
  448. void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
  449. const APInt &DemandedElts,
  450. const SelectionDAG &DAG,
  451. unsigned Depth = 0) const override;
  452. MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
  453. // Returning i64 unconditionally here (i.e. even for ILP32) means that the
  454. // *DAG* representation of pointers will always be 64-bits. They will be
  455. // truncated and extended when transferred to memory, but the 64-bit DAG
  456. // allows us to use AArch64's addressing modes much more easily.
  457. return MVT::getIntegerVT(64);
  458. }
  459. bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
  460. const APInt &DemandedElts,
  461. TargetLoweringOpt &TLO) const override;
  462. MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
  463. /// Returns true if the target allows unaligned memory accesses of the
  464. /// specified type.
  465. bool allowsMisalignedMemoryAccesses(
  466. EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
  467. MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
  468. unsigned *Fast = nullptr) const override;
  469. /// LLT variant.
  470. bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
  471. Align Alignment,
  472. MachineMemOperand::Flags Flags,
  473. unsigned *Fast = nullptr) const override;
  474. /// Provide custom lowering hooks for some operations.
  475. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
  476. const char *getTargetNodeName(unsigned Opcode) const override;
  477. SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
  478. /// This method returns a target specific FastISel object, or null if the
  479. /// target does not support "fast" ISel.
  480. FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
  481. const TargetLibraryInfo *libInfo) const override;
  482. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
  483. bool isFPImmLegal(const APFloat &Imm, EVT VT,
  484. bool ForCodeSize) const override;
  485. /// Return true if the given shuffle mask can be codegen'd directly, or if it
  486. /// should be stack expanded.
  487. bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
  488. /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
  489. /// shuffle mask can be codegen'd directly.
  490. bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
  491. /// Return the ISD::SETCC ValueType.
  492. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
  493. EVT VT) const override;
  494. SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
  495. MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
  496. MachineBasicBlock *BB) const;
  497. MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
  498. MachineBasicBlock *BB) const;
  499. MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
  500. MachineInstr &MI,
  501. MachineBasicBlock *BB) const;
  502. MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
  503. MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
  504. MachineInstr &MI, MachineBasicBlock *BB,
  505. bool HasTile) const;
  506. MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
  507. MachineBasicBlock *
  508. EmitInstrWithCustomInserter(MachineInstr &MI,
  509. MachineBasicBlock *MBB) const override;
  510. bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
  511. MachineFunction &MF,
  512. unsigned Intrinsic) const override;
  513. bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
  514. EVT NewVT) const override;
  515. bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
  516. bool isTruncateFree(EVT VT1, EVT VT2) const override;
  517. bool isProfitableToHoist(Instruction *I) const override;
  518. bool isZExtFree(Type *Ty1, Type *Ty2) const override;
  519. bool isZExtFree(EVT VT1, EVT VT2) const override;
  520. bool isZExtFree(SDValue Val, EVT VT2) const override;
  521. bool shouldSinkOperands(Instruction *I,
  522. SmallVectorImpl<Use *> &Ops) const override;
  523. bool optimizeExtendOrTruncateConversion(Instruction *I,
  524. Loop *L) const override;
  525. bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
  526. unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
  527. bool lowerInterleavedLoad(LoadInst *LI,
  528. ArrayRef<ShuffleVectorInst *> Shuffles,
  529. ArrayRef<unsigned> Indices,
  530. unsigned Factor) const override;
  531. bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
  532. unsigned Factor) const override;
  533. bool isLegalAddImmediate(int64_t) const override;
  534. bool isLegalICmpImmediate(int64_t) const override;
  535. bool isMulAddWithConstProfitable(SDValue AddNode,
  536. SDValue ConstNode) const override;
  537. bool shouldConsiderGEPOffsetSplit() const override;
  538. EVT getOptimalMemOpType(const MemOp &Op,
  539. const AttributeList &FuncAttributes) const override;
  540. LLT getOptimalMemOpLLT(const MemOp &Op,
  541. const AttributeList &FuncAttributes) const override;
  542. /// Return true if the addressing mode represented by AM is legal for this
  543. /// target, for a load/store of the specified type.
  544. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
  545. unsigned AS,
  546. Instruction *I = nullptr) const override;
  547. /// Return true if an FMA operation is faster than a pair of fmul and fadd
  548. /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
  549. /// returns true, otherwise fmuladd is expanded to fmul + fadd.
  550. bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
  551. EVT VT) const override;
  552. bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
  553. bool generateFMAsInMachineCombiner(EVT VT,
  554. CodeGenOpt::Level OptLevel) const override;
  555. const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
  556. /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
  557. bool isDesirableToCommuteWithShift(const SDNode *N,
  558. CombineLevel Level) const override;
  559. /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
  560. bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
  561. /// Return true if it is profitable to fold a pair of shifts into a mask.
  562. bool shouldFoldConstantShiftPairToMask(const SDNode *N,
  563. CombineLevel Level) const override;
  564. /// Returns true if it is beneficial to convert a load of a constant
  565. /// to just the constant itself.
  566. bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
  567. Type *Ty) const override;
  568. /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
  569. /// with this index.
  570. bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
  571. unsigned Index) const override;
  572. bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
  573. bool MathUsed) const override {
  574. // Using overflow ops for overflow checks only should beneficial on
  575. // AArch64.
  576. return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
  577. }
  578. Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
  579. AtomicOrdering Ord) const override;
  580. Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
  581. AtomicOrdering Ord) const override;
  582. void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
  583. bool isOpSuitableForLDPSTP(const Instruction *I) const;
  584. bool shouldInsertFencesForAtomic(const Instruction *I) const override;
  585. bool
  586. shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
  587. TargetLoweringBase::AtomicExpansionKind
  588. shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
  589. TargetLoweringBase::AtomicExpansionKind
  590. shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
  591. TargetLoweringBase::AtomicExpansionKind
  592. shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
  593. TargetLoweringBase::AtomicExpansionKind
  594. shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
  595. bool useLoadStackGuardNode() const override;
  596. TargetLoweringBase::LegalizeTypeAction
  597. getPreferredVectorAction(MVT VT) const override;
  598. /// If the target has a standard location for the stack protector cookie,
  599. /// returns the address of that location. Otherwise, returns nullptr.
  600. Value *getIRStackGuard(IRBuilderBase &IRB) const override;
  601. void insertSSPDeclarations(Module &M) const override;
  602. Value *getSDagStackGuard(const Module &M) const override;
  603. Function *getSSPStackGuardCheck(const Module &M) const override;
  604. /// If the target has a standard location for the unsafe stack pointer,
  605. /// returns the address of that location. Otherwise, returns nullptr.
  606. Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
  607. /// If a physical register, this returns the register that receives the
  608. /// exception address on entry to an EH pad.
  609. Register
  610. getExceptionPointerRegister(const Constant *PersonalityFn) const override {
  611. // FIXME: This is a guess. Has this been defined yet?
  612. return AArch64::X0;
  613. }
  614. /// If a physical register, this returns the register that receives the
  615. /// exception typeid on entry to a landing pad.
  616. Register
  617. getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
  618. // FIXME: This is a guess. Has this been defined yet?
  619. return AArch64::X1;
  620. }
  621. bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
  622. bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
  623. const MachineFunction &MF) const override {
  624. // Do not merge to float value size (128 bytes) if no implicit
  625. // float attribute is set.
  626. bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
  627. if (NoFloat)
  628. return (MemVT.getSizeInBits() <= 64);
  629. return true;
  630. }
  631. bool isCheapToSpeculateCttz(Type *) const override {
  632. return true;
  633. }
  634. bool isCheapToSpeculateCtlz(Type *) const override {
  635. return true;
  636. }
  637. bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
  638. bool hasAndNotCompare(SDValue V) const override {
  639. // We can use bics for any scalar.
  640. return V.getValueType().isScalarInteger();
  641. }
  642. bool hasAndNot(SDValue Y) const override {
  643. EVT VT = Y.getValueType();
  644. if (!VT.isVector())
  645. return hasAndNotCompare(Y);
  646. TypeSize TS = VT.getSizeInBits();
  647. // TODO: We should be able to use bic/bif too for SVE.
  648. return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
  649. }
  650. bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
  651. SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
  652. unsigned OldShiftOpcode, unsigned NewShiftOpcode,
  653. SelectionDAG &DAG) const override;
  654. ShiftLegalizationStrategy
  655. preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
  656. unsigned ExpansionFactor) const override;
  657. bool shouldTransformSignedTruncationCheck(EVT XVT,
  658. unsigned KeptBits) const override {
  659. // For vectors, we don't have a preference..
  660. if (XVT.isVector())
  661. return false;
  662. auto VTIsOk = [](EVT VT) -> bool {
  663. return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
  664. VT == MVT::i64;
  665. };
  666. // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
  667. // XVT will be larger than KeptBitsVT.
  668. MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
  669. return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
  670. }
  671. bool preferIncOfAddToSubOfNot(EVT VT) const override;
  672. bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
  673. bool isComplexDeinterleavingSupported() const override;
  674. bool isComplexDeinterleavingOperationSupported(
  675. ComplexDeinterleavingOperation Operation, Type *Ty) const override;
  676. Value *createComplexDeinterleavingIR(
  677. Instruction *I, ComplexDeinterleavingOperation OperationType,
  678. ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
  679. Value *Accumulator = nullptr) const override;
  680. bool hasBitPreservingFPLogic(EVT VT) const override {
  681. // FIXME: Is this always true? It should be true for vectors at least.
  682. return VT == MVT::f32 || VT == MVT::f64;
  683. }
  684. bool supportSplitCSR(MachineFunction *MF) const override {
  685. return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
  686. MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
  687. }
  688. void initializeSplitCSR(MachineBasicBlock *Entry) const override;
  689. void insertCopiesSplitCSR(
  690. MachineBasicBlock *Entry,
  691. const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
  692. bool supportSwiftError() const override {
  693. return true;
  694. }
  695. bool supportKCFIBundles() const override { return true; }
  696. /// Enable aggressive FMA fusion on targets that want it.
  697. bool enableAggressiveFMAFusion(EVT VT) const override;
  698. /// Returns the size of the platform's va_list object.
  699. unsigned getVaListSizeInBits(const DataLayout &DL) const override;
  700. /// Returns true if \p VecTy is a legal interleaved access type. This
  701. /// function checks the vector element type and the overall width of the
  702. /// vector.
  703. bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
  704. bool &UseScalable) const;
  705. /// Returns the number of interleaved accesses that will be generated when
  706. /// lowering accesses of the given type.
  707. unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
  708. bool UseScalable) const;
  709. MachineMemOperand::Flags getTargetMMOFlags(
  710. const Instruction &I) const override;
  711. bool functionArgumentNeedsConsecutiveRegisters(
  712. Type *Ty, CallingConv::ID CallConv, bool isVarArg,
  713. const DataLayout &DL) const override;
  714. /// Used for exception handling on Win64.
  715. bool needsFixedCatchObjects() const override;
  716. bool fallBackToDAGISel(const Instruction &Inst) const override;
  717. /// SVE code generation for fixed length vectors does not custom lower
  718. /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
  719. /// merge. However, merging them creates a BUILD_VECTOR that is just as
  720. /// illegal as the original, thus leading to an infinite legalisation loop.
  721. /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
  722. /// vector types this override can be removed.
  723. bool mergeStoresAfterLegalization(EVT VT) const override;
  724. // If the platform/function should have a redzone, return the size in bytes.
  725. unsigned getRedZoneSize(const Function &F) const {
  726. if (F.hasFnAttribute(Attribute::NoRedZone))
  727. return 0;
  728. return 128;
  729. }
  730. bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
  731. EVT getPromotedVTForPredicate(EVT VT) const;
  732. EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
  733. bool AllowUnknown = false) const override;
  734. bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
  735. /// If a change in streaming mode is required on entry to/return from a
  736. /// function call it emits and returns the corresponding SMSTART or SMSTOP node.
  737. /// \p Entry tells whether this is before/after the Call, which is necessary
  738. /// because PSTATE.SM is only queried once.
  739. SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
  740. SDValue Chain, SDValue InFlag,
  741. SDValue PStateSM, bool Entry) const;
  742. bool isVScaleKnownToBeAPowerOfTwo() const override;
  743. // Normally SVE is only used for byte size vectors that do not fit within a
  744. // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
  745. // used for 64bit and 128bit vectors as well.
  746. bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
  747. private:
  748. /// Keep a pointer to the AArch64Subtarget around so that we can
  749. /// make the right decision when generating code for different targets.
  750. const AArch64Subtarget *Subtarget;
  751. bool isExtFreeImpl(const Instruction *Ext) const override;
  752. void addTypeForNEON(MVT VT);
  753. void addTypeForStreamingSVE(MVT VT);
  754. void addTypeForFixedLengthSVE(MVT VT);
  755. void addDRTypeForNEON(MVT VT);
  756. void addQRTypeForNEON(MVT VT);
  757. unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
  758. SelectionDAG &DAG) const;
  759. SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
  760. bool isVarArg,
  761. const SmallVectorImpl<ISD::InputArg> &Ins,
  762. const SDLoc &DL, SelectionDAG &DAG,
  763. SmallVectorImpl<SDValue> &InVals) const override;
  764. SDValue LowerCall(CallLoweringInfo & /*CLI*/,
  765. SmallVectorImpl<SDValue> &InVals) const override;
  766. SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
  767. CallingConv::ID CallConv, bool isVarArg,
  768. const SmallVectorImpl<CCValAssign> &RVLocs,
  769. const SDLoc &DL, SelectionDAG &DAG,
  770. SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
  771. SDValue ThisVal) const;
  772. SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
  773. SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
  774. SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
  775. SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
  776. SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
  777. SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
  778. SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
  779. SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
  780. SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
  781. SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
  782. bool
  783. isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
  784. /// Finds the incoming stack arguments which overlap the given fixed stack
  785. /// object and incorporates their load into the current chain. This prevents
  786. /// an upcoming store from clobbering the stack argument before it's used.
  787. SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
  788. MachineFrameInfo &MFI, int ClobberedFI) const;
  789. bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
  790. void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
  791. SDValue &Chain) const;
  792. bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
  793. bool isVarArg,
  794. const SmallVectorImpl<ISD::OutputArg> &Outs,
  795. LLVMContext &Context) const override;
  796. SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
  797. const SmallVectorImpl<ISD::OutputArg> &Outs,
  798. const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
  799. SelectionDAG &DAG) const override;
  800. SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
  801. unsigned Flag) const;
  802. SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
  803. unsigned Flag) const;
  804. SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
  805. unsigned Flag) const;
  806. SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
  807. unsigned Flag) const;
  808. template <class NodeTy>
  809. SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  810. template <class NodeTy>
  811. SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  812. template <class NodeTy>
  813. SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  814. template <class NodeTy>
  815. SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
  816. SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
  817. SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
  818. SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  819. SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  820. SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  821. SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
  822. const SDLoc &DL, SelectionDAG &DAG) const;
  823. SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
  824. SelectionDAG &DAG) const;
  825. SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
  826. SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
  827. SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
  828. SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
  829. SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
  830. SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
  831. SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
  832. SDValue TVal, SDValue FVal, const SDLoc &dl,
  833. SelectionDAG &DAG) const;
  834. SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
  835. SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
  836. SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
  837. SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
  838. SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
  839. SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
  840. SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
  841. SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
  842. SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
  843. SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
  844. SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
  845. SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
  846. SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
  847. SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
  848. SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
  849. SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
  850. SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
  851. SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  852. SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  853. SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
  854. SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
  855. SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
  856. SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
  857. SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
  858. unsigned NewOp) const;
  859. SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
  860. SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
  861. SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
  862. SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
  863. SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
  864. SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
  865. SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
  866. SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
  867. SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
  868. SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
  869. SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
  870. SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
  871. SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
  872. SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
  873. SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
  874. SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
  875. SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
  876. SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
  877. SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
  878. SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
  879. SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
  880. SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
  881. SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
  882. SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
  883. SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
  884. SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
  885. SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
  886. SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
  887. SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
  888. SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
  889. SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
  890. SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
  891. SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
  892. SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
  893. SDValue &Size,
  894. SelectionDAG &DAG) const;
  895. SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
  896. SelectionDAG &DAG) const;
  897. SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
  898. SelectionDAG &DAG) const;
  899. SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
  900. SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
  901. SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
  902. SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
  903. SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
  904. SelectionDAG &DAG) const;
  905. SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
  906. SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
  907. SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
  908. SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
  909. SelectionDAG &DAG) const;
  910. SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
  911. SelectionDAG &DAG) const;
  912. SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
  913. SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
  914. SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
  915. SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
  916. SelectionDAG &DAG) const;
  917. SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
  918. SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
  919. SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
  920. SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
  921. SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
  922. SelectionDAG &DAG) const;
  923. SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
  924. SmallVectorImpl<SDNode *> &Created) const override;
  925. SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
  926. SmallVectorImpl<SDNode *> &Created) const override;
  927. SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
  928. int &ExtraSteps, bool &UseOneConst,
  929. bool Reciprocal) const override;
  930. SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
  931. int &ExtraSteps) const override;
  932. SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
  933. const DenormalMode &Mode) const override;
  934. SDValue getSqrtResultForDenormInput(SDValue Operand,
  935. SelectionDAG &DAG) const override;
  936. unsigned combineRepeatedFPDivisors() const override;
  937. ConstraintType getConstraintType(StringRef Constraint) const override;
  938. Register getRegisterByName(const char* RegName, LLT VT,
  939. const MachineFunction &MF) const override;
  940. /// Examine constraint string and operand type and determine a weight value.
  941. /// The operand object must already have been set up with the operand type.
  942. ConstraintWeight
  943. getSingleConstraintMatchWeight(AsmOperandInfo &info,
  944. const char *constraint) const override;
  945. std::pair<unsigned, const TargetRegisterClass *>
  946. getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
  947. StringRef Constraint, MVT VT) const override;
  948. const char *LowerXConstraint(EVT ConstraintVT) const override;
  949. void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
  950. std::vector<SDValue> &Ops,
  951. SelectionDAG &DAG) const override;
  952. unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
  953. if (ConstraintCode == "Q")
  954. return InlineAsm::Constraint_Q;
  955. // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
  956. // followed by llvm_unreachable so we'll leave them unimplemented in
  957. // the backend for now.
  958. return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
  959. }
  960. bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
  961. bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
  962. bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
  963. bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
  964. bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
  965. bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
  966. SDValue &Offset, ISD::MemIndexedMode &AM,
  967. bool &IsInc, SelectionDAG &DAG) const;
  968. bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
  969. ISD::MemIndexedMode &AM,
  970. SelectionDAG &DAG) const override;
  971. bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
  972. SDValue &Offset, ISD::MemIndexedMode &AM,
  973. SelectionDAG &DAG) const override;
  974. void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
  975. SelectionDAG &DAG) const override;
  976. void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
  977. SelectionDAG &DAG) const;
  978. void ReplaceExtractSubVectorResults(SDNode *N,
  979. SmallVectorImpl<SDValue> &Results,
  980. SelectionDAG &DAG) const;
  981. bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
  982. void finalizeLowering(MachineFunction &MF) const override;
  983. bool shouldLocalize(const MachineInstr &MI,
  984. const TargetTransformInfo *TTI) const override;
  985. bool SimplifyDemandedBitsForTargetNode(SDValue Op,
  986. const APInt &OriginalDemandedBits,
  987. const APInt &OriginalDemandedElts,
  988. KnownBits &Known,
  989. TargetLoweringOpt &TLO,
  990. unsigned Depth) const override;
  991. bool isTargetCanonicalConstantNode(SDValue Op) const override;
  992. // With the exception of data-predicate transitions, no instructions are
  993. // required to cast between legal scalable vector types. However:
  994. // 1. Packed and unpacked types have different bit lengths, meaning BITCAST
  995. // is not universally useable.
  996. // 2. Most unpacked integer types are not legal and thus integer extends
  997. // cannot be used to convert between unpacked and packed types.
  998. // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
  999. // to transition between unpacked and packed types of the same element type,
  1000. // with BITCAST used otherwise.
  1001. // This function does not handle predicate bitcasts.
  1002. SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
  1003. // Returns the runtime value for PSTATE.SM. When the function is streaming-
  1004. // compatible, this generates a call to __arm_sme_state.
  1005. SDValue getPStateSM(SelectionDAG &DAG, SDValue Chain, SMEAttrs Attrs,
  1006. SDLoc DL, EVT VT) const;
  1007. bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
  1008. LLT Ty2) const override;
  1009. };
  1010. namespace AArch64 {
  1011. FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
  1012. const TargetLibraryInfo *libInfo);
  1013. } // end namespace AArch64
  1014. } // end namespace llvm
  1015. #endif