AArch64TargetTransformInfo.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file a TargetTransformInfo::Concept conforming object specific to the
  10. /// AArch64 target machine. It uses the target's detailed information to
  11. /// provide more precise answers to certain TTI queries, while letting the
  12. /// target independent and default TTI implementations handle the rest.
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  16. #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  17. #include "AArch64.h"
  18. #include "AArch64Subtarget.h"
  19. #include "AArch64TargetMachine.h"
  20. #include "llvm/ADT/ArrayRef.h"
  21. #include "llvm/Analysis/TargetTransformInfo.h"
  22. #include "llvm/CodeGen/BasicTTIImpl.h"
  23. #include "llvm/IR/Function.h"
  24. #include "llvm/IR/Intrinsics.h"
  25. #include <cstdint>
  26. #include <optional>
  27. namespace llvm {
  28. class APInt;
  29. class Instruction;
  30. class IntrinsicInst;
  31. class Loop;
  32. class SCEV;
  33. class ScalarEvolution;
  34. class Type;
  35. class Value;
  36. class VectorType;
  37. class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
  38. using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
  39. using TTI = TargetTransformInfo;
  40. friend BaseT;
  41. const AArch64Subtarget *ST;
  42. const AArch64TargetLowering *TLI;
  43. const AArch64Subtarget *getST() const { return ST; }
  44. const AArch64TargetLowering *getTLI() const { return TLI; }
  45. enum MemIntrinsicType {
  46. VECTOR_LDST_TWO_ELEMENTS,
  47. VECTOR_LDST_THREE_ELEMENTS,
  48. VECTOR_LDST_FOUR_ELEMENTS
  49. };
  50. bool isWideningInstruction(Type *Ty, unsigned Opcode,
  51. ArrayRef<const Value *> Args);
  52. // A helper function called by 'getVectorInstrCost'.
  53. //
  54. // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
  55. // indicates whether the vector instruction is available in the input IR or
  56. // just imaginary in vectorizer passes.
  57. InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index,
  58. bool HasRealUse);
  59. public:
  60. explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
  61. : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  62. TLI(ST->getTargetLowering()) {}
  63. bool areInlineCompatible(const Function *Caller,
  64. const Function *Callee) const;
  65. /// \name Scalar TTI Implementations
  66. /// @{
  67. using BaseT::getIntImmCost;
  68. InstructionCost getIntImmCost(int64_t Val);
  69. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  70. TTI::TargetCostKind CostKind);
  71. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  72. const APInt &Imm, Type *Ty,
  73. TTI::TargetCostKind CostKind,
  74. Instruction *Inst = nullptr);
  75. InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  76. const APInt &Imm, Type *Ty,
  77. TTI::TargetCostKind CostKind);
  78. TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  79. /// @}
  80. /// \name Vector TTI Implementations
  81. /// @{
  82. bool enableInterleavedAccessVectorization() { return true; }
  83. unsigned getNumberOfRegisters(unsigned ClassID) const {
  84. bool Vector = (ClassID == 1);
  85. if (Vector) {
  86. if (ST->hasNEON())
  87. return 32;
  88. return 0;
  89. }
  90. return 31;
  91. }
  92. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  93. TTI::TargetCostKind CostKind);
  94. std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  95. IntrinsicInst &II) const;
  96. std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  97. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  98. APInt &UndefElts2, APInt &UndefElts3,
  99. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  100. SimplifyAndSetOp) const;
  101. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
  102. unsigned getMinVectorRegisterBitWidth() const {
  103. return ST->getMinVectorRegisterBitWidth();
  104. }
  105. std::optional<unsigned> getVScaleForTuning() const {
  106. return ST->getVScaleForTuning();
  107. }
  108. bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
  109. /// Try to return an estimate cost factor that can be used as a multiplier
  110. /// when scalarizing an operation for a vector with ElementCount \p VF.
  111. /// For scalable vectors this currently takes the most pessimistic view based
  112. /// upon the maximum possible value for vscale.
  113. unsigned getMaxNumElements(ElementCount VF) const {
  114. if (!VF.isScalable())
  115. return VF.getFixedValue();
  116. return VF.getKnownMinValue() * ST->getVScaleForTuning();
  117. }
  118. unsigned getMaxInterleaveFactor(unsigned VF);
  119. bool prefersVectorizedAddressing() const;
  120. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  121. Align Alignment, unsigned AddressSpace,
  122. TTI::TargetCostKind CostKind);
  123. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  124. const Value *Ptr, bool VariableMask,
  125. Align Alignment,
  126. TTI::TargetCostKind CostKind,
  127. const Instruction *I = nullptr);
  128. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  129. TTI::CastContextHint CCH,
  130. TTI::TargetCostKind CostKind,
  131. const Instruction *I = nullptr);
  132. InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  133. VectorType *VecTy, unsigned Index);
  134. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  135. const Instruction *I = nullptr);
  136. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  137. TTI::TargetCostKind CostKind,
  138. unsigned Index, Value *Op0, Value *Op1);
  139. InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
  140. TTI::TargetCostKind CostKind,
  141. unsigned Index);
  142. InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  143. bool IsUnsigned,
  144. TTI::TargetCostKind CostKind);
  145. InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
  146. VectorType *ValTy,
  147. TTI::TargetCostKind CostKind);
  148. InstructionCost getSpliceCost(VectorType *Tp, int Index);
  149. InstructionCost getArithmeticInstrCost(
  150. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  151. TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
  152. TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
  153. ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  154. const Instruction *CxtI = nullptr);
  155. InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
  156. const SCEV *Ptr);
  157. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  158. CmpInst::Predicate VecPred,
  159. TTI::TargetCostKind CostKind,
  160. const Instruction *I = nullptr);
  161. TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  162. bool IsZeroCmp) const;
  163. bool useNeonVector(const Type *Ty) const;
  164. InstructionCost
  165. getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
  166. unsigned AddressSpace, TTI::TargetCostKind CostKind,
  167. TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
  168. const Instruction *I = nullptr);
  169. InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
  170. void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  171. TTI::UnrollingPreferences &UP,
  172. OptimizationRemarkEmitter *ORE);
  173. void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
  174. TTI::PeelingPreferences &PP);
  175. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  176. Type *ExpectedType);
  177. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
  178. bool isElementTypeLegalForScalableVector(Type *Ty) const {
  179. if (Ty->isPointerTy())
  180. return true;
  181. if (Ty->isBFloatTy() && ST->hasBF16())
  182. return true;
  183. if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
  184. return true;
  185. if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
  186. Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
  187. return true;
  188. return false;
  189. }
  190. bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
  191. if (!ST->hasSVE())
  192. return false;
  193. // For fixed vectors, avoid scalarization if using SVE for them.
  194. if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
  195. return false; // Fall back to scalarization of masked operations.
  196. return isElementTypeLegalForScalableVector(DataType->getScalarType());
  197. }
  198. bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
  199. return isLegalMaskedLoadStore(DataType, Alignment);
  200. }
  201. bool isLegalMaskedStore(Type *DataType, Align Alignment) {
  202. return isLegalMaskedLoadStore(DataType, Alignment);
  203. }
  204. bool isLegalMaskedGatherScatter(Type *DataType) const {
  205. if (!ST->hasSVE() || ST->forceStreamingCompatibleSVE())
  206. return false;
  207. // For fixed vectors, scalarize if not using SVE for them.
  208. auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
  209. if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
  210. DataTypeFVTy->getNumElements() < 2))
  211. return false;
  212. return isElementTypeLegalForScalableVector(DataType->getScalarType());
  213. }
  214. bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
  215. return isLegalMaskedGatherScatter(DataType);
  216. }
  217. bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
  218. return isLegalMaskedGatherScatter(DataType);
  219. }
  220. bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
  221. // Return true if we can generate a `ld1r` splat load instruction.
  222. if (!ST->hasNEON() || NumElements.isScalable())
  223. return false;
  224. switch (unsigned ElementBits = ElementTy->getScalarSizeInBits()) {
  225. case 8:
  226. case 16:
  227. case 32:
  228. case 64: {
  229. // We accept bit-widths >= 64bits and elements {8,16,32,64} bits.
  230. unsigned VectorBits = NumElements.getFixedValue() * ElementBits;
  231. return VectorBits >= 64;
  232. }
  233. }
  234. return false;
  235. }
  236. bool isLegalNTStoreLoad(Type *DataType, Align Alignment) {
  237. // NOTE: The logic below is mostly geared towards LV, which calls it with
  238. // vectors with 2 elements. We might want to improve that, if other
  239. // users show up.
  240. // Nontemporal vector loads/stores can be directly lowered to LDNP/STNP, if
  241. // the vector can be halved so that each half fits into a register. That's
  242. // the case if the element type fits into a register and the number of
  243. // elements is a power of 2 > 1.
  244. if (auto *DataTypeTy = dyn_cast<FixedVectorType>(DataType)) {
  245. unsigned NumElements = DataTypeTy->getNumElements();
  246. unsigned EltSize = DataTypeTy->getElementType()->getScalarSizeInBits();
  247. return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
  248. EltSize <= 128 && isPowerOf2_64(EltSize);
  249. }
  250. return BaseT::isLegalNTStore(DataType, Alignment);
  251. }
  252. bool isLegalNTStore(Type *DataType, Align Alignment) {
  253. return isLegalNTStoreLoad(DataType, Alignment);
  254. }
  255. bool isLegalNTLoad(Type *DataType, Align Alignment) {
  256. // Only supports little-endian targets.
  257. if (ST->isLittleEndian())
  258. return isLegalNTStoreLoad(DataType, Alignment);
  259. return BaseT::isLegalNTLoad(DataType, Alignment);
  260. }
  261. bool enableOrderedReductions() const { return true; }
  262. InstructionCost getInterleavedMemoryOpCost(
  263. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  264. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  265. bool UseMaskForCond = false, bool UseMaskForGaps = false);
  266. bool
  267. shouldConsiderAddressTypePromotion(const Instruction &I,
  268. bool &AllowPromotionWithoutCommonHeader);
  269. bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
  270. unsigned getGISelRematGlobalCost() const {
  271. return 2;
  272. }
  273. unsigned getMinTripCountTailFoldingThreshold() const {
  274. return ST->hasSVE() ? 5 : 0;
  275. }
  276. PredicationStyle emitGetActiveLaneMask() const {
  277. if (ST->hasSVE())
  278. return PredicationStyle::DataAndControlFlow;
  279. return PredicationStyle::None;
  280. }
  281. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  282. AssumptionCache &AC, TargetLibraryInfo *TLI,
  283. DominatorTree *DT,
  284. LoopVectorizationLegality *LVL,
  285. InterleavedAccessInfo *IAI);
  286. bool supportsScalableVectors() const { return ST->hasSVE(); }
  287. bool enableScalableVectorization() const { return ST->hasSVE(); }
  288. bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
  289. ElementCount VF) const;
  290. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  291. TTI::ReductionFlags Flags) const {
  292. return ST->hasSVE();
  293. }
  294. InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  295. std::optional<FastMathFlags> FMF,
  296. TTI::TargetCostKind CostKind);
  297. InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
  298. ArrayRef<int> Mask,
  299. TTI::TargetCostKind CostKind, int Index,
  300. VectorType *SubTp,
  301. ArrayRef<const Value *> Args = std::nullopt);
  302. /// Return the cost of the scaling factor used in the addressing
  303. /// mode represented by AM for this target, for a load/store
  304. /// of the specified type.
  305. /// If the AM is supported, the return value must be >= 0.
  306. /// If the AM is not supported, it returns a negative value.
  307. InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  308. int64_t BaseOffset, bool HasBaseReg,
  309. int64_t Scale, unsigned AddrSpace) const;
  310. /// @}
  311. bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
  312. };
  313. } // end namespace llvm
  314. #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H