AArch64TargetTransformInfo.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file a TargetTransformInfo::Concept conforming object specific to the
  10. /// AArch64 target machine. It uses the target's detailed information to
  11. /// provide more precise answers to certain TTI queries, while letting the
  12. /// target independent and default TTI implementations handle the rest.
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  16. #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
  17. #include "AArch64.h"
  18. #include "AArch64Subtarget.h"
  19. #include "AArch64TargetMachine.h"
  20. #include "llvm/ADT/ArrayRef.h"
  21. #include "llvm/Analysis/TargetTransformInfo.h"
  22. #include "llvm/CodeGen/BasicTTIImpl.h"
  23. #include "llvm/IR/Function.h"
  24. #include "llvm/IR/Intrinsics.h"
  25. #include <cstdint>
  26. namespace llvm {
  27. class APInt;
  28. class Instruction;
  29. class IntrinsicInst;
  30. class Loop;
  31. class SCEV;
  32. class ScalarEvolution;
  33. class Type;
  34. class Value;
  35. class VectorType;
  36. class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
  37. using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
  38. using TTI = TargetTransformInfo;
  39. friend BaseT;
  40. const AArch64Subtarget *ST;
  41. const AArch64TargetLowering *TLI;
  42. const AArch64Subtarget *getST() const { return ST; }
  43. const AArch64TargetLowering *getTLI() const { return TLI; }
  44. enum MemIntrinsicType {
  45. VECTOR_LDST_TWO_ELEMENTS,
  46. VECTOR_LDST_THREE_ELEMENTS,
  47. VECTOR_LDST_FOUR_ELEMENTS
  48. };
  49. bool isWideningInstruction(Type *Ty, unsigned Opcode,
  50. ArrayRef<const Value *> Args);
  51. public:
  52. explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
  53. : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  54. TLI(ST->getTargetLowering()) {}
  55. bool areInlineCompatible(const Function *Caller,
  56. const Function *Callee) const;
  57. /// \name Scalar TTI Implementations
  58. /// @{
  59. using BaseT::getIntImmCost;
  60. InstructionCost getIntImmCost(int64_t Val);
  61. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  62. TTI::TargetCostKind CostKind);
  63. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  64. const APInt &Imm, Type *Ty,
  65. TTI::TargetCostKind CostKind,
  66. Instruction *Inst = nullptr);
  67. InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  68. const APInt &Imm, Type *Ty,
  69. TTI::TargetCostKind CostKind);
  70. TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  71. /// @}
  72. /// \name Vector TTI Implementations
  73. /// @{
  74. bool enableInterleavedAccessVectorization() { return true; }
  75. unsigned getNumberOfRegisters(unsigned ClassID) const {
  76. bool Vector = (ClassID == 1);
  77. if (Vector) {
  78. if (ST->hasNEON())
  79. return 32;
  80. return 0;
  81. }
  82. return 31;
  83. }
  84. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  85. TTI::TargetCostKind CostKind);
  86. Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  87. IntrinsicInst &II) const;
  88. Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  89. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  90. APInt &UndefElts2, APInt &UndefElts3,
  91. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  92. SimplifyAndSetOp) const;
  93. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  94. switch (K) {
  95. case TargetTransformInfo::RGK_Scalar:
  96. return TypeSize::getFixed(64);
  97. case TargetTransformInfo::RGK_FixedWidthVector:
  98. if (ST->hasSVE())
  99. return TypeSize::getFixed(
  100. std::max(ST->getMinSVEVectorSizeInBits(), 128u));
  101. return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
  102. case TargetTransformInfo::RGK_ScalableVector:
  103. return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
  104. }
  105. llvm_unreachable("Unsupported register kind");
  106. }
  107. unsigned getMinVectorRegisterBitWidth() const {
  108. return ST->getMinVectorRegisterBitWidth();
  109. }
  110. Optional<unsigned> getVScaleForTuning() const {
  111. return ST->getVScaleForTuning();
  112. }
  113. /// Try to return an estimate cost factor that can be used as a multiplier
  114. /// when scalarizing an operation for a vector with ElementCount \p VF.
  115. /// For scalable vectors this currently takes the most pessimistic view based
  116. /// upon the maximum possible value for vscale.
  117. unsigned getMaxNumElements(ElementCount VF) const {
  118. if (!VF.isScalable())
  119. return VF.getFixedValue();
  120. return VF.getKnownMinValue() * ST->getVScaleForTuning();
  121. }
  122. unsigned getMaxInterleaveFactor(unsigned VF);
  123. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  124. Align Alignment, unsigned AddressSpace,
  125. TTI::TargetCostKind CostKind);
  126. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  127. const Value *Ptr, bool VariableMask,
  128. Align Alignment,
  129. TTI::TargetCostKind CostKind,
  130. const Instruction *I = nullptr);
  131. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  132. TTI::CastContextHint CCH,
  133. TTI::TargetCostKind CostKind,
  134. const Instruction *I = nullptr);
  135. InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  136. VectorType *VecTy, unsigned Index);
  137. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  138. const Instruction *I = nullptr);
  139. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  140. unsigned Index);
  141. InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  142. bool IsUnsigned,
  143. TTI::TargetCostKind CostKind);
  144. InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
  145. VectorType *ValTy,
  146. TTI::TargetCostKind CostKind);
  147. InstructionCost getSpliceCost(VectorType *Tp, int Index);
  148. InstructionCost getArithmeticInstrCost(
  149. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  150. TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
  151. TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
  152. TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
  153. TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
  154. ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  155. const Instruction *CxtI = nullptr);
  156. InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
  157. const SCEV *Ptr);
  158. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  159. CmpInst::Predicate VecPred,
  160. TTI::TargetCostKind CostKind,
  161. const Instruction *I = nullptr);
  162. TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  163. bool IsZeroCmp) const;
  164. bool useNeonVector(const Type *Ty) const;
  165. InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
  166. MaybeAlign Alignment, unsigned AddressSpace,
  167. TTI::TargetCostKind CostKind,
  168. const Instruction *I = nullptr);
  169. InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
  170. void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  171. TTI::UnrollingPreferences &UP,
  172. OptimizationRemarkEmitter *ORE);
  173. void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
  174. TTI::PeelingPreferences &PP);
  175. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  176. Type *ExpectedType);
  177. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
  178. bool isElementTypeLegalForScalableVector(Type *Ty) const {
  179. if (Ty->isPointerTy())
  180. return true;
  181. if (Ty->isBFloatTy() && ST->hasBF16())
  182. return true;
  183. if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
  184. return true;
  185. if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
  186. Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
  187. return true;
  188. return false;
  189. }
  190. bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
  191. if (!ST->hasSVE())
  192. return false;
  193. // For fixed vectors, avoid scalarization if using SVE for them.
  194. if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
  195. return false; // Fall back to scalarization of masked operations.
  196. return isElementTypeLegalForScalableVector(DataType->getScalarType());
  197. }
  198. bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
  199. return isLegalMaskedLoadStore(DataType, Alignment);
  200. }
  201. bool isLegalMaskedStore(Type *DataType, Align Alignment) {
  202. return isLegalMaskedLoadStore(DataType, Alignment);
  203. }
  204. bool isLegalMaskedGatherScatter(Type *DataType) const {
  205. if (!ST->hasSVE())
  206. return false;
  207. // For fixed vectors, scalarize if not using SVE for them.
  208. auto *DataTypeFVTy = dyn_cast<FixedVectorType>(DataType);
  209. if (DataTypeFVTy && (!ST->useSVEForFixedLengthVectors() ||
  210. DataTypeFVTy->getNumElements() < 2))
  211. return false;
  212. return isElementTypeLegalForScalableVector(DataType->getScalarType());
  213. }
  214. bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
  215. return isLegalMaskedGatherScatter(DataType);
  216. }
  217. bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
  218. return isLegalMaskedGatherScatter(DataType);
  219. }
  220. bool isLegalNTStore(Type *DataType, Align Alignment) {
  221. // NOTE: The logic below is mostly geared towards LV, which calls it with
  222. // vectors with 2 elements. We might want to improve that, if other
  223. // users show up.
  224. // Nontemporal vector stores can be directly lowered to STNP, if the vector
  225. // can be halved so that each half fits into a register. That's the case if
  226. // the element type fits into a register and the number of elements is a
  227. // power of 2 > 1.
  228. if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
  229. unsigned NumElements =
  230. cast<FixedVectorType>(DataTypeVTy)->getNumElements();
  231. unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
  232. return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
  233. EltSize <= 128 && isPowerOf2_64(EltSize);
  234. }
  235. return BaseT::isLegalNTStore(DataType, Alignment);
  236. }
  237. bool enableOrderedReductions() const { return true; }
  238. InstructionCost getInterleavedMemoryOpCost(
  239. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  240. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  241. bool UseMaskForCond = false, bool UseMaskForGaps = false);
  242. bool
  243. shouldConsiderAddressTypePromotion(const Instruction &I,
  244. bool &AllowPromotionWithoutCommonHeader);
  245. bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
  246. unsigned getGISelRematGlobalCost() const {
  247. return 2;
  248. }
  249. bool emitGetActiveLaneMask() const {
  250. return ST->hasSVE();
  251. }
  252. bool supportsScalableVectors() const { return ST->hasSVE(); }
  253. bool enableScalableVectorization() const { return ST->hasSVE(); }
  254. bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
  255. ElementCount VF) const;
  256. InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  257. Optional<FastMathFlags> FMF,
  258. TTI::TargetCostKind CostKind);
  259. InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
  260. ArrayRef<int> Mask, int Index,
  261. VectorType *SubTp);
  262. /// @}
  263. };
  264. } // end namespace llvm
  265. #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H