ARMTargetTransformInfo.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file
  10. /// This file a TargetTransformInfo::Concept conforming object specific to the
  11. /// ARM target machine. It uses the target's detailed information to
  12. /// provide more precise answers to certain TTI queries, while letting the
  13. /// target independent and default TTI implementations handle the rest.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  17. #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  18. #include "ARM.h"
  19. #include "ARMSubtarget.h"
  20. #include "ARMTargetMachine.h"
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/Analysis/TargetTransformInfo.h"
  23. #include "llvm/CodeGen/BasicTTIImpl.h"
  24. #include "llvm/IR/Constant.h"
  25. #include "llvm/IR/Function.h"
  26. #include "llvm/MC/SubtargetFeature.h"
  27. #include <optional>
  28. namespace llvm {
  29. class APInt;
  30. class ARMTargetLowering;
  31. class Instruction;
  32. class Loop;
  33. class SCEV;
  34. class ScalarEvolution;
  35. class Type;
  36. class Value;
  37. namespace TailPredication {
  38. enum Mode {
  39. Disabled = 0,
  40. EnabledNoReductions,
  41. Enabled,
  42. ForceEnabledNoReductions,
  43. ForceEnabled
  44. };
  45. }
  46. // For controlling conversion of memcpy into Tail Predicated loop.
  47. namespace TPLoop {
  48. enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
  49. }
  50. class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
  51. using BaseT = BasicTTIImplBase<ARMTTIImpl>;
  52. using TTI = TargetTransformInfo;
  53. friend BaseT;
  54. const ARMSubtarget *ST;
  55. const ARMTargetLowering *TLI;
  56. // Currently the following features are excluded from InlineFeaturesAllowed.
  57. // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
  58. // Depending on whether they are set or unset, different
  59. // instructions/registers are available. For example, inlining a callee with
  60. // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
  61. // fail if the callee uses ARM only instructions, e.g. in inline asm.
  62. const FeatureBitset InlineFeaturesAllowed = {
  63. ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
  64. ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
  65. ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
  66. ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
  67. ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
  68. ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
  69. ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
  70. ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
  71. ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
  72. ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
  73. ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
  74. ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
  75. ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
  76. ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
  77. ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
  78. ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
  79. ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
  80. ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
  81. ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
  82. ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
  83. ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
  84. ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
  85. ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
  86. ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
  87. };
  88. const ARMSubtarget *getST() const { return ST; }
  89. const ARMTargetLowering *getTLI() const { return TLI; }
  90. public:
  91. explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
  92. : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  93. TLI(ST->getTargetLowering()) {}
  94. bool areInlineCompatible(const Function *Caller,
  95. const Function *Callee) const;
  96. bool enableInterleavedAccessVectorization() { return true; }
  97. TTI::AddressingModeKind
  98. getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
  99. /// Floating-point computation using ARMv8 AArch32 Advanced
  100. /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
  101. /// and Arm MVE are IEEE-754 compliant.
  102. bool isFPVectorizationPotentiallyUnsafe() {
  103. return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
  104. }
  105. std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  106. IntrinsicInst &II) const;
  107. std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  108. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  109. APInt &UndefElts2, APInt &UndefElts3,
  110. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  111. SimplifyAndSetOp) const;
  112. /// \name Scalar TTI Implementations
  113. /// @{
  114. InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
  115. const APInt &Imm, Type *Ty);
  116. using BaseT::getIntImmCost;
  117. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  118. TTI::TargetCostKind CostKind);
  119. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  120. const APInt &Imm, Type *Ty,
  121. TTI::TargetCostKind CostKind,
  122. Instruction *Inst = nullptr);
  123. /// @}
  124. /// \name Vector TTI Implementations
  125. /// @{
  126. unsigned getNumberOfRegisters(unsigned ClassID) const {
  127. bool Vector = (ClassID == 1);
  128. if (Vector) {
  129. if (ST->hasNEON())
  130. return 16;
  131. if (ST->hasMVEIntegerOps())
  132. return 8;
  133. return 0;
  134. }
  135. if (ST->isThumb1Only())
  136. return 8;
  137. return 13;
  138. }
  139. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  140. switch (K) {
  141. case TargetTransformInfo::RGK_Scalar:
  142. return TypeSize::getFixed(32);
  143. case TargetTransformInfo::RGK_FixedWidthVector:
  144. if (ST->hasNEON())
  145. return TypeSize::getFixed(128);
  146. if (ST->hasMVEIntegerOps())
  147. return TypeSize::getFixed(128);
  148. return TypeSize::getFixed(0);
  149. case TargetTransformInfo::RGK_ScalableVector:
  150. return TypeSize::getScalable(0);
  151. }
  152. llvm_unreachable("Unsupported register kind");
  153. }
  154. unsigned getMaxInterleaveFactor(unsigned VF) {
  155. return ST->getMaxInterleaveFactor();
  156. }
  157. bool isProfitableLSRChainElement(Instruction *I);
  158. bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
  159. bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
  160. return isLegalMaskedLoad(DataTy, Alignment);
  161. }
  162. bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
  163. // For MVE, we have a custom lowering pass that will already have custom
  164. // legalised any gathers that we can lower to MVE intrinsics, and want to
  165. // expand all the rest. The pass runs before the masked intrinsic lowering
  166. // pass.
  167. return true;
  168. }
  169. bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
  170. return forceScalarizeMaskedGather(VTy, Alignment);
  171. }
  172. bool isLegalMaskedGather(Type *Ty, Align Alignment);
  173. bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
  174. return isLegalMaskedGather(Ty, Alignment);
  175. }
  176. InstructionCost getMemcpyCost(const Instruction *I);
  177. int getNumMemOps(const IntrinsicInst *I) const;
  178. InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
  179. ArrayRef<int> Mask,
  180. TTI::TargetCostKind CostKind, int Index,
  181. VectorType *SubTp,
  182. ArrayRef<const Value *> Args = std::nullopt);
  183. bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  184. TTI::ReductionFlags Flags) const;
  185. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  186. TTI::ReductionFlags Flags) const;
  187. bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
  188. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  189. const Instruction *I = nullptr);
  190. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  191. TTI::CastContextHint CCH,
  192. TTI::TargetCostKind CostKind,
  193. const Instruction *I = nullptr);
  194. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  195. CmpInst::Predicate VecPred,
  196. TTI::TargetCostKind CostKind,
  197. const Instruction *I = nullptr);
  198. using BaseT::getVectorInstrCost;
  199. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  200. TTI::TargetCostKind CostKind,
  201. unsigned Index, Value *Op0, Value *Op1);
  202. InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
  203. const SCEV *Ptr);
  204. InstructionCost getArithmeticInstrCost(
  205. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  206. TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
  207. TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
  208. ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  209. const Instruction *CxtI = nullptr);
  210. InstructionCost
  211. getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
  212. unsigned AddressSpace, TTI::TargetCostKind CostKind,
  213. TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
  214. const Instruction *I = nullptr);
  215. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  216. Align Alignment, unsigned AddressSpace,
  217. TTI::TargetCostKind CostKind);
  218. InstructionCost getInterleavedMemoryOpCost(
  219. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  220. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  221. bool UseMaskForCond = false, bool UseMaskForGaps = false);
  222. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  223. const Value *Ptr, bool VariableMask,
  224. Align Alignment,
  225. TTI::TargetCostKind CostKind,
  226. const Instruction *I = nullptr);
  227. InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
  228. std::optional<FastMathFlags> FMF,
  229. TTI::TargetCostKind CostKind);
  230. InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
  231. Type *ResTy, VectorType *ValTy,
  232. std::optional<FastMathFlags> FMF,
  233. TTI::TargetCostKind CostKind);
  234. InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
  235. VectorType *ValTy,
  236. TTI::TargetCostKind CostKind);
  237. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  238. TTI::TargetCostKind CostKind);
  239. /// getScalingFactorCost - Return the cost of the scaling used in
  240. /// addressing mode represented by AM.
  241. /// If the AM is supported, the return value must be >= 0.
  242. /// If the AM is not supported, the return value must be negative.
  243. InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  244. int64_t BaseOffset, bool HasBaseReg,
  245. int64_t Scale, unsigned AddrSpace) const;
  246. bool maybeLoweredToCall(Instruction &I);
  247. bool isLoweredToCall(const Function *F);
  248. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  249. AssumptionCache &AC,
  250. TargetLibraryInfo *LibInfo,
  251. HardwareLoopInfo &HWLoopInfo);
  252. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  253. AssumptionCache &AC, TargetLibraryInfo *TLI,
  254. DominatorTree *DT,
  255. LoopVectorizationLegality *LVL,
  256. InterleavedAccessInfo *IAI);
  257. void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  258. TTI::UnrollingPreferences &UP,
  259. OptimizationRemarkEmitter *ORE);
  260. PredicationStyle emitGetActiveLaneMask() const;
  261. void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
  262. TTI::PeelingPreferences &PP);
  263. bool shouldBuildLookupTablesForConstant(Constant *C) const {
  264. // In the ROPI and RWPI relocation models we can't have pointers to global
  265. // variables or functions in constant data, so don't convert switches to
  266. // lookup tables if any of the values would need relocation.
  267. if (ST->isROPI() || ST->isRWPI())
  268. return !C->needsDynamicRelocation();
  269. return true;
  270. }
  271. /// @}
  272. };
  273. /// isVREVMask - Check if a vector shuffle corresponds to a VREV
  274. /// instruction with the specified blocksize. (The order of the elements
  275. /// within each block of the vector is reversed.)
  276. inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
  277. assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
  278. "Only possible block sizes for VREV are: 16, 32, 64");
  279. unsigned EltSz = VT.getScalarSizeInBits();
  280. if (EltSz != 8 && EltSz != 16 && EltSz != 32)
  281. return false;
  282. unsigned BlockElts = M[0] + 1;
  283. // If the first shuffle index is UNDEF, be optimistic.
  284. if (M[0] < 0)
  285. BlockElts = BlockSize / EltSz;
  286. if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
  287. return false;
  288. for (unsigned i = 0, e = M.size(); i < e; ++i) {
  289. if (M[i] < 0)
  290. continue; // ignore UNDEF indices
  291. if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
  292. return false;
  293. }
  294. return true;
  295. }
  296. } // end namespace llvm
  297. #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H