ARMTargetTransformInfo.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file
  10. /// This file a TargetTransformInfo::Concept conforming object specific to the
  11. /// ARM target machine. It uses the target's detailed information to
  12. /// provide more precise answers to certain TTI queries, while letting the
  13. /// target independent and default TTI implementations handle the rest.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  17. #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
  18. #include "ARM.h"
  19. #include "ARMSubtarget.h"
  20. #include "ARMTargetMachine.h"
  21. #include "llvm/ADT/ArrayRef.h"
  22. #include "llvm/Analysis/TargetTransformInfo.h"
  23. #include "llvm/CodeGen/BasicTTIImpl.h"
  24. #include "llvm/IR/Constant.h"
  25. #include "llvm/IR/Function.h"
  26. #include "llvm/MC/SubtargetFeature.h"
  27. namespace llvm {
  28. class APInt;
  29. class ARMTargetLowering;
  30. class Instruction;
  31. class Loop;
  32. class SCEV;
  33. class ScalarEvolution;
  34. class Type;
  35. class Value;
  36. namespace TailPredication {
  37. enum Mode {
  38. Disabled = 0,
  39. EnabledNoReductions,
  40. Enabled,
  41. ForceEnabledNoReductions,
  42. ForceEnabled
  43. };
  44. }
  45. // For controlling conversion of memcpy into Tail Predicated loop.
  46. namespace TPLoop {
  47. enum MemTransfer { ForceDisabled = 0, ForceEnabled, Allow };
  48. }
  49. class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
  50. using BaseT = BasicTTIImplBase<ARMTTIImpl>;
  51. using TTI = TargetTransformInfo;
  52. friend BaseT;
  53. const ARMSubtarget *ST;
  54. const ARMTargetLowering *TLI;
  55. // Currently the following features are excluded from InlineFeaturesAllowed.
  56. // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
  57. // Depending on whether they are set or unset, different
  58. // instructions/registers are available. For example, inlining a callee with
  59. // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
  60. // fail if the callee uses ARM only instructions, e.g. in inline asm.
  61. const FeatureBitset InlineFeaturesAllowed = {
  62. ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
  63. ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
  64. ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
  65. ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
  66. ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
  67. ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
  68. ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
  69. ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
  70. ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
  71. ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
  72. ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
  73. ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
  74. ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
  75. ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
  76. ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
  77. ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
  78. ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
  79. ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
  80. ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
  81. ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
  82. ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
  83. ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
  84. ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
  85. ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
  86. };
  87. const ARMSubtarget *getST() const { return ST; }
  88. const ARMTargetLowering *getTLI() const { return TLI; }
  89. public:
  90. explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
  91. : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  92. TLI(ST->getTargetLowering()) {}
  93. bool areInlineCompatible(const Function *Caller,
  94. const Function *Callee) const;
  95. bool enableInterleavedAccessVectorization() { return true; }
  96. TTI::AddressingModeKind
  97. getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const;
  98. /// Floating-point computation using ARMv8 AArch32 Advanced
  99. /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
  100. /// and Arm MVE are IEEE-754 compliant.
  101. bool isFPVectorizationPotentiallyUnsafe() {
  102. return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
  103. }
  104. Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  105. IntrinsicInst &II) const;
  106. Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  107. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  108. APInt &UndefElts2, APInt &UndefElts3,
  109. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  110. SimplifyAndSetOp) const;
  111. /// \name Scalar TTI Implementations
  112. /// @{
  113. InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
  114. const APInt &Imm, Type *Ty);
  115. using BaseT::getIntImmCost;
  116. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  117. TTI::TargetCostKind CostKind);
  118. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  119. const APInt &Imm, Type *Ty,
  120. TTI::TargetCostKind CostKind,
  121. Instruction *Inst = nullptr);
  122. /// @}
  123. /// \name Vector TTI Implementations
  124. /// @{
  125. unsigned getNumberOfRegisters(unsigned ClassID) const {
  126. bool Vector = (ClassID == 1);
  127. if (Vector) {
  128. if (ST->hasNEON())
  129. return 16;
  130. if (ST->hasMVEIntegerOps())
  131. return 8;
  132. return 0;
  133. }
  134. if (ST->isThumb1Only())
  135. return 8;
  136. return 13;
  137. }
  138. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  139. switch (K) {
  140. case TargetTransformInfo::RGK_Scalar:
  141. return TypeSize::getFixed(32);
  142. case TargetTransformInfo::RGK_FixedWidthVector:
  143. if (ST->hasNEON())
  144. return TypeSize::getFixed(128);
  145. if (ST->hasMVEIntegerOps())
  146. return TypeSize::getFixed(128);
  147. return TypeSize::getFixed(0);
  148. case TargetTransformInfo::RGK_ScalableVector:
  149. return TypeSize::getScalable(0);
  150. }
  151. llvm_unreachable("Unsupported register kind");
  152. }
  153. unsigned getMaxInterleaveFactor(unsigned VF) {
  154. return ST->getMaxInterleaveFactor();
  155. }
  156. bool isProfitableLSRChainElement(Instruction *I);
  157. bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
  158. bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
  159. return isLegalMaskedLoad(DataTy, Alignment);
  160. }
  161. bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) {
  162. // For MVE, we have a custom lowering pass that will already have custom
  163. // legalised any gathers that we can lower to MVE intrinsics, and want to
  164. // expand all the rest. The pass runs before the masked intrinsic lowering
  165. // pass.
  166. return true;
  167. }
  168. bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
  169. return forceScalarizeMaskedGather(VTy, Alignment);
  170. }
  171. bool isLegalMaskedGather(Type *Ty, Align Alignment);
  172. bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
  173. return isLegalMaskedGather(Ty, Alignment);
  174. }
  175. InstructionCost getMemcpyCost(const Instruction *I);
  176. int getNumMemOps(const IntrinsicInst *I) const;
  177. InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
  178. ArrayRef<int> Mask, int Index,
  179. VectorType *SubTp);
  180. bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  181. TTI::ReductionFlags Flags) const;
  182. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  183. TTI::ReductionFlags Flags) const;
  184. bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
  185. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  186. const Instruction *I = nullptr);
  187. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  188. TTI::CastContextHint CCH,
  189. TTI::TargetCostKind CostKind,
  190. const Instruction *I = nullptr);
  191. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  192. CmpInst::Predicate VecPred,
  193. TTI::TargetCostKind CostKind,
  194. const Instruction *I = nullptr);
  195. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  196. unsigned Index);
  197. InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
  198. const SCEV *Ptr);
  199. InstructionCost getArithmeticInstrCost(
  200. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  201. TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
  202. TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
  203. TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
  204. TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
  205. ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  206. const Instruction *CxtI = nullptr);
  207. InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
  208. MaybeAlign Alignment, unsigned AddressSpace,
  209. TTI::TargetCostKind CostKind,
  210. const Instruction *I = nullptr);
  211. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  212. Align Alignment, unsigned AddressSpace,
  213. TTI::TargetCostKind CostKind);
  214. InstructionCost getInterleavedMemoryOpCost(
  215. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  216. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  217. bool UseMaskForCond = false, bool UseMaskForGaps = false);
  218. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  219. const Value *Ptr, bool VariableMask,
  220. Align Alignment,
  221. TTI::TargetCostKind CostKind,
  222. const Instruction *I = nullptr);
  223. InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
  224. Optional<FastMathFlags> FMF,
  225. TTI::TargetCostKind CostKind);
  226. InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
  227. Type *ResTy, VectorType *ValTy,
  228. TTI::TargetCostKind CostKind);
  229. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  230. TTI::TargetCostKind CostKind);
  231. bool maybeLoweredToCall(Instruction &I);
  232. bool isLoweredToCall(const Function *F);
  233. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  234. AssumptionCache &AC,
  235. TargetLibraryInfo *LibInfo,
  236. HardwareLoopInfo &HWLoopInfo);
  237. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
  238. ScalarEvolution &SE,
  239. AssumptionCache &AC,
  240. TargetLibraryInfo *TLI,
  241. DominatorTree *DT,
  242. const LoopAccessInfo *LAI);
  243. void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  244. TTI::UnrollingPreferences &UP,
  245. OptimizationRemarkEmitter *ORE);
  246. bool emitGetActiveLaneMask() const;
  247. void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
  248. TTI::PeelingPreferences &PP);
  249. bool shouldBuildLookupTablesForConstant(Constant *C) const {
  250. // In the ROPI and RWPI relocation models we can't have pointers to global
  251. // variables or functions in constant data, so don't convert switches to
  252. // lookup tables if any of the values would need relocation.
  253. if (ST->isROPI() || ST->isRWPI())
  254. return !C->needsDynamicRelocation();
  255. return true;
  256. }
  257. /// @}
  258. };
  259. /// isVREVMask - Check if a vector shuffle corresponds to a VREV
  260. /// instruction with the specified blocksize. (The order of the elements
  261. /// within each block of the vector is reversed.)
  262. inline bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
  263. assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
  264. "Only possible block sizes for VREV are: 16, 32, 64");
  265. unsigned EltSz = VT.getScalarSizeInBits();
  266. if (EltSz != 8 && EltSz != 16 && EltSz != 32)
  267. return false;
  268. unsigned BlockElts = M[0] + 1;
  269. // If the first shuffle index is UNDEF, be optimistic.
  270. if (M[0] < 0)
  271. BlockElts = BlockSize / EltSz;
  272. if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
  273. return false;
  274. for (unsigned i = 0, e = M.size(); i < e; ++i) {
  275. if (M[i] < 0)
  276. continue; // ignore UNDEF indices
  277. if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
  278. return false;
  279. }
  280. return true;
  281. }
  282. } // end namespace llvm
  283. #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H