X86TargetTransformInfo.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file a TargetTransformInfo::Concept conforming object specific to the
  10. /// X86 target machine. It uses the target's detailed information to
  11. /// provide more precise answers to certain TTI queries, while letting the
  12. /// target independent and default TTI implementations handle the rest.
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
  16. #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
  17. #include "X86TargetMachine.h"
  18. #include "llvm/Analysis/TargetTransformInfo.h"
  19. #include "llvm/CodeGen/BasicTTIImpl.h"
  20. #include <optional>
  21. namespace llvm {
  22. class InstCombiner;
  23. class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
  24. typedef BasicTTIImplBase<X86TTIImpl> BaseT;
  25. typedef TargetTransformInfo TTI;
  26. friend BaseT;
  27. const X86Subtarget *ST;
  28. const X86TargetLowering *TLI;
  29. const X86Subtarget *getST() const { return ST; }
  30. const X86TargetLowering *getTLI() const { return TLI; }
  31. const FeatureBitset InlineFeatureIgnoreList = {
  32. // This indicates the CPU is 64 bit capable not that we are in 64-bit
  33. // mode.
  34. X86::FeatureX86_64,
  35. // These features don't have any intrinsics or ABI effect.
  36. X86::FeatureNOPL,
  37. X86::FeatureCX16,
  38. X86::FeatureLAHFSAHF64,
  39. // Some older targets can be setup to fold unaligned loads.
  40. X86::FeatureSSEUnalignedMem,
  41. // Codegen control options.
  42. X86::TuningFast11ByteNOP,
  43. X86::TuningFast15ByteNOP,
  44. X86::TuningFastBEXTR,
  45. X86::TuningFastHorizontalOps,
  46. X86::TuningFastLZCNT,
  47. X86::TuningFastScalarFSQRT,
  48. X86::TuningFastSHLDRotate,
  49. X86::TuningFastScalarShiftMasks,
  50. X86::TuningFastVectorShiftMasks,
  51. X86::TuningFastVariableCrossLaneShuffle,
  52. X86::TuningFastVariablePerLaneShuffle,
  53. X86::TuningFastVectorFSQRT,
  54. X86::TuningLEAForSP,
  55. X86::TuningLEAUsesAG,
  56. X86::TuningLZCNTFalseDeps,
  57. X86::TuningBranchFusion,
  58. X86::TuningMacroFusion,
  59. X86::TuningPadShortFunctions,
  60. X86::TuningPOPCNTFalseDeps,
  61. X86::TuningMULCFalseDeps,
  62. X86::TuningPERMFalseDeps,
  63. X86::TuningRANGEFalseDeps,
  64. X86::TuningGETMANTFalseDeps,
  65. X86::TuningMULLQFalseDeps,
  66. X86::TuningSlow3OpsLEA,
  67. X86::TuningSlowDivide32,
  68. X86::TuningSlowDivide64,
  69. X86::TuningSlowIncDec,
  70. X86::TuningSlowLEA,
  71. X86::TuningSlowPMADDWD,
  72. X86::TuningSlowPMULLD,
  73. X86::TuningSlowSHLD,
  74. X86::TuningSlowTwoMemOps,
  75. X86::TuningSlowUAMem16,
  76. X86::TuningPreferMaskRegisters,
  77. X86::TuningInsertVZEROUPPER,
  78. X86::TuningUseSLMArithCosts,
  79. X86::TuningUseGLMDivSqrtCosts,
  80. // Perf-tuning flags.
  81. X86::TuningFastGather,
  82. X86::TuningSlowUAMem32,
  83. X86::TuningAllowLight256Bit,
  84. // Based on whether user set the -mprefer-vector-width command line.
  85. X86::TuningPrefer128Bit,
  86. X86::TuningPrefer256Bit,
  87. // CPU name enums. These just follow CPU string.
  88. X86::ProcIntelAtom
  89. };
  90. public:
  91. explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
  92. : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  93. TLI(ST->getTargetLowering()) {}
  94. /// \name Scalar TTI Implementations
  95. /// @{
  96. TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  97. /// @}
  98. /// \name Cache TTI Implementation
  99. /// @{
  100. std::optional<unsigned> getCacheSize(
  101. TargetTransformInfo::CacheLevel Level) const override;
  102. std::optional<unsigned> getCacheAssociativity(
  103. TargetTransformInfo::CacheLevel Level) const override;
  104. /// @}
  105. /// \name Vector TTI Implementations
  106. /// @{
  107. unsigned getNumberOfRegisters(unsigned ClassID) const;
  108. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
  109. unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
  110. unsigned getMaxInterleaveFactor(unsigned VF);
  111. InstructionCost getArithmeticInstrCost(
  112. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  113. TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
  114. TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
  115. ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  116. const Instruction *CxtI = nullptr);
  117. InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
  118. ArrayRef<int> Mask,
  119. TTI::TargetCostKind CostKind, int Index,
  120. VectorType *SubTp,
  121. ArrayRef<const Value *> Args = std::nullopt);
  122. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  123. TTI::CastContextHint CCH,
  124. TTI::TargetCostKind CostKind,
  125. const Instruction *I = nullptr);
  126. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  127. CmpInst::Predicate VecPred,
  128. TTI::TargetCostKind CostKind,
  129. const Instruction *I = nullptr);
  130. using BaseT::getVectorInstrCost;
  131. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  132. TTI::TargetCostKind CostKind,
  133. unsigned Index, Value *Op0, Value *Op1);
  134. InstructionCost getScalarizationOverhead(VectorType *Ty,
  135. const APInt &DemandedElts,
  136. bool Insert, bool Extract,
  137. TTI::TargetCostKind CostKind);
  138. InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
  139. int VF,
  140. const APInt &DemandedDstElts,
  141. TTI::TargetCostKind CostKind);
  142. InstructionCost
  143. getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
  144. unsigned AddressSpace, TTI::TargetCostKind CostKind,
  145. TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
  146. const Instruction *I = nullptr);
  147. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  148. Align Alignment, unsigned AddressSpace,
  149. TTI::TargetCostKind CostKind);
  150. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  151. const Value *Ptr, bool VariableMask,
  152. Align Alignment,
  153. TTI::TargetCostKind CostKind,
  154. const Instruction *I);
  155. InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
  156. const SCEV *Ptr);
  157. std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  158. IntrinsicInst &II) const;
  159. std::optional<Value *>
  160. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  161. APInt DemandedMask, KnownBits &Known,
  162. bool &KnownBitsComputed) const;
  163. std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  164. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  165. APInt &UndefElts2, APInt &UndefElts3,
  166. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  167. SimplifyAndSetOp) const;
  168. unsigned getAtomicMemIntrinsicMaxElementSize() const;
  169. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  170. TTI::TargetCostKind CostKind);
  171. InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  172. std::optional<FastMathFlags> FMF,
  173. TTI::TargetCostKind CostKind);
  174. InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
  175. InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  176. bool IsUnsigned,
  177. TTI::TargetCostKind CostKind);
  178. InstructionCost getInterleavedMemoryOpCost(
  179. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  180. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  181. bool UseMaskForCond = false, bool UseMaskForGaps = false);
  182. InstructionCost getInterleavedMemoryOpCostAVX512(
  183. unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
  184. ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
  185. TTI::TargetCostKind CostKind, bool UseMaskForCond = false,
  186. bool UseMaskForGaps = false);
  187. InstructionCost getIntImmCost(int64_t);
  188. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  189. TTI::TargetCostKind CostKind);
  190. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  191. const Instruction *I = nullptr);
  192. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  193. const APInt &Imm, Type *Ty,
  194. TTI::TargetCostKind CostKind,
  195. Instruction *Inst = nullptr);
  196. InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  197. const APInt &Imm, Type *Ty,
  198. TTI::TargetCostKind CostKind);
  199. /// Return the cost of the scaling factor used in the addressing
  200. /// mode represented by AM for this target, for a load/store
  201. /// of the specified type.
  202. /// If the AM is supported, the return value must be >= 0.
  203. /// If the AM is not supported, it returns a negative value.
  204. InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  205. int64_t BaseOffset, bool HasBaseReg,
  206. int64_t Scale, unsigned AddrSpace) const;
  207. bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
  208. const TargetTransformInfo::LSRCost &C2);
  209. bool canMacroFuseCmp();
  210. bool isLegalMaskedLoad(Type *DataType, Align Alignment);
  211. bool isLegalMaskedStore(Type *DataType, Align Alignment);
  212. bool isLegalNTLoad(Type *DataType, Align Alignment);
  213. bool isLegalNTStore(Type *DataType, Align Alignment);
  214. bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
  215. bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
  216. bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
  217. return forceScalarizeMaskedGather(VTy, Alignment);
  218. }
  219. bool isLegalMaskedGather(Type *DataType, Align Alignment);
  220. bool isLegalMaskedScatter(Type *DataType, Align Alignment);
  221. bool isLegalMaskedExpandLoad(Type *DataType);
  222. bool isLegalMaskedCompressStore(Type *DataType);
  223. bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
  224. const SmallBitVector &OpcodeMask) const;
  225. bool hasDivRemOp(Type *DataType, bool IsSigned);
  226. bool isExpensiveToSpeculativelyExecute(const Instruction *I);
  227. bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
  228. bool areInlineCompatible(const Function *Caller,
  229. const Function *Callee) const;
  230. bool areTypesABICompatible(const Function *Caller, const Function *Callee,
  231. const ArrayRef<Type *> &Type) const;
  232. TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  233. bool IsZeroCmp) const;
  234. bool prefersVectorizedAddressing() const;
  235. bool supportsEfficientVectorElementLoadStore() const;
  236. bool enableInterleavedAccessVectorization();
  237. private:
  238. bool supportsGather() const;
  239. InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
  240. bool VariableMask, Align Alignment,
  241. unsigned AddressSpace);
  242. InstructionCost getGSVectorCost(unsigned Opcode, Type *DataTy,
  243. const Value *Ptr, Align Alignment,
  244. unsigned AddressSpace);
  245. int getGatherOverhead() const;
  246. int getScatterOverhead() const;
  247. /// @}
  248. };
  249. } // end namespace llvm
  250. #endif