X86TargetTransformInfo.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. //===-- X86TargetTransformInfo.h - X86 specific TTI -------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. /// This file a TargetTransformInfo::Concept conforming object specific to the
  10. /// X86 target machine. It uses the target's detailed information to
  11. /// provide more precise answers to certain TTI queries, while letting the
  12. /// target independent and default TTI implementations handle the rest.
  13. ///
  14. //===----------------------------------------------------------------------===//
  15. #ifndef LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
  16. #define LLVM_LIB_TARGET_X86_X86TARGETTRANSFORMINFO_H
  17. #include "X86TargetMachine.h"
  18. #include "llvm/Analysis/TargetTransformInfo.h"
  19. #include "llvm/CodeGen/BasicTTIImpl.h"
  20. namespace llvm {
  21. class InstCombiner;
  22. class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
  23. typedef BasicTTIImplBase<X86TTIImpl> BaseT;
  24. typedef TargetTransformInfo TTI;
  25. friend BaseT;
  26. const X86Subtarget *ST;
  27. const X86TargetLowering *TLI;
  28. const X86Subtarget *getST() const { return ST; }
  29. const X86TargetLowering *getTLI() const { return TLI; }
  30. const FeatureBitset InlineFeatureIgnoreList = {
  31. // This indicates the CPU is 64 bit capable not that we are in 64-bit
  32. // mode.
  33. X86::Feature64Bit,
  34. // These features don't have any intrinsics or ABI effect.
  35. X86::FeatureNOPL,
  36. X86::FeatureCMPXCHG16B,
  37. X86::FeatureLAHFSAHF,
  38. // Some older targets can be setup to fold unaligned loads.
  39. X86::FeatureSSEUnalignedMem,
  40. // Codegen control options.
  41. X86::TuningFast11ByteNOP,
  42. X86::TuningFast15ByteNOP,
  43. X86::TuningFastBEXTR,
  44. X86::TuningFastHorizontalOps,
  45. X86::TuningFastLZCNT,
  46. X86::TuningFastScalarFSQRT,
  47. X86::TuningFastSHLDRotate,
  48. X86::TuningFastScalarShiftMasks,
  49. X86::TuningFastVectorShiftMasks,
  50. X86::TuningFastVariableCrossLaneShuffle,
  51. X86::TuningFastVariablePerLaneShuffle,
  52. X86::TuningFastVectorFSQRT,
  53. X86::TuningLEAForSP,
  54. X86::TuningLEAUsesAG,
  55. X86::TuningLZCNTFalseDeps,
  56. X86::TuningBranchFusion,
  57. X86::TuningMacroFusion,
  58. X86::TuningPadShortFunctions,
  59. X86::TuningPOPCNTFalseDeps,
  60. X86::TuningSlow3OpsLEA,
  61. X86::TuningSlowDivide32,
  62. X86::TuningSlowDivide64,
  63. X86::TuningSlowIncDec,
  64. X86::TuningSlowLEA,
  65. X86::TuningSlowPMADDWD,
  66. X86::TuningSlowPMULLD,
  67. X86::TuningSlowSHLD,
  68. X86::TuningSlowTwoMemOps,
  69. X86::TuningSlowUAMem16,
  70. X86::TuningPreferMaskRegisters,
  71. X86::TuningInsertVZEROUPPER,
  72. X86::TuningUseSLMArithCosts,
  73. X86::TuningUseGLMDivSqrtCosts,
  74. // Perf-tuning flags.
  75. X86::TuningFastGather,
  76. X86::TuningSlowUAMem32,
  77. // Based on whether user set the -mprefer-vector-width command line.
  78. X86::TuningPrefer128Bit,
  79. X86::TuningPrefer256Bit,
  80. // CPU name enums. These just follow CPU string.
  81. X86::ProcIntelAtom
  82. };
  83. public:
  84. explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
  85. : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
  86. TLI(ST->getTargetLowering()) {}
  87. /// \name Scalar TTI Implementations
  88. /// @{
  89. TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
  90. /// @}
  91. /// \name Cache TTI Implementation
  92. /// @{
  93. llvm::Optional<unsigned> getCacheSize(
  94. TargetTransformInfo::CacheLevel Level) const override;
  95. llvm::Optional<unsigned> getCacheAssociativity(
  96. TargetTransformInfo::CacheLevel Level) const override;
  97. /// @}
  98. /// \name Vector TTI Implementations
  99. /// @{
  100. unsigned getNumberOfRegisters(unsigned ClassID) const;
  101. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
  102. unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
  103. unsigned getMaxInterleaveFactor(unsigned VF);
  104. InstructionCost getArithmeticInstrCost(
  105. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  106. TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
  107. TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
  108. TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
  109. TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
  110. ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  111. const Instruction *CxtI = nullptr);
  112. InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
  113. ArrayRef<int> Mask, int Index,
  114. VectorType *SubTp);
  115. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  116. TTI::CastContextHint CCH,
  117. TTI::TargetCostKind CostKind,
  118. const Instruction *I = nullptr);
  119. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  120. CmpInst::Predicate VecPred,
  121. TTI::TargetCostKind CostKind,
  122. const Instruction *I = nullptr);
  123. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  124. unsigned Index);
  125. InstructionCost getScalarizationOverhead(VectorType *Ty,
  126. const APInt &DemandedElts,
  127. bool Insert, bool Extract);
  128. InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
  129. int VF,
  130. const APInt &DemandedDstElts,
  131. TTI::TargetCostKind CostKind);
  132. InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
  133. MaybeAlign Alignment, unsigned AddressSpace,
  134. TTI::TargetCostKind CostKind,
  135. const Instruction *I = nullptr);
  136. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  137. Align Alignment, unsigned AddressSpace,
  138. TTI::TargetCostKind CostKind);
  139. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  140. const Value *Ptr, bool VariableMask,
  141. Align Alignment,
  142. TTI::TargetCostKind CostKind,
  143. const Instruction *I);
  144. InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
  145. const SCEV *Ptr);
  146. Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  147. IntrinsicInst &II) const;
  148. Optional<Value *>
  149. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  150. APInt DemandedMask, KnownBits &Known,
  151. bool &KnownBitsComputed) const;
  152. Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  153. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  154. APInt &UndefElts2, APInt &UndefElts3,
  155. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  156. SimplifyAndSetOp) const;
  157. unsigned getAtomicMemIntrinsicMaxElementSize() const;
  158. InstructionCost
  159. getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  160. TTI::TargetCostKind CostKind);
  161. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  162. TTI::TargetCostKind CostKind);
  163. InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  164. Optional<FastMathFlags> FMF,
  165. TTI::TargetCostKind CostKind);
  166. InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
  167. InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  168. bool IsUnsigned,
  169. TTI::TargetCostKind CostKind);
  170. InstructionCost getInterleavedMemoryOpCost(
  171. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  172. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  173. bool UseMaskForCond = false, bool UseMaskForGaps = false);
  174. InstructionCost getInterleavedMemoryOpCostAVX512(
  175. unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
  176. ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
  177. TTI::TargetCostKind CostKind, bool UseMaskForCond = false,
  178. bool UseMaskForGaps = false);
  179. InstructionCost getIntImmCost(int64_t);
  180. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  181. TTI::TargetCostKind CostKind);
  182. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  183. const Instruction *I = nullptr);
  184. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  185. const APInt &Imm, Type *Ty,
  186. TTI::TargetCostKind CostKind,
  187. Instruction *Inst = nullptr);
  188. InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  189. const APInt &Imm, Type *Ty,
  190. TTI::TargetCostKind CostKind);
  191. bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
  192. TargetTransformInfo::LSRCost &C2);
  193. bool canMacroFuseCmp();
  194. bool isLegalMaskedLoad(Type *DataType, Align Alignment);
  195. bool isLegalMaskedStore(Type *DataType, Align Alignment);
  196. bool isLegalNTLoad(Type *DataType, Align Alignment);
  197. bool isLegalNTStore(Type *DataType, Align Alignment);
  198. bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment);
  199. bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) {
  200. return forceScalarizeMaskedGather(VTy, Alignment);
  201. }
  202. bool isLegalMaskedGather(Type *DataType, Align Alignment);
  203. bool isLegalMaskedScatter(Type *DataType, Align Alignment);
  204. bool isLegalMaskedExpandLoad(Type *DataType);
  205. bool isLegalMaskedCompressStore(Type *DataType);
  206. bool hasDivRemOp(Type *DataType, bool IsSigned);
  207. bool isFCmpOrdCheaperThanFCmpZero(Type *Ty);
  208. bool areInlineCompatible(const Function *Caller,
  209. const Function *Callee) const;
  210. bool areTypesABICompatible(const Function *Caller, const Function *Callee,
  211. const ArrayRef<Type *> &Type) const;
  212. TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  213. bool IsZeroCmp) const;
  214. bool prefersVectorizedAddressing() const;
  215. bool supportsEfficientVectorElementLoadStore() const;
  216. bool enableInterleavedAccessVectorization();
  217. private:
  218. bool supportsGather() const;
  219. InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
  220. bool VariableMask, Align Alignment,
  221. unsigned AddressSpace);
  222. InstructionCost getGSVectorCost(unsigned Opcode, Type *DataTy,
  223. const Value *Ptr, Align Alignment,
  224. unsigned AddressSpace);
  225. int getGatherOverhead() const;
  226. int getScatterOverhead() const;
  227. /// @}
  228. };
  229. } // end namespace llvm
  230. #endif