TargetTransformInfoImpl.h 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. /// \file
  14. /// This file provides helpers for the implementation of
  15. /// a TargetTransformInfo-conforming class.
  16. ///
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  19. #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  20. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  21. #include "llvm/Analysis/TargetTransformInfo.h"
  22. #include "llvm/Analysis/VectorUtils.h"
  23. #include "llvm/IR/DataLayout.h"
  24. #include "llvm/IR/Function.h"
  25. #include "llvm/IR/GetElementPtrTypeIterator.h"
  26. #include "llvm/IR/IntrinsicInst.h"
  27. #include "llvm/IR/Operator.h"
  28. #include "llvm/IR/PatternMatch.h"
  29. #include "llvm/IR/Type.h"
  30. #include <utility>
  31. using namespace llvm::PatternMatch;
  32. namespace llvm {
  33. /// Base class for use as a mix-in that aids implementing
  34. /// a TargetTransformInfo-compatible class.
  35. class TargetTransformInfoImplBase {
  36. protected:
  37. typedef TargetTransformInfo TTI;
  38. const DataLayout &DL;
  39. explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
  40. public:
  41. // Provide value semantics. MSVC requires that we spell all of these out.
  42. TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
  43. TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
  44. const DataLayout &getDataLayout() const { return DL; }
  45. InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
  46. ArrayRef<const Value *> Operands,
  47. TTI::TargetCostKind CostKind) const {
  48. // In the basic model, we just assume that all-constant GEPs will be folded
  49. // into their uses via addressing modes.
  50. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
  51. if (!isa<Constant>(Operands[Idx]))
  52. return TTI::TCC_Basic;
  53. return TTI::TCC_Free;
  54. }
  55. unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
  56. unsigned &JTSize,
  57. ProfileSummaryInfo *PSI,
  58. BlockFrequencyInfo *BFI) const {
  59. (void)PSI;
  60. (void)BFI;
  61. JTSize = 0;
  62. return SI.getNumCases();
  63. }
  64. unsigned getInliningThresholdMultiplier() const { return 1; }
  65. unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
  66. int getInlinerVectorBonusPercent() const { return 150; }
  67. InstructionCost getMemcpyCost(const Instruction *I) const {
  68. return TTI::TCC_Expensive;
  69. }
  70. // Although this default value is arbitrary, it is not random. It is assumed
  71. // that a condition that evaluates the same way by a higher percentage than
  72. // this is best represented as control flow. Therefore, the default value N
  73. // should be set such that the win from N% correct executions is greater than
  74. // the loss from (100 - N)% mispredicted executions for the majority of
  75. // intended targets.
  76. BranchProbability getPredictableBranchThreshold() const {
  77. return BranchProbability(99, 100);
  78. }
  79. bool hasBranchDivergence() const { return false; }
  80. bool useGPUDivergenceAnalysis() const { return false; }
  81. bool isSourceOfDivergence(const Value *V) const { return false; }
  82. bool isAlwaysUniform(const Value *V) const { return false; }
  83. unsigned getFlatAddressSpace() const { return -1; }
  84. bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
  85. Intrinsic::ID IID) const {
  86. return false;
  87. }
  88. bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
  89. bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
  90. return AS == 0;
  91. };
  92. unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
  93. std::pair<const Value *, unsigned>
  94. getPredicatedAddrSpace(const Value *V) const {
  95. return std::make_pair(nullptr, -1);
  96. }
  97. Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
  98. Value *NewV) const {
  99. return nullptr;
  100. }
  101. bool isLoweredToCall(const Function *F) const {
  102. assert(F && "A concrete function must be provided to this routine.");
  103. // FIXME: These should almost certainly not be handled here, and instead
  104. // handled with the help of TLI or the target itself. This was largely
  105. // ported from existing analysis heuristics here so that such refactorings
  106. // can take place in the future.
  107. if (F->isIntrinsic())
  108. return false;
  109. if (F->hasLocalLinkage() || !F->hasName())
  110. return true;
  111. StringRef Name = F->getName();
  112. // These will all likely lower to a single selection DAG node.
  113. if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
  114. Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
  115. Name == "fmin" || Name == "fminf" || Name == "fminl" ||
  116. Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
  117. Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
  118. Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
  119. return false;
  120. // These are all likely to be optimized into something smaller.
  121. if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
  122. Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
  123. Name == "floorf" || Name == "ceil" || Name == "round" ||
  124. Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
  125. Name == "llabs")
  126. return false;
  127. return true;
  128. }
  129. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  130. AssumptionCache &AC, TargetLibraryInfo *LibInfo,
  131. HardwareLoopInfo &HWLoopInfo) const {
  132. return false;
  133. }
  134. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  135. AssumptionCache &AC, TargetLibraryInfo *TLI,
  136. DominatorTree *DT,
  137. const LoopAccessInfo *LAI) const {
  138. return false;
  139. }
  140. bool emitGetActiveLaneMask() const {
  141. return false;
  142. }
  143. Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  144. IntrinsicInst &II) const {
  145. return None;
  146. }
  147. Optional<Value *>
  148. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  149. APInt DemandedMask, KnownBits &Known,
  150. bool &KnownBitsComputed) const {
  151. return None;
  152. }
  153. Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  154. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  155. APInt &UndefElts2, APInt &UndefElts3,
  156. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  157. SimplifyAndSetOp) const {
  158. return None;
  159. }
  160. void getUnrollingPreferences(Loop *, ScalarEvolution &,
  161. TTI::UnrollingPreferences &,
  162. OptimizationRemarkEmitter *) const {}
  163. void getPeelingPreferences(Loop *, ScalarEvolution &,
  164. TTI::PeelingPreferences &) const {}
  165. bool isLegalAddImmediate(int64_t Imm) const { return false; }
  166. bool isLegalICmpImmediate(int64_t Imm) const { return false; }
  167. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  168. bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
  169. Instruction *I = nullptr) const {
  170. // Guess that only reg and reg+reg addressing is allowed. This heuristic is
  171. // taken from the implementation of LSR.
  172. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
  173. }
  174. bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const {
  175. return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
  176. C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
  177. std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
  178. C2.ScaleCost, C2.ImmCost, C2.SetupCost);
  179. }
  180. bool isNumRegsMajorCostOfLSR() const { return true; }
  181. bool isProfitableLSRChainElement(Instruction *I) const { return false; }
  182. bool canMacroFuseCmp() const { return false; }
  183. bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
  184. DominatorTree *DT, AssumptionCache *AC,
  185. TargetLibraryInfo *LibInfo) const {
  186. return false;
  187. }
  188. TTI::AddressingModeKind
  189. getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
  190. return TTI::AMK_None;
  191. }
  192. bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
  193. return false;
  194. }
  195. bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
  196. return false;
  197. }
  198. bool isLegalNTStore(Type *DataType, Align Alignment) const {
  199. // By default, assume nontemporal memory stores are available for stores
  200. // that are aligned and have a size that is a power of 2.
  201. unsigned DataSize = DL.getTypeStoreSize(DataType);
  202. return Alignment >= DataSize && isPowerOf2_32(DataSize);
  203. }
  204. bool isLegalNTLoad(Type *DataType, Align Alignment) const {
  205. // By default, assume nontemporal memory loads are available for loads that
  206. // are aligned and have a size that is a power of 2.
  207. unsigned DataSize = DL.getTypeStoreSize(DataType);
  208. return Alignment >= DataSize && isPowerOf2_32(DataSize);
  209. }
  210. bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
  211. return false;
  212. }
  213. bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
  214. return false;
  215. }
  216. bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
  217. return false;
  218. }
  219. bool forceScalarizeMaskedScatter(VectorType *DataType,
  220. Align Alignment) const {
  221. return false;
  222. }
  223. bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
  224. bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
  225. bool enableOrderedReductions() const { return false; }
  226. bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
  227. bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
  228. return false;
  229. }
  230. bool prefersVectorizedAddressing() const { return true; }
  231. InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  232. int64_t BaseOffset, bool HasBaseReg,
  233. int64_t Scale,
  234. unsigned AddrSpace) const {
  235. // Guess that all legal addressing mode are free.
  236. if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
  237. AddrSpace))
  238. return 0;
  239. return -1;
  240. }
  241. bool LSRWithInstrQueries() const { return false; }
  242. bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
  243. bool isProfitableToHoist(Instruction *I) const { return true; }
  244. bool useAA() const { return false; }
  245. bool isTypeLegal(Type *Ty) const { return false; }
  246. InstructionCost getRegUsageForType(Type *Ty) const { return 1; }
  247. bool shouldBuildLookupTables() const { return true; }
  248. bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
  249. bool shouldBuildRelLookupTables() const { return false; }
  250. bool useColdCCForColdCall(Function &F) const { return false; }
  251. InstructionCost getScalarizationOverhead(VectorType *Ty,
  252. const APInt &DemandedElts,
  253. bool Insert, bool Extract) const {
  254. return 0;
  255. }
  256. InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
  257. ArrayRef<Type *> Tys) const {
  258. return 0;
  259. }
  260. bool supportsEfficientVectorElementLoadStore() const { return false; }
  261. bool enableAggressiveInterleaving(bool LoopHasReductions) const {
  262. return false;
  263. }
  264. TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  265. bool IsZeroCmp) const {
  266. return {};
  267. }
  268. bool enableInterleavedAccessVectorization() const { return false; }
  269. bool enableMaskedInterleavedAccessVectorization() const { return false; }
  270. bool isFPVectorizationPotentiallyUnsafe() const { return false; }
  271. bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
  272. unsigned AddressSpace, Align Alignment,
  273. bool *Fast) const {
  274. return false;
  275. }
  276. TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
  277. return TTI::PSK_Software;
  278. }
  279. bool haveFastSqrt(Type *Ty) const { return false; }
  280. bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
  281. InstructionCost getFPOpCost(Type *Ty) const {
  282. return TargetTransformInfo::TCC_Basic;
  283. }
  284. InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
  285. const APInt &Imm, Type *Ty) const {
  286. return 0;
  287. }
  288. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  289. TTI::TargetCostKind CostKind) const {
  290. return TTI::TCC_Basic;
  291. }
  292. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  293. const APInt &Imm, Type *Ty,
  294. TTI::TargetCostKind CostKind,
  295. Instruction *Inst = nullptr) const {
  296. return TTI::TCC_Free;
  297. }
  298. InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  299. const APInt &Imm, Type *Ty,
  300. TTI::TargetCostKind CostKind) const {
  301. return TTI::TCC_Free;
  302. }
  303. unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
  304. unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
  305. return Vector ? 1 : 0;
  306. };
  307. const char *getRegisterClassName(unsigned ClassID) const {
  308. switch (ClassID) {
  309. default:
  310. return "Generic::Unknown Register Class";
  311. case 0:
  312. return "Generic::ScalarRC";
  313. case 1:
  314. return "Generic::VectorRC";
  315. }
  316. }
  317. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  318. return TypeSize::getFixed(32);
  319. }
  320. unsigned getMinVectorRegisterBitWidth() const { return 128; }
  321. Optional<unsigned> getMaxVScale() const { return None; }
  322. Optional<unsigned> getVScaleForTuning() const { return None; }
  323. bool shouldMaximizeVectorBandwidth() const { return false; }
  324. ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
  325. return ElementCount::get(0, IsScalable);
  326. }
  327. unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
  328. bool shouldConsiderAddressTypePromotion(
  329. const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
  330. AllowPromotionWithoutCommonHeader = false;
  331. return false;
  332. }
  333. unsigned getCacheLineSize() const { return 0; }
  334. llvm::Optional<unsigned>
  335. getCacheSize(TargetTransformInfo::CacheLevel Level) const {
  336. switch (Level) {
  337. case TargetTransformInfo::CacheLevel::L1D:
  338. LLVM_FALLTHROUGH;
  339. case TargetTransformInfo::CacheLevel::L2D:
  340. return llvm::Optional<unsigned>();
  341. }
  342. llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  343. }
  344. llvm::Optional<unsigned>
  345. getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
  346. switch (Level) {
  347. case TargetTransformInfo::CacheLevel::L1D:
  348. LLVM_FALLTHROUGH;
  349. case TargetTransformInfo::CacheLevel::L2D:
  350. return llvm::Optional<unsigned>();
  351. }
  352. llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  353. }
  354. unsigned getPrefetchDistance() const { return 0; }
  355. unsigned getMinPrefetchStride(unsigned NumMemAccesses,
  356. unsigned NumStridedMemAccesses,
  357. unsigned NumPrefetches, bool HasCall) const {
  358. return 1;
  359. }
  360. unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
  361. bool enableWritePrefetching() const { return false; }
  362. unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
  363. InstructionCost getArithmeticInstrCost(
  364. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  365. TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
  366. TTI::OperandValueProperties Opd1PropInfo,
  367. TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
  368. const Instruction *CxtI = nullptr) const {
  369. // FIXME: A number of transformation tests seem to require these values
  370. // which seems a little odd for how arbitary there are.
  371. switch (Opcode) {
  372. default:
  373. break;
  374. case Instruction::FDiv:
  375. case Instruction::FRem:
  376. case Instruction::SDiv:
  377. case Instruction::SRem:
  378. case Instruction::UDiv:
  379. case Instruction::URem:
  380. // FIXME: Unlikely to be true for CodeSize.
  381. return TTI::TCC_Expensive;
  382. }
  383. return 1;
  384. }
  385. InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
  386. ArrayRef<int> Mask, int Index,
  387. VectorType *SubTp) const {
  388. return 1;
  389. }
  390. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  391. TTI::CastContextHint CCH,
  392. TTI::TargetCostKind CostKind,
  393. const Instruction *I) const {
  394. switch (Opcode) {
  395. default:
  396. break;
  397. case Instruction::IntToPtr: {
  398. unsigned SrcSize = Src->getScalarSizeInBits();
  399. if (DL.isLegalInteger(SrcSize) &&
  400. SrcSize <= DL.getPointerTypeSizeInBits(Dst))
  401. return 0;
  402. break;
  403. }
  404. case Instruction::PtrToInt: {
  405. unsigned DstSize = Dst->getScalarSizeInBits();
  406. if (DL.isLegalInteger(DstSize) &&
  407. DstSize >= DL.getPointerTypeSizeInBits(Src))
  408. return 0;
  409. break;
  410. }
  411. case Instruction::BitCast:
  412. if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
  413. // Identity and pointer-to-pointer casts are free.
  414. return 0;
  415. break;
  416. case Instruction::Trunc: {
  417. // trunc to a native type is free (assuming the target has compare and
  418. // shift-right of the same width).
  419. TypeSize DstSize = DL.getTypeSizeInBits(Dst);
  420. if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize()))
  421. return 0;
  422. break;
  423. }
  424. }
  425. return 1;
  426. }
  427. InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  428. VectorType *VecTy,
  429. unsigned Index) const {
  430. return 1;
  431. }
  432. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  433. const Instruction *I = nullptr) const {
  434. // A phi would be free, unless we're costing the throughput because it
  435. // will require a register.
  436. if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
  437. return 0;
  438. return 1;
  439. }
  440. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  441. CmpInst::Predicate VecPred,
  442. TTI::TargetCostKind CostKind,
  443. const Instruction *I) const {
  444. return 1;
  445. }
  446. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  447. unsigned Index) const {
  448. return 1;
  449. }
  450. unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
  451. const APInt &DemandedDstElts,
  452. TTI::TargetCostKind CostKind) {
  453. return 1;
  454. }
  455. InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  456. unsigned AddressSpace,
  457. TTI::TargetCostKind CostKind,
  458. const Instruction *I) const {
  459. return 1;
  460. }
  461. InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  462. unsigned AddressSpace,
  463. TTI::TargetCostKind CostKind,
  464. const Instruction *I) const {
  465. return 1;
  466. }
  467. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  468. Align Alignment, unsigned AddressSpace,
  469. TTI::TargetCostKind CostKind) const {
  470. return 1;
  471. }
  472. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  473. const Value *Ptr, bool VariableMask,
  474. Align Alignment,
  475. TTI::TargetCostKind CostKind,
  476. const Instruction *I = nullptr) const {
  477. return 1;
  478. }
  479. unsigned getInterleavedMemoryOpCost(
  480. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  481. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  482. bool UseMaskForCond, bool UseMaskForGaps) const {
  483. return 1;
  484. }
  485. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  486. TTI::TargetCostKind CostKind) const {
  487. switch (ICA.getID()) {
  488. default:
  489. break;
  490. case Intrinsic::annotation:
  491. case Intrinsic::assume:
  492. case Intrinsic::sideeffect:
  493. case Intrinsic::pseudoprobe:
  494. case Intrinsic::arithmetic_fence:
  495. case Intrinsic::dbg_declare:
  496. case Intrinsic::dbg_value:
  497. case Intrinsic::dbg_label:
  498. case Intrinsic::invariant_start:
  499. case Intrinsic::invariant_end:
  500. case Intrinsic::launder_invariant_group:
  501. case Intrinsic::strip_invariant_group:
  502. case Intrinsic::is_constant:
  503. case Intrinsic::lifetime_start:
  504. case Intrinsic::lifetime_end:
  505. case Intrinsic::experimental_noalias_scope_decl:
  506. case Intrinsic::objectsize:
  507. case Intrinsic::ptr_annotation:
  508. case Intrinsic::var_annotation:
  509. case Intrinsic::experimental_gc_result:
  510. case Intrinsic::experimental_gc_relocate:
  511. case Intrinsic::coro_alloc:
  512. case Intrinsic::coro_begin:
  513. case Intrinsic::coro_free:
  514. case Intrinsic::coro_end:
  515. case Intrinsic::coro_frame:
  516. case Intrinsic::coro_size:
  517. case Intrinsic::coro_align:
  518. case Intrinsic::coro_suspend:
  519. case Intrinsic::coro_subfn_addr:
  520. // These intrinsics don't actually represent code after lowering.
  521. return 0;
  522. }
  523. return 1;
  524. }
  525. InstructionCost getCallInstrCost(Function *F, Type *RetTy,
  526. ArrayRef<Type *> Tys,
  527. TTI::TargetCostKind CostKind) const {
  528. return 1;
  529. }
  530. // Assume that we have a register of the right size for the type.
  531. unsigned getNumberOfParts(Type *Tp) const { return 1; }
  532. InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
  533. const SCEV *) const {
  534. return 0;
  535. }
  536. InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
  537. Optional<FastMathFlags> FMF,
  538. TTI::TargetCostKind) const {
  539. return 1;
  540. }
  541. InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
  542. TTI::TargetCostKind) const {
  543. return 1;
  544. }
  545. InstructionCost
  546. getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy,
  547. VectorType *Ty,
  548. TTI::TargetCostKind CostKind) const {
  549. return 1;
  550. }
  551. InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
  552. return 0;
  553. }
  554. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
  555. return false;
  556. }
  557. unsigned getAtomicMemIntrinsicMaxElementSize() const {
  558. // Note for overrides: You must ensure for all element unordered-atomic
  559. // memory intrinsics that all power-of-2 element sizes up to, and
  560. // including, the return value of this method have a corresponding
  561. // runtime lib call. These runtime lib call definitions can be found
  562. // in RuntimeLibcalls.h
  563. return 0;
  564. }
  565. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  566. Type *ExpectedType) const {
  567. return nullptr;
  568. }
  569. Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
  570. unsigned SrcAddrSpace, unsigned DestAddrSpace,
  571. unsigned SrcAlign, unsigned DestAlign) const {
  572. return Type::getInt8Ty(Context);
  573. }
  574. void getMemcpyLoopResidualLoweringType(
  575. SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
  576. unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
  577. unsigned SrcAlign, unsigned DestAlign) const {
  578. for (unsigned i = 0; i != RemainingBytes; ++i)
  579. OpsOut.push_back(Type::getInt8Ty(Context));
  580. }
  581. bool areInlineCompatible(const Function *Caller,
  582. const Function *Callee) const {
  583. return (Caller->getFnAttribute("target-cpu") ==
  584. Callee->getFnAttribute("target-cpu")) &&
  585. (Caller->getFnAttribute("target-features") ==
  586. Callee->getFnAttribute("target-features"));
  587. }
  588. bool areTypesABICompatible(const Function *Caller, const Function *Callee,
  589. const ArrayRef<Type *> &Types) const {
  590. return (Caller->getFnAttribute("target-cpu") ==
  591. Callee->getFnAttribute("target-cpu")) &&
  592. (Caller->getFnAttribute("target-features") ==
  593. Callee->getFnAttribute("target-features"));
  594. }
  595. bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
  596. const DataLayout &DL) const {
  597. return false;
  598. }
  599. bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
  600. const DataLayout &DL) const {
  601. return false;
  602. }
  603. unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
  604. bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
  605. bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
  606. bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
  607. unsigned AddrSpace) const {
  608. return true;
  609. }
  610. bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
  611. unsigned AddrSpace) const {
  612. return true;
  613. }
  614. bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
  615. ElementCount VF) const {
  616. return true;
  617. }
  618. bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
  619. unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
  620. unsigned ChainSizeInBytes,
  621. VectorType *VecTy) const {
  622. return VF;
  623. }
  624. unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
  625. unsigned ChainSizeInBytes,
  626. VectorType *VecTy) const {
  627. return VF;
  628. }
  629. bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  630. TTI::ReductionFlags Flags) const {
  631. return false;
  632. }
  633. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  634. TTI::ReductionFlags Flags) const {
  635. return false;
  636. }
  637. bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
  638. unsigned getGISelRematGlobalCost() const { return 1; }
  639. bool supportsScalableVectors() const { return false; }
  640. bool enableScalableVectorization() const { return false; }
  641. bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
  642. Align Alignment) const {
  643. return false;
  644. }
  645. TargetTransformInfo::VPLegalization
  646. getVPLegalizationStrategy(const VPIntrinsic &PI) const {
  647. return TargetTransformInfo::VPLegalization(
  648. /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
  649. /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
  650. }
  651. protected:
  652. // Obtain the minimum required size to hold the value (without the sign)
  653. // In case of a vector it returns the min required size for one element.
  654. unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
  655. if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
  656. const auto *VectorValue = cast<Constant>(Val);
  657. // In case of a vector need to pick the max between the min
  658. // required size for each element
  659. auto *VT = cast<FixedVectorType>(Val->getType());
  660. // Assume unsigned elements
  661. isSigned = false;
  662. // The max required size is the size of the vector element type
  663. unsigned MaxRequiredSize =
  664. VT->getElementType()->getPrimitiveSizeInBits().getFixedSize();
  665. unsigned MinRequiredSize = 0;
  666. for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
  667. if (auto *IntElement =
  668. dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
  669. bool signedElement = IntElement->getValue().isNegative();
  670. // Get the element min required size.
  671. unsigned ElementMinRequiredSize =
  672. IntElement->getValue().getMinSignedBits() - 1;
  673. // In case one element is signed then all the vector is signed.
  674. isSigned |= signedElement;
  675. // Save the max required bit size between all the elements.
  676. MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
  677. } else {
  678. // not an int constant element
  679. return MaxRequiredSize;
  680. }
  681. }
  682. return MinRequiredSize;
  683. }
  684. if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
  685. isSigned = CI->getValue().isNegative();
  686. return CI->getValue().getMinSignedBits() - 1;
  687. }
  688. if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
  689. isSigned = true;
  690. return Cast->getSrcTy()->getScalarSizeInBits() - 1;
  691. }
  692. if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
  693. isSigned = false;
  694. return Cast->getSrcTy()->getScalarSizeInBits();
  695. }
  696. isSigned = false;
  697. return Val->getType()->getScalarSizeInBits();
  698. }
  699. bool isStridedAccess(const SCEV *Ptr) const {
  700. return Ptr && isa<SCEVAddRecExpr>(Ptr);
  701. }
  702. const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
  703. const SCEV *Ptr) const {
  704. if (!isStridedAccess(Ptr))
  705. return nullptr;
  706. const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
  707. return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
  708. }
  709. bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
  710. int64_t MergeDistance) const {
  711. const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
  712. if (!Step)
  713. return false;
  714. APInt StrideVal = Step->getAPInt();
  715. if (StrideVal.getBitWidth() > 64)
  716. return false;
  717. // FIXME: Need to take absolute value for negative stride case.
  718. return StrideVal.getSExtValue() < MergeDistance;
  719. }
  720. };
  721. /// CRTP base class for use as a mix-in that aids implementing
  722. /// a TargetTransformInfo-compatible class.
  723. template <typename T>
  724. class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
  725. private:
  726. typedef TargetTransformInfoImplBase BaseT;
  727. protected:
  728. explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
  729. public:
  730. using BaseT::getGEPCost;
  731. InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
  732. ArrayRef<const Value *> Operands,
  733. TTI::TargetCostKind CostKind) {
  734. assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
  735. assert(cast<PointerType>(Ptr->getType()->getScalarType())
  736. ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
  737. "explicit pointee type doesn't match operand's pointee type");
  738. auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
  739. bool HasBaseReg = (BaseGV == nullptr);
  740. auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
  741. APInt BaseOffset(PtrSizeBits, 0);
  742. int64_t Scale = 0;
  743. auto GTI = gep_type_begin(PointeeType, Operands);
  744. Type *TargetType = nullptr;
  745. // Handle the case where the GEP instruction has a single operand,
  746. // the basis, therefore TargetType is a nullptr.
  747. if (Operands.empty())
  748. return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
  749. for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
  750. TargetType = GTI.getIndexedType();
  751. // We assume that the cost of Scalar GEP with constant index and the
  752. // cost of Vector GEP with splat constant index are the same.
  753. const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
  754. if (!ConstIdx)
  755. if (auto Splat = getSplatValue(*I))
  756. ConstIdx = dyn_cast<ConstantInt>(Splat);
  757. if (StructType *STy = GTI.getStructTypeOrNull()) {
  758. // For structures the index is always splat or scalar constant
  759. assert(ConstIdx && "Unexpected GEP index");
  760. uint64_t Field = ConstIdx->getZExtValue();
  761. BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
  762. } else {
  763. // If this operand is a scalable type, bail out early.
  764. // TODO: handle scalable vectors
  765. if (isa<ScalableVectorType>(TargetType))
  766. return TTI::TCC_Basic;
  767. int64_t ElementSize =
  768. DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
  769. if (ConstIdx) {
  770. BaseOffset +=
  771. ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
  772. } else {
  773. // Needs scale register.
  774. if (Scale != 0)
  775. // No addressing mode takes two scale registers.
  776. return TTI::TCC_Basic;
  777. Scale = ElementSize;
  778. }
  779. }
  780. }
  781. if (static_cast<T *>(this)->isLegalAddressingMode(
  782. TargetType, const_cast<GlobalValue *>(BaseGV),
  783. BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
  784. Ptr->getType()->getPointerAddressSpace()))
  785. return TTI::TCC_Free;
  786. return TTI::TCC_Basic;
  787. }
  788. InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
  789. TTI::TargetCostKind CostKind) {
  790. auto *TargetTTI = static_cast<T *>(this);
  791. // Handle non-intrinsic calls, invokes, and callbr.
  792. // FIXME: Unlikely to be true for anything but CodeSize.
  793. auto *CB = dyn_cast<CallBase>(U);
  794. if (CB && !isa<IntrinsicInst>(U)) {
  795. if (const Function *F = CB->getCalledFunction()) {
  796. if (!TargetTTI->isLoweredToCall(F))
  797. return TTI::TCC_Basic; // Give a basic cost if it will be lowered
  798. return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
  799. }
  800. // For indirect or other calls, scale cost by number of arguments.
  801. return TTI::TCC_Basic * (CB->arg_size() + 1);
  802. }
  803. Type *Ty = U->getType();
  804. Type *OpTy =
  805. U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr;
  806. unsigned Opcode = Operator::getOpcode(U);
  807. auto *I = dyn_cast<Instruction>(U);
  808. switch (Opcode) {
  809. default:
  810. break;
  811. case Instruction::Call: {
  812. assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
  813. auto *Intrinsic = cast<IntrinsicInst>(U);
  814. IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
  815. return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
  816. }
  817. case Instruction::Br:
  818. case Instruction::Ret:
  819. case Instruction::PHI:
  820. case Instruction::Switch:
  821. return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
  822. case Instruction::ExtractValue:
  823. case Instruction::Freeze:
  824. return TTI::TCC_Free;
  825. case Instruction::Alloca:
  826. if (cast<AllocaInst>(U)->isStaticAlloca())
  827. return TTI::TCC_Free;
  828. break;
  829. case Instruction::GetElementPtr: {
  830. const auto *GEP = cast<GEPOperator>(U);
  831. return TargetTTI->getGEPCost(GEP->getSourceElementType(),
  832. GEP->getPointerOperand(),
  833. Operands.drop_front(), CostKind);
  834. }
  835. case Instruction::Add:
  836. case Instruction::FAdd:
  837. case Instruction::Sub:
  838. case Instruction::FSub:
  839. case Instruction::Mul:
  840. case Instruction::FMul:
  841. case Instruction::UDiv:
  842. case Instruction::SDiv:
  843. case Instruction::FDiv:
  844. case Instruction::URem:
  845. case Instruction::SRem:
  846. case Instruction::FRem:
  847. case Instruction::Shl:
  848. case Instruction::LShr:
  849. case Instruction::AShr:
  850. case Instruction::And:
  851. case Instruction::Or:
  852. case Instruction::Xor:
  853. case Instruction::FNeg: {
  854. TTI::OperandValueProperties Op1VP = TTI::OP_None;
  855. TTI::OperandValueProperties Op2VP = TTI::OP_None;
  856. TTI::OperandValueKind Op1VK =
  857. TTI::getOperandInfo(U->getOperand(0), Op1VP);
  858. TTI::OperandValueKind Op2VK = Opcode != Instruction::FNeg ?
  859. TTI::getOperandInfo(U->getOperand(1), Op2VP) : TTI::OK_AnyValue;
  860. SmallVector<const Value *, 2> Operands(U->operand_values());
  861. return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind,
  862. Op1VK, Op2VK,
  863. Op1VP, Op2VP, Operands, I);
  864. }
  865. case Instruction::IntToPtr:
  866. case Instruction::PtrToInt:
  867. case Instruction::SIToFP:
  868. case Instruction::UIToFP:
  869. case Instruction::FPToUI:
  870. case Instruction::FPToSI:
  871. case Instruction::Trunc:
  872. case Instruction::FPTrunc:
  873. case Instruction::BitCast:
  874. case Instruction::FPExt:
  875. case Instruction::SExt:
  876. case Instruction::ZExt:
  877. case Instruction::AddrSpaceCast:
  878. return TargetTTI->getCastInstrCost(
  879. Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
  880. case Instruction::Store: {
  881. auto *SI = cast<StoreInst>(U);
  882. Type *ValTy = U->getOperand(0)->getType();
  883. return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
  884. SI->getPointerAddressSpace(),
  885. CostKind, I);
  886. }
  887. case Instruction::Load: {
  888. auto *LI = cast<LoadInst>(U);
  889. Type *LoadType = U->getType();
  890. // If there is a non-register sized type, the cost estimation may expand
  891. // it to be several instructions to load into multiple registers on the
  892. // target. But, if the only use of the load is a trunc instruction to a
  893. // register sized type, the instruction selector can combine these
  894. // instructions to be a single load. So, in this case, we use the
  895. // destination type of the trunc instruction rather than the load to
  896. // accurately estimate the cost of this load instruction.
  897. if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
  898. !LoadType->isVectorTy()) {
  899. if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
  900. LoadType = TI->getDestTy();
  901. }
  902. return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
  903. LI->getPointerAddressSpace(),
  904. CostKind, I);
  905. }
  906. case Instruction::Select: {
  907. const Value *Op0, *Op1;
  908. if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
  909. match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
  910. // select x, y, false --> x & y
  911. // select x, true, y --> x | y
  912. TTI::OperandValueProperties Op1VP = TTI::OP_None;
  913. TTI::OperandValueProperties Op2VP = TTI::OP_None;
  914. TTI::OperandValueKind Op1VK = TTI::getOperandInfo(Op0, Op1VP);
  915. TTI::OperandValueKind Op2VK = TTI::getOperandInfo(Op1, Op2VP);
  916. assert(Op0->getType()->getScalarSizeInBits() == 1 &&
  917. Op1->getType()->getScalarSizeInBits() == 1);
  918. SmallVector<const Value *, 2> Operands{Op0, Op1};
  919. return TargetTTI->getArithmeticInstrCost(
  920. match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
  921. CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, I);
  922. }
  923. Type *CondTy = U->getOperand(0)->getType();
  924. return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
  925. CmpInst::BAD_ICMP_PREDICATE,
  926. CostKind, I);
  927. }
  928. case Instruction::ICmp:
  929. case Instruction::FCmp: {
  930. Type *ValTy = U->getOperand(0)->getType();
  931. // TODO: Also handle ICmp/FCmp constant expressions.
  932. return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
  933. I ? cast<CmpInst>(I)->getPredicate()
  934. : CmpInst::BAD_ICMP_PREDICATE,
  935. CostKind, I);
  936. }
  937. case Instruction::InsertElement: {
  938. auto *IE = dyn_cast<InsertElementInst>(U);
  939. if (!IE)
  940. return TTI::TCC_Basic; // FIXME
  941. unsigned Idx = -1;
  942. if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
  943. if (CI->getValue().getActiveBits() <= 32)
  944. Idx = CI->getZExtValue();
  945. return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
  946. }
  947. case Instruction::ShuffleVector: {
  948. auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
  949. if (!Shuffle)
  950. return TTI::TCC_Basic; // FIXME
  951. auto *VecTy = cast<VectorType>(U->getType());
  952. auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
  953. int NumSubElts, SubIndex;
  954. if (Shuffle->changesLength()) {
  955. // Treat a 'subvector widening' as a free shuffle.
  956. if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
  957. return 0;
  958. if (Shuffle->isExtractSubvectorMask(SubIndex))
  959. return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
  960. Shuffle->getShuffleMask(), SubIndex,
  961. VecTy);
  962. if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
  963. return TargetTTI->getShuffleCost(
  964. TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
  965. SubIndex,
  966. FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
  967. int ReplicationFactor, VF;
  968. if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
  969. APInt DemandedDstElts =
  970. APInt::getNullValue(Shuffle->getShuffleMask().size());
  971. for (auto I : enumerate(Shuffle->getShuffleMask())) {
  972. if (I.value() != UndefMaskElem)
  973. DemandedDstElts.setBit(I.index());
  974. }
  975. return TargetTTI->getReplicationShuffleCost(
  976. VecSrcTy->getElementType(), ReplicationFactor, VF,
  977. DemandedDstElts, CostKind);
  978. }
  979. return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
  980. }
  981. if (Shuffle->isIdentity())
  982. return 0;
  983. if (Shuffle->isReverse())
  984. return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
  985. Shuffle->getShuffleMask(), 0, nullptr);
  986. if (Shuffle->isSelect())
  987. return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
  988. Shuffle->getShuffleMask(), 0, nullptr);
  989. if (Shuffle->isTranspose())
  990. return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
  991. Shuffle->getShuffleMask(), 0, nullptr);
  992. if (Shuffle->isZeroEltSplat())
  993. return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
  994. Shuffle->getShuffleMask(), 0, nullptr);
  995. if (Shuffle->isSingleSource())
  996. return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
  997. Shuffle->getShuffleMask(), 0, nullptr);
  998. if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
  999. return TargetTTI->getShuffleCost(
  1000. TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex,
  1001. FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
  1002. return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
  1003. Shuffle->getShuffleMask(), 0, nullptr);
  1004. }
  1005. case Instruction::ExtractElement: {
  1006. auto *EEI = dyn_cast<ExtractElementInst>(U);
  1007. if (!EEI)
  1008. return TTI::TCC_Basic; // FIXME
  1009. unsigned Idx = -1;
  1010. if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
  1011. if (CI->getValue().getActiveBits() <= 32)
  1012. Idx = CI->getZExtValue();
  1013. Type *DstTy = U->getOperand(0)->getType();
  1014. return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx);
  1015. }
  1016. }
  1017. // By default, just classify everything as 'basic'.
  1018. return TTI::TCC_Basic;
  1019. }
  1020. InstructionCost getInstructionLatency(const Instruction *I) {
  1021. SmallVector<const Value *, 4> Operands(I->operand_values());
  1022. if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
  1023. return 0;
  1024. if (isa<LoadInst>(I))
  1025. return 4;
  1026. Type *DstTy = I->getType();
  1027. // Usually an intrinsic is a simple instruction.
  1028. // A real function call is much slower.
  1029. if (auto *CI = dyn_cast<CallInst>(I)) {
  1030. const Function *F = CI->getCalledFunction();
  1031. if (!F || static_cast<T *>(this)->isLoweredToCall(F))
  1032. return 40;
  1033. // Some intrinsics return a value and a flag, we use the value type
  1034. // to decide its latency.
  1035. if (StructType *StructTy = dyn_cast<StructType>(DstTy))
  1036. DstTy = StructTy->getElementType(0);
  1037. // Fall through to simple instructions.
  1038. }
  1039. if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
  1040. DstTy = VectorTy->getElementType();
  1041. if (DstTy->isFloatingPointTy())
  1042. return 3;
  1043. return 1;
  1044. }
  1045. };
  1046. } // namespace llvm
  1047. #endif
  1048. #ifdef __GNUC__
  1049. #pragma GCC diagnostic pop
  1050. #endif