TargetTransformInfoImpl.h 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. /// \file
  14. /// This file provides helpers for the implementation of
  15. /// a TargetTransformInfo-conforming class.
  16. ///
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  19. #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  20. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  21. #include "llvm/Analysis/TargetTransformInfo.h"
  22. #include "llvm/Analysis/VectorUtils.h"
  23. #include "llvm/IR/DataLayout.h"
  24. #include "llvm/IR/GetElementPtrTypeIterator.h"
  25. #include "llvm/IR/IntrinsicInst.h"
  26. #include "llvm/IR/Operator.h"
  27. #include "llvm/IR/PatternMatch.h"
  28. #include <optional>
  29. #include <utility>
  30. namespace llvm {
  31. class Function;
  32. /// Base class for use as a mix-in that aids implementing
  33. /// a TargetTransformInfo-compatible class.
  34. class TargetTransformInfoImplBase {
  35. protected:
  36. typedef TargetTransformInfo TTI;
  37. const DataLayout &DL;
  38. explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
  39. public:
  40. // Provide value semantics. MSVC requires that we spell all of these out.
  41. TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default;
  42. TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
  43. const DataLayout &getDataLayout() const { return DL; }
  44. InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
  45. ArrayRef<const Value *> Operands,
  46. TTI::TargetCostKind CostKind) const {
  47. // In the basic model, we just assume that all-constant GEPs will be folded
  48. // into their uses via addressing modes.
  49. for (const Value *Operand : Operands)
  50. if (!isa<Constant>(Operand))
  51. return TTI::TCC_Basic;
  52. return TTI::TCC_Free;
  53. }
  54. unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
  55. unsigned &JTSize,
  56. ProfileSummaryInfo *PSI,
  57. BlockFrequencyInfo *BFI) const {
  58. (void)PSI;
  59. (void)BFI;
  60. JTSize = 0;
  61. return SI.getNumCases();
  62. }
  63. unsigned getInliningThresholdMultiplier() const { return 1; }
  64. unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
  65. int getInlinerVectorBonusPercent() const { return 150; }
  66. InstructionCost getMemcpyCost(const Instruction *I) const {
  67. return TTI::TCC_Expensive;
  68. }
  69. // Although this default value is arbitrary, it is not random. It is assumed
  70. // that a condition that evaluates the same way by a higher percentage than
  71. // this is best represented as control flow. Therefore, the default value N
  72. // should be set such that the win from N% correct executions is greater than
  73. // the loss from (100 - N)% mispredicted executions for the majority of
  74. // intended targets.
  75. BranchProbability getPredictableBranchThreshold() const {
  76. return BranchProbability(99, 100);
  77. }
  78. bool hasBranchDivergence() const { return false; }
  79. bool useGPUDivergenceAnalysis() const { return false; }
  80. bool isSourceOfDivergence(const Value *V) const { return false; }
  81. bool isAlwaysUniform(const Value *V) const { return false; }
  82. unsigned getFlatAddressSpace() const { return -1; }
  83. bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
  84. Intrinsic::ID IID) const {
  85. return false;
  86. }
  87. bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
  88. bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
  89. return AS == 0;
  90. };
  91. unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
  92. bool isSingleThreaded() const { return false; }
  93. std::pair<const Value *, unsigned>
  94. getPredicatedAddrSpace(const Value *V) const {
  95. return std::make_pair(nullptr, -1);
  96. }
  97. Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
  98. Value *NewV) const {
  99. return nullptr;
  100. }
  101. bool isLoweredToCall(const Function *F) const {
  102. assert(F && "A concrete function must be provided to this routine.");
  103. // FIXME: These should almost certainly not be handled here, and instead
  104. // handled with the help of TLI or the target itself. This was largely
  105. // ported from existing analysis heuristics here so that such refactorings
  106. // can take place in the future.
  107. if (F->isIntrinsic())
  108. return false;
  109. if (F->hasLocalLinkage() || !F->hasName())
  110. return true;
  111. StringRef Name = F->getName();
  112. // These will all likely lower to a single selection DAG node.
  113. if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
  114. Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
  115. Name == "fmin" || Name == "fminf" || Name == "fminl" ||
  116. Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
  117. Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
  118. Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
  119. return false;
  120. // These are all likely to be optimized into something smaller.
  121. if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
  122. Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
  123. Name == "floorf" || Name == "ceil" || Name == "round" ||
  124. Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
  125. Name == "llabs")
  126. return false;
  127. return true;
  128. }
  129. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  130. AssumptionCache &AC, TargetLibraryInfo *LibInfo,
  131. HardwareLoopInfo &HWLoopInfo) const {
  132. return false;
  133. }
  134. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  135. AssumptionCache &AC, TargetLibraryInfo *TLI,
  136. DominatorTree *DT,
  137. LoopVectorizationLegality *LVL,
  138. InterleavedAccessInfo *IAI) const {
  139. return false;
  140. }
  141. PredicationStyle emitGetActiveLaneMask() const {
  142. return PredicationStyle::None;
  143. }
  144. std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  145. IntrinsicInst &II) const {
  146. return std::nullopt;
  147. }
  148. std::optional<Value *>
  149. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  150. APInt DemandedMask, KnownBits &Known,
  151. bool &KnownBitsComputed) const {
  152. return std::nullopt;
  153. }
  154. std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  155. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  156. APInt &UndefElts2, APInt &UndefElts3,
  157. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  158. SimplifyAndSetOp) const {
  159. return std::nullopt;
  160. }
  161. void getUnrollingPreferences(Loop *, ScalarEvolution &,
  162. TTI::UnrollingPreferences &,
  163. OptimizationRemarkEmitter *) const {}
  164. void getPeelingPreferences(Loop *, ScalarEvolution &,
  165. TTI::PeelingPreferences &) const {}
  166. bool isLegalAddImmediate(int64_t Imm) const { return false; }
  167. bool isLegalICmpImmediate(int64_t Imm) const { return false; }
  168. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  169. bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
  170. Instruction *I = nullptr) const {
  171. // Guess that only reg and reg+reg addressing is allowed. This heuristic is
  172. // taken from the implementation of LSR.
  173. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
  174. }
  175. bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
  176. return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
  177. C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
  178. std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
  179. C2.ScaleCost, C2.ImmCost, C2.SetupCost);
  180. }
  181. bool isNumRegsMajorCostOfLSR() const { return true; }
  182. bool isProfitableLSRChainElement(Instruction *I) const { return false; }
  183. bool canMacroFuseCmp() const { return false; }
  184. bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
  185. DominatorTree *DT, AssumptionCache *AC,
  186. TargetLibraryInfo *LibInfo) const {
  187. return false;
  188. }
  189. TTI::AddressingModeKind
  190. getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const {
  191. return TTI::AMK_None;
  192. }
  193. bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
  194. return false;
  195. }
  196. bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
  197. return false;
  198. }
  199. bool isLegalNTStore(Type *DataType, Align Alignment) const {
  200. // By default, assume nontemporal memory stores are available for stores
  201. // that are aligned and have a size that is a power of 2.
  202. unsigned DataSize = DL.getTypeStoreSize(DataType);
  203. return Alignment >= DataSize && isPowerOf2_32(DataSize);
  204. }
  205. bool isLegalNTLoad(Type *DataType, Align Alignment) const {
  206. // By default, assume nontemporal memory loads are available for loads that
  207. // are aligned and have a size that is a power of 2.
  208. unsigned DataSize = DL.getTypeStoreSize(DataType);
  209. return Alignment >= DataSize && isPowerOf2_32(DataSize);
  210. }
  211. bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
  212. return false;
  213. }
  214. bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
  215. return false;
  216. }
  217. bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
  218. return false;
  219. }
  220. bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
  221. return false;
  222. }
  223. bool forceScalarizeMaskedScatter(VectorType *DataType,
  224. Align Alignment) const {
  225. return false;
  226. }
  227. bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
  228. bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
  229. const SmallBitVector &OpcodeMask) const {
  230. return false;
  231. }
  232. bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
  233. bool enableOrderedReductions() const { return false; }
  234. bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
  235. bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
  236. return false;
  237. }
  238. bool prefersVectorizedAddressing() const { return true; }
  239. InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  240. int64_t BaseOffset, bool HasBaseReg,
  241. int64_t Scale,
  242. unsigned AddrSpace) const {
  243. // Guess that all legal addressing mode are free.
  244. if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
  245. AddrSpace))
  246. return 0;
  247. return -1;
  248. }
  249. bool LSRWithInstrQueries() const { return false; }
  250. bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
  251. bool isProfitableToHoist(Instruction *I) const { return true; }
  252. bool useAA() const { return false; }
  253. bool isTypeLegal(Type *Ty) const { return false; }
  254. unsigned getRegUsageForType(Type *Ty) const { return 1; }
  255. bool shouldBuildLookupTables() const { return true; }
  256. bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
  257. bool shouldBuildRelLookupTables() const { return false; }
  258. bool useColdCCForColdCall(Function &F) const { return false; }
  259. InstructionCost getScalarizationOverhead(VectorType *Ty,
  260. const APInt &DemandedElts,
  261. bool Insert, bool Extract,
  262. TTI::TargetCostKind CostKind) const {
  263. return 0;
  264. }
  265. InstructionCost
  266. getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
  267. ArrayRef<Type *> Tys,
  268. TTI::TargetCostKind CostKind) const {
  269. return 0;
  270. }
  271. bool supportsEfficientVectorElementLoadStore() const { return false; }
  272. bool supportsTailCalls() const { return true; }
  273. bool supportsTailCallFor(const CallBase *CB) const {
  274. return supportsTailCalls();
  275. }
  276. bool enableAggressiveInterleaving(bool LoopHasReductions) const {
  277. return false;
  278. }
  279. TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  280. bool IsZeroCmp) const {
  281. return {};
  282. }
  283. bool enableSelectOptimize() const { return true; }
  284. bool enableInterleavedAccessVectorization() const { return false; }
  285. bool enableMaskedInterleavedAccessVectorization() const { return false; }
  286. bool isFPVectorizationPotentiallyUnsafe() const { return false; }
  287. bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
  288. unsigned AddressSpace, Align Alignment,
  289. unsigned *Fast) const {
  290. return false;
  291. }
  292. TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
  293. return TTI::PSK_Software;
  294. }
  295. bool haveFastSqrt(Type *Ty) const { return false; }
  296. bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
  297. bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
  298. InstructionCost getFPOpCost(Type *Ty) const {
  299. return TargetTransformInfo::TCC_Basic;
  300. }
  301. InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
  302. const APInt &Imm, Type *Ty) const {
  303. return 0;
  304. }
  305. InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
  306. TTI::TargetCostKind CostKind) const {
  307. return TTI::TCC_Basic;
  308. }
  309. InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
  310. const APInt &Imm, Type *Ty,
  311. TTI::TargetCostKind CostKind,
  312. Instruction *Inst = nullptr) const {
  313. return TTI::TCC_Free;
  314. }
  315. InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  316. const APInt &Imm, Type *Ty,
  317. TTI::TargetCostKind CostKind) const {
  318. return TTI::TCC_Free;
  319. }
  320. unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
  321. unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
  322. return Vector ? 1 : 0;
  323. };
  324. const char *getRegisterClassName(unsigned ClassID) const {
  325. switch (ClassID) {
  326. default:
  327. return "Generic::Unknown Register Class";
  328. case 0:
  329. return "Generic::ScalarRC";
  330. case 1:
  331. return "Generic::VectorRC";
  332. }
  333. }
  334. TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
  335. return TypeSize::getFixed(32);
  336. }
  337. unsigned getMinVectorRegisterBitWidth() const { return 128; }
  338. std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
  339. std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
  340. bool
  341. shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
  342. return false;
  343. }
  344. ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
  345. return ElementCount::get(0, IsScalable);
  346. }
  347. unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
  348. unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
  349. bool shouldConsiderAddressTypePromotion(
  350. const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
  351. AllowPromotionWithoutCommonHeader = false;
  352. return false;
  353. }
  354. unsigned getCacheLineSize() const { return 0; }
  355. std::optional<unsigned>
  356. getCacheSize(TargetTransformInfo::CacheLevel Level) const {
  357. switch (Level) {
  358. case TargetTransformInfo::CacheLevel::L1D:
  359. [[fallthrough]];
  360. case TargetTransformInfo::CacheLevel::L2D:
  361. return std::nullopt;
  362. }
  363. llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  364. }
  365. std::optional<unsigned>
  366. getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
  367. switch (Level) {
  368. case TargetTransformInfo::CacheLevel::L1D:
  369. [[fallthrough]];
  370. case TargetTransformInfo::CacheLevel::L2D:
  371. return std::nullopt;
  372. }
  373. llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  374. }
  375. unsigned getPrefetchDistance() const { return 0; }
  376. unsigned getMinPrefetchStride(unsigned NumMemAccesses,
  377. unsigned NumStridedMemAccesses,
  378. unsigned NumPrefetches, bool HasCall) const {
  379. return 1;
  380. }
  381. unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
  382. bool enableWritePrefetching() const { return false; }
  383. bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
  384. unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
  385. InstructionCost getArithmeticInstrCost(
  386. unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
  387. TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
  388. ArrayRef<const Value *> Args,
  389. const Instruction *CxtI = nullptr) const {
  390. // FIXME: A number of transformation tests seem to require these values
  391. // which seems a little odd for how arbitary there are.
  392. switch (Opcode) {
  393. default:
  394. break;
  395. case Instruction::FDiv:
  396. case Instruction::FRem:
  397. case Instruction::SDiv:
  398. case Instruction::SRem:
  399. case Instruction::UDiv:
  400. case Instruction::URem:
  401. // FIXME: Unlikely to be true for CodeSize.
  402. return TTI::TCC_Expensive;
  403. }
  404. // Assume a 3cy latency for fp arithmetic ops.
  405. if (CostKind == TTI::TCK_Latency)
  406. if (Ty->getScalarType()->isFloatingPointTy())
  407. return 3;
  408. return 1;
  409. }
  410. InstructionCost
  411. getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
  412. TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
  413. ArrayRef<const Value *> Args = std::nullopt) const {
  414. return 1;
  415. }
  416. InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  417. TTI::CastContextHint CCH,
  418. TTI::TargetCostKind CostKind,
  419. const Instruction *I) const {
  420. switch (Opcode) {
  421. default:
  422. break;
  423. case Instruction::IntToPtr: {
  424. unsigned SrcSize = Src->getScalarSizeInBits();
  425. if (DL.isLegalInteger(SrcSize) &&
  426. SrcSize <= DL.getPointerTypeSizeInBits(Dst))
  427. return 0;
  428. break;
  429. }
  430. case Instruction::PtrToInt: {
  431. unsigned DstSize = Dst->getScalarSizeInBits();
  432. if (DL.isLegalInteger(DstSize) &&
  433. DstSize >= DL.getPointerTypeSizeInBits(Src))
  434. return 0;
  435. break;
  436. }
  437. case Instruction::BitCast:
  438. if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
  439. // Identity and pointer-to-pointer casts are free.
  440. return 0;
  441. break;
  442. case Instruction::Trunc: {
  443. // trunc to a native type is free (assuming the target has compare and
  444. // shift-right of the same width).
  445. TypeSize DstSize = DL.getTypeSizeInBits(Dst);
  446. if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
  447. return 0;
  448. break;
  449. }
  450. }
  451. return 1;
  452. }
  453. InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  454. VectorType *VecTy,
  455. unsigned Index) const {
  456. return 1;
  457. }
  458. InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
  459. const Instruction *I = nullptr) const {
  460. // A phi would be free, unless we're costing the throughput because it
  461. // will require a register.
  462. if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
  463. return 0;
  464. return 1;
  465. }
  466. InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  467. CmpInst::Predicate VecPred,
  468. TTI::TargetCostKind CostKind,
  469. const Instruction *I) const {
  470. return 1;
  471. }
  472. InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
  473. TTI::TargetCostKind CostKind,
  474. unsigned Index, Value *Op0,
  475. Value *Op1) const {
  476. return 1;
  477. }
  478. InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
  479. TTI::TargetCostKind CostKind,
  480. unsigned Index) const {
  481. return 1;
  482. }
  483. unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
  484. const APInt &DemandedDstElts,
  485. TTI::TargetCostKind CostKind) {
  486. return 1;
  487. }
  488. InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  489. unsigned AddressSpace,
  490. TTI::TargetCostKind CostKind,
  491. TTI::OperandValueInfo OpInfo,
  492. const Instruction *I) const {
  493. return 1;
  494. }
  495. InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  496. unsigned AddressSpace,
  497. TTI::TargetCostKind CostKind,
  498. const Instruction *I) const {
  499. return 1;
  500. }
  501. InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
  502. Align Alignment, unsigned AddressSpace,
  503. TTI::TargetCostKind CostKind) const {
  504. return 1;
  505. }
  506. InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  507. const Value *Ptr, bool VariableMask,
  508. Align Alignment,
  509. TTI::TargetCostKind CostKind,
  510. const Instruction *I = nullptr) const {
  511. return 1;
  512. }
  513. unsigned getInterleavedMemoryOpCost(
  514. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  515. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  516. bool UseMaskForCond, bool UseMaskForGaps) const {
  517. return 1;
  518. }
  519. InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  520. TTI::TargetCostKind CostKind) const {
  521. switch (ICA.getID()) {
  522. default:
  523. break;
  524. case Intrinsic::annotation:
  525. case Intrinsic::assume:
  526. case Intrinsic::sideeffect:
  527. case Intrinsic::pseudoprobe:
  528. case Intrinsic::arithmetic_fence:
  529. case Intrinsic::dbg_declare:
  530. case Intrinsic::dbg_value:
  531. case Intrinsic::dbg_label:
  532. case Intrinsic::invariant_start:
  533. case Intrinsic::invariant_end:
  534. case Intrinsic::launder_invariant_group:
  535. case Intrinsic::strip_invariant_group:
  536. case Intrinsic::is_constant:
  537. case Intrinsic::lifetime_start:
  538. case Intrinsic::lifetime_end:
  539. case Intrinsic::experimental_noalias_scope_decl:
  540. case Intrinsic::objectsize:
  541. case Intrinsic::ptr_annotation:
  542. case Intrinsic::var_annotation:
  543. case Intrinsic::experimental_gc_result:
  544. case Intrinsic::experimental_gc_relocate:
  545. case Intrinsic::coro_alloc:
  546. case Intrinsic::coro_begin:
  547. case Intrinsic::coro_free:
  548. case Intrinsic::coro_end:
  549. case Intrinsic::coro_frame:
  550. case Intrinsic::coro_size:
  551. case Intrinsic::coro_align:
  552. case Intrinsic::coro_suspend:
  553. case Intrinsic::coro_subfn_addr:
  554. case Intrinsic::threadlocal_address:
  555. // These intrinsics don't actually represent code after lowering.
  556. return 0;
  557. }
  558. return 1;
  559. }
  560. InstructionCost getCallInstrCost(Function *F, Type *RetTy,
  561. ArrayRef<Type *> Tys,
  562. TTI::TargetCostKind CostKind) const {
  563. return 1;
  564. }
  565. // Assume that we have a register of the right size for the type.
  566. unsigned getNumberOfParts(Type *Tp) const { return 1; }
  567. InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
  568. const SCEV *) const {
  569. return 0;
  570. }
  571. InstructionCost getArithmeticReductionCost(unsigned, VectorType *,
  572. std::optional<FastMathFlags> FMF,
  573. TTI::TargetCostKind) const {
  574. return 1;
  575. }
  576. InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
  577. TTI::TargetCostKind) const {
  578. return 1;
  579. }
  580. InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
  581. Type *ResTy, VectorType *Ty,
  582. std::optional<FastMathFlags> FMF,
  583. TTI::TargetCostKind CostKind) const {
  584. return 1;
  585. }
  586. InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
  587. VectorType *Ty,
  588. TTI::TargetCostKind CostKind) const {
  589. return 1;
  590. }
  591. InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
  592. return 0;
  593. }
  594. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
  595. return false;
  596. }
  597. unsigned getAtomicMemIntrinsicMaxElementSize() const {
  598. // Note for overrides: You must ensure for all element unordered-atomic
  599. // memory intrinsics that all power-of-2 element sizes up to, and
  600. // including, the return value of this method have a corresponding
  601. // runtime lib call. These runtime lib call definitions can be found
  602. // in RuntimeLibcalls.h
  603. return 0;
  604. }
  605. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  606. Type *ExpectedType) const {
  607. return nullptr;
  608. }
  609. Type *
  610. getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
  611. unsigned SrcAddrSpace, unsigned DestAddrSpace,
  612. unsigned SrcAlign, unsigned DestAlign,
  613. std::optional<uint32_t> AtomicElementSize) const {
  614. return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
  615. : Type::getInt8Ty(Context);
  616. }
  617. void getMemcpyLoopResidualLoweringType(
  618. SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
  619. unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
  620. unsigned SrcAlign, unsigned DestAlign,
  621. std::optional<uint32_t> AtomicCpySize) const {
  622. unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
  623. Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
  624. for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
  625. OpsOut.push_back(OpType);
  626. }
  627. bool areInlineCompatible(const Function *Caller,
  628. const Function *Callee) const {
  629. return (Caller->getFnAttribute("target-cpu") ==
  630. Callee->getFnAttribute("target-cpu")) &&
  631. (Caller->getFnAttribute("target-features") ==
  632. Callee->getFnAttribute("target-features"));
  633. }
  634. bool areTypesABICompatible(const Function *Caller, const Function *Callee,
  635. const ArrayRef<Type *> &Types) const {
  636. return (Caller->getFnAttribute("target-cpu") ==
  637. Callee->getFnAttribute("target-cpu")) &&
  638. (Caller->getFnAttribute("target-features") ==
  639. Callee->getFnAttribute("target-features"));
  640. }
  641. bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
  642. const DataLayout &DL) const {
  643. return false;
  644. }
  645. bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
  646. const DataLayout &DL) const {
  647. return false;
  648. }
  649. unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
  650. bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
  651. bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
  652. bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
  653. unsigned AddrSpace) const {
  654. return true;
  655. }
  656. bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
  657. unsigned AddrSpace) const {
  658. return true;
  659. }
  660. bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
  661. ElementCount VF) const {
  662. return true;
  663. }
  664. bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
  665. unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
  666. unsigned ChainSizeInBytes,
  667. VectorType *VecTy) const {
  668. return VF;
  669. }
  670. unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
  671. unsigned ChainSizeInBytes,
  672. VectorType *VecTy) const {
  673. return VF;
  674. }
  675. bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  676. TTI::ReductionFlags Flags) const {
  677. return false;
  678. }
  679. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  680. TTI::ReductionFlags Flags) const {
  681. return false;
  682. }
  683. bool preferEpilogueVectorization() const {
  684. return true;
  685. }
  686. bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
  687. unsigned getGISelRematGlobalCost() const { return 1; }
  688. unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
  689. bool supportsScalableVectors() const { return false; }
  690. bool enableScalableVectorization() const { return false; }
  691. bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
  692. Align Alignment) const {
  693. return false;
  694. }
  695. TargetTransformInfo::VPLegalization
  696. getVPLegalizationStrategy(const VPIntrinsic &PI) const {
  697. return TargetTransformInfo::VPLegalization(
  698. /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard,
  699. /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
  700. }
  701. protected:
  702. // Obtain the minimum required size to hold the value (without the sign)
  703. // In case of a vector it returns the min required size for one element.
  704. unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
  705. if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
  706. const auto *VectorValue = cast<Constant>(Val);
  707. // In case of a vector need to pick the max between the min
  708. // required size for each element
  709. auto *VT = cast<FixedVectorType>(Val->getType());
  710. // Assume unsigned elements
  711. isSigned = false;
  712. // The max required size is the size of the vector element type
  713. unsigned MaxRequiredSize =
  714. VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
  715. unsigned MinRequiredSize = 0;
  716. for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
  717. if (auto *IntElement =
  718. dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
  719. bool signedElement = IntElement->getValue().isNegative();
  720. // Get the element min required size.
  721. unsigned ElementMinRequiredSize =
  722. IntElement->getValue().getMinSignedBits() - 1;
  723. // In case one element is signed then all the vector is signed.
  724. isSigned |= signedElement;
  725. // Save the max required bit size between all the elements.
  726. MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
  727. } else {
  728. // not an int constant element
  729. return MaxRequiredSize;
  730. }
  731. }
  732. return MinRequiredSize;
  733. }
  734. if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
  735. isSigned = CI->getValue().isNegative();
  736. return CI->getValue().getMinSignedBits() - 1;
  737. }
  738. if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
  739. isSigned = true;
  740. return Cast->getSrcTy()->getScalarSizeInBits() - 1;
  741. }
  742. if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
  743. isSigned = false;
  744. return Cast->getSrcTy()->getScalarSizeInBits();
  745. }
  746. isSigned = false;
  747. return Val->getType()->getScalarSizeInBits();
  748. }
  749. bool isStridedAccess(const SCEV *Ptr) const {
  750. return Ptr && isa<SCEVAddRecExpr>(Ptr);
  751. }
  752. const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
  753. const SCEV *Ptr) const {
  754. if (!isStridedAccess(Ptr))
  755. return nullptr;
  756. const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
  757. return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
  758. }
  759. bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
  760. int64_t MergeDistance) const {
  761. const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
  762. if (!Step)
  763. return false;
  764. APInt StrideVal = Step->getAPInt();
  765. if (StrideVal.getBitWidth() > 64)
  766. return false;
  767. // FIXME: Need to take absolute value for negative stride case.
  768. return StrideVal.getSExtValue() < MergeDistance;
  769. }
  770. };
  771. /// CRTP base class for use as a mix-in that aids implementing
  772. /// a TargetTransformInfo-compatible class.
  773. template <typename T>
  774. class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
  775. private:
  776. typedef TargetTransformInfoImplBase BaseT;
  777. protected:
  778. explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
  779. public:
  780. using BaseT::getGEPCost;
  781. InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
  782. ArrayRef<const Value *> Operands,
  783. TTI::TargetCostKind CostKind) {
  784. assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
  785. assert(cast<PointerType>(Ptr->getType()->getScalarType())
  786. ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
  787. "explicit pointee type doesn't match operand's pointee type");
  788. auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
  789. bool HasBaseReg = (BaseGV == nullptr);
  790. auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
  791. APInt BaseOffset(PtrSizeBits, 0);
  792. int64_t Scale = 0;
  793. auto GTI = gep_type_begin(PointeeType, Operands);
  794. Type *TargetType = nullptr;
  795. // Handle the case where the GEP instruction has a single operand,
  796. // the basis, therefore TargetType is a nullptr.
  797. if (Operands.empty())
  798. return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
  799. for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
  800. TargetType = GTI.getIndexedType();
  801. // We assume that the cost of Scalar GEP with constant index and the
  802. // cost of Vector GEP with splat constant index are the same.
  803. const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
  804. if (!ConstIdx)
  805. if (auto Splat = getSplatValue(*I))
  806. ConstIdx = dyn_cast<ConstantInt>(Splat);
  807. if (StructType *STy = GTI.getStructTypeOrNull()) {
  808. // For structures the index is always splat or scalar constant
  809. assert(ConstIdx && "Unexpected GEP index");
  810. uint64_t Field = ConstIdx->getZExtValue();
  811. BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
  812. } else {
  813. // If this operand is a scalable type, bail out early.
  814. // TODO: handle scalable vectors
  815. if (isa<ScalableVectorType>(TargetType))
  816. return TTI::TCC_Basic;
  817. int64_t ElementSize =
  818. DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
  819. if (ConstIdx) {
  820. BaseOffset +=
  821. ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
  822. } else {
  823. // Needs scale register.
  824. if (Scale != 0)
  825. // No addressing mode takes two scale registers.
  826. return TTI::TCC_Basic;
  827. Scale = ElementSize;
  828. }
  829. }
  830. }
  831. if (static_cast<T *>(this)->isLegalAddressingMode(
  832. TargetType, const_cast<GlobalValue *>(BaseGV),
  833. BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
  834. Ptr->getType()->getPointerAddressSpace()))
  835. return TTI::TCC_Free;
  836. return TTI::TCC_Basic;
  837. }
  838. InstructionCost getInstructionCost(const User *U,
  839. ArrayRef<const Value *> Operands,
  840. TTI::TargetCostKind CostKind) {
  841. using namespace llvm::PatternMatch;
  842. auto *TargetTTI = static_cast<T *>(this);
  843. // Handle non-intrinsic calls, invokes, and callbr.
  844. // FIXME: Unlikely to be true for anything but CodeSize.
  845. auto *CB = dyn_cast<CallBase>(U);
  846. if (CB && !isa<IntrinsicInst>(U)) {
  847. if (const Function *F = CB->getCalledFunction()) {
  848. if (!TargetTTI->isLoweredToCall(F))
  849. return TTI::TCC_Basic; // Give a basic cost if it will be lowered
  850. return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
  851. }
  852. // For indirect or other calls, scale cost by number of arguments.
  853. return TTI::TCC_Basic * (CB->arg_size() + 1);
  854. }
  855. Type *Ty = U->getType();
  856. unsigned Opcode = Operator::getOpcode(U);
  857. auto *I = dyn_cast<Instruction>(U);
  858. switch (Opcode) {
  859. default:
  860. break;
  861. case Instruction::Call: {
  862. assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
  863. auto *Intrinsic = cast<IntrinsicInst>(U);
  864. IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
  865. return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
  866. }
  867. case Instruction::Br:
  868. case Instruction::Ret:
  869. case Instruction::PHI:
  870. case Instruction::Switch:
  871. return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
  872. case Instruction::ExtractValue:
  873. case Instruction::Freeze:
  874. return TTI::TCC_Free;
  875. case Instruction::Alloca:
  876. if (cast<AllocaInst>(U)->isStaticAlloca())
  877. return TTI::TCC_Free;
  878. break;
  879. case Instruction::GetElementPtr: {
  880. const auto *GEP = cast<GEPOperator>(U);
  881. return TargetTTI->getGEPCost(GEP->getSourceElementType(),
  882. GEP->getPointerOperand(),
  883. Operands.drop_front(), CostKind);
  884. }
  885. case Instruction::Add:
  886. case Instruction::FAdd:
  887. case Instruction::Sub:
  888. case Instruction::FSub:
  889. case Instruction::Mul:
  890. case Instruction::FMul:
  891. case Instruction::UDiv:
  892. case Instruction::SDiv:
  893. case Instruction::FDiv:
  894. case Instruction::URem:
  895. case Instruction::SRem:
  896. case Instruction::FRem:
  897. case Instruction::Shl:
  898. case Instruction::LShr:
  899. case Instruction::AShr:
  900. case Instruction::And:
  901. case Instruction::Or:
  902. case Instruction::Xor:
  903. case Instruction::FNeg: {
  904. const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(U->getOperand(0));
  905. TTI::OperandValueInfo Op2Info;
  906. if (Opcode != Instruction::FNeg)
  907. Op2Info = TTI::getOperandInfo(U->getOperand(1));
  908. SmallVector<const Value *, 2> Operands(U->operand_values());
  909. return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
  910. Op2Info, Operands, I);
  911. }
  912. case Instruction::IntToPtr:
  913. case Instruction::PtrToInt:
  914. case Instruction::SIToFP:
  915. case Instruction::UIToFP:
  916. case Instruction::FPToUI:
  917. case Instruction::FPToSI:
  918. case Instruction::Trunc:
  919. case Instruction::FPTrunc:
  920. case Instruction::BitCast:
  921. case Instruction::FPExt:
  922. case Instruction::SExt:
  923. case Instruction::ZExt:
  924. case Instruction::AddrSpaceCast: {
  925. Type *OpTy = U->getOperand(0)->getType();
  926. return TargetTTI->getCastInstrCost(
  927. Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
  928. }
  929. case Instruction::Store: {
  930. auto *SI = cast<StoreInst>(U);
  931. Type *ValTy = U->getOperand(0)->getType();
  932. TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(U->getOperand(0));
  933. return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
  934. SI->getPointerAddressSpace(), CostKind,
  935. OpInfo, I);
  936. }
  937. case Instruction::Load: {
  938. // FIXME: Arbitary cost which could come from the backend.
  939. if (CostKind == TTI::TCK_Latency)
  940. return 4;
  941. auto *LI = cast<LoadInst>(U);
  942. Type *LoadType = U->getType();
  943. // If there is a non-register sized type, the cost estimation may expand
  944. // it to be several instructions to load into multiple registers on the
  945. // target. But, if the only use of the load is a trunc instruction to a
  946. // register sized type, the instruction selector can combine these
  947. // instructions to be a single load. So, in this case, we use the
  948. // destination type of the trunc instruction rather than the load to
  949. // accurately estimate the cost of this load instruction.
  950. if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
  951. !LoadType->isVectorTy()) {
  952. if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
  953. LoadType = TI->getDestTy();
  954. }
  955. return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
  956. LI->getPointerAddressSpace(), CostKind,
  957. {TTI::OK_AnyValue, TTI::OP_None}, I);
  958. }
  959. case Instruction::Select: {
  960. const Value *Op0, *Op1;
  961. if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
  962. match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
  963. // select x, y, false --> x & y
  964. // select x, true, y --> x | y
  965. const auto Op1Info = TTI::getOperandInfo(Op0);
  966. const auto Op2Info = TTI::getOperandInfo(Op1);
  967. assert(Op0->getType()->getScalarSizeInBits() == 1 &&
  968. Op1->getType()->getScalarSizeInBits() == 1);
  969. SmallVector<const Value *, 2> Operands{Op0, Op1};
  970. return TargetTTI->getArithmeticInstrCost(
  971. match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
  972. CostKind, Op1Info, Op2Info, Operands, I);
  973. }
  974. Type *CondTy = U->getOperand(0)->getType();
  975. return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
  976. CmpInst::BAD_ICMP_PREDICATE,
  977. CostKind, I);
  978. }
  979. case Instruction::ICmp:
  980. case Instruction::FCmp: {
  981. Type *ValTy = U->getOperand(0)->getType();
  982. // TODO: Also handle ICmp/FCmp constant expressions.
  983. return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
  984. I ? cast<CmpInst>(I)->getPredicate()
  985. : CmpInst::BAD_ICMP_PREDICATE,
  986. CostKind, I);
  987. }
  988. case Instruction::InsertElement: {
  989. auto *IE = dyn_cast<InsertElementInst>(U);
  990. if (!IE)
  991. return TTI::TCC_Basic; // FIXME
  992. unsigned Idx = -1;
  993. if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
  994. if (CI->getValue().getActiveBits() <= 32)
  995. Idx = CI->getZExtValue();
  996. return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
  997. }
  998. case Instruction::ShuffleVector: {
  999. auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
  1000. if (!Shuffle)
  1001. return TTI::TCC_Basic; // FIXME
  1002. auto *VecTy = cast<VectorType>(U->getType());
  1003. auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
  1004. int NumSubElts, SubIndex;
  1005. if (Shuffle->changesLength()) {
  1006. // Treat a 'subvector widening' as a free shuffle.
  1007. if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
  1008. return 0;
  1009. if (Shuffle->isExtractSubvectorMask(SubIndex))
  1010. return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
  1011. Shuffle->getShuffleMask(), CostKind,
  1012. SubIndex, VecTy, Operands);
  1013. if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
  1014. return TargetTTI->getShuffleCost(
  1015. TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
  1016. CostKind, SubIndex,
  1017. FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
  1018. Operands);
  1019. int ReplicationFactor, VF;
  1020. if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
  1021. APInt DemandedDstElts =
  1022. APInt::getNullValue(Shuffle->getShuffleMask().size());
  1023. for (auto I : enumerate(Shuffle->getShuffleMask())) {
  1024. if (I.value() != UndefMaskElem)
  1025. DemandedDstElts.setBit(I.index());
  1026. }
  1027. return TargetTTI->getReplicationShuffleCost(
  1028. VecSrcTy->getElementType(), ReplicationFactor, VF,
  1029. DemandedDstElts, CostKind);
  1030. }
  1031. return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
  1032. }
  1033. if (Shuffle->isIdentity())
  1034. return 0;
  1035. if (Shuffle->isReverse())
  1036. return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
  1037. Shuffle->getShuffleMask(), CostKind, 0,
  1038. nullptr, Operands);
  1039. if (Shuffle->isSelect())
  1040. return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
  1041. Shuffle->getShuffleMask(), CostKind, 0,
  1042. nullptr, Operands);
  1043. if (Shuffle->isTranspose())
  1044. return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
  1045. Shuffle->getShuffleMask(), CostKind, 0,
  1046. nullptr, Operands);
  1047. if (Shuffle->isZeroEltSplat())
  1048. return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
  1049. Shuffle->getShuffleMask(), CostKind, 0,
  1050. nullptr, Operands);
  1051. if (Shuffle->isSingleSource())
  1052. return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
  1053. Shuffle->getShuffleMask(), CostKind, 0,
  1054. nullptr, Operands);
  1055. if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
  1056. return TargetTTI->getShuffleCost(
  1057. TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
  1058. SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
  1059. Operands);
  1060. if (Shuffle->isSplice(SubIndex))
  1061. return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
  1062. Shuffle->getShuffleMask(), CostKind,
  1063. SubIndex, nullptr, Operands);
  1064. return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
  1065. Shuffle->getShuffleMask(), CostKind, 0,
  1066. nullptr, Operands);
  1067. }
  1068. case Instruction::ExtractElement: {
  1069. auto *EEI = dyn_cast<ExtractElementInst>(U);
  1070. if (!EEI)
  1071. return TTI::TCC_Basic; // FIXME
  1072. unsigned Idx = -1;
  1073. if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
  1074. if (CI->getValue().getActiveBits() <= 32)
  1075. Idx = CI->getZExtValue();
  1076. Type *DstTy = U->getOperand(0)->getType();
  1077. return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
  1078. }
  1079. }
  1080. // By default, just classify everything as 'basic' or -1 to represent that
  1081. // don't know the throughput cost.
  1082. return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic;
  1083. }
  1084. bool isExpensiveToSpeculativelyExecute(const Instruction *I) {
  1085. auto *TargetTTI = static_cast<T *>(this);
  1086. SmallVector<const Value *, 4> Ops(I->operand_values());
  1087. InstructionCost Cost = TargetTTI->getInstructionCost(
  1088. I, Ops, TargetTransformInfo::TCK_SizeAndLatency);
  1089. return Cost >= TargetTransformInfo::TCC_Expensive;
  1090. }
  1091. };
  1092. } // namespace llvm
  1093. #endif
  1094. #ifdef __GNUC__
  1095. #pragma GCC diagnostic pop
  1096. #endif