TargetTransformInfoImpl.h 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. /// \file
  14. /// This file provides helpers for the implementation of
  15. /// a TargetTransformInfo-conforming class.
  16. ///
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  19. #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
  20. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  21. #include "llvm/Analysis/TargetTransformInfo.h"
  22. #include "llvm/Analysis/VectorUtils.h"
  23. #include "llvm/IR/DataLayout.h"
  24. #include "llvm/IR/Function.h"
  25. #include "llvm/IR/GetElementPtrTypeIterator.h"
  26. #include "llvm/IR/IntrinsicInst.h"
  27. #include "llvm/IR/Operator.h"
  28. #include "llvm/IR/Type.h"
  29. namespace llvm {
  30. /// Base class for use as a mix-in that aids implementing
  31. /// a TargetTransformInfo-compatible class.
  32. class TargetTransformInfoImplBase {
  33. protected:
  34. typedef TargetTransformInfo TTI;
  35. const DataLayout &DL;
  36. explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
  37. public:
  38. // Provide value semantics. MSVC requires that we spell all of these out.
  39. TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
  40. : DL(Arg.DL) {}
  41. TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
  42. const DataLayout &getDataLayout() const { return DL; }
  43. int getGEPCost(Type *PointeeType, const Value *Ptr,
  44. ArrayRef<const Value *> Operands,
  45. TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
  46. // In the basic model, we just assume that all-constant GEPs will be folded
  47. // into their uses via addressing modes.
  48. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
  49. if (!isa<Constant>(Operands[Idx]))
  50. return TTI::TCC_Basic;
  51. return TTI::TCC_Free;
  52. }
  53. unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
  54. unsigned &JTSize,
  55. ProfileSummaryInfo *PSI,
  56. BlockFrequencyInfo *BFI) const {
  57. (void)PSI;
  58. (void)BFI;
  59. JTSize = 0;
  60. return SI.getNumCases();
  61. }
  62. unsigned getInliningThresholdMultiplier() const { return 1; }
  63. unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
  64. int getInlinerVectorBonusPercent() const { return 150; }
  65. unsigned getMemcpyCost(const Instruction *I) const {
  66. return TTI::TCC_Expensive;
  67. }
  68. bool hasBranchDivergence() const { return false; }
  69. bool useGPUDivergenceAnalysis() const { return false; }
  70. bool isSourceOfDivergence(const Value *V) const { return false; }
  71. bool isAlwaysUniform(const Value *V) const { return false; }
  72. unsigned getFlatAddressSpace() const { return -1; }
  73. bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
  74. Intrinsic::ID IID) const {
  75. return false;
  76. }
  77. bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
  78. unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
  79. Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
  80. Value *NewV) const {
  81. return nullptr;
  82. }
  83. bool isLoweredToCall(const Function *F) const {
  84. assert(F && "A concrete function must be provided to this routine.");
  85. // FIXME: These should almost certainly not be handled here, and instead
  86. // handled with the help of TLI or the target itself. This was largely
  87. // ported from existing analysis heuristics here so that such refactorings
  88. // can take place in the future.
  89. if (F->isIntrinsic())
  90. return false;
  91. if (F->hasLocalLinkage() || !F->hasName())
  92. return true;
  93. StringRef Name = F->getName();
  94. // These will all likely lower to a single selection DAG node.
  95. if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
  96. Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
  97. Name == "fmin" || Name == "fminf" || Name == "fminl" ||
  98. Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
  99. Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
  100. Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
  101. return false;
  102. // These are all likely to be optimized into something smaller.
  103. if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
  104. Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
  105. Name == "floorf" || Name == "ceil" || Name == "round" ||
  106. Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
  107. Name == "llabs")
  108. return false;
  109. return true;
  110. }
  111. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  112. AssumptionCache &AC, TargetLibraryInfo *LibInfo,
  113. HardwareLoopInfo &HWLoopInfo) const {
  114. return false;
  115. }
  116. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  117. AssumptionCache &AC, TargetLibraryInfo *TLI,
  118. DominatorTree *DT,
  119. const LoopAccessInfo *LAI) const {
  120. return false;
  121. }
  122. bool emitGetActiveLaneMask() const {
  123. return false;
  124. }
  125. Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  126. IntrinsicInst &II) const {
  127. return None;
  128. }
  129. Optional<Value *>
  130. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  131. APInt DemandedMask, KnownBits &Known,
  132. bool &KnownBitsComputed) const {
  133. return None;
  134. }
  135. Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  136. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  137. APInt &UndefElts2, APInt &UndefElts3,
  138. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  139. SimplifyAndSetOp) const {
  140. return None;
  141. }
  142. void getUnrollingPreferences(Loop *, ScalarEvolution &,
  143. TTI::UnrollingPreferences &) const {}
  144. void getPeelingPreferences(Loop *, ScalarEvolution &,
  145. TTI::PeelingPreferences &) const {}
  146. bool isLegalAddImmediate(int64_t Imm) const { return false; }
  147. bool isLegalICmpImmediate(int64_t Imm) const { return false; }
  148. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  149. bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
  150. Instruction *I = nullptr) const {
  151. // Guess that only reg and reg+reg addressing is allowed. This heuristic is
  152. // taken from the implementation of LSR.
  153. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
  154. }
  155. bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const {
  156. return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
  157. C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
  158. std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
  159. C2.ScaleCost, C2.ImmCost, C2.SetupCost);
  160. }
  161. bool isNumRegsMajorCostOfLSR() const { return true; }
  162. bool isProfitableLSRChainElement(Instruction *I) const { return false; }
  163. bool canMacroFuseCmp() const { return false; }
  164. bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
  165. DominatorTree *DT, AssumptionCache *AC,
  166. TargetLibraryInfo *LibInfo) const {
  167. return false;
  168. }
  169. bool shouldFavorPostInc() const { return false; }
  170. bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
  171. bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
  172. return false;
  173. }
  174. bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
  175. return false;
  176. }
  177. bool isLegalNTStore(Type *DataType, Align Alignment) const {
  178. // By default, assume nontemporal memory stores are available for stores
  179. // that are aligned and have a size that is a power of 2.
  180. unsigned DataSize = DL.getTypeStoreSize(DataType);
  181. return Alignment >= DataSize && isPowerOf2_32(DataSize);
  182. }
  183. bool isLegalNTLoad(Type *DataType, Align Alignment) const {
  184. // By default, assume nontemporal memory loads are available for loads that
  185. // are aligned and have a size that is a power of 2.
  186. unsigned DataSize = DL.getTypeStoreSize(DataType);
  187. return Alignment >= DataSize && isPowerOf2_32(DataSize);
  188. }
  189. bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
  190. return false;
  191. }
  192. bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
  193. return false;
  194. }
  195. bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
  196. bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
  197. bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
  198. bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
  199. return false;
  200. }
  201. bool prefersVectorizedAddressing() const { return true; }
  202. int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  203. bool HasBaseReg, int64_t Scale,
  204. unsigned AddrSpace) const {
  205. // Guess that all legal addressing mode are free.
  206. if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
  207. AddrSpace))
  208. return 0;
  209. return -1;
  210. }
  211. bool LSRWithInstrQueries() const { return false; }
  212. bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
  213. bool isProfitableToHoist(Instruction *I) const { return true; }
  214. bool useAA() const { return false; }
  215. bool isTypeLegal(Type *Ty) const { return false; }
  216. unsigned getRegUsageForType(Type *Ty) const { return 1; }
  217. bool shouldBuildLookupTables() const { return true; }
  218. bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
  219. bool useColdCCForColdCall(Function &F) const { return false; }
  220. unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
  221. bool Insert, bool Extract) const {
  222. return 0;
  223. }
  224. unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
  225. unsigned VF) const {
  226. return 0;
  227. }
  228. bool supportsEfficientVectorElementLoadStore() const { return false; }
  229. bool enableAggressiveInterleaving(bool LoopHasReductions) const {
  230. return false;
  231. }
  232. TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  233. bool IsZeroCmp) const {
  234. return {};
  235. }
  236. bool enableInterleavedAccessVectorization() const { return false; }
  237. bool enableMaskedInterleavedAccessVectorization() const { return false; }
  238. bool isFPVectorizationPotentiallyUnsafe() const { return false; }
  239. bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
  240. unsigned AddressSpace, unsigned Alignment,
  241. bool *Fast) const {
  242. return false;
  243. }
  244. TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
  245. return TTI::PSK_Software;
  246. }
  247. bool haveFastSqrt(Type *Ty) const { return false; }
  248. bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
  249. unsigned getFPOpCost(Type *Ty) const {
  250. return TargetTransformInfo::TCC_Basic;
  251. }
  252. int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
  253. Type *Ty) const {
  254. return 0;
  255. }
  256. unsigned getIntImmCost(const APInt &Imm, Type *Ty,
  257. TTI::TargetCostKind CostKind) const {
  258. return TTI::TCC_Basic;
  259. }
  260. unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
  261. Type *Ty, TTI::TargetCostKind CostKind,
  262. Instruction *Inst = nullptr) const {
  263. return TTI::TCC_Free;
  264. }
  265. unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  266. const APInt &Imm, Type *Ty,
  267. TTI::TargetCostKind CostKind) const {
  268. return TTI::TCC_Free;
  269. }
  270. unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
  271. unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
  272. return Vector ? 1 : 0;
  273. };
  274. const char *getRegisterClassName(unsigned ClassID) const {
  275. switch (ClassID) {
  276. default:
  277. return "Generic::Unknown Register Class";
  278. case 0:
  279. return "Generic::ScalarRC";
  280. case 1:
  281. return "Generic::VectorRC";
  282. }
  283. }
  284. unsigned getRegisterBitWidth(bool Vector) const { return 32; }
  285. unsigned getMinVectorRegisterBitWidth() const { return 128; }
  286. Optional<unsigned> getMaxVScale() const { return None; }
  287. bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
  288. unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
  289. unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
  290. bool shouldConsiderAddressTypePromotion(
  291. const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
  292. AllowPromotionWithoutCommonHeader = false;
  293. return false;
  294. }
  295. unsigned getCacheLineSize() const { return 0; }
  296. llvm::Optional<unsigned>
  297. getCacheSize(TargetTransformInfo::CacheLevel Level) const {
  298. switch (Level) {
  299. case TargetTransformInfo::CacheLevel::L1D:
  300. LLVM_FALLTHROUGH;
  301. case TargetTransformInfo::CacheLevel::L2D:
  302. return llvm::Optional<unsigned>();
  303. }
  304. llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  305. }
  306. llvm::Optional<unsigned>
  307. getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
  308. switch (Level) {
  309. case TargetTransformInfo::CacheLevel::L1D:
  310. LLVM_FALLTHROUGH;
  311. case TargetTransformInfo::CacheLevel::L2D:
  312. return llvm::Optional<unsigned>();
  313. }
  314. llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
  315. }
  316. unsigned getPrefetchDistance() const { return 0; }
  317. unsigned getMinPrefetchStride(unsigned NumMemAccesses,
  318. unsigned NumStridedMemAccesses,
  319. unsigned NumPrefetches, bool HasCall) const {
  320. return 1;
  321. }
  322. unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
  323. bool enableWritePrefetching() const { return false; }
  324. unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
  325. unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
  326. TTI::TargetCostKind CostKind,
  327. TTI::OperandValueKind Opd1Info,
  328. TTI::OperandValueKind Opd2Info,
  329. TTI::OperandValueProperties Opd1PropInfo,
  330. TTI::OperandValueProperties Opd2PropInfo,
  331. ArrayRef<const Value *> Args,
  332. const Instruction *CxtI = nullptr) const {
  333. // FIXME: A number of transformation tests seem to require these values
  334. // which seems a little odd for how arbitary there are.
  335. switch (Opcode) {
  336. default:
  337. break;
  338. case Instruction::FDiv:
  339. case Instruction::FRem:
  340. case Instruction::SDiv:
  341. case Instruction::SRem:
  342. case Instruction::UDiv:
  343. case Instruction::URem:
  344. // FIXME: Unlikely to be true for CodeSize.
  345. return TTI::TCC_Expensive;
  346. }
  347. return 1;
  348. }
  349. unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index,
  350. VectorType *SubTp) const {
  351. return 1;
  352. }
  353. unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  354. TTI::CastContextHint CCH,
  355. TTI::TargetCostKind CostKind,
  356. const Instruction *I) const {
  357. switch (Opcode) {
  358. default:
  359. break;
  360. case Instruction::IntToPtr: {
  361. unsigned SrcSize = Src->getScalarSizeInBits();
  362. if (DL.isLegalInteger(SrcSize) &&
  363. SrcSize <= DL.getPointerTypeSizeInBits(Dst))
  364. return 0;
  365. break;
  366. }
  367. case Instruction::PtrToInt: {
  368. unsigned DstSize = Dst->getScalarSizeInBits();
  369. if (DL.isLegalInteger(DstSize) &&
  370. DstSize >= DL.getPointerTypeSizeInBits(Src))
  371. return 0;
  372. break;
  373. }
  374. case Instruction::BitCast:
  375. if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
  376. // Identity and pointer-to-pointer casts are free.
  377. return 0;
  378. break;
  379. case Instruction::Trunc: {
  380. // trunc to a native type is free (assuming the target has compare and
  381. // shift-right of the same width).
  382. TypeSize DstSize = DL.getTypeSizeInBits(Dst);
  383. if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize()))
  384. return 0;
  385. break;
  386. }
  387. }
  388. return 1;
  389. }
  390. unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  391. VectorType *VecTy, unsigned Index) const {
  392. return 1;
  393. }
  394. unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const {
  395. // A phi would be free, unless we're costing the throughput because it
  396. // will require a register.
  397. if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
  398. return 0;
  399. return 1;
  400. }
  401. unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  402. CmpInst::Predicate VecPred,
  403. TTI::TargetCostKind CostKind,
  404. const Instruction *I) const {
  405. return 1;
  406. }
  407. unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
  408. unsigned Index) const {
  409. return 1;
  410. }
  411. unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  412. unsigned AddressSpace, TTI::TargetCostKind CostKind,
  413. const Instruction *I) const {
  414. return 1;
  415. }
  416. unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  417. unsigned AddressSpace,
  418. TTI::TargetCostKind CostKind) const {
  419. return 1;
  420. }
  421. unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  422. const Value *Ptr, bool VariableMask,
  423. Align Alignment, TTI::TargetCostKind CostKind,
  424. const Instruction *I = nullptr) const {
  425. return 1;
  426. }
  427. unsigned getInterleavedMemoryOpCost(
  428. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  429. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  430. bool UseMaskForCond, bool UseMaskForGaps) const {
  431. return 1;
  432. }
  433. unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  434. TTI::TargetCostKind CostKind) const {
  435. switch (ICA.getID()) {
  436. default:
  437. break;
  438. case Intrinsic::annotation:
  439. case Intrinsic::assume:
  440. case Intrinsic::sideeffect:
  441. case Intrinsic::pseudoprobe:
  442. case Intrinsic::dbg_declare:
  443. case Intrinsic::dbg_value:
  444. case Intrinsic::dbg_label:
  445. case Intrinsic::invariant_start:
  446. case Intrinsic::invariant_end:
  447. case Intrinsic::launder_invariant_group:
  448. case Intrinsic::strip_invariant_group:
  449. case Intrinsic::is_constant:
  450. case Intrinsic::lifetime_start:
  451. case Intrinsic::lifetime_end:
  452. case Intrinsic::experimental_noalias_scope_decl:
  453. case Intrinsic::objectsize:
  454. case Intrinsic::ptr_annotation:
  455. case Intrinsic::var_annotation:
  456. case Intrinsic::experimental_gc_result:
  457. case Intrinsic::experimental_gc_relocate:
  458. case Intrinsic::coro_alloc:
  459. case Intrinsic::coro_begin:
  460. case Intrinsic::coro_free:
  461. case Intrinsic::coro_end:
  462. case Intrinsic::coro_frame:
  463. case Intrinsic::coro_size:
  464. case Intrinsic::coro_suspend:
  465. case Intrinsic::coro_param:
  466. case Intrinsic::coro_subfn_addr:
  467. // These intrinsics don't actually represent code after lowering.
  468. return 0;
  469. }
  470. return 1;
  471. }
  472. unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
  473. TTI::TargetCostKind CostKind) const {
  474. return 1;
  475. }
  476. unsigned getNumberOfParts(Type *Tp) const { return 0; }
  477. unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
  478. const SCEV *) const {
  479. return 0;
  480. }
  481. unsigned getArithmeticReductionCost(unsigned, VectorType *, bool,
  482. TTI::TargetCostKind) const {
  483. return 1;
  484. }
  485. unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool,
  486. TTI::TargetCostKind) const {
  487. return 1;
  488. }
  489. InstructionCost getExtendedAddReductionCost(
  490. bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
  491. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const {
  492. return 1;
  493. }
  494. unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
  495. return 0;
  496. }
  497. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
  498. return false;
  499. }
  500. unsigned getAtomicMemIntrinsicMaxElementSize() const {
  501. // Note for overrides: You must ensure for all element unordered-atomic
  502. // memory intrinsics that all power-of-2 element sizes up to, and
  503. // including, the return value of this method have a corresponding
  504. // runtime lib call. These runtime lib call definitions can be found
  505. // in RuntimeLibcalls.h
  506. return 0;
  507. }
  508. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  509. Type *ExpectedType) const {
  510. return nullptr;
  511. }
  512. Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
  513. unsigned SrcAddrSpace, unsigned DestAddrSpace,
  514. unsigned SrcAlign, unsigned DestAlign) const {
  515. return Type::getInt8Ty(Context);
  516. }
  517. void getMemcpyLoopResidualLoweringType(
  518. SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
  519. unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
  520. unsigned SrcAlign, unsigned DestAlign) const {
  521. for (unsigned i = 0; i != RemainingBytes; ++i)
  522. OpsOut.push_back(Type::getInt8Ty(Context));
  523. }
  524. bool areInlineCompatible(const Function *Caller,
  525. const Function *Callee) const {
  526. return (Caller->getFnAttribute("target-cpu") ==
  527. Callee->getFnAttribute("target-cpu")) &&
  528. (Caller->getFnAttribute("target-features") ==
  529. Callee->getFnAttribute("target-features"));
  530. }
  531. bool areFunctionArgsABICompatible(const Function *Caller,
  532. const Function *Callee,
  533. SmallPtrSetImpl<Argument *> &Args) const {
  534. return (Caller->getFnAttribute("target-cpu") ==
  535. Callee->getFnAttribute("target-cpu")) &&
  536. (Caller->getFnAttribute("target-features") ==
  537. Callee->getFnAttribute("target-features"));
  538. }
  539. bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
  540. const DataLayout &DL) const {
  541. return false;
  542. }
  543. bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
  544. const DataLayout &DL) const {
  545. return false;
  546. }
  547. unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
  548. bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
  549. bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
  550. bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
  551. unsigned AddrSpace) const {
  552. return true;
  553. }
  554. bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
  555. unsigned AddrSpace) const {
  556. return true;
  557. }
  558. unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
  559. unsigned ChainSizeInBytes,
  560. VectorType *VecTy) const {
  561. return VF;
  562. }
  563. unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
  564. unsigned ChainSizeInBytes,
  565. VectorType *VecTy) const {
  566. return VF;
  567. }
  568. bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
  569. TTI::ReductionFlags Flags) const {
  570. return false;
  571. }
  572. bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  573. TTI::ReductionFlags Flags) const {
  574. return false;
  575. }
  576. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  577. TTI::ReductionFlags Flags) const {
  578. return false;
  579. }
  580. bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
  581. unsigned getGISelRematGlobalCost() const { return 1; }
  582. bool supportsScalableVectors() const { return false; }
  583. bool hasActiveVectorLength() const { return false; }
  584. protected:
  585. // Obtain the minimum required size to hold the value (without the sign)
  586. // In case of a vector it returns the min required size for one element.
  587. unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
  588. if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
  589. const auto *VectorValue = cast<Constant>(Val);
  590. // In case of a vector need to pick the max between the min
  591. // required size for each element
  592. auto *VT = cast<FixedVectorType>(Val->getType());
  593. // Assume unsigned elements
  594. isSigned = false;
  595. // The max required size is the size of the vector element type
  596. unsigned MaxRequiredSize =
  597. VT->getElementType()->getPrimitiveSizeInBits().getFixedSize();
  598. unsigned MinRequiredSize = 0;
  599. for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
  600. if (auto *IntElement =
  601. dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
  602. bool signedElement = IntElement->getValue().isNegative();
  603. // Get the element min required size.
  604. unsigned ElementMinRequiredSize =
  605. IntElement->getValue().getMinSignedBits() - 1;
  606. // In case one element is signed then all the vector is signed.
  607. isSigned |= signedElement;
  608. // Save the max required bit size between all the elements.
  609. MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
  610. } else {
  611. // not an int constant element
  612. return MaxRequiredSize;
  613. }
  614. }
  615. return MinRequiredSize;
  616. }
  617. if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
  618. isSigned = CI->getValue().isNegative();
  619. return CI->getValue().getMinSignedBits() - 1;
  620. }
  621. if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
  622. isSigned = true;
  623. return Cast->getSrcTy()->getScalarSizeInBits() - 1;
  624. }
  625. if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
  626. isSigned = false;
  627. return Cast->getSrcTy()->getScalarSizeInBits();
  628. }
  629. isSigned = false;
  630. return Val->getType()->getScalarSizeInBits();
  631. }
  632. bool isStridedAccess(const SCEV *Ptr) const {
  633. return Ptr && isa<SCEVAddRecExpr>(Ptr);
  634. }
  635. const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
  636. const SCEV *Ptr) const {
  637. if (!isStridedAccess(Ptr))
  638. return nullptr;
  639. const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
  640. return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
  641. }
  642. bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
  643. int64_t MergeDistance) const {
  644. const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
  645. if (!Step)
  646. return false;
  647. APInt StrideVal = Step->getAPInt();
  648. if (StrideVal.getBitWidth() > 64)
  649. return false;
  650. // FIXME: Need to take absolute value for negative stride case.
  651. return StrideVal.getSExtValue() < MergeDistance;
  652. }
  653. };
  654. /// CRTP base class for use as a mix-in that aids implementing
  655. /// a TargetTransformInfo-compatible class.
  656. template <typename T>
  657. class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
  658. private:
  659. typedef TargetTransformInfoImplBase BaseT;
  660. protected:
  661. explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
  662. public:
  663. using BaseT::getGEPCost;
  664. int getGEPCost(Type *PointeeType, const Value *Ptr,
  665. ArrayRef<const Value *> Operands,
  666. TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
  667. assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
  668. // TODO: will remove this when pointers have an opaque type.
  669. assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
  670. PointeeType &&
  671. "explicit pointee type doesn't match operand's pointee type");
  672. auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
  673. bool HasBaseReg = (BaseGV == nullptr);
  674. auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
  675. APInt BaseOffset(PtrSizeBits, 0);
  676. int64_t Scale = 0;
  677. auto GTI = gep_type_begin(PointeeType, Operands);
  678. Type *TargetType = nullptr;
  679. // Handle the case where the GEP instruction has a single operand,
  680. // the basis, therefore TargetType is a nullptr.
  681. if (Operands.empty())
  682. return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
  683. for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
  684. TargetType = GTI.getIndexedType();
  685. // We assume that the cost of Scalar GEP with constant index and the
  686. // cost of Vector GEP with splat constant index are the same.
  687. const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
  688. if (!ConstIdx)
  689. if (auto Splat = getSplatValue(*I))
  690. ConstIdx = dyn_cast<ConstantInt>(Splat);
  691. if (StructType *STy = GTI.getStructTypeOrNull()) {
  692. // For structures the index is always splat or scalar constant
  693. assert(ConstIdx && "Unexpected GEP index");
  694. uint64_t Field = ConstIdx->getZExtValue();
  695. BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
  696. } else {
  697. // If this operand is a scalable type, bail out early.
  698. // TODO: handle scalable vectors
  699. if (isa<ScalableVectorType>(TargetType))
  700. return TTI::TCC_Basic;
  701. int64_t ElementSize =
  702. DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
  703. if (ConstIdx) {
  704. BaseOffset +=
  705. ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
  706. } else {
  707. // Needs scale register.
  708. if (Scale != 0)
  709. // No addressing mode takes two scale registers.
  710. return TTI::TCC_Basic;
  711. Scale = ElementSize;
  712. }
  713. }
  714. }
  715. if (static_cast<T *>(this)->isLegalAddressingMode(
  716. TargetType, const_cast<GlobalValue *>(BaseGV),
  717. BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
  718. Ptr->getType()->getPointerAddressSpace()))
  719. return TTI::TCC_Free;
  720. return TTI::TCC_Basic;
  721. }
  722. int getUserCost(const User *U, ArrayRef<const Value *> Operands,
  723. TTI::TargetCostKind CostKind) {
  724. auto *TargetTTI = static_cast<T *>(this);
  725. // Handle non-intrinsic calls, invokes, and callbr.
  726. // FIXME: Unlikely to be true for anything but CodeSize.
  727. auto *CB = dyn_cast<CallBase>(U);
  728. if (CB && !isa<IntrinsicInst>(U)) {
  729. if (const Function *F = CB->getCalledFunction()) {
  730. if (!TargetTTI->isLoweredToCall(F))
  731. return TTI::TCC_Basic; // Give a basic cost if it will be lowered
  732. return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
  733. }
  734. // For indirect or other calls, scale cost by number of arguments.
  735. return TTI::TCC_Basic * (CB->arg_size() + 1);
  736. }
  737. Type *Ty = U->getType();
  738. Type *OpTy =
  739. U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr;
  740. unsigned Opcode = Operator::getOpcode(U);
  741. auto *I = dyn_cast<Instruction>(U);
  742. switch (Opcode) {
  743. default:
  744. break;
  745. case Instruction::Call: {
  746. assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
  747. auto *Intrinsic = cast<IntrinsicInst>(U);
  748. IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
  749. return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
  750. }
  751. case Instruction::Br:
  752. case Instruction::Ret:
  753. case Instruction::PHI:
  754. return TargetTTI->getCFInstrCost(Opcode, CostKind);
  755. case Instruction::ExtractValue:
  756. case Instruction::Freeze:
  757. return TTI::TCC_Free;
  758. case Instruction::Alloca:
  759. if (cast<AllocaInst>(U)->isStaticAlloca())
  760. return TTI::TCC_Free;
  761. break;
  762. case Instruction::GetElementPtr: {
  763. const GEPOperator *GEP = cast<GEPOperator>(U);
  764. return TargetTTI->getGEPCost(GEP->getSourceElementType(),
  765. GEP->getPointerOperand(),
  766. Operands.drop_front());
  767. }
  768. case Instruction::Add:
  769. case Instruction::FAdd:
  770. case Instruction::Sub:
  771. case Instruction::FSub:
  772. case Instruction::Mul:
  773. case Instruction::FMul:
  774. case Instruction::UDiv:
  775. case Instruction::SDiv:
  776. case Instruction::FDiv:
  777. case Instruction::URem:
  778. case Instruction::SRem:
  779. case Instruction::FRem:
  780. case Instruction::Shl:
  781. case Instruction::LShr:
  782. case Instruction::AShr:
  783. case Instruction::And:
  784. case Instruction::Or:
  785. case Instruction::Xor:
  786. case Instruction::FNeg: {
  787. TTI::OperandValueProperties Op1VP = TTI::OP_None;
  788. TTI::OperandValueProperties Op2VP = TTI::OP_None;
  789. TTI::OperandValueKind Op1VK =
  790. TTI::getOperandInfo(U->getOperand(0), Op1VP);
  791. TTI::OperandValueKind Op2VK = Opcode != Instruction::FNeg ?
  792. TTI::getOperandInfo(U->getOperand(1), Op2VP) : TTI::OK_AnyValue;
  793. SmallVector<const Value *, 2> Operands(U->operand_values());
  794. return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind,
  795. Op1VK, Op2VK,
  796. Op1VP, Op2VP, Operands, I);
  797. }
  798. case Instruction::IntToPtr:
  799. case Instruction::PtrToInt:
  800. case Instruction::SIToFP:
  801. case Instruction::UIToFP:
  802. case Instruction::FPToUI:
  803. case Instruction::FPToSI:
  804. case Instruction::Trunc:
  805. case Instruction::FPTrunc:
  806. case Instruction::BitCast:
  807. case Instruction::FPExt:
  808. case Instruction::SExt:
  809. case Instruction::ZExt:
  810. case Instruction::AddrSpaceCast:
  811. return TargetTTI->getCastInstrCost(
  812. Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
  813. case Instruction::Store: {
  814. auto *SI = cast<StoreInst>(U);
  815. Type *ValTy = U->getOperand(0)->getType();
  816. return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
  817. SI->getPointerAddressSpace(),
  818. CostKind, I);
  819. }
  820. case Instruction::Load: {
  821. auto *LI = cast<LoadInst>(U);
  822. return TargetTTI->getMemoryOpCost(Opcode, U->getType(), LI->getAlign(),
  823. LI->getPointerAddressSpace(),
  824. CostKind, I);
  825. }
  826. case Instruction::Select: {
  827. Type *CondTy = U->getOperand(0)->getType();
  828. return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
  829. CmpInst::BAD_ICMP_PREDICATE,
  830. CostKind, I);
  831. }
  832. case Instruction::ICmp:
  833. case Instruction::FCmp: {
  834. Type *ValTy = U->getOperand(0)->getType();
  835. // TODO: Also handle ICmp/FCmp constant expressions.
  836. return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
  837. I ? cast<CmpInst>(I)->getPredicate()
  838. : CmpInst::BAD_ICMP_PREDICATE,
  839. CostKind, I);
  840. }
  841. case Instruction::InsertElement: {
  842. auto *IE = dyn_cast<InsertElementInst>(U);
  843. if (!IE)
  844. return TTI::TCC_Basic; // FIXME
  845. auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
  846. unsigned Idx = CI ? CI->getZExtValue() : -1;
  847. return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
  848. }
  849. case Instruction::ShuffleVector: {
  850. auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
  851. if (!Shuffle)
  852. return TTI::TCC_Basic; // FIXME
  853. auto *VecTy = cast<VectorType>(U->getType());
  854. auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
  855. // TODO: Identify and add costs for insert subvector, etc.
  856. int SubIndex;
  857. if (Shuffle->isExtractSubvectorMask(SubIndex))
  858. return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
  859. SubIndex, VecTy);
  860. else if (Shuffle->changesLength())
  861. return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
  862. else if (Shuffle->isIdentity())
  863. return 0;
  864. else if (Shuffle->isReverse())
  865. return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 0, nullptr);
  866. else if (Shuffle->isSelect())
  867. return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 0, nullptr);
  868. else if (Shuffle->isTranspose())
  869. return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 0, nullptr);
  870. else if (Shuffle->isZeroEltSplat())
  871. return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 0, nullptr);
  872. else if (Shuffle->isSingleSource())
  873. return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 0,
  874. nullptr);
  875. return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 0,
  876. nullptr);
  877. }
  878. case Instruction::ExtractElement: {
  879. unsigned Idx = -1;
  880. auto *EEI = dyn_cast<ExtractElementInst>(U);
  881. if (!EEI)
  882. return TTI::TCC_Basic; // FIXME
  883. auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1));
  884. if (CI)
  885. Idx = CI->getZExtValue();
  886. // Try to match a reduction (a series of shufflevector and vector ops
  887. // followed by an extractelement).
  888. unsigned RdxOpcode;
  889. VectorType *RdxType;
  890. bool IsPairwise;
  891. switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) {
  892. case TTI::RK_Arithmetic:
  893. return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType,
  894. IsPairwise, CostKind);
  895. case TTI::RK_MinMax:
  896. return TargetTTI->getMinMaxReductionCost(
  897. RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
  898. IsPairwise, /*IsUnsigned=*/false, CostKind);
  899. case TTI::RK_UnsignedMinMax:
  900. return TargetTTI->getMinMaxReductionCost(
  901. RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
  902. IsPairwise, /*IsUnsigned=*/true, CostKind);
  903. case TTI::RK_None:
  904. break;
  905. }
  906. return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(),
  907. Idx);
  908. }
  909. }
  910. // By default, just classify everything as 'basic'.
  911. return TTI::TCC_Basic;
  912. }
  913. int getInstructionLatency(const Instruction *I) {
  914. SmallVector<const Value *, 4> Operands(I->operand_values());
  915. if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
  916. return 0;
  917. if (isa<LoadInst>(I))
  918. return 4;
  919. Type *DstTy = I->getType();
  920. // Usually an intrinsic is a simple instruction.
  921. // A real function call is much slower.
  922. if (auto *CI = dyn_cast<CallInst>(I)) {
  923. const Function *F = CI->getCalledFunction();
  924. if (!F || static_cast<T *>(this)->isLoweredToCall(F))
  925. return 40;
  926. // Some intrinsics return a value and a flag, we use the value type
  927. // to decide its latency.
  928. if (StructType *StructTy = dyn_cast<StructType>(DstTy))
  929. DstTy = StructTy->getElementType(0);
  930. // Fall through to simple instructions.
  931. }
  932. if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
  933. DstTy = VectorTy->getElementType();
  934. if (DstTy->isFloatingPointTy())
  935. return 3;
  936. return 1;
  937. }
  938. };
  939. } // namespace llvm
  940. #endif
  941. #ifdef __GNUC__
  942. #pragma GCC diagnostic pop
  943. #endif