TargetTransformInfo.h 107 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. /// \file
  14. /// This pass exposes codegen information to IR-level passes. Every
  15. /// transformation that uses codegen information is broken into three parts:
  16. /// 1. The IR-level analysis pass.
  17. /// 2. The IR-level transformation interface which provides the needed
  18. /// information.
  19. /// 3. Codegen-level implementation which uses target-specific hooks.
  20. ///
  21. /// This file defines #2, which is the interface that IR-level transformations
  22. /// use for querying the codegen.
  23. ///
  24. //===----------------------------------------------------------------------===//
  25. #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  26. #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
  27. #include "llvm/IR/InstrTypes.h"
  28. #include "llvm/IR/Operator.h"
  29. #include "llvm/IR/PassManager.h"
  30. #include "llvm/Pass.h"
  31. #include "llvm/Support/AtomicOrdering.h"
  32. #include "llvm/Support/DataTypes.h"
  33. #include "llvm/Support/InstructionCost.h"
  34. #include <functional>
  35. namespace llvm {
  36. namespace Intrinsic {
  37. typedef unsigned ID;
  38. }
  39. class AssumptionCache;
  40. class BlockFrequencyInfo;
  41. class DominatorTree;
  42. class BranchInst;
  43. class CallBase;
  44. class ExtractElementInst;
  45. class Function;
  46. class GlobalValue;
  47. class InstCombiner;
  48. class IntrinsicInst;
  49. class LoadInst;
  50. class LoopAccessInfo;
  51. class Loop;
  52. class LoopInfo;
  53. class ProfileSummaryInfo;
  54. class SCEV;
  55. class ScalarEvolution;
  56. class StoreInst;
  57. class SwitchInst;
  58. class TargetLibraryInfo;
  59. class Type;
  60. class User;
  61. class Value;
  62. struct KnownBits;
  63. template <typename T> class Optional;
  64. /// Information about a load/store intrinsic defined by the target.
  65. struct MemIntrinsicInfo {
  66. /// This is the pointer that the intrinsic is loading from or storing to.
  67. /// If this is non-null, then analysis/optimization passes can assume that
  68. /// this intrinsic is functionally equivalent to a load/store from this
  69. /// pointer.
  70. Value *PtrVal = nullptr;
  71. // Ordering for atomic operations.
  72. AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
  73. // Same Id is set by the target for corresponding load/store intrinsics.
  74. unsigned short MatchingId = 0;
  75. bool ReadMem = false;
  76. bool WriteMem = false;
  77. bool IsVolatile = false;
  78. bool isUnordered() const {
  79. return (Ordering == AtomicOrdering::NotAtomic ||
  80. Ordering == AtomicOrdering::Unordered) &&
  81. !IsVolatile;
  82. }
  83. };
  84. /// Attributes of a target dependent hardware loop.
  85. struct HardwareLoopInfo {
  86. HardwareLoopInfo() = delete;
  87. HardwareLoopInfo(Loop *L) : L(L) {}
  88. Loop *L = nullptr;
  89. BasicBlock *ExitBlock = nullptr;
  90. BranchInst *ExitBranch = nullptr;
  91. const SCEV *TripCount = nullptr;
  92. IntegerType *CountType = nullptr;
  93. Value *LoopDecrement = nullptr; // Decrement the loop counter by this
  94. // value in every iteration.
  95. bool IsNestingLegal = false; // Can a hardware loop be a parent to
  96. // another hardware loop?
  97. bool CounterInReg = false; // Should loop counter be updated in
  98. // the loop via a phi?
  99. bool PerformEntryTest = false; // Generate the intrinsic which also performs
  100. // icmp ne zero on the loop counter value and
  101. // produces an i1 to guard the loop entry.
  102. bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
  103. DominatorTree &DT, bool ForceNestedLoop = false,
  104. bool ForceHardwareLoopPHI = false);
  105. bool canAnalyze(LoopInfo &LI);
  106. };
  107. class IntrinsicCostAttributes {
  108. const IntrinsicInst *II = nullptr;
  109. Type *RetTy = nullptr;
  110. Intrinsic::ID IID;
  111. SmallVector<Type *, 4> ParamTys;
  112. SmallVector<const Value *, 4> Arguments;
  113. FastMathFlags FMF;
  114. ElementCount VF = ElementCount::getFixed(1);
  115. // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
  116. // arguments and the return value will be computed based on types.
  117. unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
  118. public:
  119. IntrinsicCostAttributes(const IntrinsicInst &I);
  120. IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
  121. IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
  122. ElementCount Factor);
  123. IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
  124. ElementCount Factor, unsigned ScalarCost);
  125. IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
  126. ArrayRef<Type *> Tys, FastMathFlags Flags);
  127. IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
  128. ArrayRef<Type *> Tys, FastMathFlags Flags,
  129. unsigned ScalarCost);
  130. IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
  131. ArrayRef<Type *> Tys, FastMathFlags Flags,
  132. unsigned ScalarCost,
  133. const IntrinsicInst *I);
  134. IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
  135. ArrayRef<Type *> Tys);
  136. IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
  137. ArrayRef<const Value *> Args);
  138. Intrinsic::ID getID() const { return IID; }
  139. const IntrinsicInst *getInst() const { return II; }
  140. Type *getReturnType() const { return RetTy; }
  141. ElementCount getVectorFactor() const { return VF; }
  142. FastMathFlags getFlags() const { return FMF; }
  143. unsigned getScalarizationCost() const { return ScalarizationCost; }
  144. const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
  145. const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
  146. bool isTypeBasedOnly() const {
  147. return Arguments.empty();
  148. }
  149. bool skipScalarizationCost() const {
  150. return ScalarizationCost != std::numeric_limits<unsigned>::max();
  151. }
  152. };
  153. class TargetTransformInfo;
  154. typedef TargetTransformInfo TTI;
  155. /// This pass provides access to the codegen interfaces that are needed
  156. /// for IR-level transformations.
  157. class TargetTransformInfo {
  158. public:
  159. /// Construct a TTI object using a type implementing the \c Concept
  160. /// API below.
  161. ///
  162. /// This is used by targets to construct a TTI wrapping their target-specific
  163. /// implementation that encodes appropriate costs for their target.
  164. template <typename T> TargetTransformInfo(T Impl);
  165. /// Construct a baseline TTI object using a minimal implementation of
  166. /// the \c Concept API below.
  167. ///
  168. /// The TTI implementation will reflect the information in the DataLayout
  169. /// provided if non-null.
  170. explicit TargetTransformInfo(const DataLayout &DL);
  171. // Provide move semantics.
  172. TargetTransformInfo(TargetTransformInfo &&Arg);
  173. TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
  174. // We need to define the destructor out-of-line to define our sub-classes
  175. // out-of-line.
  176. ~TargetTransformInfo();
  177. /// Handle the invalidation of this information.
  178. ///
  179. /// When used as a result of \c TargetIRAnalysis this method will be called
  180. /// when the function this was computed for changes. When it returns false,
  181. /// the information is preserved across those changes.
  182. bool invalidate(Function &, const PreservedAnalyses &,
  183. FunctionAnalysisManager::Invalidator &) {
  184. // FIXME: We should probably in some way ensure that the subtarget
  185. // information for a function hasn't changed.
  186. return false;
  187. }
  188. /// \name Generic Target Information
  189. /// @{
  190. /// The kind of cost model.
  191. ///
  192. /// There are several different cost models that can be customized by the
  193. /// target. The normalization of each cost model may be target specific.
  194. enum TargetCostKind {
  195. TCK_RecipThroughput, ///< Reciprocal throughput.
  196. TCK_Latency, ///< The latency of instruction.
  197. TCK_CodeSize, ///< Instruction code size.
  198. TCK_SizeAndLatency ///< The weighted sum of size and latency.
  199. };
  200. /// Query the cost of a specified instruction.
  201. ///
  202. /// Clients should use this interface to query the cost of an existing
  203. /// instruction. The instruction must have a valid parent (basic block).
  204. ///
  205. /// Note, this method does not cache the cost calculation and it
  206. /// can be expensive in some cases.
  207. InstructionCost getInstructionCost(const Instruction *I,
  208. enum TargetCostKind kind) const {
  209. InstructionCost Cost;
  210. switch (kind) {
  211. case TCK_RecipThroughput:
  212. Cost = getInstructionThroughput(I);
  213. break;
  214. case TCK_Latency:
  215. Cost = getInstructionLatency(I);
  216. break;
  217. case TCK_CodeSize:
  218. case TCK_SizeAndLatency:
  219. Cost = getUserCost(I, kind);
  220. break;
  221. }
  222. if (Cost == -1)
  223. Cost.setInvalid();
  224. return Cost;
  225. }
  226. /// Underlying constants for 'cost' values in this interface.
  227. ///
  228. /// Many APIs in this interface return a cost. This enum defines the
  229. /// fundamental values that should be used to interpret (and produce) those
  230. /// costs. The costs are returned as an int rather than a member of this
  231. /// enumeration because it is expected that the cost of one IR instruction
  232. /// may have a multiplicative factor to it or otherwise won't fit directly
  233. /// into the enum. Moreover, it is common to sum or average costs which works
  234. /// better as simple integral values. Thus this enum only provides constants.
  235. /// Also note that the returned costs are signed integers to make it natural
  236. /// to add, subtract, and test with zero (a common boundary condition). It is
  237. /// not expected that 2^32 is a realistic cost to be modeling at any point.
  238. ///
  239. /// Note that these costs should usually reflect the intersection of code-size
  240. /// cost and execution cost. A free instruction is typically one that folds
  241. /// into another instruction. For example, reg-to-reg moves can often be
  242. /// skipped by renaming the registers in the CPU, but they still are encoded
  243. /// and thus wouldn't be considered 'free' here.
  244. enum TargetCostConstants {
  245. TCC_Free = 0, ///< Expected to fold away in lowering.
  246. TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
  247. TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
  248. };
  249. /// Estimate the cost of a GEP operation when lowered.
  250. int getGEPCost(Type *PointeeType, const Value *Ptr,
  251. ArrayRef<const Value *> Operands,
  252. TargetCostKind CostKind = TCK_SizeAndLatency) const;
  253. /// \returns A value by which our inlining threshold should be multiplied.
  254. /// This is primarily used to bump up the inlining threshold wholesale on
  255. /// targets where calls are unusually expensive.
  256. ///
  257. /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
  258. /// individual classes of instructions would be better.
  259. unsigned getInliningThresholdMultiplier() const;
  260. /// \returns A value to be added to the inlining threshold.
  261. unsigned adjustInliningThreshold(const CallBase *CB) const;
  262. /// \returns Vector bonus in percent.
  263. ///
  264. /// Vector bonuses: We want to more aggressively inline vector-dense kernels
  265. /// and apply this bonus based on the percentage of vector instructions. A
  266. /// bonus is applied if the vector instructions exceed 50% and half that
  267. /// amount is applied if it exceeds 10%. Note that these bonuses are some what
  268. /// arbitrary and evolved over time by accident as much as because they are
  269. /// principled bonuses.
  270. /// FIXME: It would be nice to base the bonus values on something more
  271. /// scientific. A target may has no bonus on vector instructions.
  272. int getInlinerVectorBonusPercent() const;
  273. /// \return the expected cost of a memcpy, which could e.g. depend on the
  274. /// source/destination type and alignment and the number of bytes copied.
  275. int getMemcpyCost(const Instruction *I) const;
  276. /// \return The estimated number of case clusters when lowering \p 'SI'.
  277. /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
  278. /// table.
  279. unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
  280. unsigned &JTSize,
  281. ProfileSummaryInfo *PSI,
  282. BlockFrequencyInfo *BFI) const;
  283. /// Estimate the cost of a given IR user when lowered.
  284. ///
  285. /// This can estimate the cost of either a ConstantExpr or Instruction when
  286. /// lowered.
  287. ///
  288. /// \p Operands is a list of operands which can be a result of transformations
  289. /// of the current operands. The number of the operands on the list must equal
  290. /// to the number of the current operands the IR user has. Their order on the
  291. /// list must be the same as the order of the current operands the IR user
  292. /// has.
  293. ///
  294. /// The returned cost is defined in terms of \c TargetCostConstants, see its
  295. /// comments for a detailed explanation of the cost values.
  296. int getUserCost(const User *U, ArrayRef<const Value *> Operands,
  297. TargetCostKind CostKind) const;
  298. /// This is a helper function which calls the two-argument getUserCost
  299. /// with \p Operands which are the current operands U has.
  300. int getUserCost(const User *U, TargetCostKind CostKind) const {
  301. SmallVector<const Value *, 4> Operands(U->operand_values());
  302. return getUserCost(U, Operands, CostKind);
  303. }
  304. /// Return true if branch divergence exists.
  305. ///
  306. /// Branch divergence has a significantly negative impact on GPU performance
  307. /// when threads in the same wavefront take different paths due to conditional
  308. /// branches.
  309. bool hasBranchDivergence() const;
  310. /// Return true if the target prefers to use GPU divergence analysis to
  311. /// replace the legacy version.
  312. bool useGPUDivergenceAnalysis() const;
  313. /// Returns whether V is a source of divergence.
  314. ///
  315. /// This function provides the target-dependent information for
  316. /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
  317. /// first builds the dependency graph, and then runs the reachability
  318. /// algorithm starting with the sources of divergence.
  319. bool isSourceOfDivergence(const Value *V) const;
  320. // Returns true for the target specific
  321. // set of operations which produce uniform result
  322. // even taking non-uniform arguments
  323. bool isAlwaysUniform(const Value *V) const;
  324. /// Returns the address space ID for a target's 'flat' address space. Note
  325. /// this is not necessarily the same as addrspace(0), which LLVM sometimes
  326. /// refers to as the generic address space. The flat address space is a
  327. /// generic address space that can be used access multiple segments of memory
  328. /// with different address spaces. Access of a memory location through a
  329. /// pointer with this address space is expected to be legal but slower
  330. /// compared to the same memory location accessed through a pointer with a
  331. /// different address space.
  332. //
  333. /// This is for targets with different pointer representations which can
  334. /// be converted with the addrspacecast instruction. If a pointer is converted
  335. /// to this address space, optimizations should attempt to replace the access
  336. /// with the source address space.
  337. ///
  338. /// \returns ~0u if the target does not have such a flat address space to
  339. /// optimize away.
  340. unsigned getFlatAddressSpace() const;
  341. /// Return any intrinsic address operand indexes which may be rewritten if
  342. /// they use a flat address space pointer.
  343. ///
  344. /// \returns true if the intrinsic was handled.
  345. bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
  346. Intrinsic::ID IID) const;
  347. bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
  348. unsigned getAssumedAddrSpace(const Value *V) const;
  349. /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
  350. /// NewV, which has a different address space. This should happen for every
  351. /// operand index that collectFlatAddressOperands returned for the intrinsic.
  352. /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
  353. /// new value (which may be the original \p II with modified operands).
  354. Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
  355. Value *NewV) const;
  356. /// Test whether calls to a function lower to actual program function
  357. /// calls.
  358. ///
  359. /// The idea is to test whether the program is likely to require a 'call'
  360. /// instruction or equivalent in order to call the given function.
  361. ///
  362. /// FIXME: It's not clear that this is a good or useful query API. Client's
  363. /// should probably move to simpler cost metrics using the above.
  364. /// Alternatively, we could split the cost interface into distinct code-size
  365. /// and execution-speed costs. This would allow modelling the core of this
  366. /// query more accurately as a call is a single small instruction, but
  367. /// incurs significant execution cost.
  368. bool isLoweredToCall(const Function *F) const;
  369. struct LSRCost {
  370. /// TODO: Some of these could be merged. Also, a lexical ordering
  371. /// isn't always optimal.
  372. unsigned Insns;
  373. unsigned NumRegs;
  374. unsigned AddRecCost;
  375. unsigned NumIVMuls;
  376. unsigned NumBaseAdds;
  377. unsigned ImmCost;
  378. unsigned SetupCost;
  379. unsigned ScaleCost;
  380. };
  381. /// Parameters that control the generic loop unrolling transformation.
  382. struct UnrollingPreferences {
  383. /// The cost threshold for the unrolled loop. Should be relative to the
  384. /// getUserCost values returned by this API, and the expectation is that
  385. /// the unrolled loop's instructions when run through that interface should
  386. /// not exceed this cost. However, this is only an estimate. Also, specific
  387. /// loops may be unrolled even with a cost above this threshold if deemed
  388. /// profitable. Set this to UINT_MAX to disable the loop body cost
  389. /// restriction.
  390. unsigned Threshold;
  391. /// If complete unrolling will reduce the cost of the loop, we will boost
  392. /// the Threshold by a certain percent to allow more aggressive complete
  393. /// unrolling. This value provides the maximum boost percentage that we
  394. /// can apply to Threshold (The value should be no less than 100).
  395. /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
  396. /// MaxPercentThresholdBoost / 100)
  397. /// E.g. if complete unrolling reduces the loop execution time by 50%
  398. /// then we boost the threshold by the factor of 2x. If unrolling is not
  399. /// expected to reduce the running time, then we do not increase the
  400. /// threshold.
  401. unsigned MaxPercentThresholdBoost;
  402. /// The cost threshold for the unrolled loop when optimizing for size (set
  403. /// to UINT_MAX to disable).
  404. unsigned OptSizeThreshold;
  405. /// The cost threshold for the unrolled loop, like Threshold, but used
  406. /// for partial/runtime unrolling (set to UINT_MAX to disable).
  407. unsigned PartialThreshold;
  408. /// The cost threshold for the unrolled loop when optimizing for size, like
  409. /// OptSizeThreshold, but used for partial/runtime unrolling (set to
  410. /// UINT_MAX to disable).
  411. unsigned PartialOptSizeThreshold;
  412. /// A forced unrolling factor (the number of concatenated bodies of the
  413. /// original loop in the unrolled loop body). When set to 0, the unrolling
  414. /// transformation will select an unrolling factor based on the current cost
  415. /// threshold and other factors.
  416. unsigned Count;
  417. /// Default unroll count for loops with run-time trip count.
  418. unsigned DefaultUnrollRuntimeCount;
  419. // Set the maximum unrolling factor. The unrolling factor may be selected
  420. // using the appropriate cost threshold, but may not exceed this number
  421. // (set to UINT_MAX to disable). This does not apply in cases where the
  422. // loop is being fully unrolled.
  423. unsigned MaxCount;
  424. /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
  425. /// applies even if full unrolling is selected. This allows a target to fall
  426. /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
  427. unsigned FullUnrollMaxCount;
  428. // Represents number of instructions optimized when "back edge"
  429. // becomes "fall through" in unrolled loop.
  430. // For now we count a conditional branch on a backedge and a comparison
  431. // feeding it.
  432. unsigned BEInsns;
  433. /// Allow partial unrolling (unrolling of loops to expand the size of the
  434. /// loop body, not only to eliminate small constant-trip-count loops).
  435. bool Partial;
  436. /// Allow runtime unrolling (unrolling of loops to expand the size of the
  437. /// loop body even when the number of loop iterations is not known at
  438. /// compile time).
  439. bool Runtime;
  440. /// Allow generation of a loop remainder (extra iterations after unroll).
  441. bool AllowRemainder;
  442. /// Allow emitting expensive instructions (such as divisions) when computing
  443. /// the trip count of a loop for runtime unrolling.
  444. bool AllowExpensiveTripCount;
  445. /// Apply loop unroll on any kind of loop
  446. /// (mainly to loops that fail runtime unrolling).
  447. bool Force;
  448. /// Allow using trip count upper bound to unroll loops.
  449. bool UpperBound;
  450. /// Allow unrolling of all the iterations of the runtime loop remainder.
  451. bool UnrollRemainder;
  452. /// Allow unroll and jam. Used to enable unroll and jam for the target.
  453. bool UnrollAndJam;
  454. /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
  455. /// value above is used during unroll and jam for the outer loop size.
  456. /// This value is used in the same manner to limit the size of the inner
  457. /// loop.
  458. unsigned UnrollAndJamInnerLoopThreshold;
  459. /// Don't allow loop unrolling to simulate more than this number of
  460. /// iterations when checking full unroll profitability
  461. unsigned MaxIterationsCountToAnalyze;
  462. };
  463. /// Get target-customized preferences for the generic loop unrolling
  464. /// transformation. The caller will initialize UP with the current
  465. /// target-independent defaults.
  466. void getUnrollingPreferences(Loop *L, ScalarEvolution &,
  467. UnrollingPreferences &UP) const;
  468. /// Query the target whether it would be profitable to convert the given loop
  469. /// into a hardware loop.
  470. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  471. AssumptionCache &AC, TargetLibraryInfo *LibInfo,
  472. HardwareLoopInfo &HWLoopInfo) const;
  473. /// Query the target whether it would be prefered to create a predicated
  474. /// vector loop, which can avoid the need to emit a scalar epilogue loop.
  475. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  476. AssumptionCache &AC, TargetLibraryInfo *TLI,
  477. DominatorTree *DT,
  478. const LoopAccessInfo *LAI) const;
  479. /// Query the target whether lowering of the llvm.get.active.lane.mask
  480. /// intrinsic is supported.
  481. bool emitGetActiveLaneMask() const;
  482. // Parameters that control the loop peeling transformation
  483. struct PeelingPreferences {
  484. /// A forced peeling factor (the number of bodied of the original loop
  485. /// that should be peeled off before the loop body). When set to 0, the
  486. /// a peeling factor based on profile information and other factors.
  487. unsigned PeelCount;
  488. /// Allow peeling off loop iterations.
  489. bool AllowPeeling;
  490. /// Allow peeling off loop iterations for loop nests.
  491. bool AllowLoopNestsPeeling;
  492. /// Allow peeling basing on profile. Uses to enable peeling off all
  493. /// iterations basing on provided profile.
  494. /// If the value is true the peeling cost model can decide to peel only
  495. /// some iterations and in this case it will set this to false.
  496. bool PeelProfiledIterations;
  497. };
  498. /// Get target-customized preferences for the generic loop peeling
  499. /// transformation. The caller will initialize \p PP with the current
  500. /// target-independent defaults with information from \p L and \p SE.
  501. void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
  502. PeelingPreferences &PP) const;
  503. /// Targets can implement their own combinations for target-specific
  504. /// intrinsics. This function will be called from the InstCombine pass every
  505. /// time a target-specific intrinsic is encountered.
  506. ///
  507. /// \returns None to not do anything target specific or a value that will be
  508. /// returned from the InstCombiner. It is possible to return null and stop
  509. /// further processing of the intrinsic by returning nullptr.
  510. Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  511. IntrinsicInst &II) const;
  512. /// Can be used to implement target-specific instruction combining.
  513. /// \see instCombineIntrinsic
  514. Optional<Value *>
  515. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  516. APInt DemandedMask, KnownBits &Known,
  517. bool &KnownBitsComputed) const;
  518. /// Can be used to implement target-specific instruction combining.
  519. /// \see instCombineIntrinsic
  520. Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  521. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  522. APInt &UndefElts2, APInt &UndefElts3,
  523. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  524. SimplifyAndSetOp) const;
  525. /// @}
  526. /// \name Scalar Target Information
  527. /// @{
  528. /// Flags indicating the kind of support for population count.
  529. ///
  530. /// Compared to the SW implementation, HW support is supposed to
  531. /// significantly boost the performance when the population is dense, and it
  532. /// may or may not degrade performance if the population is sparse. A HW
  533. /// support is considered as "Fast" if it can outperform, or is on a par
  534. /// with, SW implementation when the population is sparse; otherwise, it is
  535. /// considered as "Slow".
  536. enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
  537. /// Return true if the specified immediate is legal add immediate, that
  538. /// is the target has add instructions which can add a register with the
  539. /// immediate without having to materialize the immediate into a register.
  540. bool isLegalAddImmediate(int64_t Imm) const;
  541. /// Return true if the specified immediate is legal icmp immediate,
  542. /// that is the target has icmp instructions which can compare a register
  543. /// against the immediate without having to materialize the immediate into a
  544. /// register.
  545. bool isLegalICmpImmediate(int64_t Imm) const;
  546. /// Return true if the addressing mode represented by AM is legal for
  547. /// this target, for a load/store of the specified type.
  548. /// The type may be VoidTy, in which case only return true if the addressing
  549. /// mode is legal for a load/store of any legal type.
  550. /// If target returns true in LSRWithInstrQueries(), I may be valid.
  551. /// TODO: Handle pre/postinc as well.
  552. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  553. bool HasBaseReg, int64_t Scale,
  554. unsigned AddrSpace = 0,
  555. Instruction *I = nullptr) const;
  556. /// Return true if LSR cost of C1 is lower than C1.
  557. bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
  558. TargetTransformInfo::LSRCost &C2) const;
  559. /// Return true if LSR major cost is number of registers. Targets which
  560. /// implement their own isLSRCostLess and unset number of registers as major
  561. /// cost should return false, otherwise return true.
  562. bool isNumRegsMajorCostOfLSR() const;
  563. /// \returns true if LSR should not optimize a chain that includes \p I.
  564. bool isProfitableLSRChainElement(Instruction *I) const;
  565. /// Return true if the target can fuse a compare and branch.
  566. /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
  567. /// calculation for the instructions in a loop.
  568. bool canMacroFuseCmp() const;
  569. /// Return true if the target can save a compare for loop count, for example
  570. /// hardware loop saves a compare.
  571. bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
  572. DominatorTree *DT, AssumptionCache *AC,
  573. TargetLibraryInfo *LibInfo) const;
  574. /// \return True is LSR should make efforts to create/preserve post-inc
  575. /// addressing mode expressions.
  576. bool shouldFavorPostInc() const;
  577. /// Return true if LSR should make efforts to generate indexed addressing
  578. /// modes that operate across loop iterations.
  579. bool shouldFavorBackedgeIndex(const Loop *L) const;
  580. /// Return true if the target supports masked store.
  581. bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
  582. /// Return true if the target supports masked load.
  583. bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
  584. /// Return true if the target supports nontemporal store.
  585. bool isLegalNTStore(Type *DataType, Align Alignment) const;
  586. /// Return true if the target supports nontemporal load.
  587. bool isLegalNTLoad(Type *DataType, Align Alignment) const;
  588. /// Return true if the target supports masked scatter.
  589. bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
  590. /// Return true if the target supports masked gather.
  591. bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
  592. /// Return true if the target supports masked compress store.
  593. bool isLegalMaskedCompressStore(Type *DataType) const;
  594. /// Return true if the target supports masked expand load.
  595. bool isLegalMaskedExpandLoad(Type *DataType) const;
  596. /// Return true if the target has a unified operation to calculate division
  597. /// and remainder. If so, the additional implicit multiplication and
  598. /// subtraction required to calculate a remainder from division are free. This
  599. /// can enable more aggressive transformations for division and remainder than
  600. /// would typically be allowed using throughput or size cost models.
  601. bool hasDivRemOp(Type *DataType, bool IsSigned) const;
  602. /// Return true if the given instruction (assumed to be a memory access
  603. /// instruction) has a volatile variant. If that's the case then we can avoid
  604. /// addrspacecast to generic AS for volatile loads/stores. Default
  605. /// implementation returns false, which prevents address space inference for
  606. /// volatile loads/stores.
  607. bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
  608. /// Return true if target doesn't mind addresses in vectors.
  609. bool prefersVectorizedAddressing() const;
  610. /// Return the cost of the scaling factor used in the addressing
  611. /// mode represented by AM for this target, for a load/store
  612. /// of the specified type.
  613. /// If the AM is supported, the return value must be >= 0.
  614. /// If the AM is not supported, it returns a negative value.
  615. /// TODO: Handle pre/postinc as well.
  616. int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  617. bool HasBaseReg, int64_t Scale,
  618. unsigned AddrSpace = 0) const;
  619. /// Return true if the loop strength reduce pass should make
  620. /// Instruction* based TTI queries to isLegalAddressingMode(). This is
  621. /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
  622. /// immediate offset and no index register.
  623. bool LSRWithInstrQueries() const;
  624. /// Return true if it's free to truncate a value of type Ty1 to type
  625. /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
  626. /// by referencing its sub-register AX.
  627. bool isTruncateFree(Type *Ty1, Type *Ty2) const;
  628. /// Return true if it is profitable to hoist instruction in the
  629. /// then/else to before if.
  630. bool isProfitableToHoist(Instruction *I) const;
  631. bool useAA() const;
  632. /// Return true if this type is legal.
  633. bool isTypeLegal(Type *Ty) const;
  634. /// Returns the estimated number of registers required to represent \p Ty.
  635. unsigned getRegUsageForType(Type *Ty) const;
  636. /// Return true if switches should be turned into lookup tables for the
  637. /// target.
  638. bool shouldBuildLookupTables() const;
  639. /// Return true if switches should be turned into lookup tables
  640. /// containing this constant value for the target.
  641. bool shouldBuildLookupTablesForConstant(Constant *C) const;
  642. /// Return true if the input function which is cold at all call sites,
  643. /// should use coldcc calling convention.
  644. bool useColdCCForColdCall(Function &F) const;
  645. /// Estimate the overhead of scalarizing an instruction. Insert and Extract
  646. /// are set if the demanded result elements need to be inserted and/or
  647. /// extracted from vectors.
  648. unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
  649. bool Insert, bool Extract) const;
  650. /// Estimate the overhead of scalarizing an instructions unique
  651. /// non-constant operands. The types of the arguments are ordinarily
  652. /// scalar, in which case the costs are multiplied with VF.
  653. unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
  654. unsigned VF) const;
  655. /// If target has efficient vector element load/store instructions, it can
  656. /// return true here so that insertion/extraction costs are not added to
  657. /// the scalarization cost of a load/store.
  658. bool supportsEfficientVectorElementLoadStore() const;
  659. /// Don't restrict interleaved unrolling to small loops.
  660. bool enableAggressiveInterleaving(bool LoopHasReductions) const;
  661. /// Returns options for expansion of memcmp. IsZeroCmp is
  662. // true if this is the expansion of memcmp(p1, p2, s) == 0.
  663. struct MemCmpExpansionOptions {
  664. // Return true if memcmp expansion is enabled.
  665. operator bool() const { return MaxNumLoads > 0; }
  666. // Maximum number of load operations.
  667. unsigned MaxNumLoads = 0;
  668. // The list of available load sizes (in bytes), sorted in decreasing order.
  669. SmallVector<unsigned, 8> LoadSizes;
  670. // For memcmp expansion when the memcmp result is only compared equal or
  671. // not-equal to 0, allow up to this number of load pairs per block. As an
  672. // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
  673. // a0 = load2bytes &a[0]
  674. // b0 = load2bytes &b[0]
  675. // a2 = load1byte &a[2]
  676. // b2 = load1byte &b[2]
  677. // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
  678. unsigned NumLoadsPerBlock = 1;
  679. // Set to true to allow overlapping loads. For example, 7-byte compares can
  680. // be done with two 4-byte compares instead of 4+2+1-byte compares. This
  681. // requires all loads in LoadSizes to be doable in an unaligned way.
  682. bool AllowOverlappingLoads = false;
  683. };
  684. MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  685. bool IsZeroCmp) const;
  686. /// Enable matching of interleaved access groups.
  687. bool enableInterleavedAccessVectorization() const;
  688. /// Enable matching of interleaved access groups that contain predicated
  689. /// accesses or gaps and therefore vectorized using masked
  690. /// vector loads/stores.
  691. bool enableMaskedInterleavedAccessVectorization() const;
  692. /// Indicate that it is potentially unsafe to automatically vectorize
  693. /// floating-point operations because the semantics of vector and scalar
  694. /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
  695. /// does not support IEEE-754 denormal numbers, while depending on the
  696. /// platform, scalar floating-point math does.
  697. /// This applies to floating-point math operations and calls, not memory
  698. /// operations, shuffles, or casts.
  699. bool isFPVectorizationPotentiallyUnsafe() const;
  700. /// Determine if the target supports unaligned memory accesses.
  701. bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
  702. unsigned AddressSpace = 0,
  703. unsigned Alignment = 1,
  704. bool *Fast = nullptr) const;
  705. /// Return hardware support for population count.
  706. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
  707. /// Return true if the hardware has a fast square-root instruction.
  708. bool haveFastSqrt(Type *Ty) const;
  709. /// Return true if it is faster to check if a floating-point value is NaN
  710. /// (or not-NaN) versus a comparison against a constant FP zero value.
  711. /// Targets should override this if materializing a 0.0 for comparison is
  712. /// generally as cheap as checking for ordered/unordered.
  713. bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
  714. /// Return the expected cost of supporting the floating point operation
  715. /// of the specified type.
  716. int getFPOpCost(Type *Ty) const;
  717. /// Return the expected cost of materializing for the given integer
  718. /// immediate of the specified type.
  719. int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
  720. /// Return the expected cost of materialization for the given integer
  721. /// immediate of the specified type for a given instruction. The cost can be
  722. /// zero if the immediate can be folded into the specified instruction.
  723. int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
  724. TargetCostKind CostKind,
  725. Instruction *Inst = nullptr) const;
  726. int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
  727. Type *Ty, TargetCostKind CostKind) const;
  728. /// Return the expected cost for the given integer when optimising
  729. /// for size. This is different than the other integer immediate cost
  730. /// functions in that it is subtarget agnostic. This is useful when you e.g.
  731. /// target one ISA such as Aarch32 but smaller encodings could be possible
  732. /// with another such as Thumb. This return value is used as a penalty when
  733. /// the total costs for a constant is calculated (the bigger the cost, the
  734. /// more beneficial constant hoisting is).
  735. int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
  736. Type *Ty) const;
  737. /// @}
  738. /// \name Vector Target Information
  739. /// @{
  740. /// The various kinds of shuffle patterns for vector queries.
  741. enum ShuffleKind {
  742. SK_Broadcast, ///< Broadcast element 0 to all other elements.
  743. SK_Reverse, ///< Reverse the order of the vector.
  744. SK_Select, ///< Selects elements from the corresponding lane of
  745. ///< either source operand. This is equivalent to a
  746. ///< vector select with a constant condition operand.
  747. SK_Transpose, ///< Transpose two vectors.
  748. SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
  749. SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
  750. SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
  751. ///< with any shuffle mask.
  752. SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
  753. ///< shuffle mask.
  754. };
  755. /// Kind of the reduction data.
  756. enum ReductionKind {
  757. RK_None, /// Not a reduction.
  758. RK_Arithmetic, /// Binary reduction data.
  759. RK_MinMax, /// Min/max reduction data.
  760. RK_UnsignedMinMax, /// Unsigned min/max reduction data.
  761. };
  762. /// Contains opcode + LHS/RHS parts of the reduction operations.
  763. struct ReductionData {
  764. ReductionData() = delete;
  765. ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
  766. : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
  767. assert(Kind != RK_None && "expected binary or min/max reduction only.");
  768. }
  769. unsigned Opcode = 0;
  770. Value *LHS = nullptr;
  771. Value *RHS = nullptr;
  772. ReductionKind Kind = RK_None;
  773. bool hasSameData(ReductionData &RD) const {
  774. return Kind == RD.Kind && Opcode == RD.Opcode;
  775. }
  776. };
  777. static ReductionKind matchPairwiseReduction(
  778. const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
  779. static ReductionKind matchVectorSplittingReduction(
  780. const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
  781. static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,
  782. unsigned &Opcode, VectorType *&Ty,
  783. bool &IsPairwise);
  784. /// Additional information about an operand's possible values.
  785. enum OperandValueKind {
  786. OK_AnyValue, // Operand can have any value.
  787. OK_UniformValue, // Operand is uniform (splat of a value).
  788. OK_UniformConstantValue, // Operand is uniform constant.
  789. OK_NonUniformConstantValue // Operand is a non uniform constant value.
  790. };
  791. /// Additional properties of an operand's values.
  792. enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
  793. /// \return the number of registers in the target-provided register class.
  794. unsigned getNumberOfRegisters(unsigned ClassID) const;
  795. /// \return the target-provided register class ID for the provided type,
  796. /// accounting for type promotion and other type-legalization techniques that
  797. /// the target might apply. However, it specifically does not account for the
  798. /// scalarization or splitting of vector types. Should a vector type require
  799. /// scalarization or splitting into multiple underlying vector registers, that
  800. /// type should be mapped to a register class containing no registers.
  801. /// Specifically, this is designed to provide a simple, high-level view of the
  802. /// register allocation later performed by the backend. These register classes
  803. /// don't necessarily map onto the register classes used by the backend.
  804. /// FIXME: It's not currently possible to determine how many registers
  805. /// are used by the provided type.
  806. unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
  807. /// \return the target-provided register class name
  808. const char *getRegisterClassName(unsigned ClassID) const;
  809. /// \return The width of the largest scalar or vector register type.
  810. unsigned getRegisterBitWidth(bool Vector) const;
  811. /// \return The width of the smallest vector register type.
  812. unsigned getMinVectorRegisterBitWidth() const;
  813. /// \return The maximum value of vscale if the target specifies an
  814. /// architectural maximum vector length, and None otherwise.
  815. Optional<unsigned> getMaxVScale() const;
  816. /// \return True if the vectorization factor should be chosen to
  817. /// make the vector of the smallest element type match the size of a
  818. /// vector register. For wider element types, this could result in
  819. /// creating vectors that span multiple vector registers.
  820. /// If false, the vectorization factor will be chosen based on the
  821. /// size of the widest element type.
  822. bool shouldMaximizeVectorBandwidth(bool OptSize) const;
  823. /// \return The minimum vectorization factor for types of given element
  824. /// bit width, or 0 if there is no minimum VF. The returned value only
  825. /// applies when shouldMaximizeVectorBandwidth returns true.
  826. unsigned getMinimumVF(unsigned ElemWidth) const;
  827. /// \return The maximum vectorization factor for types of given element
  828. /// bit width and opcode, or 0 if there is no maximum VF.
  829. /// Currently only used by the SLP vectorizer.
  830. unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
  831. /// \return True if it should be considered for address type promotion.
  832. /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
  833. /// profitable without finding other extensions fed by the same input.
  834. bool shouldConsiderAddressTypePromotion(
  835. const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
  836. /// \return The size of a cache line in bytes.
  837. unsigned getCacheLineSize() const;
  838. /// The possible cache levels
  839. enum class CacheLevel {
  840. L1D, // The L1 data cache
  841. L2D, // The L2 data cache
  842. // We currently do not model L3 caches, as their sizes differ widely between
  843. // microarchitectures. Also, we currently do not have a use for L3 cache
  844. // size modeling yet.
  845. };
  846. /// \return The size of the cache level in bytes, if available.
  847. Optional<unsigned> getCacheSize(CacheLevel Level) const;
  848. /// \return The associativity of the cache level, if available.
  849. Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
  850. /// \return How much before a load we should place the prefetch
  851. /// instruction. This is currently measured in number of
  852. /// instructions.
  853. unsigned getPrefetchDistance() const;
  854. /// Some HW prefetchers can handle accesses up to a certain constant stride.
  855. /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
  856. /// and the arguments provided are meant to serve as a basis for deciding this
  857. /// for a particular loop.
  858. ///
  859. /// \param NumMemAccesses Number of memory accesses in the loop.
  860. /// \param NumStridedMemAccesses Number of the memory accesses that
  861. /// ScalarEvolution could find a known stride
  862. /// for.
  863. /// \param NumPrefetches Number of software prefetches that will be
  864. /// emitted as determined by the addresses
  865. /// involved and the cache line size.
  866. /// \param HasCall True if the loop contains a call.
  867. ///
  868. /// \return This is the minimum stride in bytes where it makes sense to start
  869. /// adding SW prefetches. The default is 1, i.e. prefetch with any
  870. /// stride.
  871. unsigned getMinPrefetchStride(unsigned NumMemAccesses,
  872. unsigned NumStridedMemAccesses,
  873. unsigned NumPrefetches, bool HasCall) const;
  874. /// \return The maximum number of iterations to prefetch ahead. If
  875. /// the required number of iterations is more than this number, no
  876. /// prefetching is performed.
  877. unsigned getMaxPrefetchIterationsAhead() const;
  878. /// \return True if prefetching should also be done for writes.
  879. bool enableWritePrefetching() const;
  880. /// \return The maximum interleave factor that any transform should try to
  881. /// perform for this target. This number depends on the level of parallelism
  882. /// and the number of execution units in the CPU.
  883. unsigned getMaxInterleaveFactor(unsigned VF) const;
  884. /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
  885. static OperandValueKind getOperandInfo(const Value *V,
  886. OperandValueProperties &OpProps);
  887. /// This is an approximation of reciprocal throughput of a math/logic op.
  888. /// A higher cost indicates less expected throughput.
  889. /// From Agner Fog's guides, reciprocal throughput is "the average number of
  890. /// clock cycles per instruction when the instructions are not part of a
  891. /// limiting dependency chain."
  892. /// Therefore, costs should be scaled to account for multiple execution units
  893. /// on the target that can process this type of instruction. For example, if
  894. /// there are 5 scalar integer units and 2 vector integer units that can
  895. /// calculate an 'add' in a single cycle, this model should indicate that the
  896. /// cost of the vector add instruction is 2.5 times the cost of the scalar
  897. /// add instruction.
  898. /// \p Args is an optional argument which holds the instruction operands
  899. /// values so the TTI can analyze those values searching for special
  900. /// cases or optimizations based on those values.
  901. /// \p CxtI is the optional original context instruction, if one exists, to
  902. /// provide even more information.
  903. int getArithmeticInstrCost(
  904. unsigned Opcode, Type *Ty,
  905. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
  906. OperandValueKind Opd1Info = OK_AnyValue,
  907. OperandValueKind Opd2Info = OK_AnyValue,
  908. OperandValueProperties Opd1PropInfo = OP_None,
  909. OperandValueProperties Opd2PropInfo = OP_None,
  910. ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
  911. const Instruction *CxtI = nullptr) const;
  912. /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
  913. /// The index and subtype parameters are used by the subvector insertion and
  914. /// extraction shuffle kinds to show the insert/extract point and the type of
  915. /// the subvector being inserted/extracted.
  916. /// NOTE: For subvector extractions Tp represents the source type.
  917. int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
  918. VectorType *SubTp = nullptr) const;
  919. /// Represents a hint about the context in which a cast is used.
  920. ///
  921. /// For zext/sext, the context of the cast is the operand, which must be a
  922. /// load of some kind. For trunc, the context is of the cast is the single
  923. /// user of the instruction, which must be a store of some kind.
  924. ///
  925. /// This enum allows the vectorizer to give getCastInstrCost an idea of the
  926. /// type of cast it's dealing with, as not every cast is equal. For instance,
  927. /// the zext of a load may be free, but the zext of an interleaving load can
  928. //// be (very) expensive!
  929. ///
  930. /// See \c getCastContextHint to compute a CastContextHint from a cast
  931. /// Instruction*. Callers can use it if they don't need to override the
  932. /// context and just want it to be calculated from the instruction.
  933. ///
  934. /// FIXME: This handles the types of load/store that the vectorizer can
  935. /// produce, which are the cases where the context instruction is most
  936. /// likely to be incorrect. There are other situations where that can happen
  937. /// too, which might be handled here but in the long run a more general
  938. /// solution of costing multiple instructions at the same times may be better.
  939. enum class CastContextHint : uint8_t {
  940. None, ///< The cast is not used with a load/store of any kind.
  941. Normal, ///< The cast is used with a normal load/store.
  942. Masked, ///< The cast is used with a masked load/store.
  943. GatherScatter, ///< The cast is used with a gather/scatter.
  944. Interleave, ///< The cast is used with an interleaved load/store.
  945. Reversed, ///< The cast is used with a reversed load/store.
  946. };
  947. /// Calculates a CastContextHint from \p I.
  948. /// This should be used by callers of getCastInstrCost if they wish to
  949. /// determine the context from some instruction.
  950. /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
  951. /// or if it's another type of cast.
  952. static CastContextHint getCastContextHint(const Instruction *I);
  953. /// \return The expected cost of cast instructions, such as bitcast, trunc,
  954. /// zext, etc. If there is an existing instruction that holds Opcode, it
  955. /// may be passed in the 'I' parameter.
  956. int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  957. TTI::CastContextHint CCH,
  958. TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
  959. const Instruction *I = nullptr) const;
  960. /// \return The expected cost of a sign- or zero-extended vector extract. Use
  961. /// -1 to indicate that there is no information about the index value.
  962. int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
  963. unsigned Index = -1) const;
  964. /// \return The expected cost of control-flow related instructions such as
  965. /// Phi, Ret, Br.
  966. int getCFInstrCost(unsigned Opcode,
  967. TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
  968. /// \returns The expected cost of compare and select instructions. If there
  969. /// is an existing instruction that holds Opcode, it may be passed in the
  970. /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
  971. /// is using a compare with the specified predicate as condition. When vector
  972. /// types are passed, \p VecPred must be used for all lanes.
  973. int getCmpSelInstrCost(
  974. unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
  975. CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
  976. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
  977. const Instruction *I = nullptr) const;
  978. /// \return The expected cost of vector Insert and Extract.
  979. /// Use -1 to indicate that there is no information on the index value.
  980. int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
  981. /// \return The cost of Load and Store instructions.
  982. int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  983. unsigned AddressSpace,
  984. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
  985. const Instruction *I = nullptr) const;
  986. /// \return The cost of masked Load and Store instructions.
  987. int getMaskedMemoryOpCost(
  988. unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
  989. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
  990. /// \return The cost of Gather or Scatter operation
  991. /// \p Opcode - is a type of memory access Load or Store
  992. /// \p DataTy - a vector type of the data to be loaded or stored
  993. /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
  994. /// \p VariableMask - true when the memory access is predicated with a mask
  995. /// that is not a compile-time constant
  996. /// \p Alignment - alignment of single element
  997. /// \p I - the optional original context instruction, if one exists, e.g. the
  998. /// load/store to transform or the call to the gather/scatter intrinsic
  999. int getGatherScatterOpCost(
  1000. unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
  1001. Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
  1002. const Instruction *I = nullptr) const;
  1003. /// \return The cost of the interleaved memory operation.
  1004. /// \p Opcode is the memory operation code
  1005. /// \p VecTy is the vector type of the interleaved access.
  1006. /// \p Factor is the interleave factor
  1007. /// \p Indices is the indices for interleaved load members (as interleaved
  1008. /// load allows gaps)
  1009. /// \p Alignment is the alignment of the memory operation
  1010. /// \p AddressSpace is address space of the pointer.
  1011. /// \p UseMaskForCond indicates if the memory access is predicated.
  1012. /// \p UseMaskForGaps indicates if gaps should be masked.
  1013. int getInterleavedMemoryOpCost(
  1014. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  1015. Align Alignment, unsigned AddressSpace,
  1016. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
  1017. bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
  1018. /// Calculate the cost of performing a vector reduction.
  1019. ///
  1020. /// This is the cost of reducing the vector value of type \p Ty to a scalar
  1021. /// value using the operation denoted by \p Opcode. The form of the reduction
  1022. /// can either be a pairwise reduction or a reduction that splits the vector
  1023. /// at every reduction level.
  1024. ///
  1025. /// Pairwise:
  1026. /// (v0, v1, v2, v3)
  1027. /// ((v0+v1), (v2+v3), undef, undef)
  1028. /// Split:
  1029. /// (v0, v1, v2, v3)
  1030. /// ((v0+v2), (v1+v3), undef, undef)
  1031. int getArithmeticReductionCost(
  1032. unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
  1033. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
  1034. int getMinMaxReductionCost(
  1035. VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
  1036. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
  1037. /// Calculate the cost of an extended reduction pattern, similar to
  1038. /// getArithmeticReductionCost of an Add reduction with an extension and
  1039. /// optional multiply. This is the cost of as:
  1040. /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
  1041. /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
  1042. /// on a VectorType with ResTy elements and Ty lanes.
  1043. InstructionCost getExtendedAddReductionCost(
  1044. bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
  1045. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
  1046. /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
  1047. /// Three cases are handled: 1. scalar instruction 2. vector instruction
  1048. /// 3. scalar instruction which is to be vectorized.
  1049. int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  1050. TTI::TargetCostKind CostKind) const;
  1051. /// \returns The cost of Call instructions.
  1052. int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
  1053. TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
  1054. /// \returns The number of pieces into which the provided type must be
  1055. /// split during legalization. Zero is returned when the answer is unknown.
  1056. unsigned getNumberOfParts(Type *Tp) const;
  1057. /// \returns The cost of the address computation. For most targets this can be
  1058. /// merged into the instruction indexing mode. Some targets might want to
  1059. /// distinguish between address computation for memory operations on vector
  1060. /// types and scalar types. Such targets should override this function.
  1061. /// The 'SE' parameter holds pointer for the scalar evolution object which
  1062. /// is used in order to get the Ptr step value in case of constant stride.
  1063. /// The 'Ptr' parameter holds SCEV of the access pointer.
  1064. int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
  1065. const SCEV *Ptr = nullptr) const;
  1066. /// \returns The cost, if any, of keeping values of the given types alive
  1067. /// over a callsite.
  1068. ///
  1069. /// Some types may require the use of register classes that do not have
  1070. /// any callee-saved registers, so would require a spill and fill.
  1071. unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
  1072. /// \returns True if the intrinsic is a supported memory intrinsic. Info
  1073. /// will contain additional information - whether the intrinsic may write
  1074. /// or read to memory, volatility and the pointer. Info is undefined
  1075. /// if false is returned.
  1076. bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
  1077. /// \returns The maximum element size, in bytes, for an element
  1078. /// unordered-atomic memory intrinsic.
  1079. unsigned getAtomicMemIntrinsicMaxElementSize() const;
  1080. /// \returns A value which is the result of the given memory intrinsic. New
  1081. /// instructions may be created to extract the result from the given intrinsic
  1082. /// memory operation. Returns nullptr if the target cannot create a result
  1083. /// from the given intrinsic.
  1084. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  1085. Type *ExpectedType) const;
  1086. /// \returns The type to use in a loop expansion of a memcpy call.
  1087. Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
  1088. unsigned SrcAddrSpace, unsigned DestAddrSpace,
  1089. unsigned SrcAlign, unsigned DestAlign) const;
  1090. /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
  1091. /// \param RemainingBytes The number of bytes to copy.
  1092. ///
  1093. /// Calculates the operand types to use when copying \p RemainingBytes of
  1094. /// memory, where source and destination alignments are \p SrcAlign and
  1095. /// \p DestAlign respectively.
  1096. void getMemcpyLoopResidualLoweringType(
  1097. SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
  1098. unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
  1099. unsigned SrcAlign, unsigned DestAlign) const;
  1100. /// \returns True if the two functions have compatible attributes for inlining
  1101. /// purposes.
  1102. bool areInlineCompatible(const Function *Caller,
  1103. const Function *Callee) const;
  1104. /// \returns True if the caller and callee agree on how \p Args will be passed
  1105. /// to the callee.
  1106. /// \param[out] Args The list of compatible arguments. The implementation may
  1107. /// filter out any incompatible args from this list.
  1108. bool areFunctionArgsABICompatible(const Function *Caller,
  1109. const Function *Callee,
  1110. SmallPtrSetImpl<Argument *> &Args) const;
  1111. /// The type of load/store indexing.
  1112. enum MemIndexedMode {
  1113. MIM_Unindexed, ///< No indexing.
  1114. MIM_PreInc, ///< Pre-incrementing.
  1115. MIM_PreDec, ///< Pre-decrementing.
  1116. MIM_PostInc, ///< Post-incrementing.
  1117. MIM_PostDec ///< Post-decrementing.
  1118. };
  1119. /// \returns True if the specified indexed load for the given type is legal.
  1120. bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
  1121. /// \returns True if the specified indexed store for the given type is legal.
  1122. bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
  1123. /// \returns The bitwidth of the largest vector type that should be used to
  1124. /// load/store in the given address space.
  1125. unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
  1126. /// \returns True if the load instruction is legal to vectorize.
  1127. bool isLegalToVectorizeLoad(LoadInst *LI) const;
  1128. /// \returns True if the store instruction is legal to vectorize.
  1129. bool isLegalToVectorizeStore(StoreInst *SI) const;
  1130. /// \returns True if it is legal to vectorize the given load chain.
  1131. bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
  1132. unsigned AddrSpace) const;
  1133. /// \returns True if it is legal to vectorize the given store chain.
  1134. bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
  1135. unsigned AddrSpace) const;
  1136. /// \returns The new vector factor value if the target doesn't support \p
  1137. /// SizeInBytes loads or has a better vector factor.
  1138. unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
  1139. unsigned ChainSizeInBytes,
  1140. VectorType *VecTy) const;
  1141. /// \returns The new vector factor value if the target doesn't support \p
  1142. /// SizeInBytes stores or has a better vector factor.
  1143. unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
  1144. unsigned ChainSizeInBytes,
  1145. VectorType *VecTy) const;
  1146. /// Flags describing the kind of vector reduction.
  1147. struct ReductionFlags {
  1148. ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
  1149. bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
  1150. bool IsSigned; ///< Whether the operation is a signed int reduction.
  1151. bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
  1152. };
  1153. /// \returns True if the target wants to handle the given reduction idiom in
  1154. /// the intrinsics form instead of the shuffle form.
  1155. bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
  1156. ReductionFlags Flags) const;
  1157. /// \returns True if the target prefers reductions in loop.
  1158. bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  1159. ReductionFlags Flags) const;
  1160. /// \returns True if the target prefers reductions select kept in the loop
  1161. /// when tail folding. i.e.
  1162. /// loop:
  1163. /// p = phi (0, s)
  1164. /// a = add (p, x)
  1165. /// s = select (mask, a, p)
  1166. /// vecreduce.add(s)
  1167. ///
  1168. /// As opposed to the normal scheme of p = phi (0, a) which allows the select
  1169. /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
  1170. /// by the target, this can lead to cleaner code generation.
  1171. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  1172. ReductionFlags Flags) const;
  1173. /// \returns True if the target wants to expand the given reduction intrinsic
  1174. /// into a shuffle sequence.
  1175. bool shouldExpandReduction(const IntrinsicInst *II) const;
  1176. /// \returns the size cost of rematerializing a GlobalValue address relative
  1177. /// to a stack reload.
  1178. unsigned getGISelRematGlobalCost() const;
  1179. /// \returns True if the target supports scalable vectors.
  1180. bool supportsScalableVectors() const;
  1181. /// \name Vector Predication Information
  1182. /// @{
  1183. /// Whether the target supports the %evl parameter of VP intrinsic efficiently
  1184. /// in hardware. (see LLVM Language Reference - "Vector Predication
  1185. /// Intrinsics") Use of %evl is discouraged when that is not the case.
  1186. bool hasActiveVectorLength() const;
  1187. /// @}
  1188. /// @}
  1189. private:
  1190. /// Estimate the latency of specified instruction.
  1191. /// Returns 1 as the default value.
  1192. int getInstructionLatency(const Instruction *I) const;
  1193. /// Returns the expected throughput cost of the instruction.
  1194. /// Returns -1 if the cost is unknown.
  1195. int getInstructionThroughput(const Instruction *I) const;
  1196. /// The abstract base class used to type erase specific TTI
  1197. /// implementations.
  1198. class Concept;
  1199. /// The template model for the base class which wraps a concrete
  1200. /// implementation in a type erased interface.
  1201. template <typename T> class Model;
  1202. std::unique_ptr<Concept> TTIImpl;
  1203. };
  1204. class TargetTransformInfo::Concept {
  1205. public:
  1206. virtual ~Concept() = 0;
  1207. virtual const DataLayout &getDataLayout() const = 0;
  1208. virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
  1209. ArrayRef<const Value *> Operands,
  1210. TTI::TargetCostKind CostKind) = 0;
  1211. virtual unsigned getInliningThresholdMultiplier() = 0;
  1212. virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
  1213. virtual int getInlinerVectorBonusPercent() = 0;
  1214. virtual int getMemcpyCost(const Instruction *I) = 0;
  1215. virtual unsigned
  1216. getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
  1217. ProfileSummaryInfo *PSI,
  1218. BlockFrequencyInfo *BFI) = 0;
  1219. virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands,
  1220. TargetCostKind CostKind) = 0;
  1221. virtual bool hasBranchDivergence() = 0;
  1222. virtual bool useGPUDivergenceAnalysis() = 0;
  1223. virtual bool isSourceOfDivergence(const Value *V) = 0;
  1224. virtual bool isAlwaysUniform(const Value *V) = 0;
  1225. virtual unsigned getFlatAddressSpace() = 0;
  1226. virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
  1227. Intrinsic::ID IID) const = 0;
  1228. virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
  1229. virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
  1230. virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
  1231. Value *OldV,
  1232. Value *NewV) const = 0;
  1233. virtual bool isLoweredToCall(const Function *F) = 0;
  1234. virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
  1235. UnrollingPreferences &UP) = 0;
  1236. virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
  1237. PeelingPreferences &PP) = 0;
  1238. virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  1239. AssumptionCache &AC,
  1240. TargetLibraryInfo *LibInfo,
  1241. HardwareLoopInfo &HWLoopInfo) = 0;
  1242. virtual bool
  1243. preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  1244. AssumptionCache &AC, TargetLibraryInfo *TLI,
  1245. DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
  1246. virtual bool emitGetActiveLaneMask() = 0;
  1247. virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  1248. IntrinsicInst &II) = 0;
  1249. virtual Optional<Value *>
  1250. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  1251. APInt DemandedMask, KnownBits &Known,
  1252. bool &KnownBitsComputed) = 0;
  1253. virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  1254. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  1255. APInt &UndefElts2, APInt &UndefElts3,
  1256. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  1257. SimplifyAndSetOp) = 0;
  1258. virtual bool isLegalAddImmediate(int64_t Imm) = 0;
  1259. virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
  1260. virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
  1261. int64_t BaseOffset, bool HasBaseReg,
  1262. int64_t Scale, unsigned AddrSpace,
  1263. Instruction *I) = 0;
  1264. virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
  1265. TargetTransformInfo::LSRCost &C2) = 0;
  1266. virtual bool isNumRegsMajorCostOfLSR() = 0;
  1267. virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
  1268. virtual bool canMacroFuseCmp() = 0;
  1269. virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
  1270. LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
  1271. TargetLibraryInfo *LibInfo) = 0;
  1272. virtual bool shouldFavorPostInc() const = 0;
  1273. virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
  1274. virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
  1275. virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
  1276. virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
  1277. virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
  1278. virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
  1279. virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
  1280. virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
  1281. virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
  1282. virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
  1283. virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
  1284. virtual bool prefersVectorizedAddressing() = 0;
  1285. virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
  1286. int64_t BaseOffset, bool HasBaseReg,
  1287. int64_t Scale, unsigned AddrSpace) = 0;
  1288. virtual bool LSRWithInstrQueries() = 0;
  1289. virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
  1290. virtual bool isProfitableToHoist(Instruction *I) = 0;
  1291. virtual bool useAA() = 0;
  1292. virtual bool isTypeLegal(Type *Ty) = 0;
  1293. virtual unsigned getRegUsageForType(Type *Ty) = 0;
  1294. virtual bool shouldBuildLookupTables() = 0;
  1295. virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
  1296. virtual bool useColdCCForColdCall(Function &F) = 0;
  1297. virtual unsigned getScalarizationOverhead(VectorType *Ty,
  1298. const APInt &DemandedElts,
  1299. bool Insert, bool Extract) = 0;
  1300. virtual unsigned
  1301. getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
  1302. unsigned VF) = 0;
  1303. virtual bool supportsEfficientVectorElementLoadStore() = 0;
  1304. virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
  1305. virtual MemCmpExpansionOptions
  1306. enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
  1307. virtual bool enableInterleavedAccessVectorization() = 0;
  1308. virtual bool enableMaskedInterleavedAccessVectorization() = 0;
  1309. virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
  1310. virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
  1311. unsigned BitWidth,
  1312. unsigned AddressSpace,
  1313. unsigned Alignment,
  1314. bool *Fast) = 0;
  1315. virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
  1316. virtual bool haveFastSqrt(Type *Ty) = 0;
  1317. virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
  1318. virtual int getFPOpCost(Type *Ty) = 0;
  1319. virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
  1320. const APInt &Imm, Type *Ty) = 0;
  1321. virtual int getIntImmCost(const APInt &Imm, Type *Ty,
  1322. TargetCostKind CostKind) = 0;
  1323. virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
  1324. Type *Ty, TargetCostKind CostKind,
  1325. Instruction *Inst = nullptr) = 0;
  1326. virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
  1327. const APInt &Imm, Type *Ty,
  1328. TargetCostKind CostKind) = 0;
  1329. virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
  1330. virtual unsigned getRegisterClassForType(bool Vector,
  1331. Type *Ty = nullptr) const = 0;
  1332. virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
  1333. virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
  1334. virtual unsigned getMinVectorRegisterBitWidth() = 0;
  1335. virtual Optional<unsigned> getMaxVScale() const = 0;
  1336. virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
  1337. virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
  1338. virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
  1339. virtual bool shouldConsiderAddressTypePromotion(
  1340. const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
  1341. virtual unsigned getCacheLineSize() const = 0;
  1342. virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
  1343. virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
  1344. /// \return How much before a load we should place the prefetch
  1345. /// instruction. This is currently measured in number of
  1346. /// instructions.
  1347. virtual unsigned getPrefetchDistance() const = 0;
  1348. /// \return Some HW prefetchers can handle accesses up to a certain
  1349. /// constant stride. This is the minimum stride in bytes where it
  1350. /// makes sense to start adding SW prefetches. The default is 1,
  1351. /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
  1352. /// even below the HW prefetcher limit, and the arguments provided are
  1353. /// meant to serve as a basis for deciding this for a particular loop.
  1354. virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
  1355. unsigned NumStridedMemAccesses,
  1356. unsigned NumPrefetches,
  1357. bool HasCall) const = 0;
  1358. /// \return The maximum number of iterations to prefetch ahead. If
  1359. /// the required number of iterations is more than this number, no
  1360. /// prefetching is performed.
  1361. virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
  1362. /// \return True if prefetching should also be done for writes.
  1363. virtual bool enableWritePrefetching() const = 0;
  1364. virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
  1365. virtual unsigned getArithmeticInstrCost(
  1366. unsigned Opcode, Type *Ty,
  1367. TTI::TargetCostKind CostKind,
  1368. OperandValueKind Opd1Info,
  1369. OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
  1370. OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
  1371. const Instruction *CxtI = nullptr) = 0;
  1372. virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
  1373. VectorType *SubTp) = 0;
  1374. virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  1375. CastContextHint CCH,
  1376. TTI::TargetCostKind CostKind,
  1377. const Instruction *I) = 0;
  1378. virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
  1379. VectorType *VecTy, unsigned Index) = 0;
  1380. virtual int getCFInstrCost(unsigned Opcode,
  1381. TTI::TargetCostKind CostKind) = 0;
  1382. virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  1383. CmpInst::Predicate VecPred,
  1384. TTI::TargetCostKind CostKind,
  1385. const Instruction *I) = 0;
  1386. virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
  1387. unsigned Index) = 0;
  1388. virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  1389. unsigned AddressSpace,
  1390. TTI::TargetCostKind CostKind,
  1391. const Instruction *I) = 0;
  1392. virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  1393. unsigned AddressSpace,
  1394. TTI::TargetCostKind CostKind) = 0;
  1395. virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
  1396. const Value *Ptr, bool VariableMask,
  1397. Align Alignment,
  1398. TTI::TargetCostKind CostKind,
  1399. const Instruction *I = nullptr) = 0;
  1400. virtual int getInterleavedMemoryOpCost(
  1401. unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
  1402. Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
  1403. bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
  1404. virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  1405. bool IsPairwiseForm,
  1406. TTI::TargetCostKind CostKind) = 0;
  1407. virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  1408. bool IsPairwiseForm, bool IsUnsigned,
  1409. TTI::TargetCostKind CostKind) = 0;
  1410. virtual InstructionCost getExtendedAddReductionCost(
  1411. bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
  1412. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
  1413. virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  1414. TTI::TargetCostKind CostKind) = 0;
  1415. virtual int getCallInstrCost(Function *F, Type *RetTy,
  1416. ArrayRef<Type *> Tys,
  1417. TTI::TargetCostKind CostKind) = 0;
  1418. virtual unsigned getNumberOfParts(Type *Tp) = 0;
  1419. virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
  1420. const SCEV *Ptr) = 0;
  1421. virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
  1422. virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
  1423. MemIntrinsicInfo &Info) = 0;
  1424. virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
  1425. virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  1426. Type *ExpectedType) = 0;
  1427. virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
  1428. unsigned SrcAddrSpace,
  1429. unsigned DestAddrSpace,
  1430. unsigned SrcAlign,
  1431. unsigned DestAlign) const = 0;
  1432. virtual void getMemcpyLoopResidualLoweringType(
  1433. SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
  1434. unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
  1435. unsigned SrcAlign, unsigned DestAlign) const = 0;
  1436. virtual bool areInlineCompatible(const Function *Caller,
  1437. const Function *Callee) const = 0;
  1438. virtual bool
  1439. areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
  1440. SmallPtrSetImpl<Argument *> &Args) const = 0;
  1441. virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
  1442. virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
  1443. virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
  1444. virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
  1445. virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
  1446. virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
  1447. Align Alignment,
  1448. unsigned AddrSpace) const = 0;
  1449. virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
  1450. Align Alignment,
  1451. unsigned AddrSpace) const = 0;
  1452. virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
  1453. unsigned ChainSizeInBytes,
  1454. VectorType *VecTy) const = 0;
  1455. virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
  1456. unsigned ChainSizeInBytes,
  1457. VectorType *VecTy) const = 0;
  1458. virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
  1459. ReductionFlags) const = 0;
  1460. virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  1461. ReductionFlags) const = 0;
  1462. virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  1463. ReductionFlags) const = 0;
  1464. virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
  1465. virtual unsigned getGISelRematGlobalCost() const = 0;
  1466. virtual bool supportsScalableVectors() const = 0;
  1467. virtual bool hasActiveVectorLength() const = 0;
  1468. virtual int getInstructionLatency(const Instruction *I) = 0;
  1469. };
  1470. template <typename T>
  1471. class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
  1472. T Impl;
  1473. public:
  1474. Model(T Impl) : Impl(std::move(Impl)) {}
  1475. ~Model() override {}
  1476. const DataLayout &getDataLayout() const override {
  1477. return Impl.getDataLayout();
  1478. }
  1479. int getGEPCost(Type *PointeeType, const Value *Ptr,
  1480. ArrayRef<const Value *> Operands,
  1481. enum TargetTransformInfo::TargetCostKind CostKind) override {
  1482. return Impl.getGEPCost(PointeeType, Ptr, Operands);
  1483. }
  1484. unsigned getInliningThresholdMultiplier() override {
  1485. return Impl.getInliningThresholdMultiplier();
  1486. }
  1487. unsigned adjustInliningThreshold(const CallBase *CB) override {
  1488. return Impl.adjustInliningThreshold(CB);
  1489. }
  1490. int getInlinerVectorBonusPercent() override {
  1491. return Impl.getInlinerVectorBonusPercent();
  1492. }
  1493. int getMemcpyCost(const Instruction *I) override {
  1494. return Impl.getMemcpyCost(I);
  1495. }
  1496. int getUserCost(const User *U, ArrayRef<const Value *> Operands,
  1497. TargetCostKind CostKind) override {
  1498. return Impl.getUserCost(U, Operands, CostKind);
  1499. }
  1500. bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
  1501. bool useGPUDivergenceAnalysis() override {
  1502. return Impl.useGPUDivergenceAnalysis();
  1503. }
  1504. bool isSourceOfDivergence(const Value *V) override {
  1505. return Impl.isSourceOfDivergence(V);
  1506. }
  1507. bool isAlwaysUniform(const Value *V) override {
  1508. return Impl.isAlwaysUniform(V);
  1509. }
  1510. unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
  1511. bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
  1512. Intrinsic::ID IID) const override {
  1513. return Impl.collectFlatAddressOperands(OpIndexes, IID);
  1514. }
  1515. bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
  1516. return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
  1517. }
  1518. unsigned getAssumedAddrSpace(const Value *V) const override {
  1519. return Impl.getAssumedAddrSpace(V);
  1520. }
  1521. Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
  1522. Value *NewV) const override {
  1523. return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
  1524. }
  1525. bool isLoweredToCall(const Function *F) override {
  1526. return Impl.isLoweredToCall(F);
  1527. }
  1528. void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
  1529. UnrollingPreferences &UP) override {
  1530. return Impl.getUnrollingPreferences(L, SE, UP);
  1531. }
  1532. void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
  1533. PeelingPreferences &PP) override {
  1534. return Impl.getPeelingPreferences(L, SE, PP);
  1535. }
  1536. bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
  1537. AssumptionCache &AC, TargetLibraryInfo *LibInfo,
  1538. HardwareLoopInfo &HWLoopInfo) override {
  1539. return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
  1540. }
  1541. bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
  1542. AssumptionCache &AC, TargetLibraryInfo *TLI,
  1543. DominatorTree *DT,
  1544. const LoopAccessInfo *LAI) override {
  1545. return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
  1546. }
  1547. bool emitGetActiveLaneMask() override {
  1548. return Impl.emitGetActiveLaneMask();
  1549. }
  1550. Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
  1551. IntrinsicInst &II) override {
  1552. return Impl.instCombineIntrinsic(IC, II);
  1553. }
  1554. Optional<Value *>
  1555. simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
  1556. APInt DemandedMask, KnownBits &Known,
  1557. bool &KnownBitsComputed) override {
  1558. return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
  1559. KnownBitsComputed);
  1560. }
  1561. Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
  1562. InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
  1563. APInt &UndefElts2, APInt &UndefElts3,
  1564. std::function<void(Instruction *, unsigned, APInt, APInt &)>
  1565. SimplifyAndSetOp) override {
  1566. return Impl.simplifyDemandedVectorEltsIntrinsic(
  1567. IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
  1568. SimplifyAndSetOp);
  1569. }
  1570. bool isLegalAddImmediate(int64_t Imm) override {
  1571. return Impl.isLegalAddImmediate(Imm);
  1572. }
  1573. bool isLegalICmpImmediate(int64_t Imm) override {
  1574. return Impl.isLegalICmpImmediate(Imm);
  1575. }
  1576. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  1577. bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
  1578. Instruction *I) override {
  1579. return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
  1580. AddrSpace, I);
  1581. }
  1582. bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
  1583. TargetTransformInfo::LSRCost &C2) override {
  1584. return Impl.isLSRCostLess(C1, C2);
  1585. }
  1586. bool isNumRegsMajorCostOfLSR() override {
  1587. return Impl.isNumRegsMajorCostOfLSR();
  1588. }
  1589. bool isProfitableLSRChainElement(Instruction *I) override {
  1590. return Impl.isProfitableLSRChainElement(I);
  1591. }
  1592. bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
  1593. bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
  1594. DominatorTree *DT, AssumptionCache *AC,
  1595. TargetLibraryInfo *LibInfo) override {
  1596. return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
  1597. }
  1598. bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); }
  1599. bool shouldFavorBackedgeIndex(const Loop *L) const override {
  1600. return Impl.shouldFavorBackedgeIndex(L);
  1601. }
  1602. bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
  1603. return Impl.isLegalMaskedStore(DataType, Alignment);
  1604. }
  1605. bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
  1606. return Impl.isLegalMaskedLoad(DataType, Alignment);
  1607. }
  1608. bool isLegalNTStore(Type *DataType, Align Alignment) override {
  1609. return Impl.isLegalNTStore(DataType, Alignment);
  1610. }
  1611. bool isLegalNTLoad(Type *DataType, Align Alignment) override {
  1612. return Impl.isLegalNTLoad(DataType, Alignment);
  1613. }
  1614. bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
  1615. return Impl.isLegalMaskedScatter(DataType, Alignment);
  1616. }
  1617. bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
  1618. return Impl.isLegalMaskedGather(DataType, Alignment);
  1619. }
  1620. bool isLegalMaskedCompressStore(Type *DataType) override {
  1621. return Impl.isLegalMaskedCompressStore(DataType);
  1622. }
  1623. bool isLegalMaskedExpandLoad(Type *DataType) override {
  1624. return Impl.isLegalMaskedExpandLoad(DataType);
  1625. }
  1626. bool hasDivRemOp(Type *DataType, bool IsSigned) override {
  1627. return Impl.hasDivRemOp(DataType, IsSigned);
  1628. }
  1629. bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
  1630. return Impl.hasVolatileVariant(I, AddrSpace);
  1631. }
  1632. bool prefersVectorizedAddressing() override {
  1633. return Impl.prefersVectorizedAddressing();
  1634. }
  1635. int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
  1636. bool HasBaseReg, int64_t Scale,
  1637. unsigned AddrSpace) override {
  1638. return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
  1639. AddrSpace);
  1640. }
  1641. bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
  1642. bool isTruncateFree(Type *Ty1, Type *Ty2) override {
  1643. return Impl.isTruncateFree(Ty1, Ty2);
  1644. }
  1645. bool isProfitableToHoist(Instruction *I) override {
  1646. return Impl.isProfitableToHoist(I);
  1647. }
  1648. bool useAA() override { return Impl.useAA(); }
  1649. bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
  1650. unsigned getRegUsageForType(Type *Ty) override {
  1651. return Impl.getRegUsageForType(Ty);
  1652. }
  1653. bool shouldBuildLookupTables() override {
  1654. return Impl.shouldBuildLookupTables();
  1655. }
  1656. bool shouldBuildLookupTablesForConstant(Constant *C) override {
  1657. return Impl.shouldBuildLookupTablesForConstant(C);
  1658. }
  1659. bool useColdCCForColdCall(Function &F) override {
  1660. return Impl.useColdCCForColdCall(F);
  1661. }
  1662. unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
  1663. bool Insert, bool Extract) override {
  1664. return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
  1665. }
  1666. unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
  1667. unsigned VF) override {
  1668. return Impl.getOperandsScalarizationOverhead(Args, VF);
  1669. }
  1670. bool supportsEfficientVectorElementLoadStore() override {
  1671. return Impl.supportsEfficientVectorElementLoadStore();
  1672. }
  1673. bool enableAggressiveInterleaving(bool LoopHasReductions) override {
  1674. return Impl.enableAggressiveInterleaving(LoopHasReductions);
  1675. }
  1676. MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
  1677. bool IsZeroCmp) const override {
  1678. return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
  1679. }
  1680. bool enableInterleavedAccessVectorization() override {
  1681. return Impl.enableInterleavedAccessVectorization();
  1682. }
  1683. bool enableMaskedInterleavedAccessVectorization() override {
  1684. return Impl.enableMaskedInterleavedAccessVectorization();
  1685. }
  1686. bool isFPVectorizationPotentiallyUnsafe() override {
  1687. return Impl.isFPVectorizationPotentiallyUnsafe();
  1688. }
  1689. bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
  1690. unsigned AddressSpace, unsigned Alignment,
  1691. bool *Fast) override {
  1692. return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
  1693. Alignment, Fast);
  1694. }
  1695. PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
  1696. return Impl.getPopcntSupport(IntTyWidthInBit);
  1697. }
  1698. bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
  1699. bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
  1700. return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
  1701. }
  1702. int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
  1703. int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
  1704. Type *Ty) override {
  1705. return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
  1706. }
  1707. int getIntImmCost(const APInt &Imm, Type *Ty,
  1708. TargetCostKind CostKind) override {
  1709. return Impl.getIntImmCost(Imm, Ty, CostKind);
  1710. }
  1711. int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
  1712. TargetCostKind CostKind,
  1713. Instruction *Inst = nullptr) override {
  1714. return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
  1715. }
  1716. int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
  1717. Type *Ty, TargetCostKind CostKind) override {
  1718. return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
  1719. }
  1720. unsigned getNumberOfRegisters(unsigned ClassID) const override {
  1721. return Impl.getNumberOfRegisters(ClassID);
  1722. }
  1723. unsigned getRegisterClassForType(bool Vector,
  1724. Type *Ty = nullptr) const override {
  1725. return Impl.getRegisterClassForType(Vector, Ty);
  1726. }
  1727. const char *getRegisterClassName(unsigned ClassID) const override {
  1728. return Impl.getRegisterClassName(ClassID);
  1729. }
  1730. unsigned getRegisterBitWidth(bool Vector) const override {
  1731. return Impl.getRegisterBitWidth(Vector);
  1732. }
  1733. unsigned getMinVectorRegisterBitWidth() override {
  1734. return Impl.getMinVectorRegisterBitWidth();
  1735. }
  1736. Optional<unsigned> getMaxVScale() const override {
  1737. return Impl.getMaxVScale();
  1738. }
  1739. bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
  1740. return Impl.shouldMaximizeVectorBandwidth(OptSize);
  1741. }
  1742. unsigned getMinimumVF(unsigned ElemWidth) const override {
  1743. return Impl.getMinimumVF(ElemWidth);
  1744. }
  1745. unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
  1746. return Impl.getMaximumVF(ElemWidth, Opcode);
  1747. }
  1748. bool shouldConsiderAddressTypePromotion(
  1749. const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
  1750. return Impl.shouldConsiderAddressTypePromotion(
  1751. I, AllowPromotionWithoutCommonHeader);
  1752. }
  1753. unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
  1754. Optional<unsigned> getCacheSize(CacheLevel Level) const override {
  1755. return Impl.getCacheSize(Level);
  1756. }
  1757. Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
  1758. return Impl.getCacheAssociativity(Level);
  1759. }
  1760. /// Return the preferred prefetch distance in terms of instructions.
  1761. ///
  1762. unsigned getPrefetchDistance() const override {
  1763. return Impl.getPrefetchDistance();
  1764. }
  1765. /// Return the minimum stride necessary to trigger software
  1766. /// prefetching.
  1767. ///
  1768. unsigned getMinPrefetchStride(unsigned NumMemAccesses,
  1769. unsigned NumStridedMemAccesses,
  1770. unsigned NumPrefetches,
  1771. bool HasCall) const override {
  1772. return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
  1773. NumPrefetches, HasCall);
  1774. }
  1775. /// Return the maximum prefetch distance in terms of loop
  1776. /// iterations.
  1777. ///
  1778. unsigned getMaxPrefetchIterationsAhead() const override {
  1779. return Impl.getMaxPrefetchIterationsAhead();
  1780. }
  1781. /// \return True if prefetching should also be done for writes.
  1782. bool enableWritePrefetching() const override {
  1783. return Impl.enableWritePrefetching();
  1784. }
  1785. unsigned getMaxInterleaveFactor(unsigned VF) override {
  1786. return Impl.getMaxInterleaveFactor(VF);
  1787. }
  1788. unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
  1789. unsigned &JTSize,
  1790. ProfileSummaryInfo *PSI,
  1791. BlockFrequencyInfo *BFI) override {
  1792. return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
  1793. }
  1794. unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
  1795. TTI::TargetCostKind CostKind,
  1796. OperandValueKind Opd1Info,
  1797. OperandValueKind Opd2Info,
  1798. OperandValueProperties Opd1PropInfo,
  1799. OperandValueProperties Opd2PropInfo,
  1800. ArrayRef<const Value *> Args,
  1801. const Instruction *CxtI = nullptr) override {
  1802. return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
  1803. Opd1PropInfo, Opd2PropInfo, Args, CxtI);
  1804. }
  1805. int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
  1806. VectorType *SubTp) override {
  1807. return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
  1808. }
  1809. int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
  1810. CastContextHint CCH, TTI::TargetCostKind CostKind,
  1811. const Instruction *I) override {
  1812. return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
  1813. }
  1814. int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
  1815. unsigned Index) override {
  1816. return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
  1817. }
  1818. int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
  1819. return Impl.getCFInstrCost(Opcode, CostKind);
  1820. }
  1821. int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
  1822. CmpInst::Predicate VecPred,
  1823. TTI::TargetCostKind CostKind,
  1824. const Instruction *I) override {
  1825. return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
  1826. }
  1827. int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
  1828. return Impl.getVectorInstrCost(Opcode, Val, Index);
  1829. }
  1830. int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  1831. unsigned AddressSpace, TTI::TargetCostKind CostKind,
  1832. const Instruction *I) override {
  1833. return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
  1834. CostKind, I);
  1835. }
  1836. int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
  1837. unsigned AddressSpace,
  1838. TTI::TargetCostKind CostKind) override {
  1839. return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
  1840. CostKind);
  1841. }
  1842. int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
  1843. bool VariableMask, Align Alignment,
  1844. TTI::TargetCostKind CostKind,
  1845. const Instruction *I = nullptr) override {
  1846. return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
  1847. Alignment, CostKind, I);
  1848. }
  1849. int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
  1850. ArrayRef<unsigned> Indices, Align Alignment,
  1851. unsigned AddressSpace,
  1852. TTI::TargetCostKind CostKind,
  1853. bool UseMaskForCond,
  1854. bool UseMaskForGaps) override {
  1855. return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
  1856. Alignment, AddressSpace, CostKind,
  1857. UseMaskForCond, UseMaskForGaps);
  1858. }
  1859. int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
  1860. bool IsPairwiseForm,
  1861. TTI::TargetCostKind CostKind) override {
  1862. return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
  1863. CostKind);
  1864. }
  1865. int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
  1866. bool IsPairwiseForm, bool IsUnsigned,
  1867. TTI::TargetCostKind CostKind) override {
  1868. return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
  1869. CostKind);
  1870. }
  1871. InstructionCost getExtendedAddReductionCost(
  1872. bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
  1873. TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
  1874. return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
  1875. CostKind);
  1876. }
  1877. int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
  1878. TTI::TargetCostKind CostKind) override {
  1879. return Impl.getIntrinsicInstrCost(ICA, CostKind);
  1880. }
  1881. int getCallInstrCost(Function *F, Type *RetTy,
  1882. ArrayRef<Type *> Tys,
  1883. TTI::TargetCostKind CostKind) override {
  1884. return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
  1885. }
  1886. unsigned getNumberOfParts(Type *Tp) override {
  1887. return Impl.getNumberOfParts(Tp);
  1888. }
  1889. int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
  1890. const SCEV *Ptr) override {
  1891. return Impl.getAddressComputationCost(Ty, SE, Ptr);
  1892. }
  1893. unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
  1894. return Impl.getCostOfKeepingLiveOverCall(Tys);
  1895. }
  1896. bool getTgtMemIntrinsic(IntrinsicInst *Inst,
  1897. MemIntrinsicInfo &Info) override {
  1898. return Impl.getTgtMemIntrinsic(Inst, Info);
  1899. }
  1900. unsigned getAtomicMemIntrinsicMaxElementSize() const override {
  1901. return Impl.getAtomicMemIntrinsicMaxElementSize();
  1902. }
  1903. Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
  1904. Type *ExpectedType) override {
  1905. return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
  1906. }
  1907. Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
  1908. unsigned SrcAddrSpace, unsigned DestAddrSpace,
  1909. unsigned SrcAlign,
  1910. unsigned DestAlign) const override {
  1911. return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
  1912. DestAddrSpace, SrcAlign, DestAlign);
  1913. }
  1914. void getMemcpyLoopResidualLoweringType(
  1915. SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
  1916. unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
  1917. unsigned SrcAlign, unsigned DestAlign) const override {
  1918. Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
  1919. SrcAddrSpace, DestAddrSpace,
  1920. SrcAlign, DestAlign);
  1921. }
  1922. bool areInlineCompatible(const Function *Caller,
  1923. const Function *Callee) const override {
  1924. return Impl.areInlineCompatible(Caller, Callee);
  1925. }
  1926. bool areFunctionArgsABICompatible(
  1927. const Function *Caller, const Function *Callee,
  1928. SmallPtrSetImpl<Argument *> &Args) const override {
  1929. return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
  1930. }
  1931. bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
  1932. return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
  1933. }
  1934. bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
  1935. return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
  1936. }
  1937. unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
  1938. return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
  1939. }
  1940. bool isLegalToVectorizeLoad(LoadInst *LI) const override {
  1941. return Impl.isLegalToVectorizeLoad(LI);
  1942. }
  1943. bool isLegalToVectorizeStore(StoreInst *SI) const override {
  1944. return Impl.isLegalToVectorizeStore(SI);
  1945. }
  1946. bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
  1947. unsigned AddrSpace) const override {
  1948. return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
  1949. AddrSpace);
  1950. }
  1951. bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
  1952. unsigned AddrSpace) const override {
  1953. return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
  1954. AddrSpace);
  1955. }
  1956. unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
  1957. unsigned ChainSizeInBytes,
  1958. VectorType *VecTy) const override {
  1959. return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
  1960. }
  1961. unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
  1962. unsigned ChainSizeInBytes,
  1963. VectorType *VecTy) const override {
  1964. return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
  1965. }
  1966. bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
  1967. ReductionFlags Flags) const override {
  1968. return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
  1969. }
  1970. bool preferInLoopReduction(unsigned Opcode, Type *Ty,
  1971. ReductionFlags Flags) const override {
  1972. return Impl.preferInLoopReduction(Opcode, Ty, Flags);
  1973. }
  1974. bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
  1975. ReductionFlags Flags) const override {
  1976. return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
  1977. }
  1978. bool shouldExpandReduction(const IntrinsicInst *II) const override {
  1979. return Impl.shouldExpandReduction(II);
  1980. }
  1981. unsigned getGISelRematGlobalCost() const override {
  1982. return Impl.getGISelRematGlobalCost();
  1983. }
  1984. bool supportsScalableVectors() const override {
  1985. return Impl.supportsScalableVectors();
  1986. }
  1987. bool hasActiveVectorLength() const override {
  1988. return Impl.hasActiveVectorLength();
  1989. }
  1990. int getInstructionLatency(const Instruction *I) override {
  1991. return Impl.getInstructionLatency(I);
  1992. }
  1993. };
  1994. template <typename T>
  1995. TargetTransformInfo::TargetTransformInfo(T Impl)
  1996. : TTIImpl(new Model<T>(Impl)) {}
  1997. /// Analysis pass providing the \c TargetTransformInfo.
  1998. ///
  1999. /// The core idea of the TargetIRAnalysis is to expose an interface through
  2000. /// which LLVM targets can analyze and provide information about the middle
  2001. /// end's target-independent IR. This supports use cases such as target-aware
  2002. /// cost modeling of IR constructs.
  2003. ///
  2004. /// This is a function analysis because much of the cost modeling for targets
  2005. /// is done in a subtarget specific way and LLVM supports compiling different
  2006. /// functions targeting different subtargets in order to support runtime
  2007. /// dispatch according to the observed subtarget.
  2008. class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
  2009. public:
  2010. typedef TargetTransformInfo Result;
  2011. /// Default construct a target IR analysis.
  2012. ///
  2013. /// This will use the module's datalayout to construct a baseline
  2014. /// conservative TTI result.
  2015. TargetIRAnalysis();
  2016. /// Construct an IR analysis pass around a target-provide callback.
  2017. ///
  2018. /// The callback will be called with a particular function for which the TTI
  2019. /// is needed and must return a TTI object for that function.
  2020. TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
  2021. // Value semantics. We spell out the constructors for MSVC.
  2022. TargetIRAnalysis(const TargetIRAnalysis &Arg)
  2023. : TTICallback(Arg.TTICallback) {}
  2024. TargetIRAnalysis(TargetIRAnalysis &&Arg)
  2025. : TTICallback(std::move(Arg.TTICallback)) {}
  2026. TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
  2027. TTICallback = RHS.TTICallback;
  2028. return *this;
  2029. }
  2030. TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
  2031. TTICallback = std::move(RHS.TTICallback);
  2032. return *this;
  2033. }
  2034. Result run(const Function &F, FunctionAnalysisManager &);
  2035. private:
  2036. friend AnalysisInfoMixin<TargetIRAnalysis>;
  2037. static AnalysisKey Key;
  2038. /// The callback used to produce a result.
  2039. ///
  2040. /// We use a completely opaque callback so that targets can provide whatever
  2041. /// mechanism they desire for constructing the TTI for a given function.
  2042. ///
  2043. /// FIXME: Should we really use std::function? It's relatively inefficient.
  2044. /// It might be possible to arrange for even stateful callbacks to outlive
  2045. /// the analysis and thus use a function_ref which would be lighter weight.
  2046. /// This may also be less error prone as the callback is likely to reference
  2047. /// the external TargetMachine, and that reference needs to never dangle.
  2048. std::function<Result(const Function &)> TTICallback;
  2049. /// Helper function used as the callback in the default constructor.
  2050. static Result getDefaultTTI(const Function &F);
  2051. };
  2052. /// Wrapper pass for TargetTransformInfo.
  2053. ///
  2054. /// This pass can be constructed from a TTI object which it stores internally
  2055. /// and is queried by passes.
  2056. class TargetTransformInfoWrapperPass : public ImmutablePass {
  2057. TargetIRAnalysis TIRA;
  2058. Optional<TargetTransformInfo> TTI;
  2059. virtual void anchor();
  2060. public:
  2061. static char ID;
  2062. /// We must provide a default constructor for the pass but it should
  2063. /// never be used.
  2064. ///
  2065. /// Use the constructor below or call one of the creation routines.
  2066. TargetTransformInfoWrapperPass();
  2067. explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
  2068. TargetTransformInfo &getTTI(const Function &F);
  2069. };
  2070. /// Create an analysis pass wrapper around a TTI object.
  2071. ///
  2072. /// This analysis pass just holds the TTI instance and makes it available to
  2073. /// clients.
  2074. ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
  2075. } // namespace llvm
  2076. #endif
  2077. #ifdef __GNUC__
  2078. #pragma GCC diagnostic pop
  2079. #endif