LoopAccessAnalysis.h 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines the interface for the loop memory dependence framework that
  15. // was originally developed for the Loop Vectorizer.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
  19. #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
  20. #include "llvm/ADT/EquivalenceClasses.h"
  21. #include "llvm/Analysis/LoopAnalysisManager.h"
  22. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  23. #include "llvm/IR/DiagnosticInfo.h"
  24. #include "llvm/Pass.h"
  25. namespace llvm {
  26. class AAResults;
  27. class DataLayout;
  28. class Loop;
  29. class LoopAccessInfo;
  30. class OptimizationRemarkEmitter;
  31. class raw_ostream;
  32. class SCEV;
  33. class SCEVUnionPredicate;
  34. class Value;
  35. /// Collection of parameters shared beetween the Loop Vectorizer and the
  36. /// Loop Access Analysis.
  37. struct VectorizerParams {
  38. /// Maximum SIMD width.
  39. static const unsigned MaxVectorWidth;
  40. /// VF as overridden by the user.
  41. static unsigned VectorizationFactor;
  42. /// Interleave factor as overridden by the user.
  43. static unsigned VectorizationInterleave;
  44. /// True if force-vector-interleave was specified by the user.
  45. static bool isInterleaveForced();
  46. /// \When performing memory disambiguation checks at runtime do not
  47. /// make more than this number of comparisons.
  48. static unsigned RuntimeMemoryCheckThreshold;
  49. };
  50. /// Checks memory dependences among accesses to the same underlying
  51. /// object to determine whether there vectorization is legal or not (and at
  52. /// which vectorization factor).
  53. ///
  54. /// Note: This class will compute a conservative dependence for access to
  55. /// different underlying pointers. Clients, such as the loop vectorizer, will
  56. /// sometimes deal these potential dependencies by emitting runtime checks.
  57. ///
  58. /// We use the ScalarEvolution framework to symbolically evalutate access
  59. /// functions pairs. Since we currently don't restructure the loop we can rely
  60. /// on the program order of memory accesses to determine their safety.
  61. /// At the moment we will only deem accesses as safe for:
  62. /// * A negative constant distance assuming program order.
  63. ///
  64. /// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
  65. /// a[i] = tmp; y = a[i];
  66. ///
  67. /// The latter case is safe because later checks guarantuee that there can't
  68. /// be a cycle through a phi node (that is, we check that "x" and "y" is not
  69. /// the same variable: a header phi can only be an induction or a reduction, a
  70. /// reduction can't have a memory sink, an induction can't have a memory
  71. /// source). This is important and must not be violated (or we have to
  72. /// resort to checking for cycles through memory).
  73. ///
  74. /// * A positive constant distance assuming program order that is bigger
  75. /// than the biggest memory access.
  76. ///
  77. /// tmp = a[i] OR b[i] = x
  78. /// a[i+2] = tmp y = b[i+2];
  79. ///
  80. /// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
  81. ///
  82. /// * Zero distances and all accesses have the same size.
  83. ///
  84. class MemoryDepChecker {
  85. public:
  86. typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
  87. typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
  88. /// Set of potential dependent memory accesses.
  89. typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
  90. /// Type to keep track of the status of the dependence check. The order of
  91. /// the elements is important and has to be from most permissive to least
  92. /// permissive.
  93. enum class VectorizationSafetyStatus {
  94. // Can vectorize safely without RT checks. All dependences are known to be
  95. // safe.
  96. Safe,
  97. // Can possibly vectorize with RT checks to overcome unknown dependencies.
  98. PossiblySafeWithRtChecks,
  99. // Cannot vectorize due to known unsafe dependencies.
  100. Unsafe,
  101. };
  102. /// Dependece between memory access instructions.
  103. struct Dependence {
  104. /// The type of the dependence.
  105. enum DepType {
  106. // No dependence.
  107. NoDep,
  108. // We couldn't determine the direction or the distance.
  109. Unknown,
  110. // Lexically forward.
  111. //
  112. // FIXME: If we only have loop-independent forward dependences (e.g. a
  113. // read and write of A[i]), LAA will locally deem the dependence "safe"
  114. // without querying the MemoryDepChecker. Therefore we can miss
  115. // enumerating loop-independent forward dependences in
  116. // getDependences. Note that as soon as there are different
  117. // indices used to access the same array, the MemoryDepChecker *is*
  118. // queried and the dependence list is complete.
  119. Forward,
  120. // Forward, but if vectorized, is likely to prevent store-to-load
  121. // forwarding.
  122. ForwardButPreventsForwarding,
  123. // Lexically backward.
  124. Backward,
  125. // Backward, but the distance allows a vectorization factor of
  126. // MaxSafeDepDistBytes.
  127. BackwardVectorizable,
  128. // Same, but may prevent store-to-load forwarding.
  129. BackwardVectorizableButPreventsForwarding
  130. };
  131. /// String version of the types.
  132. static const char *DepName[];
  133. /// Index of the source of the dependence in the InstMap vector.
  134. unsigned Source;
  135. /// Index of the destination of the dependence in the InstMap vector.
  136. unsigned Destination;
  137. /// The type of the dependence.
  138. DepType Type;
  139. Dependence(unsigned Source, unsigned Destination, DepType Type)
  140. : Source(Source), Destination(Destination), Type(Type) {}
  141. /// Return the source instruction of the dependence.
  142. Instruction *getSource(const LoopAccessInfo &LAI) const;
  143. /// Return the destination instruction of the dependence.
  144. Instruction *getDestination(const LoopAccessInfo &LAI) const;
  145. /// Dependence types that don't prevent vectorization.
  146. static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
  147. /// Lexically forward dependence.
  148. bool isForward() const;
  149. /// Lexically backward dependence.
  150. bool isBackward() const;
  151. /// May be a lexically backward dependence type (includes Unknown).
  152. bool isPossiblyBackward() const;
  153. /// Print the dependence. \p Instr is used to map the instruction
  154. /// indices to instructions.
  155. void print(raw_ostream &OS, unsigned Depth,
  156. const SmallVectorImpl<Instruction *> &Instrs) const;
  157. };
  158. MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
  159. : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeDepDistBytes(0),
  160. MaxSafeVectorWidthInBits(-1U),
  161. FoundNonConstantDistanceDependence(false),
  162. Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
  163. /// Register the location (instructions are given increasing numbers)
  164. /// of a write access.
  165. void addAccess(StoreInst *SI) {
  166. Value *Ptr = SI->getPointerOperand();
  167. Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
  168. InstMap.push_back(SI);
  169. ++AccessIdx;
  170. }
  171. /// Register the location (instructions are given increasing numbers)
  172. /// of a write access.
  173. void addAccess(LoadInst *LI) {
  174. Value *Ptr = LI->getPointerOperand();
  175. Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
  176. InstMap.push_back(LI);
  177. ++AccessIdx;
  178. }
  179. /// Check whether the dependencies between the accesses are safe.
  180. ///
  181. /// Only checks sets with elements in \p CheckDeps.
  182. bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
  183. const ValueToValueMap &Strides);
  184. /// No memory dependence was encountered that would inhibit
  185. /// vectorization.
  186. bool isSafeForVectorization() const {
  187. return Status == VectorizationSafetyStatus::Safe;
  188. }
  189. /// Return true if the number of elements that are safe to operate on
  190. /// simultaneously is not bounded.
  191. bool isSafeForAnyVectorWidth() const {
  192. return MaxSafeVectorWidthInBits == UINT_MAX;
  193. }
  194. /// The maximum number of bytes of a vector register we can vectorize
  195. /// the accesses safely with.
  196. uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
  197. /// Return the number of elements that are safe to operate on
  198. /// simultaneously, multiplied by the size of the element in bits.
  199. uint64_t getMaxSafeVectorWidthInBits() const {
  200. return MaxSafeVectorWidthInBits;
  201. }
  202. /// In same cases when the dependency check fails we can still
  203. /// vectorize the loop with a dynamic array access check.
  204. bool shouldRetryWithRuntimeCheck() const {
  205. return FoundNonConstantDistanceDependence &&
  206. Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
  207. }
  208. /// Returns the memory dependences. If null is returned we exceeded
  209. /// the MaxDependences threshold and this information is not
  210. /// available.
  211. const SmallVectorImpl<Dependence> *getDependences() const {
  212. return RecordDependences ? &Dependences : nullptr;
  213. }
  214. void clearDependences() { Dependences.clear(); }
  215. /// The vector of memory access instructions. The indices are used as
  216. /// instruction identifiers in the Dependence class.
  217. const SmallVectorImpl<Instruction *> &getMemoryInstructions() const {
  218. return InstMap;
  219. }
  220. /// Generate a mapping between the memory instructions and their
  221. /// indices according to program order.
  222. DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const {
  223. DenseMap<Instruction *, unsigned> OrderMap;
  224. for (unsigned I = 0; I < InstMap.size(); ++I)
  225. OrderMap[InstMap[I]] = I;
  226. return OrderMap;
  227. }
  228. /// Find the set of instructions that read or write via \p Ptr.
  229. SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
  230. bool isWrite) const;
  231. private:
  232. /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
  233. /// applies dynamic knowledge to simplify SCEV expressions and convert them
  234. /// to a more usable form. We need this in case assumptions about SCEV
  235. /// expressions need to be made in order to avoid unknown dependences. For
  236. /// example we might assume a unit stride for a pointer in order to prove
  237. /// that a memory access is strided and doesn't wrap.
  238. PredicatedScalarEvolution &PSE;
  239. const Loop *InnermostLoop;
  240. /// Maps access locations (ptr, read/write) to program order.
  241. DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
  242. /// Memory access instructions in program order.
  243. SmallVector<Instruction *, 16> InstMap;
  244. /// The program order index to be used for the next instruction.
  245. unsigned AccessIdx;
  246. // We can access this many bytes in parallel safely.
  247. uint64_t MaxSafeDepDistBytes;
  248. /// Number of elements (from consecutive iterations) that are safe to
  249. /// operate on simultaneously, multiplied by the size of the element in bits.
  250. /// The size of the element is taken from the memory access that is most
  251. /// restrictive.
  252. uint64_t MaxSafeVectorWidthInBits;
  253. /// If we see a non-constant dependence distance we can still try to
  254. /// vectorize this loop with runtime checks.
  255. bool FoundNonConstantDistanceDependence;
  256. /// Result of the dependence checks, indicating whether the checked
  257. /// dependences are safe for vectorization, require RT checks or are known to
  258. /// be unsafe.
  259. VectorizationSafetyStatus Status;
  260. //// True if Dependences reflects the dependences in the
  261. //// loop. If false we exceeded MaxDependences and
  262. //// Dependences is invalid.
  263. bool RecordDependences;
  264. /// Memory dependences collected during the analysis. Only valid if
  265. /// RecordDependences is true.
  266. SmallVector<Dependence, 8> Dependences;
  267. /// Check whether there is a plausible dependence between the two
  268. /// accesses.
  269. ///
  270. /// Access \p A must happen before \p B in program order. The two indices
  271. /// identify the index into the program order map.
  272. ///
  273. /// This function checks whether there is a plausible dependence (or the
  274. /// absence of such can't be proved) between the two accesses. If there is a
  275. /// plausible dependence but the dependence distance is bigger than one
  276. /// element access it records this distance in \p MaxSafeDepDistBytes (if this
  277. /// distance is smaller than any other distance encountered so far).
  278. /// Otherwise, this function returns true signaling a possible dependence.
  279. Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
  280. const MemAccessInfo &B, unsigned BIdx,
  281. const ValueToValueMap &Strides);
  282. /// Check whether the data dependence could prevent store-load
  283. /// forwarding.
  284. ///
  285. /// \return false if we shouldn't vectorize at all or avoid larger
  286. /// vectorization factors by limiting MaxSafeDepDistBytes.
  287. bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
  288. /// Updates the current safety status with \p S. We can go from Safe to
  289. /// either PossiblySafeWithRtChecks or Unsafe and from
  290. /// PossiblySafeWithRtChecks to Unsafe.
  291. void mergeInStatus(VectorizationSafetyStatus S);
  292. };
  293. class RuntimePointerChecking;
  294. /// A grouping of pointers. A single memcheck is required between
  295. /// two groups.
  296. struct RuntimeCheckingPtrGroup {
  297. /// Create a new pointer checking group containing a single
  298. /// pointer, with index \p Index in RtCheck.
  299. RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
  300. /// Tries to add the pointer recorded in RtCheck at index
  301. /// \p Index to this pointer checking group. We can only add a pointer
  302. /// to a checking group if we will still be able to get
  303. /// the upper and lower bounds of the check. Returns true in case
  304. /// of success, false otherwise.
  305. bool addPointer(unsigned Index);
  306. /// Constitutes the context of this pointer checking group. For each
  307. /// pointer that is a member of this group we will retain the index
  308. /// at which it appears in RtCheck.
  309. RuntimePointerChecking &RtCheck;
  310. /// The SCEV expression which represents the upper bound of all the
  311. /// pointers in this group.
  312. const SCEV *High;
  313. /// The SCEV expression which represents the lower bound of all the
  314. /// pointers in this group.
  315. const SCEV *Low;
  316. /// Indices of all the pointers that constitute this grouping.
  317. SmallVector<unsigned, 2> Members;
  318. };
  319. /// A memcheck which made up of a pair of grouped pointers.
  320. typedef std::pair<const RuntimeCheckingPtrGroup *,
  321. const RuntimeCheckingPtrGroup *>
  322. RuntimePointerCheck;
  323. /// Holds information about the memory runtime legality checks to verify
  324. /// that a group of pointers do not overlap.
  325. class RuntimePointerChecking {
  326. friend struct RuntimeCheckingPtrGroup;
  327. public:
  328. struct PointerInfo {
  329. /// Holds the pointer value that we need to check.
  330. TrackingVH<Value> PointerValue;
  331. /// Holds the smallest byte address accessed by the pointer throughout all
  332. /// iterations of the loop.
  333. const SCEV *Start;
  334. /// Holds the largest byte address accessed by the pointer throughout all
  335. /// iterations of the loop, plus 1.
  336. const SCEV *End;
  337. /// Holds the information if this pointer is used for writing to memory.
  338. bool IsWritePtr;
  339. /// Holds the id of the set of pointers that could be dependent because of a
  340. /// shared underlying object.
  341. unsigned DependencySetId;
  342. /// Holds the id of the disjoint alias set to which this pointer belongs.
  343. unsigned AliasSetId;
  344. /// SCEV for the access.
  345. const SCEV *Expr;
  346. PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End,
  347. bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
  348. const SCEV *Expr)
  349. : PointerValue(PointerValue), Start(Start), End(End),
  350. IsWritePtr(IsWritePtr), DependencySetId(DependencySetId),
  351. AliasSetId(AliasSetId), Expr(Expr) {}
  352. };
  353. RuntimePointerChecking(ScalarEvolution *SE) : Need(false), SE(SE) {}
  354. /// Reset the state of the pointer runtime information.
  355. void reset() {
  356. Need = false;
  357. Pointers.clear();
  358. Checks.clear();
  359. }
  360. /// Insert a pointer and calculate the start and end SCEVs.
  361. /// We need \p PSE in order to compute the SCEV expression of the pointer
  362. /// according to the assumptions that we've made during the analysis.
  363. /// The method might also version the pointer stride according to \p Strides,
  364. /// and add new predicates to \p PSE.
  365. void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
  366. unsigned ASId, const ValueToValueMap &Strides,
  367. PredicatedScalarEvolution &PSE);
  368. /// No run-time memory checking is necessary.
  369. bool empty() const { return Pointers.empty(); }
  370. /// Generate the checks and store it. This also performs the grouping
  371. /// of pointers to reduce the number of memchecks necessary.
  372. void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
  373. bool UseDependencies);
  374. /// Returns the checks that generateChecks created.
  375. const SmallVectorImpl<RuntimePointerCheck> &getChecks() const {
  376. return Checks;
  377. }
  378. /// Decide if we need to add a check between two groups of pointers,
  379. /// according to needsChecking.
  380. bool needsChecking(const RuntimeCheckingPtrGroup &M,
  381. const RuntimeCheckingPtrGroup &N) const;
  382. /// Returns the number of run-time checks required according to
  383. /// needsChecking.
  384. unsigned getNumberOfChecks() const { return Checks.size(); }
  385. /// Print the list run-time memory checks necessary.
  386. void print(raw_ostream &OS, unsigned Depth = 0) const;
  387. /// Print \p Checks.
  388. void printChecks(raw_ostream &OS,
  389. const SmallVectorImpl<RuntimePointerCheck> &Checks,
  390. unsigned Depth = 0) const;
  391. /// This flag indicates if we need to add the runtime check.
  392. bool Need;
  393. /// Information about the pointers that may require checking.
  394. SmallVector<PointerInfo, 2> Pointers;
  395. /// Holds a partitioning of pointers into "check groups".
  396. SmallVector<RuntimeCheckingPtrGroup, 2> CheckingGroups;
  397. /// Check if pointers are in the same partition
  398. ///
  399. /// \p PtrToPartition contains the partition number for pointers (-1 if the
  400. /// pointer belongs to multiple partitions).
  401. static bool
  402. arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition,
  403. unsigned PtrIdx1, unsigned PtrIdx2);
  404. /// Decide whether we need to issue a run-time check for pointer at
  405. /// index \p I and \p J to prove their independence.
  406. bool needsChecking(unsigned I, unsigned J) const;
  407. /// Return PointerInfo for pointer at index \p PtrIdx.
  408. const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
  409. return Pointers[PtrIdx];
  410. }
  411. ScalarEvolution *getSE() const { return SE; }
  412. private:
  413. /// Groups pointers such that a single memcheck is required
  414. /// between two different groups. This will clear the CheckingGroups vector
  415. /// and re-compute it. We will only group dependecies if \p UseDependencies
  416. /// is true, otherwise we will create a separate group for each pointer.
  417. void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
  418. bool UseDependencies);
  419. /// Generate the checks and return them.
  420. SmallVector<RuntimePointerCheck, 4> generateChecks() const;
  421. /// Holds a pointer to the ScalarEvolution analysis.
  422. ScalarEvolution *SE;
  423. /// Set of run-time checks required to establish independence of
  424. /// otherwise may-aliasing pointers in the loop.
  425. SmallVector<RuntimePointerCheck, 4> Checks;
  426. };
  427. /// Drive the analysis of memory accesses in the loop
  428. ///
  429. /// This class is responsible for analyzing the memory accesses of a loop. It
  430. /// collects the accesses and then its main helper the AccessAnalysis class
  431. /// finds and categorizes the dependences in buildDependenceSets.
  432. ///
  433. /// For memory dependences that can be analyzed at compile time, it determines
  434. /// whether the dependence is part of cycle inhibiting vectorization. This work
  435. /// is delegated to the MemoryDepChecker class.
  436. ///
  437. /// For memory dependences that cannot be determined at compile time, it
  438. /// generates run-time checks to prove independence. This is done by
  439. /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
  440. /// RuntimePointerCheck class.
  441. ///
  442. /// If pointers can wrap or can't be expressed as affine AddRec expressions by
  443. /// ScalarEvolution, we will generate run-time checks by emitting a
  444. /// SCEVUnionPredicate.
  445. ///
  446. /// Checks for both memory dependences and the SCEV predicates contained in the
  447. /// PSE must be emitted in order for the results of this analysis to be valid.
  448. class LoopAccessInfo {
  449. public:
  450. LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI,
  451. AAResults *AA, DominatorTree *DT, LoopInfo *LI);
  452. /// Return true we can analyze the memory accesses in the loop and there are
  453. /// no memory dependence cycles.
  454. bool canVectorizeMemory() const { return CanVecMem; }
  455. /// Return true if there is a convergent operation in the loop. There may
  456. /// still be reported runtime pointer checks that would be required, but it is
  457. /// not legal to insert them.
  458. bool hasConvergentOp() const { return HasConvergentOp; }
  459. const RuntimePointerChecking *getRuntimePointerChecking() const {
  460. return PtrRtChecking.get();
  461. }
  462. /// Number of memchecks required to prove independence of otherwise
  463. /// may-alias pointers.
  464. unsigned getNumRuntimePointerChecks() const {
  465. return PtrRtChecking->getNumberOfChecks();
  466. }
  467. /// Return true if the block BB needs to be predicated in order for the loop
  468. /// to be vectorized.
  469. static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
  470. DominatorTree *DT);
  471. /// Returns true if the value V is uniform within the loop.
  472. bool isUniform(Value *V) const;
  473. uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
  474. unsigned getNumStores() const { return NumStores; }
  475. unsigned getNumLoads() const { return NumLoads;}
  476. /// The diagnostics report generated for the analysis. E.g. why we
  477. /// couldn't analyze the loop.
  478. const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
  479. /// the Memory Dependence Checker which can determine the
  480. /// loop-independent and loop-carried dependences between memory accesses.
  481. const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
  482. /// Return the list of instructions that use \p Ptr to read or write
  483. /// memory.
  484. SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
  485. bool isWrite) const {
  486. return DepChecker->getInstructionsForAccess(Ptr, isWrite);
  487. }
  488. /// If an access has a symbolic strides, this maps the pointer value to
  489. /// the stride symbol.
  490. const ValueToValueMap &getSymbolicStrides() const { return SymbolicStrides; }
  491. /// Pointer has a symbolic stride.
  492. bool hasStride(Value *V) const { return StrideSet.count(V); }
  493. /// Print the information about the memory accesses in the loop.
  494. void print(raw_ostream &OS, unsigned Depth = 0) const;
  495. /// If the loop has memory dependence involving an invariant address, i.e. two
  496. /// stores or a store and a load, then return true, else return false.
  497. bool hasDependenceInvolvingLoopInvariantAddress() const {
  498. return HasDependenceInvolvingLoopInvariantAddress;
  499. }
  500. /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
  501. /// them to a more usable form. All SCEV expressions during the analysis
  502. /// should be re-written (and therefore simplified) according to PSE.
  503. /// A user of LoopAccessAnalysis will need to emit the runtime checks
  504. /// associated with this predicate.
  505. const PredicatedScalarEvolution &getPSE() const { return *PSE; }
  506. private:
  507. /// Analyze the loop.
  508. void analyzeLoop(AAResults *AA, LoopInfo *LI,
  509. const TargetLibraryInfo *TLI, DominatorTree *DT);
  510. /// Check if the structure of the loop allows it to be analyzed by this
  511. /// pass.
  512. bool canAnalyzeLoop();
  513. /// Save the analysis remark.
  514. ///
  515. /// LAA does not directly emits the remarks. Instead it stores it which the
  516. /// client can retrieve and presents as its own analysis
  517. /// (e.g. -Rpass-analysis=loop-vectorize).
  518. OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
  519. Instruction *Instr = nullptr);
  520. /// Collect memory access with loop invariant strides.
  521. ///
  522. /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
  523. /// invariant.
  524. void collectStridedAccess(Value *LoadOrStoreInst);
  525. std::unique_ptr<PredicatedScalarEvolution> PSE;
  526. /// We need to check that all of the pointers in this list are disjoint
  527. /// at runtime. Using std::unique_ptr to make using move ctor simpler.
  528. std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
  529. /// the Memory Dependence Checker which can determine the
  530. /// loop-independent and loop-carried dependences between memory accesses.
  531. std::unique_ptr<MemoryDepChecker> DepChecker;
  532. Loop *TheLoop;
  533. unsigned NumLoads;
  534. unsigned NumStores;
  535. uint64_t MaxSafeDepDistBytes;
  536. /// Cache the result of analyzeLoop.
  537. bool CanVecMem;
  538. bool HasConvergentOp;
  539. /// Indicator that there are non vectorizable stores to a uniform address.
  540. bool HasDependenceInvolvingLoopInvariantAddress;
  541. /// The diagnostics report generated for the analysis. E.g. why we
  542. /// couldn't analyze the loop.
  543. std::unique_ptr<OptimizationRemarkAnalysis> Report;
  544. /// If an access has a symbolic strides, this maps the pointer value to
  545. /// the stride symbol.
  546. ValueToValueMap SymbolicStrides;
  547. /// Set of symbolic strides values.
  548. SmallPtrSet<Value *, 8> StrideSet;
  549. };
  550. Value *stripIntegerCast(Value *V);
  551. /// Return the SCEV corresponding to a pointer with the symbolic stride
  552. /// replaced with constant one, assuming the SCEV predicate associated with
  553. /// \p PSE is true.
  554. ///
  555. /// If necessary this method will version the stride of the pointer according
  556. /// to \p PtrToStride and therefore add further predicates to \p PSE.
  557. ///
  558. /// If \p OrigPtr is not null, use it to look up the stride value instead of \p
  559. /// Ptr. \p PtrToStride provides the mapping between the pointer value and its
  560. /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
  561. const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
  562. const ValueToValueMap &PtrToStride,
  563. Value *Ptr, Value *OrigPtr = nullptr);
  564. /// If the pointer has a constant stride return it in units of its
  565. /// element size. Otherwise return zero.
  566. ///
  567. /// Ensure that it does not wrap in the address space, assuming the predicate
  568. /// associated with \p PSE is true.
  569. ///
  570. /// If necessary this method will version the stride of the pointer according
  571. /// to \p PtrToStride and therefore add further predicates to \p PSE.
  572. /// The \p Assume parameter indicates if we are allowed to make additional
  573. /// run-time assumptions.
  574. int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
  575. const ValueToValueMap &StridesMap = ValueToValueMap(),
  576. bool Assume = false, bool ShouldCheckWrap = true);
  577. /// Attempt to sort the pointers in \p VL and return the sorted indices
  578. /// in \p SortedIndices, if reordering is required.
  579. ///
  580. /// Returns 'true' if sorting is legal, otherwise returns 'false'.
  581. ///
  582. /// For example, for a given \p VL of memory accesses in program order, a[i+4],
  583. /// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
  584. /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
  585. /// saves the mask for actual memory accesses in program order in
  586. /// \p SortedIndices as <1,2,0,3>
  587. bool sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
  588. ScalarEvolution &SE,
  589. SmallVectorImpl<unsigned> &SortedIndices);
  590. /// Returns true if the memory operations \p A and \p B are consecutive.
  591. /// This is a simple API that does not depend on the analysis pass.
  592. bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
  593. ScalarEvolution &SE, bool CheckType = true);
  594. /// This analysis provides dependence information for the memory accesses
  595. /// of a loop.
  596. ///
  597. /// It runs the analysis for a loop on demand. This can be initiated by
  598. /// querying the loop access info via LAA::getInfo. getInfo return a
  599. /// LoopAccessInfo object. See this class for the specifics of what information
  600. /// is provided.
  601. class LoopAccessLegacyAnalysis : public FunctionPass {
  602. public:
  603. static char ID;
  604. LoopAccessLegacyAnalysis();
  605. bool runOnFunction(Function &F) override;
  606. void getAnalysisUsage(AnalysisUsage &AU) const override;
  607. /// Query the result of the loop access information for the loop \p L.
  608. ///
  609. /// If there is no cached result available run the analysis.
  610. const LoopAccessInfo &getInfo(Loop *L);
  611. void releaseMemory() override {
  612. // Invalidate the cache when the pass is freed.
  613. LoopAccessInfoMap.clear();
  614. }
  615. /// Print the result of the analysis when invoked with -analyze.
  616. void print(raw_ostream &OS, const Module *M = nullptr) const override;
  617. private:
  618. /// The cache.
  619. DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
  620. // The used analysis passes.
  621. ScalarEvolution *SE = nullptr;
  622. const TargetLibraryInfo *TLI = nullptr;
  623. AAResults *AA = nullptr;
  624. DominatorTree *DT = nullptr;
  625. LoopInfo *LI = nullptr;
  626. };
  627. /// This analysis provides dependence information for the memory
  628. /// accesses of a loop.
  629. ///
  630. /// It runs the analysis for a loop on demand. This can be initiated by
  631. /// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
  632. /// getResult return a LoopAccessInfo object. See this class for the
  633. /// specifics of what information is provided.
  634. class LoopAccessAnalysis
  635. : public AnalysisInfoMixin<LoopAccessAnalysis> {
  636. friend AnalysisInfoMixin<LoopAccessAnalysis>;
  637. static AnalysisKey Key;
  638. public:
  639. typedef LoopAccessInfo Result;
  640. Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
  641. };
  642. inline Instruction *MemoryDepChecker::Dependence::getSource(
  643. const LoopAccessInfo &LAI) const {
  644. return LAI.getDepChecker().getMemoryInstructions()[Source];
  645. }
  646. inline Instruction *MemoryDepChecker::Dependence::getDestination(
  647. const LoopAccessInfo &LAI) const {
  648. return LAI.getDepChecker().getMemoryInstructions()[Destination];
  649. }
  650. } // End llvm namespace
  651. #endif
  652. #ifdef __GNUC__
  653. #pragma GCC diagnostic pop
  654. #endif