LoopAccessAnalysis.h 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines the interface for the loop memory dependence framework that
  15. // was originally developed for the Loop Vectorizer.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
  19. #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
  20. #include "llvm/ADT/EquivalenceClasses.h"
  21. #include "llvm/Analysis/LoopAnalysisManager.h"
  22. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  23. #include "llvm/IR/DiagnosticInfo.h"
  24. #include "llvm/Pass.h"
  25. namespace llvm {
  26. class AAResults;
  27. class DataLayout;
  28. class Loop;
  29. class LoopAccessInfo;
  30. class raw_ostream;
  31. class SCEV;
  32. class SCEVUnionPredicate;
  33. class Value;
  34. /// Collection of parameters shared beetween the Loop Vectorizer and the
  35. /// Loop Access Analysis.
  36. struct VectorizerParams {
  37. /// Maximum SIMD width.
  38. static const unsigned MaxVectorWidth;
  39. /// VF as overridden by the user.
  40. static unsigned VectorizationFactor;
  41. /// Interleave factor as overridden by the user.
  42. static unsigned VectorizationInterleave;
  43. /// True if force-vector-interleave was specified by the user.
  44. static bool isInterleaveForced();
  45. /// \When performing memory disambiguation checks at runtime do not
  46. /// make more than this number of comparisons.
  47. static unsigned RuntimeMemoryCheckThreshold;
  48. };
  49. /// Checks memory dependences among accesses to the same underlying
  50. /// object to determine whether there vectorization is legal or not (and at
  51. /// which vectorization factor).
  52. ///
  53. /// Note: This class will compute a conservative dependence for access to
  54. /// different underlying pointers. Clients, such as the loop vectorizer, will
  55. /// sometimes deal these potential dependencies by emitting runtime checks.
  56. ///
  57. /// We use the ScalarEvolution framework to symbolically evalutate access
  58. /// functions pairs. Since we currently don't restructure the loop we can rely
  59. /// on the program order of memory accesses to determine their safety.
  60. /// At the moment we will only deem accesses as safe for:
  61. /// * A negative constant distance assuming program order.
  62. ///
  63. /// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
  64. /// a[i] = tmp; y = a[i];
  65. ///
  66. /// The latter case is safe because later checks guarantuee that there can't
  67. /// be a cycle through a phi node (that is, we check that "x" and "y" is not
  68. /// the same variable: a header phi can only be an induction or a reduction, a
  69. /// reduction can't have a memory sink, an induction can't have a memory
  70. /// source). This is important and must not be violated (or we have to
  71. /// resort to checking for cycles through memory).
  72. ///
  73. /// * A positive constant distance assuming program order that is bigger
  74. /// than the biggest memory access.
  75. ///
  76. /// tmp = a[i] OR b[i] = x
  77. /// a[i+2] = tmp y = b[i+2];
  78. ///
  79. /// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
  80. ///
  81. /// * Zero distances and all accesses have the same size.
  82. ///
  83. class MemoryDepChecker {
  84. public:
  85. typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
  86. typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
  87. /// Set of potential dependent memory accesses.
  88. typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
  89. /// Type to keep track of the status of the dependence check. The order of
  90. /// the elements is important and has to be from most permissive to least
  91. /// permissive.
  92. enum class VectorizationSafetyStatus {
  93. // Can vectorize safely without RT checks. All dependences are known to be
  94. // safe.
  95. Safe,
  96. // Can possibly vectorize with RT checks to overcome unknown dependencies.
  97. PossiblySafeWithRtChecks,
  98. // Cannot vectorize due to known unsafe dependencies.
  99. Unsafe,
  100. };
  101. /// Dependece between memory access instructions.
  102. struct Dependence {
  103. /// The type of the dependence.
  104. enum DepType {
  105. // No dependence.
  106. NoDep,
  107. // We couldn't determine the direction or the distance.
  108. Unknown,
  109. // Lexically forward.
  110. //
  111. // FIXME: If we only have loop-independent forward dependences (e.g. a
  112. // read and write of A[i]), LAA will locally deem the dependence "safe"
  113. // without querying the MemoryDepChecker. Therefore we can miss
  114. // enumerating loop-independent forward dependences in
  115. // getDependences. Note that as soon as there are different
  116. // indices used to access the same array, the MemoryDepChecker *is*
  117. // queried and the dependence list is complete.
  118. Forward,
  119. // Forward, but if vectorized, is likely to prevent store-to-load
  120. // forwarding.
  121. ForwardButPreventsForwarding,
  122. // Lexically backward.
  123. Backward,
  124. // Backward, but the distance allows a vectorization factor of
  125. // MaxSafeDepDistBytes.
  126. BackwardVectorizable,
  127. // Same, but may prevent store-to-load forwarding.
  128. BackwardVectorizableButPreventsForwarding
  129. };
  130. /// String version of the types.
  131. static const char *DepName[];
  132. /// Index of the source of the dependence in the InstMap vector.
  133. unsigned Source;
  134. /// Index of the destination of the dependence in the InstMap vector.
  135. unsigned Destination;
  136. /// The type of the dependence.
  137. DepType Type;
  138. Dependence(unsigned Source, unsigned Destination, DepType Type)
  139. : Source(Source), Destination(Destination), Type(Type) {}
  140. /// Return the source instruction of the dependence.
  141. Instruction *getSource(const LoopAccessInfo &LAI) const;
  142. /// Return the destination instruction of the dependence.
  143. Instruction *getDestination(const LoopAccessInfo &LAI) const;
  144. /// Dependence types that don't prevent vectorization.
  145. static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
  146. /// Lexically forward dependence.
  147. bool isForward() const;
  148. /// Lexically backward dependence.
  149. bool isBackward() const;
  150. /// May be a lexically backward dependence type (includes Unknown).
  151. bool isPossiblyBackward() const;
  152. /// Print the dependence. \p Instr is used to map the instruction
  153. /// indices to instructions.
  154. void print(raw_ostream &OS, unsigned Depth,
  155. const SmallVectorImpl<Instruction *> &Instrs) const;
  156. };
  157. MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
  158. : PSE(PSE), InnermostLoop(L) {}
  159. /// Register the location (instructions are given increasing numbers)
  160. /// of a write access.
  161. void addAccess(StoreInst *SI);
  162. /// Register the location (instructions are given increasing numbers)
  163. /// of a write access.
  164. void addAccess(LoadInst *LI);
  165. /// Check whether the dependencies between the accesses are safe.
  166. ///
  167. /// Only checks sets with elements in \p CheckDeps.
  168. bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
  169. const ValueToValueMap &Strides);
  170. /// No memory dependence was encountered that would inhibit
  171. /// vectorization.
  172. bool isSafeForVectorization() const {
  173. return Status == VectorizationSafetyStatus::Safe;
  174. }
  175. /// Return true if the number of elements that are safe to operate on
  176. /// simultaneously is not bounded.
  177. bool isSafeForAnyVectorWidth() const {
  178. return MaxSafeVectorWidthInBits == UINT_MAX;
  179. }
  180. /// The maximum number of bytes of a vector register we can vectorize
  181. /// the accesses safely with.
  182. uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
  183. /// Return the number of elements that are safe to operate on
  184. /// simultaneously, multiplied by the size of the element in bits.
  185. uint64_t getMaxSafeVectorWidthInBits() const {
  186. return MaxSafeVectorWidthInBits;
  187. }
  188. /// In same cases when the dependency check fails we can still
  189. /// vectorize the loop with a dynamic array access check.
  190. bool shouldRetryWithRuntimeCheck() const {
  191. return FoundNonConstantDistanceDependence &&
  192. Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
  193. }
  194. /// Returns the memory dependences. If null is returned we exceeded
  195. /// the MaxDependences threshold and this information is not
  196. /// available.
  197. const SmallVectorImpl<Dependence> *getDependences() const {
  198. return RecordDependences ? &Dependences : nullptr;
  199. }
  200. void clearDependences() { Dependences.clear(); }
  201. /// The vector of memory access instructions. The indices are used as
  202. /// instruction identifiers in the Dependence class.
  203. const SmallVectorImpl<Instruction *> &getMemoryInstructions() const {
  204. return InstMap;
  205. }
  206. /// Generate a mapping between the memory instructions and their
  207. /// indices according to program order.
  208. DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const {
  209. DenseMap<Instruction *, unsigned> OrderMap;
  210. for (unsigned I = 0; I < InstMap.size(); ++I)
  211. OrderMap[InstMap[I]] = I;
  212. return OrderMap;
  213. }
  214. /// Find the set of instructions that read or write via \p Ptr.
  215. SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
  216. bool isWrite) const;
  217. private:
  218. /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
  219. /// applies dynamic knowledge to simplify SCEV expressions and convert them
  220. /// to a more usable form. We need this in case assumptions about SCEV
  221. /// expressions need to be made in order to avoid unknown dependences. For
  222. /// example we might assume a unit stride for a pointer in order to prove
  223. /// that a memory access is strided and doesn't wrap.
  224. PredicatedScalarEvolution &PSE;
  225. const Loop *InnermostLoop;
  226. /// Maps access locations (ptr, read/write) to program order.
  227. DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
  228. /// Memory access instructions in program order.
  229. SmallVector<Instruction *, 16> InstMap;
  230. /// The program order index to be used for the next instruction.
  231. unsigned AccessIdx = 0;
  232. // We can access this many bytes in parallel safely.
  233. uint64_t MaxSafeDepDistBytes = 0;
  234. /// Number of elements (from consecutive iterations) that are safe to
  235. /// operate on simultaneously, multiplied by the size of the element in bits.
  236. /// The size of the element is taken from the memory access that is most
  237. /// restrictive.
  238. uint64_t MaxSafeVectorWidthInBits = -1U;
  239. /// If we see a non-constant dependence distance we can still try to
  240. /// vectorize this loop with runtime checks.
  241. bool FoundNonConstantDistanceDependence = false;
  242. /// Result of the dependence checks, indicating whether the checked
  243. /// dependences are safe for vectorization, require RT checks or are known to
  244. /// be unsafe.
  245. VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe;
  246. //// True if Dependences reflects the dependences in the
  247. //// loop. If false we exceeded MaxDependences and
  248. //// Dependences is invalid.
  249. bool RecordDependences = true;
  250. /// Memory dependences collected during the analysis. Only valid if
  251. /// RecordDependences is true.
  252. SmallVector<Dependence, 8> Dependences;
  253. /// Check whether there is a plausible dependence between the two
  254. /// accesses.
  255. ///
  256. /// Access \p A must happen before \p B in program order. The two indices
  257. /// identify the index into the program order map.
  258. ///
  259. /// This function checks whether there is a plausible dependence (or the
  260. /// absence of such can't be proved) between the two accesses. If there is a
  261. /// plausible dependence but the dependence distance is bigger than one
  262. /// element access it records this distance in \p MaxSafeDepDistBytes (if this
  263. /// distance is smaller than any other distance encountered so far).
  264. /// Otherwise, this function returns true signaling a possible dependence.
  265. Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
  266. const MemAccessInfo &B, unsigned BIdx,
  267. const ValueToValueMap &Strides);
  268. /// Check whether the data dependence could prevent store-load
  269. /// forwarding.
  270. ///
  271. /// \return false if we shouldn't vectorize at all or avoid larger
  272. /// vectorization factors by limiting MaxSafeDepDistBytes.
  273. bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
  274. /// Updates the current safety status with \p S. We can go from Safe to
  275. /// either PossiblySafeWithRtChecks or Unsafe and from
  276. /// PossiblySafeWithRtChecks to Unsafe.
  277. void mergeInStatus(VectorizationSafetyStatus S);
  278. };
  279. class RuntimePointerChecking;
  280. /// A grouping of pointers. A single memcheck is required between
  281. /// two groups.
  282. struct RuntimeCheckingPtrGroup {
  283. /// Create a new pointer checking group containing a single
  284. /// pointer, with index \p Index in RtCheck.
  285. RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
  286. RuntimeCheckingPtrGroup(unsigned Index, const SCEV *Start, const SCEV *End,
  287. unsigned AS)
  288. : High(End), Low(Start), AddressSpace(AS) {
  289. Members.push_back(Index);
  290. }
  291. /// Tries to add the pointer recorded in RtCheck at index
  292. /// \p Index to this pointer checking group. We can only add a pointer
  293. /// to a checking group if we will still be able to get
  294. /// the upper and lower bounds of the check. Returns true in case
  295. /// of success, false otherwise.
  296. bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
  297. bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
  298. unsigned AS, ScalarEvolution &SE);
  299. /// The SCEV expression which represents the upper bound of all the
  300. /// pointers in this group.
  301. const SCEV *High;
  302. /// The SCEV expression which represents the lower bound of all the
  303. /// pointers in this group.
  304. const SCEV *Low;
  305. /// Indices of all the pointers that constitute this grouping.
  306. SmallVector<unsigned, 2> Members;
  307. /// Address space of the involved pointers.
  308. unsigned AddressSpace;
  309. };
  310. /// A memcheck which made up of a pair of grouped pointers.
  311. typedef std::pair<const RuntimeCheckingPtrGroup *,
  312. const RuntimeCheckingPtrGroup *>
  313. RuntimePointerCheck;
  314. /// Holds information about the memory runtime legality checks to verify
  315. /// that a group of pointers do not overlap.
  316. class RuntimePointerChecking {
  317. friend struct RuntimeCheckingPtrGroup;
  318. public:
  319. struct PointerInfo {
  320. /// Holds the pointer value that we need to check.
  321. TrackingVH<Value> PointerValue;
  322. /// Holds the smallest byte address accessed by the pointer throughout all
  323. /// iterations of the loop.
  324. const SCEV *Start;
  325. /// Holds the largest byte address accessed by the pointer throughout all
  326. /// iterations of the loop, plus 1.
  327. const SCEV *End;
  328. /// Holds the information if this pointer is used for writing to memory.
  329. bool IsWritePtr;
  330. /// Holds the id of the set of pointers that could be dependent because of a
  331. /// shared underlying object.
  332. unsigned DependencySetId;
  333. /// Holds the id of the disjoint alias set to which this pointer belongs.
  334. unsigned AliasSetId;
  335. /// SCEV for the access.
  336. const SCEV *Expr;
  337. PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End,
  338. bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
  339. const SCEV *Expr)
  340. : PointerValue(PointerValue), Start(Start), End(End),
  341. IsWritePtr(IsWritePtr), DependencySetId(DependencySetId),
  342. AliasSetId(AliasSetId), Expr(Expr) {}
  343. };
  344. RuntimePointerChecking(ScalarEvolution *SE) : SE(SE) {}
  345. /// Reset the state of the pointer runtime information.
  346. void reset() {
  347. Need = false;
  348. Pointers.clear();
  349. Checks.clear();
  350. }
  351. /// Insert a pointer and calculate the start and end SCEVs.
  352. /// We need \p PSE in order to compute the SCEV expression of the pointer
  353. /// according to the assumptions that we've made during the analysis.
  354. /// The method might also version the pointer stride according to \p Strides,
  355. /// and add new predicates to \p PSE.
  356. void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
  357. unsigned ASId, const ValueToValueMap &Strides,
  358. PredicatedScalarEvolution &PSE);
  359. /// No run-time memory checking is necessary.
  360. bool empty() const { return Pointers.empty(); }
  361. /// Generate the checks and store it. This also performs the grouping
  362. /// of pointers to reduce the number of memchecks necessary.
  363. void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
  364. bool UseDependencies);
  365. /// Returns the checks that generateChecks created.
  366. const SmallVectorImpl<RuntimePointerCheck> &getChecks() const {
  367. return Checks;
  368. }
  369. /// Decide if we need to add a check between two groups of pointers,
  370. /// according to needsChecking.
  371. bool needsChecking(const RuntimeCheckingPtrGroup &M,
  372. const RuntimeCheckingPtrGroup &N) const;
  373. /// Returns the number of run-time checks required according to
  374. /// needsChecking.
  375. unsigned getNumberOfChecks() const { return Checks.size(); }
  376. /// Print the list run-time memory checks necessary.
  377. void print(raw_ostream &OS, unsigned Depth = 0) const;
  378. /// Print \p Checks.
  379. void printChecks(raw_ostream &OS,
  380. const SmallVectorImpl<RuntimePointerCheck> &Checks,
  381. unsigned Depth = 0) const;
  382. /// This flag indicates if we need to add the runtime check.
  383. bool Need = false;
  384. /// Information about the pointers that may require checking.
  385. SmallVector<PointerInfo, 2> Pointers;
  386. /// Holds a partitioning of pointers into "check groups".
  387. SmallVector<RuntimeCheckingPtrGroup, 2> CheckingGroups;
  388. /// Check if pointers are in the same partition
  389. ///
  390. /// \p PtrToPartition contains the partition number for pointers (-1 if the
  391. /// pointer belongs to multiple partitions).
  392. static bool
  393. arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition,
  394. unsigned PtrIdx1, unsigned PtrIdx2);
  395. /// Decide whether we need to issue a run-time check for pointer at
  396. /// index \p I and \p J to prove their independence.
  397. bool needsChecking(unsigned I, unsigned J) const;
  398. /// Return PointerInfo for pointer at index \p PtrIdx.
  399. const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
  400. return Pointers[PtrIdx];
  401. }
  402. ScalarEvolution *getSE() const { return SE; }
  403. private:
  404. /// Groups pointers such that a single memcheck is required
  405. /// between two different groups. This will clear the CheckingGroups vector
  406. /// and re-compute it. We will only group dependecies if \p UseDependencies
  407. /// is true, otherwise we will create a separate group for each pointer.
  408. void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
  409. bool UseDependencies);
  410. /// Generate the checks and return them.
  411. SmallVector<RuntimePointerCheck, 4> generateChecks() const;
  412. /// Holds a pointer to the ScalarEvolution analysis.
  413. ScalarEvolution *SE;
  414. /// Set of run-time checks required to establish independence of
  415. /// otherwise may-aliasing pointers in the loop.
  416. SmallVector<RuntimePointerCheck, 4> Checks;
  417. };
  418. /// Drive the analysis of memory accesses in the loop
  419. ///
  420. /// This class is responsible for analyzing the memory accesses of a loop. It
  421. /// collects the accesses and then its main helper the AccessAnalysis class
  422. /// finds and categorizes the dependences in buildDependenceSets.
  423. ///
  424. /// For memory dependences that can be analyzed at compile time, it determines
  425. /// whether the dependence is part of cycle inhibiting vectorization. This work
  426. /// is delegated to the MemoryDepChecker class.
  427. ///
  428. /// For memory dependences that cannot be determined at compile time, it
  429. /// generates run-time checks to prove independence. This is done by
  430. /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
  431. /// RuntimePointerCheck class.
  432. ///
  433. /// If pointers can wrap or can't be expressed as affine AddRec expressions by
  434. /// ScalarEvolution, we will generate run-time checks by emitting a
  435. /// SCEVUnionPredicate.
  436. ///
  437. /// Checks for both memory dependences and the SCEV predicates contained in the
  438. /// PSE must be emitted in order for the results of this analysis to be valid.
  439. class LoopAccessInfo {
  440. public:
  441. LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI,
  442. AAResults *AA, DominatorTree *DT, LoopInfo *LI);
  443. /// Return true we can analyze the memory accesses in the loop and there are
  444. /// no memory dependence cycles.
  445. bool canVectorizeMemory() const { return CanVecMem; }
  446. /// Return true if there is a convergent operation in the loop. There may
  447. /// still be reported runtime pointer checks that would be required, but it is
  448. /// not legal to insert them.
  449. bool hasConvergentOp() const { return HasConvergentOp; }
  450. const RuntimePointerChecking *getRuntimePointerChecking() const {
  451. return PtrRtChecking.get();
  452. }
  453. /// Number of memchecks required to prove independence of otherwise
  454. /// may-alias pointers.
  455. unsigned getNumRuntimePointerChecks() const {
  456. return PtrRtChecking->getNumberOfChecks();
  457. }
  458. /// Return true if the block BB needs to be predicated in order for the loop
  459. /// to be vectorized.
  460. static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
  461. DominatorTree *DT);
  462. /// Returns true if the value V is uniform within the loop.
  463. bool isUniform(Value *V) const;
  464. uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
  465. unsigned getNumStores() const { return NumStores; }
  466. unsigned getNumLoads() const { return NumLoads;}
  467. /// The diagnostics report generated for the analysis. E.g. why we
  468. /// couldn't analyze the loop.
  469. const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
  470. /// the Memory Dependence Checker which can determine the
  471. /// loop-independent and loop-carried dependences between memory accesses.
  472. const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
  473. /// Return the list of instructions that use \p Ptr to read or write
  474. /// memory.
  475. SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
  476. bool isWrite) const {
  477. return DepChecker->getInstructionsForAccess(Ptr, isWrite);
  478. }
  479. /// If an access has a symbolic strides, this maps the pointer value to
  480. /// the stride symbol.
  481. const ValueToValueMap &getSymbolicStrides() const { return SymbolicStrides; }
  482. /// Pointer has a symbolic stride.
  483. bool hasStride(Value *V) const { return StrideSet.count(V); }
  484. /// Print the information about the memory accesses in the loop.
  485. void print(raw_ostream &OS, unsigned Depth = 0) const;
  486. /// If the loop has memory dependence involving an invariant address, i.e. two
  487. /// stores or a store and a load, then return true, else return false.
  488. bool hasDependenceInvolvingLoopInvariantAddress() const {
  489. return HasDependenceInvolvingLoopInvariantAddress;
  490. }
  491. /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
  492. /// them to a more usable form. All SCEV expressions during the analysis
  493. /// should be re-written (and therefore simplified) according to PSE.
  494. /// A user of LoopAccessAnalysis will need to emit the runtime checks
  495. /// associated with this predicate.
  496. const PredicatedScalarEvolution &getPSE() const { return *PSE; }
  497. private:
  498. /// Analyze the loop.
  499. void analyzeLoop(AAResults *AA, LoopInfo *LI,
  500. const TargetLibraryInfo *TLI, DominatorTree *DT);
  501. /// Check if the structure of the loop allows it to be analyzed by this
  502. /// pass.
  503. bool canAnalyzeLoop();
  504. /// Save the analysis remark.
  505. ///
  506. /// LAA does not directly emits the remarks. Instead it stores it which the
  507. /// client can retrieve and presents as its own analysis
  508. /// (e.g. -Rpass-analysis=loop-vectorize).
  509. OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
  510. Instruction *Instr = nullptr);
  511. /// Collect memory access with loop invariant strides.
  512. ///
  513. /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
  514. /// invariant.
  515. void collectStridedAccess(Value *LoadOrStoreInst);
  516. std::unique_ptr<PredicatedScalarEvolution> PSE;
  517. /// We need to check that all of the pointers in this list are disjoint
  518. /// at runtime. Using std::unique_ptr to make using move ctor simpler.
  519. std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
  520. /// the Memory Dependence Checker which can determine the
  521. /// loop-independent and loop-carried dependences between memory accesses.
  522. std::unique_ptr<MemoryDepChecker> DepChecker;
  523. Loop *TheLoop;
  524. unsigned NumLoads = 0;
  525. unsigned NumStores = 0;
  526. uint64_t MaxSafeDepDistBytes = -1;
  527. /// Cache the result of analyzeLoop.
  528. bool CanVecMem = false;
  529. bool HasConvergentOp = false;
  530. /// Indicator that there are non vectorizable stores to a uniform address.
  531. bool HasDependenceInvolvingLoopInvariantAddress = false;
  532. /// The diagnostics report generated for the analysis. E.g. why we
  533. /// couldn't analyze the loop.
  534. std::unique_ptr<OptimizationRemarkAnalysis> Report;
  535. /// If an access has a symbolic strides, this maps the pointer value to
  536. /// the stride symbol.
  537. ValueToValueMap SymbolicStrides;
  538. /// Set of symbolic strides values.
  539. SmallPtrSet<Value *, 8> StrideSet;
  540. };
  541. Value *stripIntegerCast(Value *V);
  542. /// Return the SCEV corresponding to a pointer with the symbolic stride
  543. /// replaced with constant one, assuming the SCEV predicate associated with
  544. /// \p PSE is true.
  545. ///
  546. /// If necessary this method will version the stride of the pointer according
  547. /// to \p PtrToStride and therefore add further predicates to \p PSE.
  548. ///
  549. /// \p PtrToStride provides the mapping between the pointer value and its
  550. /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
  551. const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
  552. const ValueToValueMap &PtrToStride,
  553. Value *Ptr);
  554. /// If the pointer has a constant stride return it in units of the access type
  555. /// size. Otherwise return zero.
  556. ///
  557. /// Ensure that it does not wrap in the address space, assuming the predicate
  558. /// associated with \p PSE is true.
  559. ///
  560. /// If necessary this method will version the stride of the pointer according
  561. /// to \p PtrToStride and therefore add further predicates to \p PSE.
  562. /// The \p Assume parameter indicates if we are allowed to make additional
  563. /// run-time assumptions.
  564. int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
  565. const Loop *Lp,
  566. const ValueToValueMap &StridesMap = ValueToValueMap(),
  567. bool Assume = false, bool ShouldCheckWrap = true);
  568. /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
  569. /// compatible and it is possible to calculate the distance between them. This
  570. /// is a simple API that does not depend on the analysis pass.
  571. /// \param StrictCheck Ensure that the calculated distance matches the
  572. /// type-based one after all the bitcasts removal in the provided pointers.
  573. Optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
  574. Value *PtrB, const DataLayout &DL,
  575. ScalarEvolution &SE, bool StrictCheck = false,
  576. bool CheckType = true);
  577. /// Attempt to sort the pointers in \p VL and return the sorted indices
  578. /// in \p SortedIndices, if reordering is required.
  579. ///
  580. /// Returns 'true' if sorting is legal, otherwise returns 'false'.
  581. ///
  582. /// For example, for a given \p VL of memory accesses in program order, a[i+4],
  583. /// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
  584. /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
  585. /// saves the mask for actual memory accesses in program order in
  586. /// \p SortedIndices as <1,2,0,3>
  587. bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
  588. ScalarEvolution &SE,
  589. SmallVectorImpl<unsigned> &SortedIndices);
  590. /// Returns true if the memory operations \p A and \p B are consecutive.
  591. /// This is a simple API that does not depend on the analysis pass.
  592. bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
  593. ScalarEvolution &SE, bool CheckType = true);
  594. /// This analysis provides dependence information for the memory accesses
  595. /// of a loop.
  596. ///
  597. /// It runs the analysis for a loop on demand. This can be initiated by
  598. /// querying the loop access info via LAA::getInfo. getInfo return a
  599. /// LoopAccessInfo object. See this class for the specifics of what information
  600. /// is provided.
  601. class LoopAccessLegacyAnalysis : public FunctionPass {
  602. public:
  603. static char ID;
  604. LoopAccessLegacyAnalysis();
  605. bool runOnFunction(Function &F) override;
  606. void getAnalysisUsage(AnalysisUsage &AU) const override;
  607. /// Query the result of the loop access information for the loop \p L.
  608. ///
  609. /// If there is no cached result available run the analysis.
  610. const LoopAccessInfo &getInfo(Loop *L);
  611. void releaseMemory() override {
  612. // Invalidate the cache when the pass is freed.
  613. LoopAccessInfoMap.clear();
  614. }
  615. /// Print the result of the analysis when invoked with -analyze.
  616. void print(raw_ostream &OS, const Module *M = nullptr) const override;
  617. private:
  618. /// The cache.
  619. DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
  620. // The used analysis passes.
  621. ScalarEvolution *SE = nullptr;
  622. const TargetLibraryInfo *TLI = nullptr;
  623. AAResults *AA = nullptr;
  624. DominatorTree *DT = nullptr;
  625. LoopInfo *LI = nullptr;
  626. };
  627. /// This analysis provides dependence information for the memory
  628. /// accesses of a loop.
  629. ///
  630. /// It runs the analysis for a loop on demand. This can be initiated by
  631. /// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
  632. /// getResult return a LoopAccessInfo object. See this class for the
  633. /// specifics of what information is provided.
  634. class LoopAccessAnalysis
  635. : public AnalysisInfoMixin<LoopAccessAnalysis> {
  636. friend AnalysisInfoMixin<LoopAccessAnalysis>;
  637. static AnalysisKey Key;
  638. public:
  639. typedef LoopAccessInfo Result;
  640. Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
  641. };
  642. inline Instruction *MemoryDepChecker::Dependence::getSource(
  643. const LoopAccessInfo &LAI) const {
  644. return LAI.getDepChecker().getMemoryInstructions()[Source];
  645. }
  646. inline Instruction *MemoryDepChecker::Dependence::getDestination(
  647. const LoopAccessInfo &LAI) const {
  648. return LAI.getDepChecker().getMemoryInstructions()[Destination];
  649. }
  650. } // End llvm namespace
  651. #endif
  652. #ifdef __GNUC__
  653. #pragma GCC diagnostic pop
  654. #endif