LoopAccessAnalysis.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // This file defines the interface for the loop memory dependence framework that
  15. // was originally developed for the Loop Vectorizer.
  16. //
  17. //===----------------------------------------------------------------------===//
  18. #ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
  19. #define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
  20. #include "llvm/ADT/EquivalenceClasses.h"
  21. #include "llvm/Analysis/LoopAnalysisManager.h"
  22. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  23. #include "llvm/IR/DiagnosticInfo.h"
  24. #include "llvm/Pass.h"
  25. #include <optional>
  26. namespace llvm {
  27. class AAResults;
  28. class DataLayout;
  29. class Loop;
  30. class LoopAccessInfo;
  31. class raw_ostream;
  32. class SCEV;
  33. class SCEVUnionPredicate;
  34. class Value;
  35. /// Collection of parameters shared beetween the Loop Vectorizer and the
  36. /// Loop Access Analysis.
  37. struct VectorizerParams {
  38. /// Maximum SIMD width.
  39. static const unsigned MaxVectorWidth;
  40. /// VF as overridden by the user.
  41. static unsigned VectorizationFactor;
  42. /// Interleave factor as overridden by the user.
  43. static unsigned VectorizationInterleave;
  44. /// True if force-vector-interleave was specified by the user.
  45. static bool isInterleaveForced();
  46. /// \When performing memory disambiguation checks at runtime do not
  47. /// make more than this number of comparisons.
  48. static unsigned RuntimeMemoryCheckThreshold;
  49. };
  50. /// Checks memory dependences among accesses to the same underlying
  51. /// object to determine whether there vectorization is legal or not (and at
  52. /// which vectorization factor).
  53. ///
  54. /// Note: This class will compute a conservative dependence for access to
  55. /// different underlying pointers. Clients, such as the loop vectorizer, will
  56. /// sometimes deal these potential dependencies by emitting runtime checks.
  57. ///
  58. /// We use the ScalarEvolution framework to symbolically evalutate access
  59. /// functions pairs. Since we currently don't restructure the loop we can rely
  60. /// on the program order of memory accesses to determine their safety.
  61. /// At the moment we will only deem accesses as safe for:
  62. /// * A negative constant distance assuming program order.
  63. ///
  64. /// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
  65. /// a[i] = tmp; y = a[i];
  66. ///
  67. /// The latter case is safe because later checks guarantuee that there can't
  68. /// be a cycle through a phi node (that is, we check that "x" and "y" is not
  69. /// the same variable: a header phi can only be an induction or a reduction, a
  70. /// reduction can't have a memory sink, an induction can't have a memory
  71. /// source). This is important and must not be violated (or we have to
  72. /// resort to checking for cycles through memory).
  73. ///
  74. /// * A positive constant distance assuming program order that is bigger
  75. /// than the biggest memory access.
  76. ///
  77. /// tmp = a[i] OR b[i] = x
  78. /// a[i+2] = tmp y = b[i+2];
  79. ///
  80. /// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
  81. ///
  82. /// * Zero distances and all accesses have the same size.
  83. ///
  84. class MemoryDepChecker {
  85. public:
  86. typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
  87. typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
  88. /// Set of potential dependent memory accesses.
  89. typedef EquivalenceClasses<MemAccessInfo> DepCandidates;
  90. /// Type to keep track of the status of the dependence check. The order of
  91. /// the elements is important and has to be from most permissive to least
  92. /// permissive.
  93. enum class VectorizationSafetyStatus {
  94. // Can vectorize safely without RT checks. All dependences are known to be
  95. // safe.
  96. Safe,
  97. // Can possibly vectorize with RT checks to overcome unknown dependencies.
  98. PossiblySafeWithRtChecks,
  99. // Cannot vectorize due to known unsafe dependencies.
  100. Unsafe,
  101. };
  102. /// Dependece between memory access instructions.
  103. struct Dependence {
  104. /// The type of the dependence.
  105. enum DepType {
  106. // No dependence.
  107. NoDep,
  108. // We couldn't determine the direction or the distance.
  109. Unknown,
  110. // Lexically forward.
  111. //
  112. // FIXME: If we only have loop-independent forward dependences (e.g. a
  113. // read and write of A[i]), LAA will locally deem the dependence "safe"
  114. // without querying the MemoryDepChecker. Therefore we can miss
  115. // enumerating loop-independent forward dependences in
  116. // getDependences. Note that as soon as there are different
  117. // indices used to access the same array, the MemoryDepChecker *is*
  118. // queried and the dependence list is complete.
  119. Forward,
  120. // Forward, but if vectorized, is likely to prevent store-to-load
  121. // forwarding.
  122. ForwardButPreventsForwarding,
  123. // Lexically backward.
  124. Backward,
  125. // Backward, but the distance allows a vectorization factor of
  126. // MaxSafeDepDistBytes.
  127. BackwardVectorizable,
  128. // Same, but may prevent store-to-load forwarding.
  129. BackwardVectorizableButPreventsForwarding
  130. };
  131. /// String version of the types.
  132. static const char *DepName[];
  133. /// Index of the source of the dependence in the InstMap vector.
  134. unsigned Source;
  135. /// Index of the destination of the dependence in the InstMap vector.
  136. unsigned Destination;
  137. /// The type of the dependence.
  138. DepType Type;
  139. Dependence(unsigned Source, unsigned Destination, DepType Type)
  140. : Source(Source), Destination(Destination), Type(Type) {}
  141. /// Return the source instruction of the dependence.
  142. Instruction *getSource(const LoopAccessInfo &LAI) const;
  143. /// Return the destination instruction of the dependence.
  144. Instruction *getDestination(const LoopAccessInfo &LAI) const;
  145. /// Dependence types that don't prevent vectorization.
  146. static VectorizationSafetyStatus isSafeForVectorization(DepType Type);
  147. /// Lexically forward dependence.
  148. bool isForward() const;
  149. /// Lexically backward dependence.
  150. bool isBackward() const;
  151. /// May be a lexically backward dependence type (includes Unknown).
  152. bool isPossiblyBackward() const;
  153. /// Print the dependence. \p Instr is used to map the instruction
  154. /// indices to instructions.
  155. void print(raw_ostream &OS, unsigned Depth,
  156. const SmallVectorImpl<Instruction *> &Instrs) const;
  157. };
  158. MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
  159. : PSE(PSE), InnermostLoop(L) {}
  160. /// Register the location (instructions are given increasing numbers)
  161. /// of a write access.
  162. void addAccess(StoreInst *SI);
  163. /// Register the location (instructions are given increasing numbers)
  164. /// of a write access.
  165. void addAccess(LoadInst *LI);
  166. /// Check whether the dependencies between the accesses are safe.
  167. ///
  168. /// Only checks sets with elements in \p CheckDeps.
  169. bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
  170. const ValueToValueMap &Strides);
  171. /// No memory dependence was encountered that would inhibit
  172. /// vectorization.
  173. bool isSafeForVectorization() const {
  174. return Status == VectorizationSafetyStatus::Safe;
  175. }
  176. /// Return true if the number of elements that are safe to operate on
  177. /// simultaneously is not bounded.
  178. bool isSafeForAnyVectorWidth() const {
  179. return MaxSafeVectorWidthInBits == UINT_MAX;
  180. }
  181. /// The maximum number of bytes of a vector register we can vectorize
  182. /// the accesses safely with.
  183. uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
  184. /// Return the number of elements that are safe to operate on
  185. /// simultaneously, multiplied by the size of the element in bits.
  186. uint64_t getMaxSafeVectorWidthInBits() const {
  187. return MaxSafeVectorWidthInBits;
  188. }
  189. /// In same cases when the dependency check fails we can still
  190. /// vectorize the loop with a dynamic array access check.
  191. bool shouldRetryWithRuntimeCheck() const {
  192. return FoundNonConstantDistanceDependence &&
  193. Status == VectorizationSafetyStatus::PossiblySafeWithRtChecks;
  194. }
  195. /// Returns the memory dependences. If null is returned we exceeded
  196. /// the MaxDependences threshold and this information is not
  197. /// available.
  198. const SmallVectorImpl<Dependence> *getDependences() const {
  199. return RecordDependences ? &Dependences : nullptr;
  200. }
  201. void clearDependences() { Dependences.clear(); }
  202. /// The vector of memory access instructions. The indices are used as
  203. /// instruction identifiers in the Dependence class.
  204. const SmallVectorImpl<Instruction *> &getMemoryInstructions() const {
  205. return InstMap;
  206. }
  207. /// Generate a mapping between the memory instructions and their
  208. /// indices according to program order.
  209. DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const {
  210. DenseMap<Instruction *, unsigned> OrderMap;
  211. for (unsigned I = 0; I < InstMap.size(); ++I)
  212. OrderMap[InstMap[I]] = I;
  213. return OrderMap;
  214. }
  215. /// Find the set of instructions that read or write via \p Ptr.
  216. SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
  217. bool isWrite) const;
  218. /// Return the program order indices for the access location (Ptr, IsWrite).
  219. /// Returns an empty ArrayRef if there are no accesses for the location.
  220. ArrayRef<unsigned> getOrderForAccess(Value *Ptr, bool IsWrite) const {
  221. auto I = Accesses.find({Ptr, IsWrite});
  222. if (I != Accesses.end())
  223. return I->second;
  224. return {};
  225. }
  226. const Loop *getInnermostLoop() const { return InnermostLoop; }
  227. private:
  228. /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
  229. /// applies dynamic knowledge to simplify SCEV expressions and convert them
  230. /// to a more usable form. We need this in case assumptions about SCEV
  231. /// expressions need to be made in order to avoid unknown dependences. For
  232. /// example we might assume a unit stride for a pointer in order to prove
  233. /// that a memory access is strided and doesn't wrap.
  234. PredicatedScalarEvolution &PSE;
  235. const Loop *InnermostLoop;
  236. /// Maps access locations (ptr, read/write) to program order.
  237. DenseMap<MemAccessInfo, std::vector<unsigned> > Accesses;
  238. /// Memory access instructions in program order.
  239. SmallVector<Instruction *, 16> InstMap;
  240. /// The program order index to be used for the next instruction.
  241. unsigned AccessIdx = 0;
  242. // We can access this many bytes in parallel safely.
  243. uint64_t MaxSafeDepDistBytes = 0;
  244. /// Number of elements (from consecutive iterations) that are safe to
  245. /// operate on simultaneously, multiplied by the size of the element in bits.
  246. /// The size of the element is taken from the memory access that is most
  247. /// restrictive.
  248. uint64_t MaxSafeVectorWidthInBits = -1U;
  249. /// If we see a non-constant dependence distance we can still try to
  250. /// vectorize this loop with runtime checks.
  251. bool FoundNonConstantDistanceDependence = false;
  252. /// Result of the dependence checks, indicating whether the checked
  253. /// dependences are safe for vectorization, require RT checks or are known to
  254. /// be unsafe.
  255. VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe;
  256. //// True if Dependences reflects the dependences in the
  257. //// loop. If false we exceeded MaxDependences and
  258. //// Dependences is invalid.
  259. bool RecordDependences = true;
  260. /// Memory dependences collected during the analysis. Only valid if
  261. /// RecordDependences is true.
  262. SmallVector<Dependence, 8> Dependences;
  263. /// Check whether there is a plausible dependence between the two
  264. /// accesses.
  265. ///
  266. /// Access \p A must happen before \p B in program order. The two indices
  267. /// identify the index into the program order map.
  268. ///
  269. /// This function checks whether there is a plausible dependence (or the
  270. /// absence of such can't be proved) between the two accesses. If there is a
  271. /// plausible dependence but the dependence distance is bigger than one
  272. /// element access it records this distance in \p MaxSafeDepDistBytes (if this
  273. /// distance is smaller than any other distance encountered so far).
  274. /// Otherwise, this function returns true signaling a possible dependence.
  275. Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
  276. const MemAccessInfo &B, unsigned BIdx,
  277. const ValueToValueMap &Strides);
  278. /// Check whether the data dependence could prevent store-load
  279. /// forwarding.
  280. ///
  281. /// \return false if we shouldn't vectorize at all or avoid larger
  282. /// vectorization factors by limiting MaxSafeDepDistBytes.
  283. bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
  284. /// Updates the current safety status with \p S. We can go from Safe to
  285. /// either PossiblySafeWithRtChecks or Unsafe and from
  286. /// PossiblySafeWithRtChecks to Unsafe.
  287. void mergeInStatus(VectorizationSafetyStatus S);
  288. };
  289. class RuntimePointerChecking;
  290. /// A grouping of pointers. A single memcheck is required between
  291. /// two groups.
  292. struct RuntimeCheckingPtrGroup {
  293. /// Create a new pointer checking group containing a single
  294. /// pointer, with index \p Index in RtCheck.
  295. RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck);
  296. /// Tries to add the pointer recorded in RtCheck at index
  297. /// \p Index to this pointer checking group. We can only add a pointer
  298. /// to a checking group if we will still be able to get
  299. /// the upper and lower bounds of the check. Returns true in case
  300. /// of success, false otherwise.
  301. bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
  302. bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
  303. unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
  304. /// The SCEV expression which represents the upper bound of all the
  305. /// pointers in this group.
  306. const SCEV *High;
  307. /// The SCEV expression which represents the lower bound of all the
  308. /// pointers in this group.
  309. const SCEV *Low;
  310. /// Indices of all the pointers that constitute this grouping.
  311. SmallVector<unsigned, 2> Members;
  312. /// Address space of the involved pointers.
  313. unsigned AddressSpace;
  314. /// Whether the pointer needs to be frozen after expansion, e.g. because it
  315. /// may be poison outside the loop.
  316. bool NeedsFreeze = false;
  317. };
  318. /// A memcheck which made up of a pair of grouped pointers.
  319. typedef std::pair<const RuntimeCheckingPtrGroup *,
  320. const RuntimeCheckingPtrGroup *>
  321. RuntimePointerCheck;
  322. struct PointerDiffInfo {
  323. const SCEV *SrcStart;
  324. const SCEV *SinkStart;
  325. unsigned AccessSize;
  326. bool NeedsFreeze;
  327. PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart,
  328. unsigned AccessSize, bool NeedsFreeze)
  329. : SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize),
  330. NeedsFreeze(NeedsFreeze) {}
  331. };
  332. /// Holds information about the memory runtime legality checks to verify
  333. /// that a group of pointers do not overlap.
  334. class RuntimePointerChecking {
  335. friend struct RuntimeCheckingPtrGroup;
  336. public:
  337. struct PointerInfo {
  338. /// Holds the pointer value that we need to check.
  339. TrackingVH<Value> PointerValue;
  340. /// Holds the smallest byte address accessed by the pointer throughout all
  341. /// iterations of the loop.
  342. const SCEV *Start;
  343. /// Holds the largest byte address accessed by the pointer throughout all
  344. /// iterations of the loop, plus 1.
  345. const SCEV *End;
  346. /// Holds the information if this pointer is used for writing to memory.
  347. bool IsWritePtr;
  348. /// Holds the id of the set of pointers that could be dependent because of a
  349. /// shared underlying object.
  350. unsigned DependencySetId;
  351. /// Holds the id of the disjoint alias set to which this pointer belongs.
  352. unsigned AliasSetId;
  353. /// SCEV for the access.
  354. const SCEV *Expr;
  355. /// True if the pointer expressions needs to be frozen after expansion.
  356. bool NeedsFreeze;
  357. PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End,
  358. bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
  359. const SCEV *Expr, bool NeedsFreeze)
  360. : PointerValue(PointerValue), Start(Start), End(End),
  361. IsWritePtr(IsWritePtr), DependencySetId(DependencySetId),
  362. AliasSetId(AliasSetId), Expr(Expr), NeedsFreeze(NeedsFreeze) {}
  363. };
  364. RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
  365. : DC(DC), SE(SE) {}
  366. /// Reset the state of the pointer runtime information.
  367. void reset() {
  368. Need = false;
  369. Pointers.clear();
  370. Checks.clear();
  371. }
  372. /// Insert a pointer and calculate the start and end SCEVs.
  373. /// We need \p PSE in order to compute the SCEV expression of the pointer
  374. /// according to the assumptions that we've made during the analysis.
  375. /// The method might also version the pointer stride according to \p Strides,
  376. /// and add new predicates to \p PSE.
  377. void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
  378. bool WritePtr, unsigned DepSetId, unsigned ASId,
  379. PredicatedScalarEvolution &PSE, bool NeedsFreeze);
  380. /// No run-time memory checking is necessary.
  381. bool empty() const { return Pointers.empty(); }
  382. /// Generate the checks and store it. This also performs the grouping
  383. /// of pointers to reduce the number of memchecks necessary.
  384. void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
  385. bool UseDependencies);
  386. /// Returns the checks that generateChecks created. They can be used to ensure
  387. /// no read/write accesses overlap across all loop iterations.
  388. const SmallVectorImpl<RuntimePointerCheck> &getChecks() const {
  389. return Checks;
  390. }
  391. // Returns an optional list of (pointer-difference expressions, access size)
  392. // pairs that can be used to prove that there are no vectorization-preventing
  393. // dependencies at runtime. There are is a vectorization-preventing dependency
  394. // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
  395. // std::nullopt if pointer-difference checks cannot be used.
  396. std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
  397. if (!CanUseDiffCheck)
  398. return std::nullopt;
  399. return {DiffChecks};
  400. }
  401. /// Decide if we need to add a check between two groups of pointers,
  402. /// according to needsChecking.
  403. bool needsChecking(const RuntimeCheckingPtrGroup &M,
  404. const RuntimeCheckingPtrGroup &N) const;
  405. /// Returns the number of run-time checks required according to
  406. /// needsChecking.
  407. unsigned getNumberOfChecks() const { return Checks.size(); }
  408. /// Print the list run-time memory checks necessary.
  409. void print(raw_ostream &OS, unsigned Depth = 0) const;
  410. /// Print \p Checks.
  411. void printChecks(raw_ostream &OS,
  412. const SmallVectorImpl<RuntimePointerCheck> &Checks,
  413. unsigned Depth = 0) const;
  414. /// This flag indicates if we need to add the runtime check.
  415. bool Need = false;
  416. /// Information about the pointers that may require checking.
  417. SmallVector<PointerInfo, 2> Pointers;
  418. /// Holds a partitioning of pointers into "check groups".
  419. SmallVector<RuntimeCheckingPtrGroup, 2> CheckingGroups;
  420. /// Check if pointers are in the same partition
  421. ///
  422. /// \p PtrToPartition contains the partition number for pointers (-1 if the
  423. /// pointer belongs to multiple partitions).
  424. static bool
  425. arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition,
  426. unsigned PtrIdx1, unsigned PtrIdx2);
  427. /// Decide whether we need to issue a run-time check for pointer at
  428. /// index \p I and \p J to prove their independence.
  429. bool needsChecking(unsigned I, unsigned J) const;
  430. /// Return PointerInfo for pointer at index \p PtrIdx.
  431. const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
  432. return Pointers[PtrIdx];
  433. }
  434. ScalarEvolution *getSE() const { return SE; }
  435. private:
  436. /// Groups pointers such that a single memcheck is required
  437. /// between two different groups. This will clear the CheckingGroups vector
  438. /// and re-compute it. We will only group dependecies if \p UseDependencies
  439. /// is true, otherwise we will create a separate group for each pointer.
  440. void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
  441. bool UseDependencies);
  442. /// Generate the checks and return them.
  443. SmallVector<RuntimePointerCheck, 4> generateChecks();
  444. /// Try to create add a new (pointer-difference, access size) pair to
  445. /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
  446. /// checks cannot be used for the groups, set CanUseDiffCheck to false.
  447. void tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
  448. const RuntimeCheckingPtrGroup &CGJ);
  449. MemoryDepChecker &DC;
  450. /// Holds a pointer to the ScalarEvolution analysis.
  451. ScalarEvolution *SE;
  452. /// Set of run-time checks required to establish independence of
  453. /// otherwise may-aliasing pointers in the loop.
  454. SmallVector<RuntimePointerCheck, 4> Checks;
  455. /// Flag indicating if pointer-difference checks can be used
  456. bool CanUseDiffCheck = true;
  457. /// A list of (pointer-difference, access size) pairs that can be used to
  458. /// prove that there are no vectorization-preventing dependencies.
  459. SmallVector<PointerDiffInfo> DiffChecks;
  460. };
  461. /// Drive the analysis of memory accesses in the loop
  462. ///
  463. /// This class is responsible for analyzing the memory accesses of a loop. It
  464. /// collects the accesses and then its main helper the AccessAnalysis class
  465. /// finds and categorizes the dependences in buildDependenceSets.
  466. ///
  467. /// For memory dependences that can be analyzed at compile time, it determines
  468. /// whether the dependence is part of cycle inhibiting vectorization. This work
  469. /// is delegated to the MemoryDepChecker class.
  470. ///
  471. /// For memory dependences that cannot be determined at compile time, it
  472. /// generates run-time checks to prove independence. This is done by
  473. /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
  474. /// RuntimePointerCheck class.
  475. ///
  476. /// If pointers can wrap or can't be expressed as affine AddRec expressions by
  477. /// ScalarEvolution, we will generate run-time checks by emitting a
  478. /// SCEVUnionPredicate.
  479. ///
  480. /// Checks for both memory dependences and the SCEV predicates contained in the
  481. /// PSE must be emitted in order for the results of this analysis to be valid.
  482. class LoopAccessInfo {
  483. public:
  484. LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI,
  485. AAResults *AA, DominatorTree *DT, LoopInfo *LI);
  486. /// Return true we can analyze the memory accesses in the loop and there are
  487. /// no memory dependence cycles.
  488. bool canVectorizeMemory() const { return CanVecMem; }
  489. /// Return true if there is a convergent operation in the loop. There may
  490. /// still be reported runtime pointer checks that would be required, but it is
  491. /// not legal to insert them.
  492. bool hasConvergentOp() const { return HasConvergentOp; }
  493. const RuntimePointerChecking *getRuntimePointerChecking() const {
  494. return PtrRtChecking.get();
  495. }
  496. /// Number of memchecks required to prove independence of otherwise
  497. /// may-alias pointers.
  498. unsigned getNumRuntimePointerChecks() const {
  499. return PtrRtChecking->getNumberOfChecks();
  500. }
  501. /// Return true if the block BB needs to be predicated in order for the loop
  502. /// to be vectorized.
  503. static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
  504. DominatorTree *DT);
  505. /// Returns true if the value V is uniform within the loop.
  506. bool isUniform(Value *V) const;
  507. uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
  508. unsigned getNumStores() const { return NumStores; }
  509. unsigned getNumLoads() const { return NumLoads;}
  510. /// The diagnostics report generated for the analysis. E.g. why we
  511. /// couldn't analyze the loop.
  512. const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
  513. /// the Memory Dependence Checker which can determine the
  514. /// loop-independent and loop-carried dependences between memory accesses.
  515. const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
  516. /// Return the list of instructions that use \p Ptr to read or write
  517. /// memory.
  518. SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
  519. bool isWrite) const {
  520. return DepChecker->getInstructionsForAccess(Ptr, isWrite);
  521. }
  522. /// If an access has a symbolic strides, this maps the pointer value to
  523. /// the stride symbol.
  524. const ValueToValueMap &getSymbolicStrides() const { return SymbolicStrides; }
  525. /// Pointer has a symbolic stride.
  526. bool hasStride(Value *V) const { return StrideSet.count(V); }
  527. /// Print the information about the memory accesses in the loop.
  528. void print(raw_ostream &OS, unsigned Depth = 0) const;
  529. /// If the loop has memory dependence involving an invariant address, i.e. two
  530. /// stores or a store and a load, then return true, else return false.
  531. bool hasDependenceInvolvingLoopInvariantAddress() const {
  532. return HasDependenceInvolvingLoopInvariantAddress;
  533. }
  534. /// Return the list of stores to invariant addresses.
  535. ArrayRef<StoreInst *> getStoresToInvariantAddresses() const {
  536. return StoresToInvariantAddresses;
  537. }
  538. /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
  539. /// them to a more usable form. All SCEV expressions during the analysis
  540. /// should be re-written (and therefore simplified) according to PSE.
  541. /// A user of LoopAccessAnalysis will need to emit the runtime checks
  542. /// associated with this predicate.
  543. const PredicatedScalarEvolution &getPSE() const { return *PSE; }
  544. private:
  545. /// Analyze the loop.
  546. void analyzeLoop(AAResults *AA, LoopInfo *LI,
  547. const TargetLibraryInfo *TLI, DominatorTree *DT);
  548. /// Check if the structure of the loop allows it to be analyzed by this
  549. /// pass.
  550. bool canAnalyzeLoop();
  551. /// Save the analysis remark.
  552. ///
  553. /// LAA does not directly emits the remarks. Instead it stores it which the
  554. /// client can retrieve and presents as its own analysis
  555. /// (e.g. -Rpass-analysis=loop-vectorize).
  556. OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
  557. Instruction *Instr = nullptr);
  558. /// Collect memory access with loop invariant strides.
  559. ///
  560. /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
  561. /// invariant.
  562. void collectStridedAccess(Value *LoadOrStoreInst);
  563. // Emits the first unsafe memory dependence in a loop.
  564. // Emits nothing if there are no unsafe dependences
  565. // or if the dependences were not recorded.
  566. void emitUnsafeDependenceRemark();
  567. std::unique_ptr<PredicatedScalarEvolution> PSE;
  568. /// We need to check that all of the pointers in this list are disjoint
  569. /// at runtime. Using std::unique_ptr to make using move ctor simpler.
  570. std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
  571. /// the Memory Dependence Checker which can determine the
  572. /// loop-independent and loop-carried dependences between memory accesses.
  573. std::unique_ptr<MemoryDepChecker> DepChecker;
  574. Loop *TheLoop;
  575. unsigned NumLoads = 0;
  576. unsigned NumStores = 0;
  577. uint64_t MaxSafeDepDistBytes = -1;
  578. /// Cache the result of analyzeLoop.
  579. bool CanVecMem = false;
  580. bool HasConvergentOp = false;
  581. /// Indicator that there are non vectorizable stores to a uniform address.
  582. bool HasDependenceInvolvingLoopInvariantAddress = false;
  583. /// List of stores to invariant addresses.
  584. SmallVector<StoreInst *> StoresToInvariantAddresses;
  585. /// The diagnostics report generated for the analysis. E.g. why we
  586. /// couldn't analyze the loop.
  587. std::unique_ptr<OptimizationRemarkAnalysis> Report;
  588. /// If an access has a symbolic strides, this maps the pointer value to
  589. /// the stride symbol.
  590. ValueToValueMap SymbolicStrides;
  591. /// Set of symbolic strides values.
  592. SmallPtrSet<Value *, 8> StrideSet;
  593. };
  594. Value *stripIntegerCast(Value *V);
  595. /// Return the SCEV corresponding to a pointer with the symbolic stride
  596. /// replaced with constant one, assuming the SCEV predicate associated with
  597. /// \p PSE is true.
  598. ///
  599. /// If necessary this method will version the stride of the pointer according
  600. /// to \p PtrToStride and therefore add further predicates to \p PSE.
  601. ///
  602. /// \p PtrToStride provides the mapping between the pointer value and its
  603. /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
  604. const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
  605. const ValueToValueMap &PtrToStride,
  606. Value *Ptr);
  607. /// If the pointer has a constant stride return it in units of the access type
  608. /// size. Otherwise return std::nullopt.
  609. ///
  610. /// Ensure that it does not wrap in the address space, assuming the predicate
  611. /// associated with \p PSE is true.
  612. ///
  613. /// If necessary this method will version the stride of the pointer according
  614. /// to \p PtrToStride and therefore add further predicates to \p PSE.
  615. /// The \p Assume parameter indicates if we are allowed to make additional
  616. /// run-time assumptions.
  617. std::optional<int64_t>
  618. getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
  619. const Loop *Lp,
  620. const ValueToValueMap &StridesMap = ValueToValueMap(),
  621. bool Assume = false, bool ShouldCheckWrap = true);
  622. /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
  623. /// compatible and it is possible to calculate the distance between them. This
  624. /// is a simple API that does not depend on the analysis pass.
  625. /// \param StrictCheck Ensure that the calculated distance matches the
  626. /// type-based one after all the bitcasts removal in the provided pointers.
  627. std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
  628. Value *PtrB, const DataLayout &DL,
  629. ScalarEvolution &SE,
  630. bool StrictCheck = false,
  631. bool CheckType = true);
  632. /// Attempt to sort the pointers in \p VL and return the sorted indices
  633. /// in \p SortedIndices, if reordering is required.
  634. ///
  635. /// Returns 'true' if sorting is legal, otherwise returns 'false'.
  636. ///
  637. /// For example, for a given \p VL of memory accesses in program order, a[i+4],
  638. /// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
  639. /// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
  640. /// saves the mask for actual memory accesses in program order in
  641. /// \p SortedIndices as <1,2,0,3>
  642. bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
  643. ScalarEvolution &SE,
  644. SmallVectorImpl<unsigned> &SortedIndices);
  645. /// Returns true if the memory operations \p A and \p B are consecutive.
  646. /// This is a simple API that does not depend on the analysis pass.
  647. bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
  648. ScalarEvolution &SE, bool CheckType = true);
  649. class LoopAccessInfoManager {
  650. /// The cache.
  651. DenseMap<Loop *, std::unique_ptr<LoopAccessInfo>> LoopAccessInfoMap;
  652. // The used analysis passes.
  653. ScalarEvolution &SE;
  654. AAResults &AA;
  655. DominatorTree &DT;
  656. LoopInfo &LI;
  657. const TargetLibraryInfo *TLI = nullptr;
  658. public:
  659. LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT,
  660. LoopInfo &LI, const TargetLibraryInfo *TLI)
  661. : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI) {}
  662. const LoopAccessInfo &getInfo(Loop &L);
  663. void clear() { LoopAccessInfoMap.clear(); }
  664. };
  665. /// This analysis provides dependence information for the memory accesses
  666. /// of a loop.
  667. ///
  668. /// It runs the analysis for a loop on demand. This can be initiated by
  669. /// querying the loop access info via LAA::getInfo. getInfo return a
  670. /// LoopAccessInfo object. See this class for the specifics of what information
  671. /// is provided.
  672. class LoopAccessLegacyAnalysis : public FunctionPass {
  673. public:
  674. static char ID;
  675. LoopAccessLegacyAnalysis();
  676. bool runOnFunction(Function &F) override;
  677. void getAnalysisUsage(AnalysisUsage &AU) const override;
  678. /// Return the proxy object for retrieving LoopAccessInfo for individual
  679. /// loops.
  680. ///
  681. /// If there is no cached result available run the analysis.
  682. LoopAccessInfoManager &getLAIs() { return *LAIs; }
  683. void releaseMemory() override {
  684. // Invalidate the cache when the pass is freed.
  685. LAIs->clear();
  686. }
  687. private:
  688. std::unique_ptr<LoopAccessInfoManager> LAIs;
  689. };
  690. /// This analysis provides dependence information for the memory
  691. /// accesses of a loop.
  692. ///
  693. /// It runs the analysis for a loop on demand. This can be initiated by
  694. /// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
  695. /// getResult return a LoopAccessInfo object. See this class for the
  696. /// specifics of what information is provided.
  697. class LoopAccessAnalysis
  698. : public AnalysisInfoMixin<LoopAccessAnalysis> {
  699. friend AnalysisInfoMixin<LoopAccessAnalysis>;
  700. static AnalysisKey Key;
  701. public:
  702. typedef LoopAccessInfoManager Result;
  703. Result run(Function &F, FunctionAnalysisManager &AM);
  704. };
  705. inline Instruction *MemoryDepChecker::Dependence::getSource(
  706. const LoopAccessInfo &LAI) const {
  707. return LAI.getDepChecker().getMemoryInstructions()[Source];
  708. }
  709. inline Instruction *MemoryDepChecker::Dependence::getDestination(
  710. const LoopAccessInfo &LAI) const {
  711. return LAI.getDepChecker().getMemoryInstructions()[Destination];
  712. }
  713. } // End llvm namespace
  714. #endif
  715. #ifdef __GNUC__
  716. #pragma GCC diagnostic pop
  717. #endif