Loads.cpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669
  1. //===- Loads.cpp - Local load analysis ------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines simple local analyses for load instructions.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Analysis/Loads.h"
  13. #include "llvm/Analysis/AliasAnalysis.h"
  14. #include "llvm/Analysis/AssumeBundleQueries.h"
  15. #include "llvm/Analysis/CaptureTracking.h"
  16. #include "llvm/Analysis/LoopInfo.h"
  17. #include "llvm/Analysis/MemoryBuiltins.h"
  18. #include "llvm/Analysis/MemoryLocation.h"
  19. #include "llvm/Analysis/ScalarEvolution.h"
  20. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  21. #include "llvm/Analysis/TargetLibraryInfo.h"
  22. #include "llvm/Analysis/ValueTracking.h"
  23. #include "llvm/IR/DataLayout.h"
  24. #include "llvm/IR/GlobalAlias.h"
  25. #include "llvm/IR/GlobalVariable.h"
  26. #include "llvm/IR/IntrinsicInst.h"
  27. #include "llvm/IR/LLVMContext.h"
  28. #include "llvm/IR/Module.h"
  29. #include "llvm/IR/Operator.h"
  30. using namespace llvm;
  31. static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment,
  32. const DataLayout &DL) {
  33. Align BA = Base->getPointerAlignment(DL);
  34. const APInt APAlign(Offset.getBitWidth(), Alignment.value());
  35. assert(APAlign.isPowerOf2() && "must be a power of 2!");
  36. return BA >= Alignment && !(Offset & (APAlign - 1));
  37. }
  38. /// Test if V is always a pointer to allocated and suitably aligned memory for
  39. /// a simple load or store.
  40. static bool isDereferenceableAndAlignedPointer(
  41. const Value *V, Align Alignment, const APInt &Size, const DataLayout &DL,
  42. const Instruction *CtxI, const DominatorTree *DT,
  43. const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited,
  44. unsigned MaxDepth) {
  45. assert(V->getType()->isPointerTy() && "Base must be pointer");
  46. // Recursion limit.
  47. if (MaxDepth-- == 0)
  48. return false;
  49. // Already visited? Bail out, we've likely hit unreachable code.
  50. if (!Visited.insert(V).second)
  51. return false;
  52. // Note that it is not safe to speculate into a malloc'd region because
  53. // malloc may return null.
  54. // Recurse into both hands of select.
  55. if (const SelectInst *Sel = dyn_cast<SelectInst>(V)) {
  56. return isDereferenceableAndAlignedPointer(Sel->getTrueValue(), Alignment,
  57. Size, DL, CtxI, DT, TLI, Visited,
  58. MaxDepth) &&
  59. isDereferenceableAndAlignedPointer(Sel->getFalseValue(), Alignment,
  60. Size, DL, CtxI, DT, TLI, Visited,
  61. MaxDepth);
  62. }
  63. // bitcast instructions are no-ops as far as dereferenceability is concerned.
  64. if (const BitCastOperator *BC = dyn_cast<BitCastOperator>(V)) {
  65. if (BC->getSrcTy()->isPointerTy())
  66. return isDereferenceableAndAlignedPointer(
  67. BC->getOperand(0), Alignment, Size, DL, CtxI, DT, TLI,
  68. Visited, MaxDepth);
  69. }
  70. bool CheckForNonNull, CheckForFreed;
  71. APInt KnownDerefBytes(Size.getBitWidth(),
  72. V->getPointerDereferenceableBytes(DL, CheckForNonNull,
  73. CheckForFreed));
  74. if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) &&
  75. !CheckForFreed)
  76. if (!CheckForNonNull || isKnownNonZero(V, DL, 0, nullptr, CtxI, DT)) {
  77. // As we recursed through GEPs to get here, we've incrementally checked
  78. // that each step advanced by a multiple of the alignment. If our base is
  79. // properly aligned, then the original offset accessed must also be.
  80. Type *Ty = V->getType();
  81. assert(Ty->isSized() && "must be sized");
  82. APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
  83. return isAligned(V, Offset, Alignment, DL);
  84. }
  85. if (CtxI) {
  86. /// Look through assumes to see if both dereferencability and alignment can
  87. /// be provent by an assume
  88. RetainedKnowledge AlignRK;
  89. RetainedKnowledge DerefRK;
  90. if (getKnowledgeForValue(
  91. V, {Attribute::Dereferenceable, Attribute::Alignment}, nullptr,
  92. [&](RetainedKnowledge RK, Instruction *Assume, auto) {
  93. if (!isValidAssumeForContext(Assume, CtxI))
  94. return false;
  95. if (RK.AttrKind == Attribute::Alignment)
  96. AlignRK = std::max(AlignRK, RK);
  97. if (RK.AttrKind == Attribute::Dereferenceable)
  98. DerefRK = std::max(DerefRK, RK);
  99. if (AlignRK && DerefRK && AlignRK.ArgValue >= Alignment.value() &&
  100. DerefRK.ArgValue >= Size.getZExtValue())
  101. return true; // We have found what we needed so we stop looking
  102. return false; // Other assumes may have better information. so
  103. // keep looking
  104. }))
  105. return true;
  106. }
  107. /// TODO refactor this function to be able to search independently for
  108. /// Dereferencability and Alignment requirements.
  109. // For GEPs, determine if the indexing lands within the allocated object.
  110. if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
  111. const Value *Base = GEP->getPointerOperand();
  112. APInt Offset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
  113. if (!GEP->accumulateConstantOffset(DL, Offset) || Offset.isNegative() ||
  114. !Offset.urem(APInt(Offset.getBitWidth(), Alignment.value()))
  115. .isMinValue())
  116. return false;
  117. // If the base pointer is dereferenceable for Offset+Size bytes, then the
  118. // GEP (== Base + Offset) is dereferenceable for Size bytes. If the base
  119. // pointer is aligned to Align bytes, and the Offset is divisible by Align
  120. // then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also
  121. // aligned to Align bytes.
  122. // Offset and Size may have different bit widths if we have visited an
  123. // addrspacecast, so we can't do arithmetic directly on the APInt values.
  124. return isDereferenceableAndAlignedPointer(
  125. Base, Alignment, Offset + Size.sextOrTrunc(Offset.getBitWidth()), DL,
  126. CtxI, DT, TLI, Visited, MaxDepth);
  127. }
  128. // For gc.relocate, look through relocations
  129. if (const GCRelocateInst *RelocateInst = dyn_cast<GCRelocateInst>(V))
  130. return isDereferenceableAndAlignedPointer(RelocateInst->getDerivedPtr(),
  131. Alignment, Size, DL, CtxI, DT,
  132. TLI, Visited, MaxDepth);
  133. if (const AddrSpaceCastOperator *ASC = dyn_cast<AddrSpaceCastOperator>(V))
  134. return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment,
  135. Size, DL, CtxI, DT, TLI,
  136. Visited, MaxDepth);
  137. if (const auto *Call = dyn_cast<CallBase>(V)) {
  138. if (auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
  139. return isDereferenceableAndAlignedPointer(RP, Alignment, Size, DL, CtxI,
  140. DT, TLI, Visited, MaxDepth);
  141. // If we have a call we can't recurse through, check to see if this is an
  142. // allocation function for which we can establish an minimum object size.
  143. // Such a minimum object size is analogous to a deref_or_null attribute in
  144. // that we still need to prove the result non-null at point of use.
  145. // NOTE: We can only use the object size as a base fact as we a) need to
  146. // prove alignment too, and b) don't want the compile time impact of a
  147. // separate recursive walk.
  148. ObjectSizeOpts Opts;
  149. // TODO: It may be okay to round to align, but that would imply that
  150. // accessing slightly out of bounds was legal, and we're currently
  151. // inconsistent about that. For the moment, be conservative.
  152. Opts.RoundToAlign = false;
  153. Opts.NullIsUnknownSize = true;
  154. uint64_t ObjSize;
  155. if (getObjectSize(V, ObjSize, DL, TLI, Opts)) {
  156. APInt KnownDerefBytes(Size.getBitWidth(), ObjSize);
  157. if (KnownDerefBytes.getBoolValue() && KnownDerefBytes.uge(Size) &&
  158. isKnownNonZero(V, DL, 0, nullptr, CtxI, DT) && !V->canBeFreed()) {
  159. // As we recursed through GEPs to get here, we've incrementally
  160. // checked that each step advanced by a multiple of the alignment. If
  161. // our base is properly aligned, then the original offset accessed
  162. // must also be.
  163. Type *Ty = V->getType();
  164. assert(Ty->isSized() && "must be sized");
  165. APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
  166. return isAligned(V, Offset, Alignment, DL);
  167. }
  168. }
  169. }
  170. // If we don't know, assume the worst.
  171. return false;
  172. }
  173. bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Align Alignment,
  174. const APInt &Size,
  175. const DataLayout &DL,
  176. const Instruction *CtxI,
  177. const DominatorTree *DT,
  178. const TargetLibraryInfo *TLI) {
  179. // Note: At the moment, Size can be zero. This ends up being interpreted as
  180. // a query of whether [Base, V] is dereferenceable and V is aligned (since
  181. // that's what the implementation happened to do). It's unclear if this is
  182. // the desired semantic, but at least SelectionDAG does exercise this case.
  183. SmallPtrSet<const Value *, 32> Visited;
  184. return ::isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT,
  185. TLI, Visited, 16);
  186. }
  187. bool llvm::isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
  188. Align Alignment,
  189. const DataLayout &DL,
  190. const Instruction *CtxI,
  191. const DominatorTree *DT,
  192. const TargetLibraryInfo *TLI) {
  193. // For unsized types or scalable vectors we don't know exactly how many bytes
  194. // are dereferenced, so bail out.
  195. if (!Ty->isSized() || isa<ScalableVectorType>(Ty))
  196. return false;
  197. // When dereferenceability information is provided by a dereferenceable
  198. // attribute, we know exactly how many bytes are dereferenceable. If we can
  199. // determine the exact offset to the attributed variable, we can use that
  200. // information here.
  201. APInt AccessSize(DL.getPointerTypeSizeInBits(V->getType()),
  202. DL.getTypeStoreSize(Ty));
  203. return isDereferenceableAndAlignedPointer(V, Alignment, AccessSize, DL, CtxI,
  204. DT, TLI);
  205. }
  206. bool llvm::isDereferenceablePointer(const Value *V, Type *Ty,
  207. const DataLayout &DL,
  208. const Instruction *CtxI,
  209. const DominatorTree *DT,
  210. const TargetLibraryInfo *TLI) {
  211. return isDereferenceableAndAlignedPointer(V, Ty, Align(1), DL, CtxI, DT, TLI);
  212. }
  213. /// Test if A and B will obviously have the same value.
  214. ///
  215. /// This includes recognizing that %t0 and %t1 will have the same
  216. /// value in code like this:
  217. /// \code
  218. /// %t0 = getelementptr \@a, 0, 3
  219. /// store i32 0, i32* %t0
  220. /// %t1 = getelementptr \@a, 0, 3
  221. /// %t2 = load i32* %t1
  222. /// \endcode
  223. ///
  224. static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
  225. // Test if the values are trivially equivalent.
  226. if (A == B)
  227. return true;
  228. // Test if the values come from identical arithmetic instructions.
  229. // Use isIdenticalToWhenDefined instead of isIdenticalTo because
  230. // this function is only used when one address use dominates the
  231. // other, which means that they'll always either have the same
  232. // value or one of them will have an undefined value.
  233. if (isa<BinaryOperator>(A) || isa<CastInst>(A) || isa<PHINode>(A) ||
  234. isa<GetElementPtrInst>(A))
  235. if (const Instruction *BI = dyn_cast<Instruction>(B))
  236. if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
  237. return true;
  238. // Otherwise they may not be equivalent.
  239. return false;
  240. }
  241. bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
  242. ScalarEvolution &SE,
  243. DominatorTree &DT) {
  244. auto &DL = LI->getModule()->getDataLayout();
  245. Value *Ptr = LI->getPointerOperand();
  246. APInt EltSize(DL.getIndexTypeSizeInBits(Ptr->getType()),
  247. DL.getTypeStoreSize(LI->getType()).getFixedSize());
  248. const Align Alignment = LI->getAlign();
  249. Instruction *HeaderFirstNonPHI = L->getHeader()->getFirstNonPHI();
  250. // If given a uniform (i.e. non-varying) address, see if we can prove the
  251. // access is safe within the loop w/o needing predication.
  252. if (L->isLoopInvariant(Ptr))
  253. return isDereferenceableAndAlignedPointer(Ptr, Alignment, EltSize, DL,
  254. HeaderFirstNonPHI, &DT);
  255. // Otherwise, check to see if we have a repeating access pattern where we can
  256. // prove that all accesses are well aligned and dereferenceable.
  257. auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Ptr));
  258. if (!AddRec || AddRec->getLoop() != L || !AddRec->isAffine())
  259. return false;
  260. auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
  261. if (!Step)
  262. return false;
  263. // TODO: generalize to access patterns which have gaps
  264. if (Step->getAPInt() != EltSize)
  265. return false;
  266. auto TC = SE.getSmallConstantMaxTripCount(L);
  267. if (!TC)
  268. return false;
  269. const APInt AccessSize = TC * EltSize;
  270. auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart());
  271. if (!StartS)
  272. return false;
  273. assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition");
  274. Value *Base = StartS->getValue();
  275. // For the moment, restrict ourselves to the case where the access size is a
  276. // multiple of the requested alignment and the base is aligned.
  277. // TODO: generalize if a case found which warrants
  278. if (EltSize.urem(Alignment.value()) != 0)
  279. return false;
  280. return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
  281. HeaderFirstNonPHI, &DT);
  282. }
  283. /// Check if executing a load of this pointer value cannot trap.
  284. ///
  285. /// If DT and ScanFrom are specified this method performs context-sensitive
  286. /// analysis and returns true if it is safe to load immediately before ScanFrom.
  287. ///
  288. /// If it is not obviously safe to load from the specified pointer, we do
  289. /// a quick local scan of the basic block containing \c ScanFrom, to determine
  290. /// if the address is already accessed.
  291. ///
  292. /// This uses the pointee type to determine how many bytes need to be safe to
  293. /// load from the pointer.
  294. bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size,
  295. const DataLayout &DL,
  296. Instruction *ScanFrom,
  297. const DominatorTree *DT,
  298. const TargetLibraryInfo *TLI) {
  299. // If DT is not specified we can't make context-sensitive query
  300. const Instruction* CtxI = DT ? ScanFrom : nullptr;
  301. if (isDereferenceableAndAlignedPointer(V, Alignment, Size, DL, CtxI, DT, TLI))
  302. return true;
  303. if (!ScanFrom)
  304. return false;
  305. if (Size.getBitWidth() > 64)
  306. return false;
  307. const uint64_t LoadSize = Size.getZExtValue();
  308. // Otherwise, be a little bit aggressive by scanning the local block where we
  309. // want to check to see if the pointer is already being loaded or stored
  310. // from/to. If so, the previous load or store would have already trapped,
  311. // so there is no harm doing an extra load (also, CSE will later eliminate
  312. // the load entirely).
  313. BasicBlock::iterator BBI = ScanFrom->getIterator(),
  314. E = ScanFrom->getParent()->begin();
  315. // We can at least always strip pointer casts even though we can't use the
  316. // base here.
  317. V = V->stripPointerCasts();
  318. while (BBI != E) {
  319. --BBI;
  320. // If we see a free or a call which may write to memory (i.e. which might do
  321. // a free) the pointer could be marked invalid.
  322. if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
  323. !isa<DbgInfoIntrinsic>(BBI))
  324. return false;
  325. Value *AccessedPtr;
  326. Type *AccessedTy;
  327. Align AccessedAlign;
  328. if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
  329. // Ignore volatile loads. The execution of a volatile load cannot
  330. // be used to prove an address is backed by regular memory; it can,
  331. // for example, point to an MMIO register.
  332. if (LI->isVolatile())
  333. continue;
  334. AccessedPtr = LI->getPointerOperand();
  335. AccessedTy = LI->getType();
  336. AccessedAlign = LI->getAlign();
  337. } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
  338. // Ignore volatile stores (see comment for loads).
  339. if (SI->isVolatile())
  340. continue;
  341. AccessedPtr = SI->getPointerOperand();
  342. AccessedTy = SI->getValueOperand()->getType();
  343. AccessedAlign = SI->getAlign();
  344. } else
  345. continue;
  346. if (AccessedAlign < Alignment)
  347. continue;
  348. // Handle trivial cases.
  349. if (AccessedPtr == V &&
  350. LoadSize <= DL.getTypeStoreSize(AccessedTy))
  351. return true;
  352. if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) &&
  353. LoadSize <= DL.getTypeStoreSize(AccessedTy))
  354. return true;
  355. }
  356. return false;
  357. }
  358. bool llvm::isSafeToLoadUnconditionally(Value *V, Type *Ty, Align Alignment,
  359. const DataLayout &DL,
  360. Instruction *ScanFrom,
  361. const DominatorTree *DT,
  362. const TargetLibraryInfo *TLI) {
  363. APInt Size(DL.getIndexTypeSizeInBits(V->getType()), DL.getTypeStoreSize(Ty));
  364. return isSafeToLoadUnconditionally(V, Alignment, Size, DL, ScanFrom, DT, TLI);
  365. }
  366. /// DefMaxInstsToScan - the default number of maximum instructions
  367. /// to scan in the block, used by FindAvailableLoadedValue().
  368. /// FindAvailableLoadedValue() was introduced in r60148, to improve jump
  369. /// threading in part by eliminating partially redundant loads.
  370. /// At that point, the value of MaxInstsToScan was already set to '6'
  371. /// without documented explanation.
  372. cl::opt<unsigned>
  373. llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
  374. cl::desc("Use this to specify the default maximum number of instructions "
  375. "to scan backward from a given instruction, when searching for "
  376. "available loaded value"));
  377. Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
  378. BasicBlock *ScanBB,
  379. BasicBlock::iterator &ScanFrom,
  380. unsigned MaxInstsToScan,
  381. AAResults *AA, bool *IsLoad,
  382. unsigned *NumScanedInst) {
  383. // Don't CSE load that is volatile or anything stronger than unordered.
  384. if (!Load->isUnordered())
  385. return nullptr;
  386. MemoryLocation Loc = MemoryLocation::get(Load);
  387. return findAvailablePtrLoadStore(Loc, Load->getType(), Load->isAtomic(),
  388. ScanBB, ScanFrom, MaxInstsToScan, AA, IsLoad,
  389. NumScanedInst);
  390. }
  391. // Check if the load and the store have the same base, constant offsets and
  392. // non-overlapping access ranges.
  393. static bool areNonOverlapSameBaseLoadAndStore(const Value *LoadPtr,
  394. Type *LoadTy,
  395. const Value *StorePtr,
  396. Type *StoreTy,
  397. const DataLayout &DL) {
  398. APInt LoadOffset(DL.getIndexTypeSizeInBits(LoadPtr->getType()), 0);
  399. APInt StoreOffset(DL.getIndexTypeSizeInBits(StorePtr->getType()), 0);
  400. const Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets(
  401. DL, LoadOffset, /* AllowNonInbounds */ false);
  402. const Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets(
  403. DL, StoreOffset, /* AllowNonInbounds */ false);
  404. if (LoadBase != StoreBase)
  405. return false;
  406. auto LoadAccessSize = LocationSize::precise(DL.getTypeStoreSize(LoadTy));
  407. auto StoreAccessSize = LocationSize::precise(DL.getTypeStoreSize(StoreTy));
  408. ConstantRange LoadRange(LoadOffset,
  409. LoadOffset + LoadAccessSize.toRaw());
  410. ConstantRange StoreRange(StoreOffset,
  411. StoreOffset + StoreAccessSize.toRaw());
  412. return LoadRange.intersectWith(StoreRange).isEmptySet();
  413. }
  414. static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
  415. Type *AccessTy, bool AtLeastAtomic,
  416. const DataLayout &DL, bool *IsLoadCSE) {
  417. // If this is a load of Ptr, the loaded value is available.
  418. // (This is true even if the load is volatile or atomic, although
  419. // those cases are unlikely.)
  420. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  421. // We can value forward from an atomic to a non-atomic, but not the
  422. // other way around.
  423. if (LI->isAtomic() < AtLeastAtomic)
  424. return nullptr;
  425. Value *LoadPtr = LI->getPointerOperand()->stripPointerCasts();
  426. if (!AreEquivalentAddressValues(LoadPtr, Ptr))
  427. return nullptr;
  428. if (CastInst::isBitOrNoopPointerCastable(LI->getType(), AccessTy, DL)) {
  429. if (IsLoadCSE)
  430. *IsLoadCSE = true;
  431. return LI;
  432. }
  433. }
  434. // If this is a store through Ptr, the value is available!
  435. // (This is true even if the store is volatile or atomic, although
  436. // those cases are unlikely.)
  437. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  438. // We can value forward from an atomic to a non-atomic, but not the
  439. // other way around.
  440. if (SI->isAtomic() < AtLeastAtomic)
  441. return nullptr;
  442. Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
  443. if (!AreEquivalentAddressValues(StorePtr, Ptr))
  444. return nullptr;
  445. if (IsLoadCSE)
  446. *IsLoadCSE = false;
  447. Value *Val = SI->getValueOperand();
  448. if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL))
  449. return Val;
  450. TypeSize StoreSize = DL.getTypeStoreSize(Val->getType());
  451. TypeSize LoadSize = DL.getTypeStoreSize(AccessTy);
  452. if (TypeSize::isKnownLE(LoadSize, StoreSize))
  453. if (auto *C = dyn_cast<Constant>(Val))
  454. return ConstantFoldLoadFromConst(C, AccessTy, DL);
  455. }
  456. return nullptr;
  457. }
  458. Value *llvm::findAvailablePtrLoadStore(
  459. const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic,
  460. BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan,
  461. AAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst) {
  462. if (MaxInstsToScan == 0)
  463. MaxInstsToScan = ~0U;
  464. const DataLayout &DL = ScanBB->getModule()->getDataLayout();
  465. const Value *StrippedPtr = Loc.Ptr->stripPointerCasts();
  466. while (ScanFrom != ScanBB->begin()) {
  467. // We must ignore debug info directives when counting (otherwise they
  468. // would affect codegen).
  469. Instruction *Inst = &*--ScanFrom;
  470. if (Inst->isDebugOrPseudoInst())
  471. continue;
  472. // Restore ScanFrom to expected value in case next test succeeds
  473. ScanFrom++;
  474. if (NumScanedInst)
  475. ++(*NumScanedInst);
  476. // Don't scan huge blocks.
  477. if (MaxInstsToScan-- == 0)
  478. return nullptr;
  479. --ScanFrom;
  480. if (Value *Available = getAvailableLoadStore(Inst, StrippedPtr, AccessTy,
  481. AtLeastAtomic, DL, IsLoadCSE))
  482. return Available;
  483. // Try to get the store size for the type.
  484. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  485. Value *StorePtr = SI->getPointerOperand()->stripPointerCasts();
  486. // If both StrippedPtr and StorePtr reach all the way to an alloca or
  487. // global and they are different, ignore the store. This is a trivial form
  488. // of alias analysis that is important for reg2mem'd code.
  489. if ((isa<AllocaInst>(StrippedPtr) || isa<GlobalVariable>(StrippedPtr)) &&
  490. (isa<AllocaInst>(StorePtr) || isa<GlobalVariable>(StorePtr)) &&
  491. StrippedPtr != StorePtr)
  492. continue;
  493. if (!AA) {
  494. // When AA isn't available, but if the load and the store have the same
  495. // base, constant offsets and non-overlapping access ranges, ignore the
  496. // store. This is a simple form of alias analysis that is used by the
  497. // inliner. FIXME: use BasicAA if possible.
  498. if (areNonOverlapSameBaseLoadAndStore(
  499. Loc.Ptr, AccessTy, SI->getPointerOperand(),
  500. SI->getValueOperand()->getType(), DL))
  501. continue;
  502. } else {
  503. // If we have alias analysis and it says the store won't modify the
  504. // loaded value, ignore the store.
  505. if (!isModSet(AA->getModRefInfo(SI, Loc)))
  506. continue;
  507. }
  508. // Otherwise the store that may or may not alias the pointer, bail out.
  509. ++ScanFrom;
  510. return nullptr;
  511. }
  512. // If this is some other instruction that may clobber Ptr, bail out.
  513. if (Inst->mayWriteToMemory()) {
  514. // If alias analysis claims that it really won't modify the load,
  515. // ignore it.
  516. if (AA && !isModSet(AA->getModRefInfo(Inst, Loc)))
  517. continue;
  518. // May modify the pointer, bail out.
  519. ++ScanFrom;
  520. return nullptr;
  521. }
  522. }
  523. // Got to the start of the block, we didn't find it, but are done for this
  524. // block.
  525. return nullptr;
  526. }
  527. Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
  528. bool *IsLoadCSE,
  529. unsigned MaxInstsToScan) {
  530. const DataLayout &DL = Load->getModule()->getDataLayout();
  531. Value *StrippedPtr = Load->getPointerOperand()->stripPointerCasts();
  532. BasicBlock *ScanBB = Load->getParent();
  533. Type *AccessTy = Load->getType();
  534. bool AtLeastAtomic = Load->isAtomic();
  535. if (!Load->isUnordered())
  536. return nullptr;
  537. // Try to find an available value first, and delay expensive alias analysis
  538. // queries until later.
  539. Value *Available = nullptr;;
  540. SmallVector<Instruction *> MustNotAliasInsts;
  541. for (Instruction &Inst : make_range(++Load->getReverseIterator(),
  542. ScanBB->rend())) {
  543. if (Inst.isDebugOrPseudoInst())
  544. continue;
  545. if (MaxInstsToScan-- == 0)
  546. return nullptr;
  547. Available = getAvailableLoadStore(&Inst, StrippedPtr, AccessTy,
  548. AtLeastAtomic, DL, IsLoadCSE);
  549. if (Available)
  550. break;
  551. if (Inst.mayWriteToMemory())
  552. MustNotAliasInsts.push_back(&Inst);
  553. }
  554. // If we found an available value, ensure that the instructions in between
  555. // did not modify the memory location.
  556. if (Available) {
  557. MemoryLocation Loc = MemoryLocation::get(Load);
  558. for (Instruction *Inst : MustNotAliasInsts)
  559. if (isModSet(AA.getModRefInfo(Inst, Loc)))
  560. return nullptr;
  561. }
  562. return Available;
  563. }
  564. bool llvm::canReplacePointersIfEqual(Value *A, Value *B, const DataLayout &DL,
  565. Instruction *CtxI) {
  566. Type *Ty = A->getType();
  567. assert(Ty == B->getType() && Ty->isPointerTy() &&
  568. "values must have matching pointer types");
  569. // NOTE: The checks in the function are incomplete and currently miss illegal
  570. // cases! The current implementation is a starting point and the
  571. // implementation should be made stricter over time.
  572. if (auto *C = dyn_cast<Constant>(B)) {
  573. // Do not allow replacing a pointer with a constant pointer, unless it is
  574. // either null or at least one byte is dereferenceable.
  575. APInt OneByte(DL.getPointerTypeSizeInBits(Ty), 1);
  576. return C->isNullValue() ||
  577. isDereferenceableAndAlignedPointer(B, Align(1), OneByte, DL, CtxI);
  578. }
  579. return true;
  580. }