CaptureTracking.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498
  1. //===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains routines that help determine which pointers are captured.
  10. // A pointer value is captured if the function makes a copy of any part of the
  11. // pointer that outlives the call. Not being captured means, more or less, that
  12. // the pointer is only dereferenced and not stored in a global. Returning part
  13. // of the pointer as the function return value may or may not count as capturing
  14. // the pointer, depending on the context.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "llvm/Analysis/CaptureTracking.h"
  18. #include "llvm/ADT/SmallPtrSet.h"
  19. #include "llvm/ADT/SmallSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/Analysis/AliasAnalysis.h"
  23. #include "llvm/Analysis/CFG.h"
  24. #include "llvm/Analysis/ValueTracking.h"
  25. #include "llvm/IR/Constants.h"
  26. #include "llvm/IR/Dominators.h"
  27. #include "llvm/IR/Instructions.h"
  28. #include "llvm/IR/IntrinsicInst.h"
  29. #include "llvm/Support/CommandLine.h"
  30. using namespace llvm;
  31. #define DEBUG_TYPE "capture-tracking"
  32. STATISTIC(NumCaptured, "Number of pointers maybe captured");
  33. STATISTIC(NumNotCaptured, "Number of pointers not captured");
  34. STATISTIC(NumCapturedBefore, "Number of pointers maybe captured before");
  35. STATISTIC(NumNotCapturedBefore, "Number of pointers not captured before");
  36. /// The default value for MaxUsesToExplore argument. It's relatively small to
  37. /// keep the cost of analysis reasonable for clients like BasicAliasAnalysis,
  38. /// where the results can't be cached.
  39. /// TODO: we should probably introduce a caching CaptureTracking analysis and
  40. /// use it where possible. The caching version can use much higher limit or
  41. /// don't have this cap at all.
  42. static cl::opt<unsigned>
  43. DefaultMaxUsesToExplore("capture-tracking-max-uses-to-explore", cl::Hidden,
  44. cl::desc("Maximal number of uses to explore."),
  45. cl::init(100));
  46. unsigned llvm::getDefaultMaxUsesToExploreForCaptureTracking() {
  47. return DefaultMaxUsesToExplore;
  48. }
  49. CaptureTracker::~CaptureTracker() = default;
  50. bool CaptureTracker::shouldExplore(const Use *U) { return true; }
  51. bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
  52. // An inbounds GEP can either be a valid pointer (pointing into
  53. // or to the end of an allocation), or be null in the default
  54. // address space. So for an inbounds GEP there is no way to let
  55. // the pointer escape using clever GEP hacking because doing so
  56. // would make the pointer point outside of the allocated object
  57. // and thus make the GEP result a poison value. Similarly, other
  58. // dereferenceable pointers cannot be manipulated without producing
  59. // poison.
  60. if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
  61. if (GEP->isInBounds())
  62. return true;
  63. bool CanBeNull, CanBeFreed;
  64. return O->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
  65. }
  66. namespace {
  67. struct SimpleCaptureTracker : public CaptureTracker {
  68. explicit SimpleCaptureTracker(
  69. const SmallPtrSetImpl<const Value *> &EphValues, bool ReturnCaptures)
  70. : EphValues(EphValues), ReturnCaptures(ReturnCaptures) {}
  71. void tooManyUses() override { Captured = true; }
  72. bool captured(const Use *U) override {
  73. if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
  74. return false;
  75. if (EphValues.contains(U->getUser()))
  76. return false;
  77. Captured = true;
  78. return true;
  79. }
  80. const SmallPtrSetImpl<const Value *> &EphValues;
  81. bool ReturnCaptures;
  82. bool Captured = false;
  83. };
  84. /// Only find pointer captures which happen before the given instruction. Uses
  85. /// the dominator tree to determine whether one instruction is before another.
  86. /// Only support the case where the Value is defined in the same basic block
  87. /// as the given instruction and the use.
  88. struct CapturesBefore : public CaptureTracker {
  89. CapturesBefore(bool ReturnCaptures, const Instruction *I,
  90. const DominatorTree *DT, bool IncludeI, const LoopInfo *LI)
  91. : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
  92. IncludeI(IncludeI), LI(LI) {}
  93. void tooManyUses() override { Captured = true; }
  94. bool isSafeToPrune(Instruction *I) {
  95. if (BeforeHere == I)
  96. return !IncludeI;
  97. // We explore this usage only if the usage can reach "BeforeHere".
  98. // If use is not reachable from entry, there is no need to explore.
  99. if (!DT->isReachableFromEntry(I->getParent()))
  100. return true;
  101. // Check whether there is a path from I to BeforeHere.
  102. return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI);
  103. }
  104. bool captured(const Use *U) override {
  105. Instruction *I = cast<Instruction>(U->getUser());
  106. if (isa<ReturnInst>(I) && !ReturnCaptures)
  107. return false;
  108. // Check isSafeToPrune() here rather than in shouldExplore() to avoid
  109. // an expensive reachability query for every instruction we look at.
  110. // Instead we only do one for actual capturing candidates.
  111. if (isSafeToPrune(I))
  112. return false;
  113. Captured = true;
  114. return true;
  115. }
  116. const Instruction *BeforeHere;
  117. const DominatorTree *DT;
  118. bool ReturnCaptures;
  119. bool IncludeI;
  120. bool Captured = false;
  121. const LoopInfo *LI;
  122. };
  123. /// Find the 'earliest' instruction before which the pointer is known not to
  124. /// be captured. Here an instruction A is considered earlier than instruction
  125. /// B, if A dominates B. If 2 escapes do not dominate each other, the
  126. /// terminator of the common dominator is chosen. If not all uses cannot be
  127. /// analyzed, the earliest escape is set to the first instruction in the
  128. /// function entry block.
  129. // NOTE: Users have to make sure instructions compared against the earliest
  130. // escape are not in a cycle.
  131. struct EarliestCaptures : public CaptureTracker {
  132. EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT,
  133. const SmallPtrSetImpl<const Value *> &EphValues)
  134. : EphValues(EphValues), DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
  135. void tooManyUses() override {
  136. Captured = true;
  137. EarliestCapture = &*F.getEntryBlock().begin();
  138. }
  139. bool captured(const Use *U) override {
  140. Instruction *I = cast<Instruction>(U->getUser());
  141. if (isa<ReturnInst>(I) && !ReturnCaptures)
  142. return false;
  143. if (EphValues.contains(I))
  144. return false;
  145. if (!EarliestCapture)
  146. EarliestCapture = I;
  147. else
  148. EarliestCapture = DT.findNearestCommonDominator(EarliestCapture, I);
  149. Captured = true;
  150. // Return false to continue analysis; we need to see all potential
  151. // captures.
  152. return false;
  153. }
  154. const SmallPtrSetImpl<const Value *> &EphValues;
  155. Instruction *EarliestCapture = nullptr;
  156. const DominatorTree &DT;
  157. bool ReturnCaptures;
  158. bool Captured = false;
  159. Function &F;
  160. };
  161. }
  162. /// PointerMayBeCaptured - Return true if this pointer value may be captured
  163. /// by the enclosing function (which is required to exist). This routine can
  164. /// be expensive, so consider caching the results. The boolean ReturnCaptures
  165. /// specifies whether returning the value (or part of it) from the function
  166. /// counts as capturing it or not. The boolean StoreCaptures specified whether
  167. /// storing the value (or part of it) into memory anywhere automatically
  168. /// counts as capturing it or not.
  169. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
  170. bool StoreCaptures, unsigned MaxUsesToExplore) {
  171. SmallPtrSet<const Value *, 1> Empty;
  172. return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures, Empty,
  173. MaxUsesToExplore);
  174. }
  175. /// Variant of the above function which accepts a set of Values that are
  176. /// ephemeral and cannot cause pointers to escape.
  177. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
  178. bool StoreCaptures,
  179. const SmallPtrSetImpl<const Value *> &EphValues,
  180. unsigned MaxUsesToExplore) {
  181. assert(!isa<GlobalValue>(V) &&
  182. "It doesn't make sense to ask whether a global is captured.");
  183. // TODO: If StoreCaptures is not true, we could do Fancy analysis
  184. // to determine whether this store is not actually an escape point.
  185. // In that case, BasicAliasAnalysis should be updated as well to
  186. // take advantage of this.
  187. (void)StoreCaptures;
  188. SimpleCaptureTracker SCT(EphValues, ReturnCaptures);
  189. PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
  190. if (SCT.Captured)
  191. ++NumCaptured;
  192. else
  193. ++NumNotCaptured;
  194. return SCT.Captured;
  195. }
  196. /// PointerMayBeCapturedBefore - Return true if this pointer value may be
  197. /// captured by the enclosing function (which is required to exist). If a
  198. /// DominatorTree is provided, only captures which happen before the given
  199. /// instruction are considered. This routine can be expensive, so consider
  200. /// caching the results. The boolean ReturnCaptures specifies whether
  201. /// returning the value (or part of it) from the function counts as capturing
  202. /// it or not. The boolean StoreCaptures specified whether storing the value
  203. /// (or part of it) into memory anywhere automatically counts as capturing it
  204. /// or not.
  205. bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
  206. bool StoreCaptures, const Instruction *I,
  207. const DominatorTree *DT, bool IncludeI,
  208. unsigned MaxUsesToExplore,
  209. const LoopInfo *LI) {
  210. assert(!isa<GlobalValue>(V) &&
  211. "It doesn't make sense to ask whether a global is captured.");
  212. if (!DT)
  213. return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures,
  214. MaxUsesToExplore);
  215. // TODO: See comment in PointerMayBeCaptured regarding what could be done
  216. // with StoreCaptures.
  217. CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, LI);
  218. PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
  219. if (CB.Captured)
  220. ++NumCapturedBefore;
  221. else
  222. ++NumNotCapturedBefore;
  223. return CB.Captured;
  224. }
  225. Instruction *
  226. llvm::FindEarliestCapture(const Value *V, Function &F, bool ReturnCaptures,
  227. bool StoreCaptures, const DominatorTree &DT,
  228. const SmallPtrSetImpl<const Value *> &EphValues,
  229. unsigned MaxUsesToExplore) {
  230. assert(!isa<GlobalValue>(V) &&
  231. "It doesn't make sense to ask whether a global is captured.");
  232. EarliestCaptures CB(ReturnCaptures, F, DT, EphValues);
  233. PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
  234. if (CB.Captured)
  235. ++NumCapturedBefore;
  236. else
  237. ++NumNotCapturedBefore;
  238. return CB.EarliestCapture;
  239. }
  240. UseCaptureKind llvm::DetermineUseCaptureKind(
  241. const Use &U,
  242. function_ref<bool(Value *, const DataLayout &)> IsDereferenceableOrNull) {
  243. Instruction *I = cast<Instruction>(U.getUser());
  244. switch (I->getOpcode()) {
  245. case Instruction::Call:
  246. case Instruction::Invoke: {
  247. auto *Call = cast<CallBase>(I);
  248. // Not captured if the callee is readonly, doesn't return a copy through
  249. // its return value and doesn't unwind (a readonly function can leak bits
  250. // by throwing an exception or not depending on the input value).
  251. if (Call->onlyReadsMemory() && Call->doesNotThrow() &&
  252. Call->getType()->isVoidTy())
  253. return UseCaptureKind::NO_CAPTURE;
  254. // The pointer is not captured if returned pointer is not captured.
  255. // NOTE: CaptureTracking users should not assume that only functions
  256. // marked with nocapture do not capture. This means that places like
  257. // getUnderlyingObject in ValueTracking or DecomposeGEPExpression
  258. // in BasicAA also need to know about this property.
  259. if (isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(Call, true))
  260. return UseCaptureKind::PASSTHROUGH;
  261. // Volatile operations effectively capture the memory location that they
  262. // load and store to.
  263. if (auto *MI = dyn_cast<MemIntrinsic>(Call))
  264. if (MI->isVolatile())
  265. return UseCaptureKind::MAY_CAPTURE;
  266. // Calling a function pointer does not in itself cause the pointer to
  267. // be captured. This is a subtle point considering that (for example)
  268. // the callee might return its own address. It is analogous to saying
  269. // that loading a value from a pointer does not cause the pointer to be
  270. // captured, even though the loaded value might be the pointer itself
  271. // (think of self-referential objects).
  272. if (Call->isCallee(&U))
  273. return UseCaptureKind::NO_CAPTURE;
  274. // Not captured if only passed via 'nocapture' arguments.
  275. if (Call->isDataOperand(&U) &&
  276. !Call->doesNotCapture(Call->getDataOperandNo(&U))) {
  277. // The parameter is not marked 'nocapture' - captured.
  278. return UseCaptureKind::MAY_CAPTURE;
  279. }
  280. return UseCaptureKind::NO_CAPTURE;
  281. }
  282. case Instruction::Load:
  283. // Volatile loads make the address observable.
  284. if (cast<LoadInst>(I)->isVolatile())
  285. return UseCaptureKind::MAY_CAPTURE;
  286. return UseCaptureKind::NO_CAPTURE;
  287. case Instruction::VAArg:
  288. // "va-arg" from a pointer does not cause it to be captured.
  289. return UseCaptureKind::NO_CAPTURE;
  290. case Instruction::Store:
  291. // Stored the pointer - conservatively assume it may be captured.
  292. // Volatile stores make the address observable.
  293. if (U.getOperandNo() == 0 || cast<StoreInst>(I)->isVolatile())
  294. return UseCaptureKind::MAY_CAPTURE;
  295. return UseCaptureKind::NO_CAPTURE;
  296. case Instruction::AtomicRMW: {
  297. // atomicrmw conceptually includes both a load and store from
  298. // the same location.
  299. // As with a store, the location being accessed is not captured,
  300. // but the value being stored is.
  301. // Volatile stores make the address observable.
  302. auto *ARMWI = cast<AtomicRMWInst>(I);
  303. if (U.getOperandNo() == 1 || ARMWI->isVolatile())
  304. return UseCaptureKind::MAY_CAPTURE;
  305. return UseCaptureKind::NO_CAPTURE;
  306. }
  307. case Instruction::AtomicCmpXchg: {
  308. // cmpxchg conceptually includes both a load and store from
  309. // the same location.
  310. // As with a store, the location being accessed is not captured,
  311. // but the value being stored is.
  312. // Volatile stores make the address observable.
  313. auto *ACXI = cast<AtomicCmpXchgInst>(I);
  314. if (U.getOperandNo() == 1 || U.getOperandNo() == 2 || ACXI->isVolatile())
  315. return UseCaptureKind::MAY_CAPTURE;
  316. return UseCaptureKind::NO_CAPTURE;
  317. }
  318. case Instruction::BitCast:
  319. case Instruction::GetElementPtr:
  320. case Instruction::PHI:
  321. case Instruction::Select:
  322. case Instruction::AddrSpaceCast:
  323. // The original value is not captured via this if the new value isn't.
  324. return UseCaptureKind::PASSTHROUGH;
  325. case Instruction::ICmp: {
  326. unsigned Idx = U.getOperandNo();
  327. unsigned OtherIdx = 1 - Idx;
  328. if (auto *CPN = dyn_cast<ConstantPointerNull>(I->getOperand(OtherIdx))) {
  329. // Don't count comparisons of a no-alias return value against null as
  330. // captures. This allows us to ignore comparisons of malloc results
  331. // with null, for example.
  332. if (CPN->getType()->getAddressSpace() == 0)
  333. if (isNoAliasCall(U.get()->stripPointerCasts()))
  334. return UseCaptureKind::NO_CAPTURE;
  335. if (!I->getFunction()->nullPointerIsDefined()) {
  336. auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation();
  337. // Comparing a dereferenceable_or_null pointer against null cannot
  338. // lead to pointer escapes, because if it is not null it must be a
  339. // valid (in-bounds) pointer.
  340. const DataLayout &DL = I->getModule()->getDataLayout();
  341. if (IsDereferenceableOrNull && IsDereferenceableOrNull(O, DL))
  342. return UseCaptureKind::NO_CAPTURE;
  343. }
  344. }
  345. // Comparison against value stored in global variable. Given the pointer
  346. // does not escape, its value cannot be guessed and stored separately in a
  347. // global variable.
  348. auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
  349. if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
  350. return UseCaptureKind::NO_CAPTURE;
  351. // Otherwise, be conservative. There are crazy ways to capture pointers
  352. // using comparisons.
  353. return UseCaptureKind::MAY_CAPTURE;
  354. }
  355. default:
  356. // Something else - be conservative and say it is captured.
  357. return UseCaptureKind::MAY_CAPTURE;
  358. }
  359. }
  360. void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
  361. unsigned MaxUsesToExplore) {
  362. assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
  363. if (MaxUsesToExplore == 0)
  364. MaxUsesToExplore = DefaultMaxUsesToExplore;
  365. SmallVector<const Use *, 20> Worklist;
  366. Worklist.reserve(getDefaultMaxUsesToExploreForCaptureTracking());
  367. SmallSet<const Use *, 20> Visited;
  368. auto AddUses = [&](const Value *V) {
  369. for (const Use &U : V->uses()) {
  370. // If there are lots of uses, conservatively say that the value
  371. // is captured to avoid taking too much compile time.
  372. if (Visited.size() >= MaxUsesToExplore) {
  373. Tracker->tooManyUses();
  374. return false;
  375. }
  376. if (!Visited.insert(&U).second)
  377. continue;
  378. if (!Tracker->shouldExplore(&U))
  379. continue;
  380. Worklist.push_back(&U);
  381. }
  382. return true;
  383. };
  384. if (!AddUses(V))
  385. return;
  386. auto IsDereferenceableOrNull = [Tracker](Value *V, const DataLayout &DL) {
  387. return Tracker->isDereferenceableOrNull(V, DL);
  388. };
  389. while (!Worklist.empty()) {
  390. const Use *U = Worklist.pop_back_val();
  391. switch (DetermineUseCaptureKind(*U, IsDereferenceableOrNull)) {
  392. case UseCaptureKind::NO_CAPTURE:
  393. continue;
  394. case UseCaptureKind::MAY_CAPTURE:
  395. if (Tracker->captured(U))
  396. return;
  397. continue;
  398. case UseCaptureKind::PASSTHROUGH:
  399. if (!AddUses(U->getUser()))
  400. return;
  401. continue;
  402. }
  403. }
  404. // All uses examined.
  405. }
  406. bool llvm::isNonEscapingLocalObject(
  407. const Value *V, SmallDenseMap<const Value *, bool, 8> *IsCapturedCache) {
  408. SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
  409. if (IsCapturedCache) {
  410. bool Inserted;
  411. std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
  412. if (!Inserted)
  413. // Found cached result, return it!
  414. return CacheIt->second;
  415. }
  416. // If this is an identified function-local object, check to see if it escapes.
  417. if (isIdentifiedFunctionLocal(V)) {
  418. // Set StoreCaptures to True so that we can assume in our callers that the
  419. // pointer is not the result of a load instruction. Currently
  420. // PointerMayBeCaptured doesn't have any special analysis for the
  421. // StoreCaptures=false case; if it did, our callers could be refined to be
  422. // more precise.
  423. auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
  424. if (IsCapturedCache)
  425. CacheIt->second = Ret;
  426. return Ret;
  427. }
  428. return false;
  429. }