MemoryDependenceAnalysis.cpp 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807
  1. //===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements an analysis that determines, for a given memory
  10. // operation, what preceding memory operations it depends on. It builds on
  11. // alias analysis information, and tries to provide a lazy, caching interface to
  12. // a common kind of alias information query.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  16. #include "llvm/ADT/DenseMap.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SmallPtrSet.h"
  19. #include "llvm/ADT/SmallVector.h"
  20. #include "llvm/ADT/Statistic.h"
  21. #include "llvm/Analysis/AliasAnalysis.h"
  22. #include "llvm/Analysis/AssumptionCache.h"
  23. #include "llvm/Analysis/MemoryBuiltins.h"
  24. #include "llvm/Analysis/MemoryLocation.h"
  25. #include "llvm/Analysis/PHITransAddr.h"
  26. #include "llvm/Analysis/PhiValues.h"
  27. #include "llvm/Analysis/TargetLibraryInfo.h"
  28. #include "llvm/Analysis/ValueTracking.h"
  29. #include "llvm/IR/Attributes.h"
  30. #include "llvm/IR/BasicBlock.h"
  31. #include "llvm/IR/Constants.h"
  32. #include "llvm/IR/DataLayout.h"
  33. #include "llvm/IR/DerivedTypes.h"
  34. #include "llvm/IR/Dominators.h"
  35. #include "llvm/IR/Function.h"
  36. #include "llvm/IR/InstrTypes.h"
  37. #include "llvm/IR/Instruction.h"
  38. #include "llvm/IR/Instructions.h"
  39. #include "llvm/IR/IntrinsicInst.h"
  40. #include "llvm/IR/LLVMContext.h"
  41. #include "llvm/IR/Metadata.h"
  42. #include "llvm/IR/Module.h"
  43. #include "llvm/IR/PredIteratorCache.h"
  44. #include "llvm/IR/Type.h"
  45. #include "llvm/IR/Use.h"
  46. #include "llvm/IR/User.h"
  47. #include "llvm/IR/Value.h"
  48. #include "llvm/InitializePasses.h"
  49. #include "llvm/Pass.h"
  50. #include "llvm/Support/AtomicOrdering.h"
  51. #include "llvm/Support/Casting.h"
  52. #include "llvm/Support/CommandLine.h"
  53. #include "llvm/Support/Compiler.h"
  54. #include "llvm/Support/Debug.h"
  55. #include "llvm/Support/MathExtras.h"
  56. #include <algorithm>
  57. #include <cassert>
  58. #include <cstdint>
  59. #include <iterator>
  60. #include <utility>
  61. using namespace llvm;
  62. #define DEBUG_TYPE "memdep"
  63. STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
  64. STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
  65. STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
  66. STATISTIC(NumCacheNonLocalPtr,
  67. "Number of fully cached non-local ptr responses");
  68. STATISTIC(NumCacheDirtyNonLocalPtr,
  69. "Number of cached, but dirty, non-local ptr responses");
  70. STATISTIC(NumUncacheNonLocalPtr, "Number of uncached non-local ptr responses");
  71. STATISTIC(NumCacheCompleteNonLocalPtr,
  72. "Number of block queries that were completely cached");
  73. // Limit for the number of instructions to scan in a block.
  74. static cl::opt<unsigned> BlockScanLimit(
  75. "memdep-block-scan-limit", cl::Hidden, cl::init(100),
  76. cl::desc("The number of instructions to scan in a block in memory "
  77. "dependency analysis (default = 100)"));
  78. static cl::opt<unsigned>
  79. BlockNumberLimit("memdep-block-number-limit", cl::Hidden, cl::init(1000),
  80. cl::desc("The number of blocks to scan during memory "
  81. "dependency analysis (default = 1000)"));
  82. // Limit on the number of memdep results to process.
  83. static const unsigned int NumResultsLimit = 100;
  84. /// This is a helper function that removes Val from 'Inst's set in ReverseMap.
  85. ///
  86. /// If the set becomes empty, remove Inst's entry.
  87. template <typename KeyTy>
  88. static void
  89. RemoveFromReverseMap(DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>> &ReverseMap,
  90. Instruction *Inst, KeyTy Val) {
  91. typename DenseMap<Instruction *, SmallPtrSet<KeyTy, 4>>::iterator InstIt =
  92. ReverseMap.find(Inst);
  93. assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
  94. bool Found = InstIt->second.erase(Val);
  95. assert(Found && "Invalid reverse map!");
  96. (void)Found;
  97. if (InstIt->second.empty())
  98. ReverseMap.erase(InstIt);
  99. }
  100. /// If the given instruction references a specific memory location, fill in Loc
  101. /// with the details, otherwise set Loc.Ptr to null.
  102. ///
  103. /// Returns a ModRefInfo value describing the general behavior of the
  104. /// instruction.
  105. static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
  106. const TargetLibraryInfo &TLI) {
  107. if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  108. if (LI->isUnordered()) {
  109. Loc = MemoryLocation::get(LI);
  110. return ModRefInfo::Ref;
  111. }
  112. if (LI->getOrdering() == AtomicOrdering::Monotonic) {
  113. Loc = MemoryLocation::get(LI);
  114. return ModRefInfo::ModRef;
  115. }
  116. Loc = MemoryLocation();
  117. return ModRefInfo::ModRef;
  118. }
  119. if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  120. if (SI->isUnordered()) {
  121. Loc = MemoryLocation::get(SI);
  122. return ModRefInfo::Mod;
  123. }
  124. if (SI->getOrdering() == AtomicOrdering::Monotonic) {
  125. Loc = MemoryLocation::get(SI);
  126. return ModRefInfo::ModRef;
  127. }
  128. Loc = MemoryLocation();
  129. return ModRefInfo::ModRef;
  130. }
  131. if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
  132. Loc = MemoryLocation::get(V);
  133. return ModRefInfo::ModRef;
  134. }
  135. if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
  136. // calls to free() deallocate the entire structure
  137. Loc = MemoryLocation::getAfter(CI->getArgOperand(0));
  138. return ModRefInfo::Mod;
  139. }
  140. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
  141. switch (II->getIntrinsicID()) {
  142. case Intrinsic::lifetime_start:
  143. case Intrinsic::lifetime_end:
  144. case Intrinsic::invariant_start:
  145. Loc = MemoryLocation::getForArgument(II, 1, TLI);
  146. // These intrinsics don't really modify the memory, but returning Mod
  147. // will allow them to be handled conservatively.
  148. return ModRefInfo::Mod;
  149. case Intrinsic::invariant_end:
  150. Loc = MemoryLocation::getForArgument(II, 2, TLI);
  151. // These intrinsics don't really modify the memory, but returning Mod
  152. // will allow them to be handled conservatively.
  153. return ModRefInfo::Mod;
  154. case Intrinsic::masked_load:
  155. Loc = MemoryLocation::getForArgument(II, 0, TLI);
  156. return ModRefInfo::Ref;
  157. case Intrinsic::masked_store:
  158. Loc = MemoryLocation::getForArgument(II, 1, TLI);
  159. return ModRefInfo::Mod;
  160. default:
  161. break;
  162. }
  163. }
  164. // Otherwise, just do the coarse-grained thing that always works.
  165. if (Inst->mayWriteToMemory())
  166. return ModRefInfo::ModRef;
  167. if (Inst->mayReadFromMemory())
  168. return ModRefInfo::Ref;
  169. return ModRefInfo::NoModRef;
  170. }
  171. /// Private helper for finding the local dependencies of a call site.
  172. MemDepResult MemoryDependenceResults::getCallDependencyFrom(
  173. CallBase *Call, bool isReadOnlyCall, BasicBlock::iterator ScanIt,
  174. BasicBlock *BB) {
  175. unsigned Limit = getDefaultBlockScanLimit();
  176. // Walk backwards through the block, looking for dependencies.
  177. while (ScanIt != BB->begin()) {
  178. Instruction *Inst = &*--ScanIt;
  179. // Debug intrinsics don't cause dependences and should not affect Limit
  180. if (isa<DbgInfoIntrinsic>(Inst))
  181. continue;
  182. // Limit the amount of scanning we do so we don't end up with quadratic
  183. // running time on extreme testcases.
  184. --Limit;
  185. if (!Limit)
  186. return MemDepResult::getUnknown();
  187. // If this inst is a memory op, get the pointer it accessed
  188. MemoryLocation Loc;
  189. ModRefInfo MR = GetLocation(Inst, Loc, TLI);
  190. if (Loc.Ptr) {
  191. // A simple instruction.
  192. if (isModOrRefSet(AA.getModRefInfo(Call, Loc)))
  193. return MemDepResult::getClobber(Inst);
  194. continue;
  195. }
  196. if (auto *CallB = dyn_cast<CallBase>(Inst)) {
  197. // If these two calls do not interfere, look past it.
  198. if (isNoModRef(AA.getModRefInfo(Call, CallB))) {
  199. // If the two calls are the same, return Inst as a Def, so that
  200. // Call can be found redundant and eliminated.
  201. if (isReadOnlyCall && !isModSet(MR) &&
  202. Call->isIdenticalToWhenDefined(CallB))
  203. return MemDepResult::getDef(Inst);
  204. // Otherwise if the two calls don't interact (e.g. CallB is readnone)
  205. // keep scanning.
  206. continue;
  207. } else
  208. return MemDepResult::getClobber(Inst);
  209. }
  210. // If we could not obtain a pointer for the instruction and the instruction
  211. // touches memory then assume that this is a dependency.
  212. if (isModOrRefSet(MR))
  213. return MemDepResult::getClobber(Inst);
  214. }
  215. // No dependence found. If this is the entry block of the function, it is
  216. // unknown, otherwise it is non-local.
  217. if (BB != &BB->getParent()->getEntryBlock())
  218. return MemDepResult::getNonLocal();
  219. return MemDepResult::getNonFuncLocal();
  220. }
  221. MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
  222. const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
  223. BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
  224. BatchAAResults &BatchAA) {
  225. MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
  226. if (QueryInst != nullptr) {
  227. if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
  228. InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
  229. if (InvariantGroupDependency.isDef())
  230. return InvariantGroupDependency;
  231. }
  232. }
  233. MemDepResult SimpleDep = getSimplePointerDependencyFrom(
  234. MemLoc, isLoad, ScanIt, BB, QueryInst, Limit, BatchAA);
  235. if (SimpleDep.isDef())
  236. return SimpleDep;
  237. // Non-local invariant group dependency indicates there is non local Def
  238. // (it only returns nonLocal if it finds nonLocal def), which is better than
  239. // local clobber and everything else.
  240. if (InvariantGroupDependency.isNonLocal())
  241. return InvariantGroupDependency;
  242. assert(InvariantGroupDependency.isUnknown() &&
  243. "InvariantGroupDependency should be only unknown at this point");
  244. return SimpleDep;
  245. }
  246. MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
  247. const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
  248. BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
  249. BatchAAResults BatchAA(AA);
  250. return getPointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, Limit,
  251. BatchAA);
  252. }
  253. MemDepResult
  254. MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
  255. BasicBlock *BB) {
  256. if (!LI->hasMetadata(LLVMContext::MD_invariant_group))
  257. return MemDepResult::getUnknown();
  258. // Take the ptr operand after all casts and geps 0. This way we can search
  259. // cast graph down only.
  260. Value *LoadOperand = LI->getPointerOperand()->stripPointerCasts();
  261. // It's is not safe to walk the use list of global value, because function
  262. // passes aren't allowed to look outside their functions.
  263. // FIXME: this could be fixed by filtering instructions from outside
  264. // of current function.
  265. if (isa<GlobalValue>(LoadOperand))
  266. return MemDepResult::getUnknown();
  267. // Queue to process all pointers that are equivalent to load operand.
  268. SmallVector<const Value *, 8> LoadOperandsQueue;
  269. LoadOperandsQueue.push_back(LoadOperand);
  270. Instruction *ClosestDependency = nullptr;
  271. // Order of instructions in uses list is unpredictible. In order to always
  272. // get the same result, we will look for the closest dominance.
  273. auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
  274. assert(Other && "Must call it with not null instruction");
  275. if (Best == nullptr || DT.dominates(Best, Other))
  276. return Other;
  277. return Best;
  278. };
  279. // FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
  280. // we will see all the instructions. This should be fixed in MSSA.
  281. while (!LoadOperandsQueue.empty()) {
  282. const Value *Ptr = LoadOperandsQueue.pop_back_val();
  283. assert(Ptr && !isa<GlobalValue>(Ptr) &&
  284. "Null or GlobalValue should not be inserted");
  285. for (const Use &Us : Ptr->uses()) {
  286. auto *U = dyn_cast<Instruction>(Us.getUser());
  287. if (!U || U == LI || !DT.dominates(U, LI))
  288. continue;
  289. // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
  290. // users. U = bitcast Ptr
  291. if (isa<BitCastInst>(U)) {
  292. LoadOperandsQueue.push_back(U);
  293. continue;
  294. }
  295. // Gep with zeros is equivalent to bitcast.
  296. // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
  297. // or gep 0 to bitcast because of SROA, so there are 2 forms. When
  298. // typeless pointers will be ready then both cases will be gone
  299. // (and this BFS also won't be needed).
  300. if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
  301. if (GEP->hasAllZeroIndices()) {
  302. LoadOperandsQueue.push_back(U);
  303. continue;
  304. }
  305. // If we hit load/store with the same invariant.group metadata (and the
  306. // same pointer operand) we can assume that value pointed by pointer
  307. // operand didn't change.
  308. if ((isa<LoadInst>(U) ||
  309. (isa<StoreInst>(U) &&
  310. cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
  311. U->hasMetadata(LLVMContext::MD_invariant_group))
  312. ClosestDependency = GetClosestDependency(ClosestDependency, U);
  313. }
  314. }
  315. if (!ClosestDependency)
  316. return MemDepResult::getUnknown();
  317. if (ClosestDependency->getParent() == BB)
  318. return MemDepResult::getDef(ClosestDependency);
  319. // Def(U) can't be returned here because it is non-local. If local
  320. // dependency won't be found then return nonLocal counting that the
  321. // user will call getNonLocalPointerDependency, which will return cached
  322. // result.
  323. NonLocalDefsCache.try_emplace(
  324. LI, NonLocalDepResult(ClosestDependency->getParent(),
  325. MemDepResult::getDef(ClosestDependency), nullptr));
  326. ReverseNonLocalDefsCache[ClosestDependency].insert(LI);
  327. return MemDepResult::getNonLocal();
  328. }
  329. MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
  330. const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
  331. BasicBlock *BB, Instruction *QueryInst, unsigned *Limit,
  332. BatchAAResults &BatchAA) {
  333. bool isInvariantLoad = false;
  334. unsigned DefaultLimit = getDefaultBlockScanLimit();
  335. if (!Limit)
  336. Limit = &DefaultLimit;
  337. // We must be careful with atomic accesses, as they may allow another thread
  338. // to touch this location, clobbering it. We are conservative: if the
  339. // QueryInst is not a simple (non-atomic) memory access, we automatically
  340. // return getClobber.
  341. // If it is simple, we know based on the results of
  342. // "Compiler testing via a theory of sound optimisations in the C11/C++11
  343. // memory model" in PLDI 2013, that a non-atomic location can only be
  344. // clobbered between a pair of a release and an acquire action, with no
  345. // access to the location in between.
  346. // Here is an example for giving the general intuition behind this rule.
  347. // In the following code:
  348. // store x 0;
  349. // release action; [1]
  350. // acquire action; [4]
  351. // %val = load x;
  352. // It is unsafe to replace %val by 0 because another thread may be running:
  353. // acquire action; [2]
  354. // store x 42;
  355. // release action; [3]
  356. // with synchronization from 1 to 2 and from 3 to 4, resulting in %val
  357. // being 42. A key property of this program however is that if either
  358. // 1 or 4 were missing, there would be a race between the store of 42
  359. // either the store of 0 or the load (making the whole program racy).
  360. // The paper mentioned above shows that the same property is respected
  361. // by every program that can detect any optimization of that kind: either
  362. // it is racy (undefined) or there is a release followed by an acquire
  363. // between the pair of accesses under consideration.
  364. // If the load is invariant, we "know" that it doesn't alias *any* write. We
  365. // do want to respect mustalias results since defs are useful for value
  366. // forwarding, but any mayalias write can be assumed to be noalias.
  367. // Arguably, this logic should be pushed inside AliasAnalysis itself.
  368. if (isLoad && QueryInst) {
  369. LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
  370. if (LI && LI->hasMetadata(LLVMContext::MD_invariant_load))
  371. isInvariantLoad = true;
  372. }
  373. // Return "true" if and only if the instruction I is either a non-simple
  374. // load or a non-simple store.
  375. auto isNonSimpleLoadOrStore = [](Instruction *I) -> bool {
  376. if (auto *LI = dyn_cast<LoadInst>(I))
  377. return !LI->isSimple();
  378. if (auto *SI = dyn_cast<StoreInst>(I))
  379. return !SI->isSimple();
  380. return false;
  381. };
  382. // Return "true" if I is not a load and not a store, but it does access
  383. // memory.
  384. auto isOtherMemAccess = [](Instruction *I) -> bool {
  385. return !isa<LoadInst>(I) && !isa<StoreInst>(I) && I->mayReadOrWriteMemory();
  386. };
  387. // Walk backwards through the basic block, looking for dependencies.
  388. while (ScanIt != BB->begin()) {
  389. Instruction *Inst = &*--ScanIt;
  390. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
  391. // Debug intrinsics don't (and can't) cause dependencies.
  392. if (isa<DbgInfoIntrinsic>(II))
  393. continue;
  394. // Limit the amount of scanning we do so we don't end up with quadratic
  395. // running time on extreme testcases.
  396. --*Limit;
  397. if (!*Limit)
  398. return MemDepResult::getUnknown();
  399. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
  400. // If we reach a lifetime begin or end marker, then the query ends here
  401. // because the value is undefined.
  402. Intrinsic::ID ID = II->getIntrinsicID();
  403. switch (ID) {
  404. case Intrinsic::lifetime_start: {
  405. // FIXME: This only considers queries directly on the invariant-tagged
  406. // pointer, not on query pointers that are indexed off of them. It'd
  407. // be nice to handle that at some point (the right approach is to use
  408. // GetPointerBaseWithConstantOffset).
  409. MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
  410. if (BatchAA.isMustAlias(ArgLoc, MemLoc))
  411. return MemDepResult::getDef(II);
  412. continue;
  413. }
  414. case Intrinsic::masked_load:
  415. case Intrinsic::masked_store: {
  416. MemoryLocation Loc;
  417. /*ModRefInfo MR =*/ GetLocation(II, Loc, TLI);
  418. AliasResult R = BatchAA.alias(Loc, MemLoc);
  419. if (R == AliasResult::NoAlias)
  420. continue;
  421. if (R == AliasResult::MustAlias)
  422. return MemDepResult::getDef(II);
  423. if (ID == Intrinsic::masked_load)
  424. continue;
  425. return MemDepResult::getClobber(II);
  426. }
  427. }
  428. }
  429. // Values depend on loads if the pointers are must aliased. This means
  430. // that a load depends on another must aliased load from the same value.
  431. // One exception is atomic loads: a value can depend on an atomic load that
  432. // it does not alias with when this atomic load indicates that another
  433. // thread may be accessing the location.
  434. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  435. // While volatile access cannot be eliminated, they do not have to clobber
  436. // non-aliasing locations, as normal accesses, for example, can be safely
  437. // reordered with volatile accesses.
  438. if (LI->isVolatile()) {
  439. if (!QueryInst)
  440. // Original QueryInst *may* be volatile
  441. return MemDepResult::getClobber(LI);
  442. if (QueryInst->isVolatile())
  443. // Ordering required if QueryInst is itself volatile
  444. return MemDepResult::getClobber(LI);
  445. // Otherwise, volatile doesn't imply any special ordering
  446. }
  447. // Atomic loads have complications involved.
  448. // A Monotonic (or higher) load is OK if the query inst is itself not
  449. // atomic.
  450. // FIXME: This is overly conservative.
  451. if (LI->isAtomic() && isStrongerThanUnordered(LI->getOrdering())) {
  452. if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
  453. isOtherMemAccess(QueryInst))
  454. return MemDepResult::getClobber(LI);
  455. if (LI->getOrdering() != AtomicOrdering::Monotonic)
  456. return MemDepResult::getClobber(LI);
  457. }
  458. MemoryLocation LoadLoc = MemoryLocation::get(LI);
  459. // If we found a pointer, check if it could be the same as our pointer.
  460. AliasResult R = BatchAA.alias(LoadLoc, MemLoc);
  461. if (isLoad) {
  462. if (R == AliasResult::NoAlias)
  463. continue;
  464. // Must aliased loads are defs of each other.
  465. if (R == AliasResult::MustAlias)
  466. return MemDepResult::getDef(Inst);
  467. // If we have a partial alias, then return this as a clobber for the
  468. // client to handle.
  469. if (R == AliasResult::PartialAlias && R.hasOffset()) {
  470. ClobberOffsets[LI] = R.getOffset();
  471. return MemDepResult::getClobber(Inst);
  472. }
  473. // Random may-alias loads don't depend on each other without a
  474. // dependence.
  475. continue;
  476. }
  477. // Stores don't depend on other no-aliased accesses.
  478. if (R == AliasResult::NoAlias)
  479. continue;
  480. // Stores don't alias loads from read-only memory.
  481. if (BatchAA.pointsToConstantMemory(LoadLoc))
  482. continue;
  483. // Stores depend on may/must aliased loads.
  484. return MemDepResult::getDef(Inst);
  485. }
  486. if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  487. // Atomic stores have complications involved.
  488. // A Monotonic store is OK if the query inst is itself not atomic.
  489. // FIXME: This is overly conservative.
  490. if (!SI->isUnordered() && SI->isAtomic()) {
  491. if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
  492. isOtherMemAccess(QueryInst))
  493. return MemDepResult::getClobber(SI);
  494. if (SI->getOrdering() != AtomicOrdering::Monotonic)
  495. return MemDepResult::getClobber(SI);
  496. }
  497. // FIXME: this is overly conservative.
  498. // While volatile access cannot be eliminated, they do not have to clobber
  499. // non-aliasing locations, as normal accesses can for example be reordered
  500. // with volatile accesses.
  501. if (SI->isVolatile())
  502. if (!QueryInst || isNonSimpleLoadOrStore(QueryInst) ||
  503. isOtherMemAccess(QueryInst))
  504. return MemDepResult::getClobber(SI);
  505. // If alias analysis can tell that this store is guaranteed to not modify
  506. // the query pointer, ignore it. Use getModRefInfo to handle cases where
  507. // the query pointer points to constant memory etc.
  508. if (!isModOrRefSet(BatchAA.getModRefInfo(SI, MemLoc)))
  509. continue;
  510. // Ok, this store might clobber the query pointer. Check to see if it is
  511. // a must alias: in this case, we want to return this as a def.
  512. // FIXME: Use ModRefInfo::Must bit from getModRefInfo call above.
  513. MemoryLocation StoreLoc = MemoryLocation::get(SI);
  514. // If we found a pointer, check if it could be the same as our pointer.
  515. AliasResult R = BatchAA.alias(StoreLoc, MemLoc);
  516. if (R == AliasResult::NoAlias)
  517. continue;
  518. if (R == AliasResult::MustAlias)
  519. return MemDepResult::getDef(Inst);
  520. if (isInvariantLoad)
  521. continue;
  522. return MemDepResult::getClobber(Inst);
  523. }
  524. // If this is an allocation, and if we know that the accessed pointer is to
  525. // the allocation, return Def. This means that there is no dependence and
  526. // the access can be optimized based on that. For example, a load could
  527. // turn into undef. Note that we can bypass the allocation itself when
  528. // looking for a clobber in many cases; that's an alias property and is
  529. // handled by BasicAA.
  530. if (isa<AllocaInst>(Inst) || isNoAliasCall(Inst)) {
  531. const Value *AccessPtr = getUnderlyingObject(MemLoc.Ptr);
  532. if (AccessPtr == Inst || BatchAA.isMustAlias(Inst, AccessPtr))
  533. return MemDepResult::getDef(Inst);
  534. }
  535. if (isInvariantLoad)
  536. continue;
  537. // A release fence requires that all stores complete before it, but does
  538. // not prevent the reordering of following loads or stores 'before' the
  539. // fence. As a result, we look past it when finding a dependency for
  540. // loads. DSE uses this to find preceding stores to delete and thus we
  541. // can't bypass the fence if the query instruction is a store.
  542. if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
  543. if (isLoad && FI->getOrdering() == AtomicOrdering::Release)
  544. continue;
  545. // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
  546. ModRefInfo MR = BatchAA.getModRefInfo(Inst, MemLoc);
  547. // If necessary, perform additional analysis.
  548. if (isModAndRefSet(MR))
  549. MR = BatchAA.callCapturesBefore(Inst, MemLoc, &DT);
  550. switch (clearMust(MR)) {
  551. case ModRefInfo::NoModRef:
  552. // If the call has no effect on the queried pointer, just ignore it.
  553. continue;
  554. case ModRefInfo::Mod:
  555. return MemDepResult::getClobber(Inst);
  556. case ModRefInfo::Ref:
  557. // If the call is known to never store to the pointer, and if this is a
  558. // load query, we can safely ignore it (scan past it).
  559. if (isLoad)
  560. continue;
  561. LLVM_FALLTHROUGH;
  562. default:
  563. // Otherwise, there is a potential dependence. Return a clobber.
  564. return MemDepResult::getClobber(Inst);
  565. }
  566. }
  567. // No dependence found. If this is the entry block of the function, it is
  568. // unknown, otherwise it is non-local.
  569. if (BB != &BB->getParent()->getEntryBlock())
  570. return MemDepResult::getNonLocal();
  571. return MemDepResult::getNonFuncLocal();
  572. }
  573. MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
  574. ClobberOffsets.clear();
  575. Instruction *ScanPos = QueryInst;
  576. // Check for a cached result
  577. MemDepResult &LocalCache = LocalDeps[QueryInst];
  578. // If the cached entry is non-dirty, just return it. Note that this depends
  579. // on MemDepResult's default constructing to 'dirty'.
  580. if (!LocalCache.isDirty())
  581. return LocalCache;
  582. // Otherwise, if we have a dirty entry, we know we can start the scan at that
  583. // instruction, which may save us some work.
  584. if (Instruction *Inst = LocalCache.getInst()) {
  585. ScanPos = Inst;
  586. RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
  587. }
  588. BasicBlock *QueryParent = QueryInst->getParent();
  589. // Do the scan.
  590. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
  591. // No dependence found. If this is the entry block of the function, it is
  592. // unknown, otherwise it is non-local.
  593. if (QueryParent != &QueryParent->getParent()->getEntryBlock())
  594. LocalCache = MemDepResult::getNonLocal();
  595. else
  596. LocalCache = MemDepResult::getNonFuncLocal();
  597. } else {
  598. MemoryLocation MemLoc;
  599. ModRefInfo MR = GetLocation(QueryInst, MemLoc, TLI);
  600. if (MemLoc.Ptr) {
  601. // If we can do a pointer scan, make it happen.
  602. bool isLoad = !isModSet(MR);
  603. if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
  604. isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
  605. LocalCache =
  606. getPointerDependencyFrom(MemLoc, isLoad, ScanPos->getIterator(),
  607. QueryParent, QueryInst, nullptr);
  608. } else if (auto *QueryCall = dyn_cast<CallBase>(QueryInst)) {
  609. bool isReadOnly = AA.onlyReadsMemory(QueryCall);
  610. LocalCache = getCallDependencyFrom(QueryCall, isReadOnly,
  611. ScanPos->getIterator(), QueryParent);
  612. } else
  613. // Non-memory instruction.
  614. LocalCache = MemDepResult::getUnknown();
  615. }
  616. // Remember the result!
  617. if (Instruction *I = LocalCache.getInst())
  618. ReverseLocalDeps[I].insert(QueryInst);
  619. return LocalCache;
  620. }
  621. #ifndef NDEBUG
  622. /// This method is used when -debug is specified to verify that cache arrays
  623. /// are properly kept sorted.
  624. static void AssertSorted(MemoryDependenceResults::NonLocalDepInfo &Cache,
  625. int Count = -1) {
  626. if (Count == -1)
  627. Count = Cache.size();
  628. assert(std::is_sorted(Cache.begin(), Cache.begin() + Count) &&
  629. "Cache isn't sorted!");
  630. }
  631. #endif
  632. const MemoryDependenceResults::NonLocalDepInfo &
  633. MemoryDependenceResults::getNonLocalCallDependency(CallBase *QueryCall) {
  634. assert(getDependency(QueryCall).isNonLocal() &&
  635. "getNonLocalCallDependency should only be used on calls with "
  636. "non-local deps!");
  637. PerInstNLInfo &CacheP = NonLocalDepsMap[QueryCall];
  638. NonLocalDepInfo &Cache = CacheP.first;
  639. // This is the set of blocks that need to be recomputed. In the cached case,
  640. // this can happen due to instructions being deleted etc. In the uncached
  641. // case, this starts out as the set of predecessors we care about.
  642. SmallVector<BasicBlock *, 32> DirtyBlocks;
  643. if (!Cache.empty()) {
  644. // Okay, we have a cache entry. If we know it is not dirty, just return it
  645. // with no computation.
  646. if (!CacheP.second) {
  647. ++NumCacheNonLocal;
  648. return Cache;
  649. }
  650. // If we already have a partially computed set of results, scan them to
  651. // determine what is dirty, seeding our initial DirtyBlocks worklist.
  652. for (auto &Entry : Cache)
  653. if (Entry.getResult().isDirty())
  654. DirtyBlocks.push_back(Entry.getBB());
  655. // Sort the cache so that we can do fast binary search lookups below.
  656. llvm::sort(Cache);
  657. ++NumCacheDirtyNonLocal;
  658. // cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
  659. // << Cache.size() << " cached: " << *QueryInst;
  660. } else {
  661. // Seed DirtyBlocks with each of the preds of QueryInst's block.
  662. BasicBlock *QueryBB = QueryCall->getParent();
  663. append_range(DirtyBlocks, PredCache.get(QueryBB));
  664. ++NumUncacheNonLocal;
  665. }
  666. // isReadonlyCall - If this is a read-only call, we can be more aggressive.
  667. bool isReadonlyCall = AA.onlyReadsMemory(QueryCall);
  668. SmallPtrSet<BasicBlock *, 32> Visited;
  669. unsigned NumSortedEntries = Cache.size();
  670. LLVM_DEBUG(AssertSorted(Cache));
  671. // Iterate while we still have blocks to update.
  672. while (!DirtyBlocks.empty()) {
  673. BasicBlock *DirtyBB = DirtyBlocks.pop_back_val();
  674. // Already processed this block?
  675. if (!Visited.insert(DirtyBB).second)
  676. continue;
  677. // Do a binary search to see if we already have an entry for this block in
  678. // the cache set. If so, find it.
  679. LLVM_DEBUG(AssertSorted(Cache, NumSortedEntries));
  680. NonLocalDepInfo::iterator Entry =
  681. std::upper_bound(Cache.begin(), Cache.begin() + NumSortedEntries,
  682. NonLocalDepEntry(DirtyBB));
  683. if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
  684. --Entry;
  685. NonLocalDepEntry *ExistingResult = nullptr;
  686. if (Entry != Cache.begin() + NumSortedEntries &&
  687. Entry->getBB() == DirtyBB) {
  688. // If we already have an entry, and if it isn't already dirty, the block
  689. // is done.
  690. if (!Entry->getResult().isDirty())
  691. continue;
  692. // Otherwise, remember this slot so we can update the value.
  693. ExistingResult = &*Entry;
  694. }
  695. // If the dirty entry has a pointer, start scanning from it so we don't have
  696. // to rescan the entire block.
  697. BasicBlock::iterator ScanPos = DirtyBB->end();
  698. if (ExistingResult) {
  699. if (Instruction *Inst = ExistingResult->getResult().getInst()) {
  700. ScanPos = Inst->getIterator();
  701. // We're removing QueryInst's use of Inst.
  702. RemoveFromReverseMap<Instruction *>(ReverseNonLocalDeps, Inst,
  703. QueryCall);
  704. }
  705. }
  706. // Find out if this block has a local dependency for QueryInst.
  707. MemDepResult Dep;
  708. if (ScanPos != DirtyBB->begin()) {
  709. Dep = getCallDependencyFrom(QueryCall, isReadonlyCall, ScanPos, DirtyBB);
  710. } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
  711. // No dependence found. If this is the entry block of the function, it is
  712. // a clobber, otherwise it is unknown.
  713. Dep = MemDepResult::getNonLocal();
  714. } else {
  715. Dep = MemDepResult::getNonFuncLocal();
  716. }
  717. // If we had a dirty entry for the block, update it. Otherwise, just add
  718. // a new entry.
  719. if (ExistingResult)
  720. ExistingResult->setResult(Dep);
  721. else
  722. Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
  723. // If the block has a dependency (i.e. it isn't completely transparent to
  724. // the value), remember the association!
  725. if (!Dep.isNonLocal()) {
  726. // Keep the ReverseNonLocalDeps map up to date so we can efficiently
  727. // update this when we remove instructions.
  728. if (Instruction *Inst = Dep.getInst())
  729. ReverseNonLocalDeps[Inst].insert(QueryCall);
  730. } else {
  731. // If the block *is* completely transparent to the load, we need to check
  732. // the predecessors of this block. Add them to our worklist.
  733. append_range(DirtyBlocks, PredCache.get(DirtyBB));
  734. }
  735. }
  736. return Cache;
  737. }
  738. void MemoryDependenceResults::getNonLocalPointerDependency(
  739. Instruction *QueryInst, SmallVectorImpl<NonLocalDepResult> &Result) {
  740. const MemoryLocation Loc = MemoryLocation::get(QueryInst);
  741. bool isLoad = isa<LoadInst>(QueryInst);
  742. BasicBlock *FromBB = QueryInst->getParent();
  743. assert(FromBB);
  744. assert(Loc.Ptr->getType()->isPointerTy() &&
  745. "Can't get pointer deps of a non-pointer!");
  746. Result.clear();
  747. {
  748. // Check if there is cached Def with invariant.group.
  749. auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
  750. if (NonLocalDefIt != NonLocalDefsCache.end()) {
  751. Result.push_back(NonLocalDefIt->second);
  752. ReverseNonLocalDefsCache[NonLocalDefIt->second.getResult().getInst()]
  753. .erase(QueryInst);
  754. NonLocalDefsCache.erase(NonLocalDefIt);
  755. return;
  756. }
  757. }
  758. // This routine does not expect to deal with volatile instructions.
  759. // Doing so would require piping through the QueryInst all the way through.
  760. // TODO: volatiles can't be elided, but they can be reordered with other
  761. // non-volatile accesses.
  762. // We currently give up on any instruction which is ordered, but we do handle
  763. // atomic instructions which are unordered.
  764. // TODO: Handle ordered instructions
  765. auto isOrdered = [](Instruction *Inst) {
  766. if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
  767. return !LI->isUnordered();
  768. } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
  769. return !SI->isUnordered();
  770. }
  771. return false;
  772. };
  773. if (QueryInst->isVolatile() || isOrdered(QueryInst)) {
  774. Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
  775. const_cast<Value *>(Loc.Ptr)));
  776. return;
  777. }
  778. const DataLayout &DL = FromBB->getModule()->getDataLayout();
  779. PHITransAddr Address(const_cast<Value *>(Loc.Ptr), DL, &AC);
  780. // This is the set of blocks we've inspected, and the pointer we consider in
  781. // each block. Because of critical edges, we currently bail out if querying
  782. // a block with multiple different pointers. This can happen during PHI
  783. // translation.
  784. DenseMap<BasicBlock *, Value *> Visited;
  785. if (getNonLocalPointerDepFromBB(QueryInst, Address, Loc, isLoad, FromBB,
  786. Result, Visited, true))
  787. return;
  788. Result.clear();
  789. Result.push_back(NonLocalDepResult(FromBB, MemDepResult::getUnknown(),
  790. const_cast<Value *>(Loc.Ptr)));
  791. }
  792. /// Compute the memdep value for BB with Pointer/PointeeSize using either
  793. /// cached information in Cache or by doing a lookup (which may use dirty cache
  794. /// info if available).
  795. ///
  796. /// If we do a lookup, add the result to the cache.
  797. MemDepResult MemoryDependenceResults::getNonLocalInfoForBlock(
  798. Instruction *QueryInst, const MemoryLocation &Loc, bool isLoad,
  799. BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries,
  800. BatchAAResults &BatchAA) {
  801. bool isInvariantLoad = false;
  802. if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
  803. isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
  804. // Do a binary search to see if we already have an entry for this block in
  805. // the cache set. If so, find it.
  806. NonLocalDepInfo::iterator Entry = std::upper_bound(
  807. Cache->begin(), Cache->begin() + NumSortedEntries, NonLocalDepEntry(BB));
  808. if (Entry != Cache->begin() && (Entry - 1)->getBB() == BB)
  809. --Entry;
  810. NonLocalDepEntry *ExistingResult = nullptr;
  811. if (Entry != Cache->begin() + NumSortedEntries && Entry->getBB() == BB)
  812. ExistingResult = &*Entry;
  813. // Use cached result for invariant load only if there is no dependency for non
  814. // invariant load. In this case invariant load can not have any dependency as
  815. // well.
  816. if (ExistingResult && isInvariantLoad &&
  817. !ExistingResult->getResult().isNonFuncLocal())
  818. ExistingResult = nullptr;
  819. // If we have a cached entry, and it is non-dirty, use it as the value for
  820. // this dependency.
  821. if (ExistingResult && !ExistingResult->getResult().isDirty()) {
  822. ++NumCacheNonLocalPtr;
  823. return ExistingResult->getResult();
  824. }
  825. // Otherwise, we have to scan for the value. If we have a dirty cache
  826. // entry, start scanning from its position, otherwise we scan from the end
  827. // of the block.
  828. BasicBlock::iterator ScanPos = BB->end();
  829. if (ExistingResult && ExistingResult->getResult().getInst()) {
  830. assert(ExistingResult->getResult().getInst()->getParent() == BB &&
  831. "Instruction invalidated?");
  832. ++NumCacheDirtyNonLocalPtr;
  833. ScanPos = ExistingResult->getResult().getInst()->getIterator();
  834. // Eliminating the dirty entry from 'Cache', so update the reverse info.
  835. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
  836. RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
  837. } else {
  838. ++NumUncacheNonLocalPtr;
  839. }
  840. // Scan the block for the dependency.
  841. MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB,
  842. QueryInst, nullptr, BatchAA);
  843. // Don't cache results for invariant load.
  844. if (isInvariantLoad)
  845. return Dep;
  846. // If we had a dirty entry for the block, update it. Otherwise, just add
  847. // a new entry.
  848. if (ExistingResult)
  849. ExistingResult->setResult(Dep);
  850. else
  851. Cache->push_back(NonLocalDepEntry(BB, Dep));
  852. // If the block has a dependency (i.e. it isn't completely transparent to
  853. // the value), remember the reverse association because we just added it
  854. // to Cache!
  855. if (!Dep.isDef() && !Dep.isClobber())
  856. return Dep;
  857. // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
  858. // update MemDep when we remove instructions.
  859. Instruction *Inst = Dep.getInst();
  860. assert(Inst && "Didn't depend on anything?");
  861. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
  862. ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
  863. return Dep;
  864. }
  865. /// Sort the NonLocalDepInfo cache, given a certain number of elements in the
  866. /// array that are already properly ordered.
  867. ///
  868. /// This is optimized for the case when only a few entries are added.
  869. static void
  870. SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
  871. unsigned NumSortedEntries) {
  872. switch (Cache.size() - NumSortedEntries) {
  873. case 0:
  874. // done, no new entries.
  875. break;
  876. case 2: {
  877. // Two new entries, insert the last one into place.
  878. NonLocalDepEntry Val = Cache.back();
  879. Cache.pop_back();
  880. MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
  881. std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
  882. Cache.insert(Entry, Val);
  883. LLVM_FALLTHROUGH;
  884. }
  885. case 1:
  886. // One new entry, Just insert the new value at the appropriate position.
  887. if (Cache.size() != 1) {
  888. NonLocalDepEntry Val = Cache.back();
  889. Cache.pop_back();
  890. MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
  891. llvm::upper_bound(Cache, Val);
  892. Cache.insert(Entry, Val);
  893. }
  894. break;
  895. default:
  896. // Added many values, do a full scale sort.
  897. llvm::sort(Cache);
  898. break;
  899. }
  900. }
  901. /// Perform a dependency query based on pointer/pointeesize starting at the end
  902. /// of StartBB.
  903. ///
  904. /// Add any clobber/def results to the results vector and keep track of which
  905. /// blocks are visited in 'Visited'.
  906. ///
  907. /// This has special behavior for the first block queries (when SkipFirstBlock
  908. /// is true). In this special case, it ignores the contents of the specified
  909. /// block and starts returning dependence info for its predecessors.
  910. ///
  911. /// This function returns true on success, or false to indicate that it could
  912. /// not compute dependence information for some reason. This should be treated
  913. /// as a clobber dependence on the first instruction in the predecessor block.
  914. bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
  915. Instruction *QueryInst, const PHITransAddr &Pointer,
  916. const MemoryLocation &Loc, bool isLoad, BasicBlock *StartBB,
  917. SmallVectorImpl<NonLocalDepResult> &Result,
  918. DenseMap<BasicBlock *, Value *> &Visited, bool SkipFirstBlock,
  919. bool IsIncomplete) {
  920. // Look up the cached info for Pointer.
  921. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
  922. // Set up a temporary NLPI value. If the map doesn't yet have an entry for
  923. // CacheKey, this value will be inserted as the associated value. Otherwise,
  924. // it'll be ignored, and we'll have to check to see if the cached size and
  925. // aa tags are consistent with the current query.
  926. NonLocalPointerInfo InitialNLPI;
  927. InitialNLPI.Size = Loc.Size;
  928. InitialNLPI.AATags = Loc.AATags;
  929. bool isInvariantLoad = false;
  930. if (LoadInst *LI = dyn_cast_or_null<LoadInst>(QueryInst))
  931. isInvariantLoad = LI->getMetadata(LLVMContext::MD_invariant_load);
  932. // Get the NLPI for CacheKey, inserting one into the map if it doesn't
  933. // already have one.
  934. std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair =
  935. NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
  936. NonLocalPointerInfo *CacheInfo = &Pair.first->second;
  937. // If we already have a cache entry for this CacheKey, we may need to do some
  938. // work to reconcile the cache entry and the current query.
  939. // Invariant loads don't participate in caching. Thus no need to reconcile.
  940. if (!isInvariantLoad && !Pair.second) {
  941. if (CacheInfo->Size != Loc.Size) {
  942. bool ThrowOutEverything;
  943. if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) {
  944. // FIXME: We may be able to do better in the face of results with mixed
  945. // precision. We don't appear to get them in practice, though, so just
  946. // be conservative.
  947. ThrowOutEverything =
  948. CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() ||
  949. CacheInfo->Size.getValue() < Loc.Size.getValue();
  950. } else {
  951. // For our purposes, unknown size > all others.
  952. ThrowOutEverything = !Loc.Size.hasValue();
  953. }
  954. if (ThrowOutEverything) {
  955. // The query's Size is greater than the cached one. Throw out the
  956. // cached data and proceed with the query at the greater size.
  957. CacheInfo->Pair = BBSkipFirstBlockPair();
  958. CacheInfo->Size = Loc.Size;
  959. for (auto &Entry : CacheInfo->NonLocalDeps)
  960. if (Instruction *Inst = Entry.getResult().getInst())
  961. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
  962. CacheInfo->NonLocalDeps.clear();
  963. // The cache is cleared (in the above line) so we will have lost
  964. // information about blocks we have already visited. We therefore must
  965. // assume that the cache information is incomplete.
  966. IsIncomplete = true;
  967. } else {
  968. // This query's Size is less than the cached one. Conservatively restart
  969. // the query using the greater size.
  970. return getNonLocalPointerDepFromBB(
  971. QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad,
  972. StartBB, Result, Visited, SkipFirstBlock, IsIncomplete);
  973. }
  974. }
  975. // If the query's AATags are inconsistent with the cached one,
  976. // conservatively throw out the cached data and restart the query with
  977. // no tag if needed.
  978. if (CacheInfo->AATags != Loc.AATags) {
  979. if (CacheInfo->AATags) {
  980. CacheInfo->Pair = BBSkipFirstBlockPair();
  981. CacheInfo->AATags = AAMDNodes();
  982. for (auto &Entry : CacheInfo->NonLocalDeps)
  983. if (Instruction *Inst = Entry.getResult().getInst())
  984. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
  985. CacheInfo->NonLocalDeps.clear();
  986. // The cache is cleared (in the above line) so we will have lost
  987. // information about blocks we have already visited. We therefore must
  988. // assume that the cache information is incomplete.
  989. IsIncomplete = true;
  990. }
  991. if (Loc.AATags)
  992. return getNonLocalPointerDepFromBB(
  993. QueryInst, Pointer, Loc.getWithoutAATags(), isLoad, StartBB, Result,
  994. Visited, SkipFirstBlock, IsIncomplete);
  995. }
  996. }
  997. NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
  998. // If we have valid cached information for exactly the block we are
  999. // investigating, just return it with no recomputation.
  1000. // Don't use cached information for invariant loads since it is valid for
  1001. // non-invariant loads only.
  1002. if (!IsIncomplete && !isInvariantLoad &&
  1003. CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
  1004. // We have a fully cached result for this query then we can just return the
  1005. // cached results and populate the visited set. However, we have to verify
  1006. // that we don't already have conflicting results for these blocks. Check
  1007. // to ensure that if a block in the results set is in the visited set that
  1008. // it was for the same pointer query.
  1009. if (!Visited.empty()) {
  1010. for (auto &Entry : *Cache) {
  1011. DenseMap<BasicBlock *, Value *>::iterator VI =
  1012. Visited.find(Entry.getBB());
  1013. if (VI == Visited.end() || VI->second == Pointer.getAddr())
  1014. continue;
  1015. // We have a pointer mismatch in a block. Just return false, saying
  1016. // that something was clobbered in this result. We could also do a
  1017. // non-fully cached query, but there is little point in doing this.
  1018. return false;
  1019. }
  1020. }
  1021. Value *Addr = Pointer.getAddr();
  1022. for (auto &Entry : *Cache) {
  1023. Visited.insert(std::make_pair(Entry.getBB(), Addr));
  1024. if (Entry.getResult().isNonLocal()) {
  1025. continue;
  1026. }
  1027. if (DT.isReachableFromEntry(Entry.getBB())) {
  1028. Result.push_back(
  1029. NonLocalDepResult(Entry.getBB(), Entry.getResult(), Addr));
  1030. }
  1031. }
  1032. ++NumCacheCompleteNonLocalPtr;
  1033. return true;
  1034. }
  1035. // Otherwise, either this is a new block, a block with an invalid cache
  1036. // pointer or one that we're about to invalidate by putting more info into
  1037. // it than its valid cache info. If empty and not explicitly indicated as
  1038. // incomplete, the result will be valid cache info, otherwise it isn't.
  1039. //
  1040. // Invariant loads don't affect cache in any way thus no need to update
  1041. // CacheInfo as well.
  1042. if (!isInvariantLoad) {
  1043. if (!IsIncomplete && Cache->empty())
  1044. CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
  1045. else
  1046. CacheInfo->Pair = BBSkipFirstBlockPair();
  1047. }
  1048. SmallVector<BasicBlock *, 32> Worklist;
  1049. Worklist.push_back(StartBB);
  1050. // PredList used inside loop.
  1051. SmallVector<std::pair<BasicBlock *, PHITransAddr>, 16> PredList;
  1052. // Keep track of the entries that we know are sorted. Previously cached
  1053. // entries will all be sorted. The entries we add we only sort on demand (we
  1054. // don't insert every element into its sorted position). We know that we
  1055. // won't get any reuse from currently inserted values, because we don't
  1056. // revisit blocks after we insert info for them.
  1057. unsigned NumSortedEntries = Cache->size();
  1058. unsigned WorklistEntries = BlockNumberLimit;
  1059. bool GotWorklistLimit = false;
  1060. LLVM_DEBUG(AssertSorted(*Cache));
  1061. BatchAAResults BatchAA(AA);
  1062. while (!Worklist.empty()) {
  1063. BasicBlock *BB = Worklist.pop_back_val();
  1064. // If we do process a large number of blocks it becomes very expensive and
  1065. // likely it isn't worth worrying about
  1066. if (Result.size() > NumResultsLimit) {
  1067. Worklist.clear();
  1068. // Sort it now (if needed) so that recursive invocations of
  1069. // getNonLocalPointerDepFromBB and other routines that could reuse the
  1070. // cache value will only see properly sorted cache arrays.
  1071. if (Cache && NumSortedEntries != Cache->size()) {
  1072. SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
  1073. }
  1074. // Since we bail out, the "Cache" set won't contain all of the
  1075. // results for the query. This is ok (we can still use it to accelerate
  1076. // specific block queries) but we can't do the fastpath "return all
  1077. // results from the set". Clear out the indicator for this.
  1078. CacheInfo->Pair = BBSkipFirstBlockPair();
  1079. return false;
  1080. }
  1081. // Skip the first block if we have it.
  1082. if (!SkipFirstBlock) {
  1083. // Analyze the dependency of *Pointer in FromBB. See if we already have
  1084. // been here.
  1085. assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
  1086. // Get the dependency info for Pointer in BB. If we have cached
  1087. // information, we will use it, otherwise we compute it.
  1088. LLVM_DEBUG(AssertSorted(*Cache, NumSortedEntries));
  1089. MemDepResult Dep = getNonLocalInfoForBlock(
  1090. QueryInst, Loc, isLoad, BB, Cache, NumSortedEntries, BatchAA);
  1091. // If we got a Def or Clobber, add this to the list of results.
  1092. if (!Dep.isNonLocal()) {
  1093. if (DT.isReachableFromEntry(BB)) {
  1094. Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
  1095. continue;
  1096. }
  1097. }
  1098. }
  1099. // If 'Pointer' is an instruction defined in this block, then we need to do
  1100. // phi translation to change it into a value live in the predecessor block.
  1101. // If not, we just add the predecessors to the worklist and scan them with
  1102. // the same Pointer.
  1103. if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
  1104. SkipFirstBlock = false;
  1105. SmallVector<BasicBlock *, 16> NewBlocks;
  1106. for (BasicBlock *Pred : PredCache.get(BB)) {
  1107. // Verify that we haven't looked at this block yet.
  1108. std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
  1109. Visited.insert(std::make_pair(Pred, Pointer.getAddr()));
  1110. if (InsertRes.second) {
  1111. // First time we've looked at *PI.
  1112. NewBlocks.push_back(Pred);
  1113. continue;
  1114. }
  1115. // If we have seen this block before, but it was with a different
  1116. // pointer then we have a phi translation failure and we have to treat
  1117. // this as a clobber.
  1118. if (InsertRes.first->second != Pointer.getAddr()) {
  1119. // Make sure to clean up the Visited map before continuing on to
  1120. // PredTranslationFailure.
  1121. for (unsigned i = 0; i < NewBlocks.size(); i++)
  1122. Visited.erase(NewBlocks[i]);
  1123. goto PredTranslationFailure;
  1124. }
  1125. }
  1126. if (NewBlocks.size() > WorklistEntries) {
  1127. // Make sure to clean up the Visited map before continuing on to
  1128. // PredTranslationFailure.
  1129. for (unsigned i = 0; i < NewBlocks.size(); i++)
  1130. Visited.erase(NewBlocks[i]);
  1131. GotWorklistLimit = true;
  1132. goto PredTranslationFailure;
  1133. }
  1134. WorklistEntries -= NewBlocks.size();
  1135. Worklist.append(NewBlocks.begin(), NewBlocks.end());
  1136. continue;
  1137. }
  1138. // We do need to do phi translation, if we know ahead of time we can't phi
  1139. // translate this value, don't even try.
  1140. if (!Pointer.IsPotentiallyPHITranslatable())
  1141. goto PredTranslationFailure;
  1142. // We may have added values to the cache list before this PHI translation.
  1143. // If so, we haven't done anything to ensure that the cache remains sorted.
  1144. // Sort it now (if needed) so that recursive invocations of
  1145. // getNonLocalPointerDepFromBB and other routines that could reuse the cache
  1146. // value will only see properly sorted cache arrays.
  1147. if (Cache && NumSortedEntries != Cache->size()) {
  1148. SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
  1149. NumSortedEntries = Cache->size();
  1150. }
  1151. Cache = nullptr;
  1152. PredList.clear();
  1153. for (BasicBlock *Pred : PredCache.get(BB)) {
  1154. PredList.push_back(std::make_pair(Pred, Pointer));
  1155. // Get the PHI translated pointer in this predecessor. This can fail if
  1156. // not translatable, in which case the getAddr() returns null.
  1157. PHITransAddr &PredPointer = PredList.back().second;
  1158. PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
  1159. Value *PredPtrVal = PredPointer.getAddr();
  1160. // Check to see if we have already visited this pred block with another
  1161. // pointer. If so, we can't do this lookup. This failure can occur
  1162. // with PHI translation when a critical edge exists and the PHI node in
  1163. // the successor translates to a pointer value different than the
  1164. // pointer the block was first analyzed with.
  1165. std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> InsertRes =
  1166. Visited.insert(std::make_pair(Pred, PredPtrVal));
  1167. if (!InsertRes.second) {
  1168. // We found the pred; take it off the list of preds to visit.
  1169. PredList.pop_back();
  1170. // If the predecessor was visited with PredPtr, then we already did
  1171. // the analysis and can ignore it.
  1172. if (InsertRes.first->second == PredPtrVal)
  1173. continue;
  1174. // Otherwise, the block was previously analyzed with a different
  1175. // pointer. We can't represent the result of this case, so we just
  1176. // treat this as a phi translation failure.
  1177. // Make sure to clean up the Visited map before continuing on to
  1178. // PredTranslationFailure.
  1179. for (unsigned i = 0, n = PredList.size(); i < n; ++i)
  1180. Visited.erase(PredList[i].first);
  1181. goto PredTranslationFailure;
  1182. }
  1183. }
  1184. // Actually process results here; this need to be a separate loop to avoid
  1185. // calling getNonLocalPointerDepFromBB for blocks we don't want to return
  1186. // any results for. (getNonLocalPointerDepFromBB will modify our
  1187. // datastructures in ways the code after the PredTranslationFailure label
  1188. // doesn't expect.)
  1189. for (unsigned i = 0, n = PredList.size(); i < n; ++i) {
  1190. BasicBlock *Pred = PredList[i].first;
  1191. PHITransAddr &PredPointer = PredList[i].second;
  1192. Value *PredPtrVal = PredPointer.getAddr();
  1193. bool CanTranslate = true;
  1194. // If PHI translation was unable to find an available pointer in this
  1195. // predecessor, then we have to assume that the pointer is clobbered in
  1196. // that predecessor. We can still do PRE of the load, which would insert
  1197. // a computation of the pointer in this predecessor.
  1198. if (!PredPtrVal)
  1199. CanTranslate = false;
  1200. // FIXME: it is entirely possible that PHI translating will end up with
  1201. // the same value. Consider PHI translating something like:
  1202. // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need*
  1203. // to recurse here, pedantically speaking.
  1204. // If getNonLocalPointerDepFromBB fails here, that means the cached
  1205. // result conflicted with the Visited list; we have to conservatively
  1206. // assume it is unknown, but this also does not block PRE of the load.
  1207. if (!CanTranslate ||
  1208. !getNonLocalPointerDepFromBB(QueryInst, PredPointer,
  1209. Loc.getWithNewPtr(PredPtrVal), isLoad,
  1210. Pred, Result, Visited)) {
  1211. // Add the entry to the Result list.
  1212. NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
  1213. Result.push_back(Entry);
  1214. // Since we had a phi translation failure, the cache for CacheKey won't
  1215. // include all of the entries that we need to immediately satisfy future
  1216. // queries. Mark this in NonLocalPointerDeps by setting the
  1217. // BBSkipFirstBlockPair pointer to null. This requires reuse of the
  1218. // cached value to do more work but not miss the phi trans failure.
  1219. NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
  1220. NLPI.Pair = BBSkipFirstBlockPair();
  1221. continue;
  1222. }
  1223. }
  1224. // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
  1225. CacheInfo = &NonLocalPointerDeps[CacheKey];
  1226. Cache = &CacheInfo->NonLocalDeps;
  1227. NumSortedEntries = Cache->size();
  1228. // Since we did phi translation, the "Cache" set won't contain all of the
  1229. // results for the query. This is ok (we can still use it to accelerate
  1230. // specific block queries) but we can't do the fastpath "return all
  1231. // results from the set" Clear out the indicator for this.
  1232. CacheInfo->Pair = BBSkipFirstBlockPair();
  1233. SkipFirstBlock = false;
  1234. continue;
  1235. PredTranslationFailure:
  1236. // The following code is "failure"; we can't produce a sane translation
  1237. // for the given block. It assumes that we haven't modified any of
  1238. // our datastructures while processing the current block.
  1239. if (!Cache) {
  1240. // Refresh the CacheInfo/Cache pointer if it got invalidated.
  1241. CacheInfo = &NonLocalPointerDeps[CacheKey];
  1242. Cache = &CacheInfo->NonLocalDeps;
  1243. NumSortedEntries = Cache->size();
  1244. }
  1245. // Since we failed phi translation, the "Cache" set won't contain all of the
  1246. // results for the query. This is ok (we can still use it to accelerate
  1247. // specific block queries) but we can't do the fastpath "return all
  1248. // results from the set". Clear out the indicator for this.
  1249. CacheInfo->Pair = BBSkipFirstBlockPair();
  1250. // If *nothing* works, mark the pointer as unknown.
  1251. //
  1252. // If this is the magic first block, return this as a clobber of the whole
  1253. // incoming value. Since we can't phi translate to one of the predecessors,
  1254. // we have to bail out.
  1255. if (SkipFirstBlock)
  1256. return false;
  1257. // Results of invariant loads are not cached thus no need to update cached
  1258. // information.
  1259. if (!isInvariantLoad) {
  1260. for (NonLocalDepEntry &I : llvm::reverse(*Cache)) {
  1261. if (I.getBB() != BB)
  1262. continue;
  1263. assert((GotWorklistLimit || I.getResult().isNonLocal() ||
  1264. !DT.isReachableFromEntry(BB)) &&
  1265. "Should only be here with transparent block");
  1266. I.setResult(MemDepResult::getUnknown());
  1267. break;
  1268. }
  1269. }
  1270. (void)GotWorklistLimit;
  1271. // Go ahead and report unknown dependence.
  1272. Result.push_back(
  1273. NonLocalDepResult(BB, MemDepResult::getUnknown(), Pointer.getAddr()));
  1274. }
  1275. // Okay, we're done now. If we added new values to the cache, re-sort it.
  1276. SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
  1277. LLVM_DEBUG(AssertSorted(*Cache));
  1278. return true;
  1279. }
  1280. /// If P exists in CachedNonLocalPointerInfo or NonLocalDefsCache, remove it.
  1281. void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
  1282. ValueIsLoadPair P) {
  1283. // Most of the time this cache is empty.
  1284. if (!NonLocalDefsCache.empty()) {
  1285. auto it = NonLocalDefsCache.find(P.getPointer());
  1286. if (it != NonLocalDefsCache.end()) {
  1287. RemoveFromReverseMap(ReverseNonLocalDefsCache,
  1288. it->second.getResult().getInst(), P.getPointer());
  1289. NonLocalDefsCache.erase(it);
  1290. }
  1291. if (auto *I = dyn_cast<Instruction>(P.getPointer())) {
  1292. auto toRemoveIt = ReverseNonLocalDefsCache.find(I);
  1293. if (toRemoveIt != ReverseNonLocalDefsCache.end()) {
  1294. for (const auto *entry : toRemoveIt->second)
  1295. NonLocalDefsCache.erase(entry);
  1296. ReverseNonLocalDefsCache.erase(toRemoveIt);
  1297. }
  1298. }
  1299. }
  1300. CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P);
  1301. if (It == NonLocalPointerDeps.end())
  1302. return;
  1303. // Remove all of the entries in the BB->val map. This involves removing
  1304. // instructions from the reverse map.
  1305. NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
  1306. for (const NonLocalDepEntry &DE : PInfo) {
  1307. Instruction *Target = DE.getResult().getInst();
  1308. if (!Target)
  1309. continue; // Ignore non-local dep results.
  1310. assert(Target->getParent() == DE.getBB());
  1311. // Eliminating the dirty entry from 'Cache', so update the reverse info.
  1312. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
  1313. }
  1314. // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
  1315. NonLocalPointerDeps.erase(It);
  1316. }
  1317. void MemoryDependenceResults::invalidateCachedPointerInfo(Value *Ptr) {
  1318. // If Ptr isn't really a pointer, just ignore it.
  1319. if (!Ptr->getType()->isPointerTy())
  1320. return;
  1321. // Flush store info for the pointer.
  1322. removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
  1323. // Flush load info for the pointer.
  1324. removeCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
  1325. // Invalidate phis that use the pointer.
  1326. PV.invalidateValue(Ptr);
  1327. }
  1328. void MemoryDependenceResults::invalidateCachedPredecessors() {
  1329. PredCache.clear();
  1330. }
  1331. void MemoryDependenceResults::removeInstruction(Instruction *RemInst) {
  1332. // Walk through the Non-local dependencies, removing this one as the value
  1333. // for any cached queries.
  1334. NonLocalDepMapType::iterator NLDI = NonLocalDepsMap.find(RemInst);
  1335. if (NLDI != NonLocalDepsMap.end()) {
  1336. NonLocalDepInfo &BlockMap = NLDI->second.first;
  1337. for (auto &Entry : BlockMap)
  1338. if (Instruction *Inst = Entry.getResult().getInst())
  1339. RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
  1340. NonLocalDepsMap.erase(NLDI);
  1341. }
  1342. // If we have a cached local dependence query for this instruction, remove it.
  1343. LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
  1344. if (LocalDepEntry != LocalDeps.end()) {
  1345. // Remove us from DepInst's reverse set now that the local dep info is gone.
  1346. if (Instruction *Inst = LocalDepEntry->second.getInst())
  1347. RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
  1348. // Remove this local dependency info.
  1349. LocalDeps.erase(LocalDepEntry);
  1350. }
  1351. // If we have any cached dependencies on this instruction, remove
  1352. // them.
  1353. // If the instruction is a pointer, remove it from both the load info and the
  1354. // store info.
  1355. if (RemInst->getType()->isPointerTy()) {
  1356. removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
  1357. removeCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
  1358. } else {
  1359. // Otherwise, if the instructions is in the map directly, it must be a load.
  1360. // Remove it.
  1361. auto toRemoveIt = NonLocalDefsCache.find(RemInst);
  1362. if (toRemoveIt != NonLocalDefsCache.end()) {
  1363. assert(isa<LoadInst>(RemInst) &&
  1364. "only load instructions should be added directly");
  1365. const Instruction *DepV = toRemoveIt->second.getResult().getInst();
  1366. ReverseNonLocalDefsCache.find(DepV)->second.erase(RemInst);
  1367. NonLocalDefsCache.erase(toRemoveIt);
  1368. }
  1369. }
  1370. // Loop over all of the things that depend on the instruction we're removing.
  1371. SmallVector<std::pair<Instruction *, Instruction *>, 8> ReverseDepsToAdd;
  1372. // If we find RemInst as a clobber or Def in any of the maps for other values,
  1373. // we need to replace its entry with a dirty version of the instruction after
  1374. // it. If RemInst is a terminator, we use a null dirty value.
  1375. //
  1376. // Using a dirty version of the instruction after RemInst saves having to scan
  1377. // the entire block to get to this point.
  1378. MemDepResult NewDirtyVal;
  1379. if (!RemInst->isTerminator())
  1380. NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
  1381. ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
  1382. if (ReverseDepIt != ReverseLocalDeps.end()) {
  1383. // RemInst can't be the terminator if it has local stuff depending on it.
  1384. assert(!ReverseDepIt->second.empty() && !RemInst->isTerminator() &&
  1385. "Nothing can locally depend on a terminator");
  1386. for (Instruction *InstDependingOnRemInst : ReverseDepIt->second) {
  1387. assert(InstDependingOnRemInst != RemInst &&
  1388. "Already removed our local dep info");
  1389. LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
  1390. // Make sure to remember that new things depend on NewDepInst.
  1391. assert(NewDirtyVal.getInst() &&
  1392. "There is no way something else can have "
  1393. "a local dep on this if it is a terminator!");
  1394. ReverseDepsToAdd.push_back(
  1395. std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst));
  1396. }
  1397. ReverseLocalDeps.erase(ReverseDepIt);
  1398. // Add new reverse deps after scanning the set, to avoid invalidating the
  1399. // 'ReverseDeps' reference.
  1400. while (!ReverseDepsToAdd.empty()) {
  1401. ReverseLocalDeps[ReverseDepsToAdd.back().first].insert(
  1402. ReverseDepsToAdd.back().second);
  1403. ReverseDepsToAdd.pop_back();
  1404. }
  1405. }
  1406. ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
  1407. if (ReverseDepIt != ReverseNonLocalDeps.end()) {
  1408. for (Instruction *I : ReverseDepIt->second) {
  1409. assert(I != RemInst && "Already removed NonLocalDep info for RemInst");
  1410. PerInstNLInfo &INLD = NonLocalDepsMap[I];
  1411. // The information is now dirty!
  1412. INLD.second = true;
  1413. for (auto &Entry : INLD.first) {
  1414. if (Entry.getResult().getInst() != RemInst)
  1415. continue;
  1416. // Convert to a dirty entry for the subsequent instruction.
  1417. Entry.setResult(NewDirtyVal);
  1418. if (Instruction *NextI = NewDirtyVal.getInst())
  1419. ReverseDepsToAdd.push_back(std::make_pair(NextI, I));
  1420. }
  1421. }
  1422. ReverseNonLocalDeps.erase(ReverseDepIt);
  1423. // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
  1424. while (!ReverseDepsToAdd.empty()) {
  1425. ReverseNonLocalDeps[ReverseDepsToAdd.back().first].insert(
  1426. ReverseDepsToAdd.back().second);
  1427. ReverseDepsToAdd.pop_back();
  1428. }
  1429. }
  1430. // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
  1431. // value in the NonLocalPointerDeps info.
  1432. ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
  1433. ReverseNonLocalPtrDeps.find(RemInst);
  1434. if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
  1435. SmallVector<std::pair<Instruction *, ValueIsLoadPair>, 8>
  1436. ReversePtrDepsToAdd;
  1437. for (ValueIsLoadPair P : ReversePtrDepIt->second) {
  1438. assert(P.getPointer() != RemInst &&
  1439. "Already removed NonLocalPointerDeps info for RemInst");
  1440. NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
  1441. // The cache is not valid for any specific block anymore.
  1442. NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
  1443. // Update any entries for RemInst to use the instruction after it.
  1444. for (auto &Entry : NLPDI) {
  1445. if (Entry.getResult().getInst() != RemInst)
  1446. continue;
  1447. // Convert to a dirty entry for the subsequent instruction.
  1448. Entry.setResult(NewDirtyVal);
  1449. if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
  1450. ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
  1451. }
  1452. // Re-sort the NonLocalDepInfo. Changing the dirty entry to its
  1453. // subsequent value may invalidate the sortedness.
  1454. llvm::sort(NLPDI);
  1455. }
  1456. ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
  1457. while (!ReversePtrDepsToAdd.empty()) {
  1458. ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first].insert(
  1459. ReversePtrDepsToAdd.back().second);
  1460. ReversePtrDepsToAdd.pop_back();
  1461. }
  1462. }
  1463. // Invalidate phis that use the removed instruction.
  1464. PV.invalidateValue(RemInst);
  1465. assert(!NonLocalDepsMap.count(RemInst) && "RemInst got reinserted?");
  1466. LLVM_DEBUG(verifyRemoved(RemInst));
  1467. }
  1468. /// Verify that the specified instruction does not occur in our internal data
  1469. /// structures.
  1470. ///
  1471. /// This function verifies by asserting in debug builds.
  1472. void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
  1473. #ifndef NDEBUG
  1474. for (const auto &DepKV : LocalDeps) {
  1475. assert(DepKV.first != D && "Inst occurs in data structures");
  1476. assert(DepKV.second.getInst() != D && "Inst occurs in data structures");
  1477. }
  1478. for (const auto &DepKV : NonLocalPointerDeps) {
  1479. assert(DepKV.first.getPointer() != D && "Inst occurs in NLPD map key");
  1480. for (const auto &Entry : DepKV.second.NonLocalDeps)
  1481. assert(Entry.getResult().getInst() != D && "Inst occurs as NLPD value");
  1482. }
  1483. for (const auto &DepKV : NonLocalDepsMap) {
  1484. assert(DepKV.first != D && "Inst occurs in data structures");
  1485. const PerInstNLInfo &INLD = DepKV.second;
  1486. for (const auto &Entry : INLD.first)
  1487. assert(Entry.getResult().getInst() != D &&
  1488. "Inst occurs in data structures");
  1489. }
  1490. for (const auto &DepKV : ReverseLocalDeps) {
  1491. assert(DepKV.first != D && "Inst occurs in data structures");
  1492. for (Instruction *Inst : DepKV.second)
  1493. assert(Inst != D && "Inst occurs in data structures");
  1494. }
  1495. for (const auto &DepKV : ReverseNonLocalDeps) {
  1496. assert(DepKV.first != D && "Inst occurs in data structures");
  1497. for (Instruction *Inst : DepKV.second)
  1498. assert(Inst != D && "Inst occurs in data structures");
  1499. }
  1500. for (const auto &DepKV : ReverseNonLocalPtrDeps) {
  1501. assert(DepKV.first != D && "Inst occurs in rev NLPD map");
  1502. for (ValueIsLoadPair P : DepKV.second)
  1503. assert(P != ValueIsLoadPair(D, false) && P != ValueIsLoadPair(D, true) &&
  1504. "Inst occurs in ReverseNonLocalPtrDeps map");
  1505. }
  1506. #endif
  1507. }
  1508. AnalysisKey MemoryDependenceAnalysis::Key;
  1509. MemoryDependenceAnalysis::MemoryDependenceAnalysis()
  1510. : DefaultBlockScanLimit(BlockScanLimit) {}
  1511. MemoryDependenceResults
  1512. MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
  1513. auto &AA = AM.getResult<AAManager>(F);
  1514. auto &AC = AM.getResult<AssumptionAnalysis>(F);
  1515. auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  1516. auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
  1517. auto &PV = AM.getResult<PhiValuesAnalysis>(F);
  1518. return MemoryDependenceResults(AA, AC, TLI, DT, PV, DefaultBlockScanLimit);
  1519. }
  1520. char MemoryDependenceWrapperPass::ID = 0;
  1521. INITIALIZE_PASS_BEGIN(MemoryDependenceWrapperPass, "memdep",
  1522. "Memory Dependence Analysis", false, true)
  1523. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  1524. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  1525. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  1526. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  1527. INITIALIZE_PASS_DEPENDENCY(PhiValuesWrapperPass)
  1528. INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
  1529. "Memory Dependence Analysis", false, true)
  1530. MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
  1531. initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
  1532. }
  1533. MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() = default;
  1534. void MemoryDependenceWrapperPass::releaseMemory() {
  1535. MemDep.reset();
  1536. }
  1537. void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
  1538. AU.setPreservesAll();
  1539. AU.addRequired<AssumptionCacheTracker>();
  1540. AU.addRequired<DominatorTreeWrapperPass>();
  1541. AU.addRequired<PhiValuesWrapperPass>();
  1542. AU.addRequiredTransitive<AAResultsWrapperPass>();
  1543. AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
  1544. }
  1545. bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
  1546. FunctionAnalysisManager::Invalidator &Inv) {
  1547. // Check whether our analysis is preserved.
  1548. auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
  1549. if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
  1550. // If not, give up now.
  1551. return true;
  1552. // Check whether the analyses we depend on became invalid for any reason.
  1553. if (Inv.invalidate<AAManager>(F, PA) ||
  1554. Inv.invalidate<AssumptionAnalysis>(F, PA) ||
  1555. Inv.invalidate<DominatorTreeAnalysis>(F, PA) ||
  1556. Inv.invalidate<PhiValuesAnalysis>(F, PA))
  1557. return true;
  1558. // Otherwise this analysis result remains valid.
  1559. return false;
  1560. }
  1561. unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
  1562. return DefaultBlockScanLimit;
  1563. }
  1564. bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
  1565. auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
  1566. auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  1567. auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  1568. auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1569. auto &PV = getAnalysis<PhiValuesWrapperPass>().getResult();
  1570. MemDep.emplace(AA, AC, TLI, DT, PV, BlockScanLimit);
  1571. return false;
  1572. }