DeadStoreElimination.cpp 88 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208
  1. //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // The code below implements dead store elimination using MemorySSA. It uses
  10. // the following general approach: given a MemoryDef, walk upwards to find
  11. // clobbering MemoryDefs that may be killed by the starting def. Then check
  12. // that there are no uses that may read the location of the original MemoryDef
  13. // in between both MemoryDefs. A bit more concretely:
  14. //
  15. // For all MemoryDefs StartDef:
  16. // 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
  17. // upwards.
  18. // 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
  19. // checking all uses starting at MaybeDeadAccess and walking until we see
  20. // StartDef.
  21. // 3. For each found CurrentDef, check that:
  22. // 1. There are no barrier instructions between CurrentDef and StartDef (like
  23. // throws or stores with ordering constraints).
  24. // 2. StartDef is executed whenever CurrentDef is executed.
  25. // 3. StartDef completely overwrites CurrentDef.
  26. // 4. Erase CurrentDef from the function and MemorySSA.
  27. //
  28. //===----------------------------------------------------------------------===//
  29. #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
  30. #include "llvm/ADT/APInt.h"
  31. #include "llvm/ADT/DenseMap.h"
  32. #include "llvm/ADT/MapVector.h"
  33. #include "llvm/ADT/PostOrderIterator.h"
  34. #include "llvm/ADT/SetVector.h"
  35. #include "llvm/ADT/SmallPtrSet.h"
  36. #include "llvm/ADT/SmallVector.h"
  37. #include "llvm/ADT/Statistic.h"
  38. #include "llvm/ADT/StringRef.h"
  39. #include "llvm/Analysis/AliasAnalysis.h"
  40. #include "llvm/Analysis/CaptureTracking.h"
  41. #include "llvm/Analysis/GlobalsModRef.h"
  42. #include "llvm/Analysis/LoopInfo.h"
  43. #include "llvm/Analysis/MemoryBuiltins.h"
  44. #include "llvm/Analysis/MemoryLocation.h"
  45. #include "llvm/Analysis/MemorySSA.h"
  46. #include "llvm/Analysis/MemorySSAUpdater.h"
  47. #include "llvm/Analysis/MustExecute.h"
  48. #include "llvm/Analysis/PostDominators.h"
  49. #include "llvm/Analysis/TargetLibraryInfo.h"
  50. #include "llvm/Analysis/ValueTracking.h"
  51. #include "llvm/IR/Argument.h"
  52. #include "llvm/IR/BasicBlock.h"
  53. #include "llvm/IR/Constant.h"
  54. #include "llvm/IR/Constants.h"
  55. #include "llvm/IR/DataLayout.h"
  56. #include "llvm/IR/Dominators.h"
  57. #include "llvm/IR/Function.h"
  58. #include "llvm/IR/IRBuilder.h"
  59. #include "llvm/IR/InstIterator.h"
  60. #include "llvm/IR/InstrTypes.h"
  61. #include "llvm/IR/Instruction.h"
  62. #include "llvm/IR/Instructions.h"
  63. #include "llvm/IR/IntrinsicInst.h"
  64. #include "llvm/IR/Intrinsics.h"
  65. #include "llvm/IR/LLVMContext.h"
  66. #include "llvm/IR/Module.h"
  67. #include "llvm/IR/PassManager.h"
  68. #include "llvm/IR/PatternMatch.h"
  69. #include "llvm/IR/Value.h"
  70. #include "llvm/InitializePasses.h"
  71. #include "llvm/Pass.h"
  72. #include "llvm/Support/Casting.h"
  73. #include "llvm/Support/CommandLine.h"
  74. #include "llvm/Support/Debug.h"
  75. #include "llvm/Support/DebugCounter.h"
  76. #include "llvm/Support/ErrorHandling.h"
  77. #include "llvm/Support/MathExtras.h"
  78. #include "llvm/Support/raw_ostream.h"
  79. #include "llvm/Transforms/Scalar.h"
  80. #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
  81. #include "llvm/Transforms/Utils/BuildLibCalls.h"
  82. #include "llvm/Transforms/Utils/Local.h"
  83. #include <algorithm>
  84. #include <cassert>
  85. #include <cstddef>
  86. #include <cstdint>
  87. #include <iterator>
  88. #include <map>
  89. #include <utility>
  90. using namespace llvm;
  91. using namespace PatternMatch;
  92. #define DEBUG_TYPE "dse"
  93. STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
  94. STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
  95. STATISTIC(NumFastStores, "Number of stores deleted");
  96. STATISTIC(NumFastOther, "Number of other instrs removed");
  97. STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
  98. STATISTIC(NumModifiedStores, "Number of stores modified");
  99. STATISTIC(NumCFGChecks, "Number of stores modified");
  100. STATISTIC(NumCFGTries, "Number of stores modified");
  101. STATISTIC(NumCFGSuccess, "Number of stores modified");
  102. STATISTIC(NumGetDomMemoryDefPassed,
  103. "Number of times a valid candidate is returned from getDomMemoryDef");
  104. STATISTIC(NumDomMemDefChecks,
  105. "Number iterations check for reads in getDomMemoryDef");
  106. DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
  107. "Controls which MemoryDefs are eliminated.");
  108. static cl::opt<bool>
  109. EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
  110. cl::init(true), cl::Hidden,
  111. cl::desc("Enable partial-overwrite tracking in DSE"));
  112. static cl::opt<bool>
  113. EnablePartialStoreMerging("enable-dse-partial-store-merging",
  114. cl::init(true), cl::Hidden,
  115. cl::desc("Enable partial store merging in DSE"));
  116. static cl::opt<unsigned>
  117. MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
  118. cl::desc("The number of memory instructions to scan for "
  119. "dead store elimination (default = 150)"));
  120. static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
  121. "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
  122. cl::desc("The maximum number of steps while walking upwards to find "
  123. "MemoryDefs that may be killed (default = 90)"));
  124. static cl::opt<unsigned> MemorySSAPartialStoreLimit(
  125. "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
  126. cl::desc("The maximum number candidates that only partially overwrite the "
  127. "killing MemoryDef to consider"
  128. " (default = 5)"));
  129. static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
  130. "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
  131. cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
  132. "other stores per basic block (default = 5000)"));
  133. static cl::opt<unsigned> MemorySSASameBBStepCost(
  134. "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
  135. cl::desc(
  136. "The cost of a step in the same basic block as the killing MemoryDef"
  137. "(default = 1)"));
  138. static cl::opt<unsigned>
  139. MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
  140. cl::Hidden,
  141. cl::desc("The cost of a step in a different basic "
  142. "block than the killing MemoryDef"
  143. "(default = 5)"));
  144. static cl::opt<unsigned> MemorySSAPathCheckLimit(
  145. "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
  146. cl::desc("The maximum number of blocks to check when trying to prove that "
  147. "all paths to an exit go through a killing block (default = 50)"));
  148. // This flags allows or disallows DSE to optimize MemorySSA during its
  149. // traversal. Note that DSE optimizing MemorySSA may impact other passes
  150. // downstream of the DSE invocation and can lead to issues not being
  151. // reproducible in isolation (i.e. when MemorySSA is built from scratch). In
  152. // those cases, the flag can be used to check if DSE's MemorySSA optimizations
  153. // impact follow-up passes.
  154. static cl::opt<bool>
  155. OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
  156. cl::desc("Allow DSE to optimize memory accesses."));
  157. //===----------------------------------------------------------------------===//
  158. // Helper functions
  159. //===----------------------------------------------------------------------===//
  160. using OverlapIntervalsTy = std::map<int64_t, int64_t>;
  161. using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
  162. /// Returns true if the end of this instruction can be safely shortened in
  163. /// length.
  164. static bool isShortenableAtTheEnd(Instruction *I) {
  165. // Don't shorten stores for now
  166. if (isa<StoreInst>(I))
  167. return false;
  168. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  169. switch (II->getIntrinsicID()) {
  170. default: return false;
  171. case Intrinsic::memset:
  172. case Intrinsic::memcpy:
  173. case Intrinsic::memcpy_element_unordered_atomic:
  174. case Intrinsic::memset_element_unordered_atomic:
  175. // Do shorten memory intrinsics.
  176. // FIXME: Add memmove if it's also safe to transform.
  177. return true;
  178. }
  179. }
  180. // Don't shorten libcalls calls for now.
  181. return false;
  182. }
  183. /// Returns true if the beginning of this instruction can be safely shortened
  184. /// in length.
  185. static bool isShortenableAtTheBeginning(Instruction *I) {
  186. // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
  187. // easily done by offsetting the source address.
  188. return isa<AnyMemSetInst>(I);
  189. }
  190. static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
  191. const TargetLibraryInfo &TLI,
  192. const Function *F) {
  193. uint64_t Size;
  194. ObjectSizeOpts Opts;
  195. Opts.NullIsUnknownSize = NullPointerIsDefined(F);
  196. if (getObjectSize(V, Size, DL, &TLI, Opts))
  197. return Size;
  198. return MemoryLocation::UnknownSize;
  199. }
  200. namespace {
  201. enum OverwriteResult {
  202. OW_Begin,
  203. OW_Complete,
  204. OW_End,
  205. OW_PartialEarlierWithFullLater,
  206. OW_MaybePartial,
  207. OW_None,
  208. OW_Unknown
  209. };
  210. } // end anonymous namespace
  211. /// Check if two instruction are masked stores that completely
  212. /// overwrite one another. More specifically, \p KillingI has to
  213. /// overwrite \p DeadI.
  214. static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
  215. const Instruction *DeadI,
  216. BatchAAResults &AA) {
  217. const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
  218. const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
  219. if (KillingII == nullptr || DeadII == nullptr)
  220. return OW_Unknown;
  221. if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
  222. DeadII->getIntrinsicID() != Intrinsic::masked_store)
  223. return OW_Unknown;
  224. // Pointers.
  225. Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
  226. Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
  227. if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
  228. return OW_Unknown;
  229. // Masks.
  230. // TODO: check that KillingII's mask is a superset of the DeadII's mask.
  231. if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
  232. return OW_Unknown;
  233. return OW_Complete;
  234. }
  235. /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
  236. /// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
  237. /// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
  238. /// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
  239. /// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
  240. /// overwritten by a killing (smaller) store which doesn't write outside the big
  241. /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
  242. /// NOTE: This function must only be called if both \p KillingLoc and \p
  243. /// DeadLoc belong to the same underlying object with valid \p KillingOff and
  244. /// \p DeadOff.
  245. static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
  246. const MemoryLocation &DeadLoc,
  247. int64_t KillingOff, int64_t DeadOff,
  248. Instruction *DeadI,
  249. InstOverlapIntervalsTy &IOL) {
  250. const uint64_t KillingSize = KillingLoc.Size.getValue();
  251. const uint64_t DeadSize = DeadLoc.Size.getValue();
  252. // We may now overlap, although the overlap is not complete. There might also
  253. // be other incomplete overlaps, and together, they might cover the complete
  254. // dead store.
  255. // Note: The correctness of this logic depends on the fact that this function
  256. // is not even called providing DepWrite when there are any intervening reads.
  257. if (EnablePartialOverwriteTracking &&
  258. KillingOff < int64_t(DeadOff + DeadSize) &&
  259. int64_t(KillingOff + KillingSize) >= DeadOff) {
  260. // Insert our part of the overlap into the map.
  261. auto &IM = IOL[DeadI];
  262. LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
  263. << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
  264. << KillingOff << ", " << int64_t(KillingOff + KillingSize)
  265. << ")\n");
  266. // Make sure that we only insert non-overlapping intervals and combine
  267. // adjacent intervals. The intervals are stored in the map with the ending
  268. // offset as the key (in the half-open sense) and the starting offset as
  269. // the value.
  270. int64_t KillingIntStart = KillingOff;
  271. int64_t KillingIntEnd = KillingOff + KillingSize;
  272. // Find any intervals ending at, or after, KillingIntStart which start
  273. // before KillingIntEnd.
  274. auto ILI = IM.lower_bound(KillingIntStart);
  275. if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
  276. // This existing interval is overlapped with the current store somewhere
  277. // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
  278. // intervals and adjusting our start and end.
  279. KillingIntStart = std::min(KillingIntStart, ILI->second);
  280. KillingIntEnd = std::max(KillingIntEnd, ILI->first);
  281. ILI = IM.erase(ILI);
  282. // Continue erasing and adjusting our end in case other previous
  283. // intervals are also overlapped with the current store.
  284. //
  285. // |--- dead 1 ---| |--- dead 2 ---|
  286. // |------- killing---------|
  287. //
  288. while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
  289. assert(ILI->second > KillingIntStart && "Unexpected interval");
  290. KillingIntEnd = std::max(KillingIntEnd, ILI->first);
  291. ILI = IM.erase(ILI);
  292. }
  293. }
  294. IM[KillingIntEnd] = KillingIntStart;
  295. ILI = IM.begin();
  296. if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
  297. LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
  298. << DeadOff << ", " << int64_t(DeadOff + DeadSize)
  299. << ") Composite KillingLoc [" << ILI->second << ", "
  300. << ILI->first << ")\n");
  301. ++NumCompletePartials;
  302. return OW_Complete;
  303. }
  304. }
  305. // Check for a dead store which writes to all the memory locations that
  306. // the killing store writes to.
  307. if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
  308. int64_t(DeadOff + DeadSize) > KillingOff &&
  309. uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
  310. LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
  311. << ", " << int64_t(DeadOff + DeadSize)
  312. << ") by a killing store [" << KillingOff << ", "
  313. << int64_t(KillingOff + KillingSize) << ")\n");
  314. // TODO: Maybe come up with a better name?
  315. return OW_PartialEarlierWithFullLater;
  316. }
  317. // Another interesting case is if the killing store overwrites the end of the
  318. // dead store.
  319. //
  320. // |--dead--|
  321. // |-- killing --|
  322. //
  323. // In this case we may want to trim the size of dead store to avoid
  324. // generating stores to addresses which will definitely be overwritten killing
  325. // store.
  326. if (!EnablePartialOverwriteTracking &&
  327. (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
  328. int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
  329. return OW_End;
  330. // Finally, we also need to check if the killing store overwrites the
  331. // beginning of the dead store.
  332. //
  333. // |--dead--|
  334. // |-- killing --|
  335. //
  336. // In this case we may want to move the destination address and trim the size
  337. // of dead store to avoid generating stores to addresses which will definitely
  338. // be overwritten killing store.
  339. if (!EnablePartialOverwriteTracking &&
  340. (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
  341. assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
  342. "Expect to be handled as OW_Complete");
  343. return OW_Begin;
  344. }
  345. // Otherwise, they don't completely overlap.
  346. return OW_Unknown;
  347. }
  348. /// Returns true if the memory which is accessed by the second instruction is not
  349. /// modified between the first and the second instruction.
  350. /// Precondition: Second instruction must be dominated by the first
  351. /// instruction.
  352. static bool
  353. memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
  354. BatchAAResults &AA, const DataLayout &DL,
  355. DominatorTree *DT) {
  356. // Do a backwards scan through the CFG from SecondI to FirstI. Look for
  357. // instructions which can modify the memory location accessed by SecondI.
  358. //
  359. // While doing the walk keep track of the address to check. It might be
  360. // different in different basic blocks due to PHI translation.
  361. using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
  362. SmallVector<BlockAddressPair, 16> WorkList;
  363. // Keep track of the address we visited each block with. Bail out if we
  364. // visit a block with different addresses.
  365. DenseMap<BasicBlock *, Value *> Visited;
  366. BasicBlock::iterator FirstBBI(FirstI);
  367. ++FirstBBI;
  368. BasicBlock::iterator SecondBBI(SecondI);
  369. BasicBlock *FirstBB = FirstI->getParent();
  370. BasicBlock *SecondBB = SecondI->getParent();
  371. MemoryLocation MemLoc;
  372. if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
  373. MemLoc = MemoryLocation::getForDest(MemSet);
  374. else
  375. MemLoc = MemoryLocation::get(SecondI);
  376. auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
  377. // Start checking the SecondBB.
  378. WorkList.push_back(
  379. std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
  380. bool isFirstBlock = true;
  381. // Check all blocks going backward until we reach the FirstBB.
  382. while (!WorkList.empty()) {
  383. BlockAddressPair Current = WorkList.pop_back_val();
  384. BasicBlock *B = Current.first;
  385. PHITransAddr &Addr = Current.second;
  386. Value *Ptr = Addr.getAddr();
  387. // Ignore instructions before FirstI if this is the FirstBB.
  388. BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
  389. BasicBlock::iterator EI;
  390. if (isFirstBlock) {
  391. // Ignore instructions after SecondI if this is the first visit of SecondBB.
  392. assert(B == SecondBB && "first block is not the store block");
  393. EI = SecondBBI;
  394. isFirstBlock = false;
  395. } else {
  396. // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
  397. // In this case we also have to look at instructions after SecondI.
  398. EI = B->end();
  399. }
  400. for (; BI != EI; ++BI) {
  401. Instruction *I = &*BI;
  402. if (I->mayWriteToMemory() && I != SecondI)
  403. if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
  404. return false;
  405. }
  406. if (B != FirstBB) {
  407. assert(B != &FirstBB->getParent()->getEntryBlock() &&
  408. "Should not hit the entry block because SI must be dominated by LI");
  409. for (BasicBlock *Pred : predecessors(B)) {
  410. PHITransAddr PredAddr = Addr;
  411. if (PredAddr.NeedsPHITranslationFromBlock(B)) {
  412. if (!PredAddr.IsPotentiallyPHITranslatable())
  413. return false;
  414. if (PredAddr.PHITranslateValue(B, Pred, DT, false))
  415. return false;
  416. }
  417. Value *TranslatedPtr = PredAddr.getAddr();
  418. auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
  419. if (!Inserted.second) {
  420. // We already visited this block before. If it was with a different
  421. // address - bail out!
  422. if (TranslatedPtr != Inserted.first->second)
  423. return false;
  424. // ... otherwise just skip it.
  425. continue;
  426. }
  427. WorkList.push_back(std::make_pair(Pred, PredAddr));
  428. }
  429. }
  430. }
  431. return true;
  432. }
  433. static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
  434. uint64_t &DeadSize, int64_t KillingStart,
  435. uint64_t KillingSize, bool IsOverwriteEnd) {
  436. auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
  437. Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
  438. // We assume that memet/memcpy operates in chunks of the "largest" native
  439. // type size and aligned on the same value. That means optimal start and size
  440. // of memset/memcpy should be modulo of preferred alignment of that type. That
  441. // is it there is no any sense in trying to reduce store size any further
  442. // since any "extra" stores comes for free anyway.
  443. // On the other hand, maximum alignment we can achieve is limited by alignment
  444. // of initial store.
  445. // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
  446. // "largest" native type.
  447. // Note: What is the proper way to get that value?
  448. // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
  449. // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
  450. int64_t ToRemoveStart = 0;
  451. uint64_t ToRemoveSize = 0;
  452. // Compute start and size of the region to remove. Make sure 'PrefAlign' is
  453. // maintained on the remaining store.
  454. if (IsOverwriteEnd) {
  455. // Calculate required adjustment for 'KillingStart' in order to keep
  456. // remaining store size aligned on 'PerfAlign'.
  457. uint64_t Off =
  458. offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
  459. ToRemoveStart = KillingStart + Off;
  460. if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
  461. return false;
  462. ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
  463. } else {
  464. ToRemoveStart = DeadStart;
  465. assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
  466. "Not overlapping accesses?");
  467. ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
  468. // Calculate required adjustment for 'ToRemoveSize'in order to keep
  469. // start of the remaining store aligned on 'PerfAlign'.
  470. uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
  471. if (Off != 0) {
  472. if (ToRemoveSize <= (PrefAlign.value() - Off))
  473. return false;
  474. ToRemoveSize -= PrefAlign.value() - Off;
  475. }
  476. assert(isAligned(PrefAlign, ToRemoveSize) &&
  477. "Should preserve selected alignment");
  478. }
  479. assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
  480. assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
  481. uint64_t NewSize = DeadSize - ToRemoveSize;
  482. if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
  483. // When shortening an atomic memory intrinsic, the newly shortened
  484. // length must remain an integer multiple of the element size.
  485. const uint32_t ElementSize = AMI->getElementSizeInBytes();
  486. if (0 != NewSize % ElementSize)
  487. return false;
  488. }
  489. LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
  490. << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
  491. << "\n KILLER [" << ToRemoveStart << ", "
  492. << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
  493. Value *DeadWriteLength = DeadIntrinsic->getLength();
  494. Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
  495. DeadIntrinsic->setLength(TrimmedLength);
  496. DeadIntrinsic->setDestAlignment(PrefAlign);
  497. if (!IsOverwriteEnd) {
  498. Value *OrigDest = DeadIntrinsic->getRawDest();
  499. Type *Int8PtrTy =
  500. Type::getInt8PtrTy(DeadIntrinsic->getContext(),
  501. OrigDest->getType()->getPointerAddressSpace());
  502. Value *Dest = OrigDest;
  503. if (OrigDest->getType() != Int8PtrTy)
  504. Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
  505. Value *Indices[1] = {
  506. ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
  507. Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
  508. Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
  509. NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
  510. if (NewDestGEP->getType() != OrigDest->getType())
  511. NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
  512. "", DeadI);
  513. DeadIntrinsic->setDest(NewDestGEP);
  514. }
  515. // Finally update start and size of dead access.
  516. if (!IsOverwriteEnd)
  517. DeadStart += ToRemoveSize;
  518. DeadSize = NewSize;
  519. return true;
  520. }
  521. static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
  522. int64_t &DeadStart, uint64_t &DeadSize) {
  523. if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
  524. return false;
  525. OverlapIntervalsTy::iterator OII = --IntervalMap.end();
  526. int64_t KillingStart = OII->second;
  527. uint64_t KillingSize = OII->first - KillingStart;
  528. assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
  529. if (KillingStart > DeadStart &&
  530. // Note: "KillingStart - KillingStart" is known to be positive due to
  531. // preceding check.
  532. (uint64_t)(KillingStart - DeadStart) < DeadSize &&
  533. // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
  534. // be non negative due to preceding checks.
  535. KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
  536. if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
  537. true)) {
  538. IntervalMap.erase(OII);
  539. return true;
  540. }
  541. }
  542. return false;
  543. }
  544. static bool tryToShortenBegin(Instruction *DeadI,
  545. OverlapIntervalsTy &IntervalMap,
  546. int64_t &DeadStart, uint64_t &DeadSize) {
  547. if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
  548. return false;
  549. OverlapIntervalsTy::iterator OII = IntervalMap.begin();
  550. int64_t KillingStart = OII->second;
  551. uint64_t KillingSize = OII->first - KillingStart;
  552. assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
  553. if (KillingStart <= DeadStart &&
  554. // Note: "DeadStart - KillingStart" is known to be non negative due to
  555. // preceding check.
  556. KillingSize > (uint64_t)(DeadStart - KillingStart)) {
  557. // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
  558. // be positive due to preceding checks.
  559. assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
  560. "Should have been handled as OW_Complete");
  561. if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
  562. false)) {
  563. IntervalMap.erase(OII);
  564. return true;
  565. }
  566. }
  567. return false;
  568. }
  569. static Constant *
  570. tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
  571. int64_t KillingOffset, int64_t DeadOffset,
  572. const DataLayout &DL, BatchAAResults &AA,
  573. DominatorTree *DT) {
  574. if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
  575. DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
  576. KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
  577. DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
  578. memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
  579. // If the store we find is:
  580. // a) partially overwritten by the store to 'Loc'
  581. // b) the killing store is fully contained in the dead one and
  582. // c) they both have a constant value
  583. // d) none of the two stores need padding
  584. // Merge the two stores, replacing the dead store's value with a
  585. // merge of both values.
  586. // TODO: Deal with other constant types (vectors, etc), and probably
  587. // some mem intrinsics (if needed)
  588. APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
  589. APInt KillingValue =
  590. cast<ConstantInt>(KillingI->getValueOperand())->getValue();
  591. unsigned KillingBits = KillingValue.getBitWidth();
  592. assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
  593. KillingValue = KillingValue.zext(DeadValue.getBitWidth());
  594. // Offset of the smaller store inside the larger store
  595. unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
  596. unsigned LShiftAmount =
  597. DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
  598. : BitOffsetDiff;
  599. APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
  600. LShiftAmount + KillingBits);
  601. // Clear the bits we'll be replacing, then OR with the smaller
  602. // store, shifted appropriately.
  603. APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
  604. LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
  605. << "\n Killing: " << *KillingI
  606. << "\n Merged Value: " << Merged << '\n');
  607. return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
  608. }
  609. return nullptr;
  610. }
  611. namespace {
  612. // Returns true if \p I is an intrisnic that does not read or write memory.
  613. bool isNoopIntrinsic(Instruction *I) {
  614. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  615. switch (II->getIntrinsicID()) {
  616. case Intrinsic::lifetime_start:
  617. case Intrinsic::lifetime_end:
  618. case Intrinsic::invariant_end:
  619. case Intrinsic::launder_invariant_group:
  620. case Intrinsic::assume:
  621. return true;
  622. case Intrinsic::dbg_addr:
  623. case Intrinsic::dbg_declare:
  624. case Intrinsic::dbg_label:
  625. case Intrinsic::dbg_value:
  626. llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
  627. default:
  628. return false;
  629. }
  630. }
  631. return false;
  632. }
  633. // Check if we can ignore \p D for DSE.
  634. bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
  635. Instruction *DI = D->getMemoryInst();
  636. // Calls that only access inaccessible memory cannot read or write any memory
  637. // locations we consider for elimination.
  638. if (auto *CB = dyn_cast<CallBase>(DI))
  639. if (CB->onlyAccessesInaccessibleMemory())
  640. return true;
  641. // We can eliminate stores to locations not visible to the caller across
  642. // throwing instructions.
  643. if (DI->mayThrow() && !DefVisibleToCaller)
  644. return true;
  645. // We can remove the dead stores, irrespective of the fence and its ordering
  646. // (release/acquire/seq_cst). Fences only constraints the ordering of
  647. // already visible stores, it does not make a store visible to other
  648. // threads. So, skipping over a fence does not change a store from being
  649. // dead.
  650. if (isa<FenceInst>(DI))
  651. return true;
  652. // Skip intrinsics that do not really read or modify memory.
  653. if (isNoopIntrinsic(DI))
  654. return true;
  655. return false;
  656. }
  657. struct DSEState {
  658. Function &F;
  659. AliasAnalysis &AA;
  660. EarliestEscapeInfo EI;
  661. /// The single BatchAA instance that is used to cache AA queries. It will
  662. /// not be invalidated over the whole run. This is safe, because:
  663. /// 1. Only memory writes are removed, so the alias cache for memory
  664. /// locations remains valid.
  665. /// 2. No new instructions are added (only instructions removed), so cached
  666. /// information for a deleted value cannot be accessed by a re-used new
  667. /// value pointer.
  668. BatchAAResults BatchAA;
  669. MemorySSA &MSSA;
  670. DominatorTree &DT;
  671. PostDominatorTree &PDT;
  672. const TargetLibraryInfo &TLI;
  673. const DataLayout &DL;
  674. const LoopInfo &LI;
  675. // Whether the function contains any irreducible control flow, useful for
  676. // being accurately able to detect loops.
  677. bool ContainsIrreducibleLoops;
  678. // All MemoryDefs that potentially could kill other MemDefs.
  679. SmallVector<MemoryDef *, 64> MemDefs;
  680. // Any that should be skipped as they are already deleted
  681. SmallPtrSet<MemoryAccess *, 4> SkipStores;
  682. // Keep track whether a given object is captured before return or not.
  683. DenseMap<const Value *, bool> CapturedBeforeReturn;
  684. // Keep track of all of the objects that are invisible to the caller after
  685. // the function returns.
  686. DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
  687. // Keep track of blocks with throwing instructions not modeled in MemorySSA.
  688. SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
  689. // Post-order numbers for each basic block. Used to figure out if memory
  690. // accesses are executed before another access.
  691. DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
  692. /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
  693. /// basic block.
  694. MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
  695. // Check if there are root nodes that are terminated by UnreachableInst.
  696. // Those roots pessimize post-dominance queries. If there are such roots,
  697. // fall back to CFG scan starting from all non-unreachable roots.
  698. bool AnyUnreachableExit;
  699. // Class contains self-reference, make sure it's not copied/moved.
  700. DSEState(const DSEState &) = delete;
  701. DSEState &operator=(const DSEState &) = delete;
  702. DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
  703. PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
  704. const LoopInfo &LI)
  705. : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
  706. PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
  707. // Collect blocks with throwing instructions not modeled in MemorySSA and
  708. // alloc-like objects.
  709. unsigned PO = 0;
  710. for (BasicBlock *BB : post_order(&F)) {
  711. PostOrderNumbers[BB] = PO++;
  712. for (Instruction &I : *BB) {
  713. MemoryAccess *MA = MSSA.getMemoryAccess(&I);
  714. if (I.mayThrow() && !MA)
  715. ThrowingBlocks.insert(I.getParent());
  716. auto *MD = dyn_cast_or_null<MemoryDef>(MA);
  717. if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
  718. (getLocForWrite(&I) || isMemTerminatorInst(&I)))
  719. MemDefs.push_back(MD);
  720. }
  721. }
  722. // Treat byval or inalloca arguments the same as Allocas, stores to them are
  723. // dead at the end of the function.
  724. for (Argument &AI : F.args())
  725. if (AI.hasPassPointeeByValueCopyAttr())
  726. InvisibleToCallerAfterRet.insert({&AI, true});
  727. // Collect whether there is any irreducible control flow in the function.
  728. ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
  729. AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
  730. return isa<UnreachableInst>(E->getTerminator());
  731. });
  732. }
  733. /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
  734. /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
  735. /// location (by \p DeadI instruction).
  736. /// Return OW_MaybePartial if \p KillingI does not completely overwrite
  737. /// \p DeadI, but they both write to the same underlying object. In that
  738. /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
  739. /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
  740. /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
  741. OverwriteResult isOverwrite(const Instruction *KillingI,
  742. const Instruction *DeadI,
  743. const MemoryLocation &KillingLoc,
  744. const MemoryLocation &DeadLoc,
  745. int64_t &KillingOff, int64_t &DeadOff) {
  746. // AliasAnalysis does not always account for loops. Limit overwrite checks
  747. // to dependencies for which we can guarantee they are independent of any
  748. // loops they are in.
  749. if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
  750. return OW_Unknown;
  751. const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
  752. const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
  753. const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
  754. const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
  755. // Check whether the killing store overwrites the whole object, in which
  756. // case the size/offset of the dead store does not matter.
  757. if (DeadUndObj == KillingUndObj && KillingLoc.Size.isPrecise()) {
  758. uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
  759. if (KillingUndObjSize != MemoryLocation::UnknownSize &&
  760. KillingUndObjSize == KillingLoc.Size.getValue())
  761. return OW_Complete;
  762. }
  763. // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
  764. // get imprecise values here, though (except for unknown sizes).
  765. if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
  766. // In case no constant size is known, try to an IR values for the number
  767. // of bytes written and check if they match.
  768. const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
  769. const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
  770. if (KillingMemI && DeadMemI) {
  771. const Value *KillingV = KillingMemI->getLength();
  772. const Value *DeadV = DeadMemI->getLength();
  773. if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
  774. return OW_Complete;
  775. }
  776. // Masked stores have imprecise locations, but we can reason about them
  777. // to some extent.
  778. return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
  779. }
  780. const uint64_t KillingSize = KillingLoc.Size.getValue();
  781. const uint64_t DeadSize = DeadLoc.Size.getValue();
  782. // Query the alias information
  783. AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
  784. // If the start pointers are the same, we just have to compare sizes to see if
  785. // the killing store was larger than the dead store.
  786. if (AAR == AliasResult::MustAlias) {
  787. // Make sure that the KillingSize size is >= the DeadSize size.
  788. if (KillingSize >= DeadSize)
  789. return OW_Complete;
  790. }
  791. // If we hit a partial alias we may have a full overwrite
  792. if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
  793. int32_t Off = AAR.getOffset();
  794. if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
  795. return OW_Complete;
  796. }
  797. // If we can't resolve the same pointers to the same object, then we can't
  798. // analyze them at all.
  799. if (DeadUndObj != KillingUndObj) {
  800. // Non aliasing stores to different objects don't overlap. Note that
  801. // if the killing store is known to overwrite whole object (out of
  802. // bounds access overwrites whole object as well) then it is assumed to
  803. // completely overwrite any store to the same object even if they don't
  804. // actually alias (see next check).
  805. if (AAR == AliasResult::NoAlias)
  806. return OW_None;
  807. return OW_Unknown;
  808. }
  809. // Okay, we have stores to two completely different pointers. Try to
  810. // decompose the pointer into a "base + constant_offset" form. If the base
  811. // pointers are equal, then we can reason about the two stores.
  812. DeadOff = 0;
  813. KillingOff = 0;
  814. const Value *DeadBasePtr =
  815. GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
  816. const Value *KillingBasePtr =
  817. GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
  818. // If the base pointers still differ, we have two completely different
  819. // stores.
  820. if (DeadBasePtr != KillingBasePtr)
  821. return OW_Unknown;
  822. // The killing access completely overlaps the dead store if and only if
  823. // both start and end of the dead one is "inside" the killing one:
  824. // |<->|--dead--|<->|
  825. // |-----killing------|
  826. // Accesses may overlap if and only if start of one of them is "inside"
  827. // another one:
  828. // |<->|--dead--|<-------->|
  829. // |-------killing--------|
  830. // OR
  831. // |-------dead-------|
  832. // |<->|---killing---|<----->|
  833. //
  834. // We have to be careful here as *Off is signed while *.Size is unsigned.
  835. // Check if the dead access starts "not before" the killing one.
  836. if (DeadOff >= KillingOff) {
  837. // If the dead access ends "not after" the killing access then the
  838. // dead one is completely overwritten by the killing one.
  839. if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
  840. return OW_Complete;
  841. // If start of the dead access is "before" end of the killing access
  842. // then accesses overlap.
  843. else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
  844. return OW_MaybePartial;
  845. }
  846. // If start of the killing access is "before" end of the dead access then
  847. // accesses overlap.
  848. else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
  849. return OW_MaybePartial;
  850. }
  851. // Can reach here only if accesses are known not to overlap.
  852. return OW_None;
  853. }
  854. bool isInvisibleToCallerAfterRet(const Value *V) {
  855. if (isa<AllocaInst>(V))
  856. return true;
  857. auto I = InvisibleToCallerAfterRet.insert({V, false});
  858. if (I.second) {
  859. if (!isInvisibleToCallerOnUnwind(V)) {
  860. I.first->second = false;
  861. } else if (isNoAliasCall(V)) {
  862. I.first->second = !PointerMayBeCaptured(V, true, false);
  863. }
  864. }
  865. return I.first->second;
  866. }
  867. bool isInvisibleToCallerOnUnwind(const Value *V) {
  868. bool RequiresNoCaptureBeforeUnwind;
  869. if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
  870. return false;
  871. if (!RequiresNoCaptureBeforeUnwind)
  872. return true;
  873. auto I = CapturedBeforeReturn.insert({V, true});
  874. if (I.second)
  875. // NOTE: This could be made more precise by PointerMayBeCapturedBefore
  876. // with the killing MemoryDef. But we refrain from doing so for now to
  877. // limit compile-time and this does not cause any changes to the number
  878. // of stores removed on a large test set in practice.
  879. I.first->second = PointerMayBeCaptured(V, false, true);
  880. return !I.first->second;
  881. }
  882. Optional<MemoryLocation> getLocForWrite(Instruction *I) const {
  883. if (!I->mayWriteToMemory())
  884. return None;
  885. if (auto *CB = dyn_cast<CallBase>(I))
  886. return MemoryLocation::getForDest(CB, TLI);
  887. return MemoryLocation::getOrNone(I);
  888. }
  889. /// Assuming this instruction has a dead analyzable write, can we delete
  890. /// this instruction?
  891. bool isRemovable(Instruction *I) {
  892. assert(getLocForWrite(I) && "Must have analyzable write");
  893. // Don't remove volatile/atomic stores.
  894. if (StoreInst *SI = dyn_cast<StoreInst>(I))
  895. return SI->isUnordered();
  896. if (auto *CB = dyn_cast<CallBase>(I)) {
  897. // Don't remove volatile memory intrinsics.
  898. if (auto *MI = dyn_cast<MemIntrinsic>(CB))
  899. return !MI->isVolatile();
  900. // Never remove dead lifetime intrinsics, e.g. because they are followed
  901. // by a free.
  902. if (CB->isLifetimeStartOrEnd())
  903. return false;
  904. return CB->use_empty() && CB->willReturn() && CB->doesNotThrow();
  905. }
  906. return false;
  907. }
  908. /// Returns true if \p UseInst completely overwrites \p DefLoc
  909. /// (stored by \p DefInst).
  910. bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
  911. Instruction *UseInst) {
  912. // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
  913. // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
  914. // MemoryDef.
  915. if (!UseInst->mayWriteToMemory())
  916. return false;
  917. if (auto *CB = dyn_cast<CallBase>(UseInst))
  918. if (CB->onlyAccessesInaccessibleMemory())
  919. return false;
  920. int64_t InstWriteOffset, DepWriteOffset;
  921. if (auto CC = getLocForWrite(UseInst))
  922. return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
  923. DepWriteOffset) == OW_Complete;
  924. return false;
  925. }
  926. /// Returns true if \p Def is not read before returning from the function.
  927. bool isWriteAtEndOfFunction(MemoryDef *Def) {
  928. LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
  929. << *Def->getMemoryInst()
  930. << ") is at the end the function \n");
  931. auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
  932. if (!MaybeLoc) {
  933. LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
  934. return false;
  935. }
  936. SmallVector<MemoryAccess *, 4> WorkList;
  937. SmallPtrSet<MemoryAccess *, 8> Visited;
  938. auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
  939. if (!Visited.insert(Acc).second)
  940. return;
  941. for (Use &U : Acc->uses())
  942. WorkList.push_back(cast<MemoryAccess>(U.getUser()));
  943. };
  944. PushMemUses(Def);
  945. for (unsigned I = 0; I < WorkList.size(); I++) {
  946. if (WorkList.size() >= MemorySSAScanLimit) {
  947. LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
  948. return false;
  949. }
  950. MemoryAccess *UseAccess = WorkList[I];
  951. // Simply adding the users of MemoryPhi to the worklist is not enough,
  952. // because we might miss read clobbers in different iterations of a loop,
  953. // for example.
  954. // TODO: Add support for phi translation to handle the loop case.
  955. if (isa<MemoryPhi>(UseAccess))
  956. return false;
  957. // TODO: Checking for aliasing is expensive. Consider reducing the amount
  958. // of times this is called and/or caching it.
  959. Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
  960. if (isReadClobber(*MaybeLoc, UseInst)) {
  961. LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
  962. return false;
  963. }
  964. if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
  965. PushMemUses(UseDef);
  966. }
  967. return true;
  968. }
  969. /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
  970. /// pair with the MemoryLocation terminated by \p I and a boolean flag
  971. /// indicating whether \p I is a free-like call.
  972. Optional<std::pair<MemoryLocation, bool>>
  973. getLocForTerminator(Instruction *I) const {
  974. uint64_t Len;
  975. Value *Ptr;
  976. if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
  977. m_Value(Ptr))))
  978. return {std::make_pair(MemoryLocation(Ptr, Len), false)};
  979. if (auto *CB = dyn_cast<CallBase>(I)) {
  980. if (isFreeCall(I, &TLI))
  981. return {std::make_pair(MemoryLocation::getAfter(CB->getArgOperand(0)),
  982. true)};
  983. }
  984. return None;
  985. }
  986. /// Returns true if \p I is a memory terminator instruction like
  987. /// llvm.lifetime.end or free.
  988. bool isMemTerminatorInst(Instruction *I) const {
  989. IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
  990. return (II && II->getIntrinsicID() == Intrinsic::lifetime_end) ||
  991. isFreeCall(I, &TLI);
  992. }
  993. /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
  994. /// instruction \p AccessI.
  995. bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
  996. Instruction *MaybeTerm) {
  997. Optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
  998. getLocForTerminator(MaybeTerm);
  999. if (!MaybeTermLoc)
  1000. return false;
  1001. // If the terminator is a free-like call, all accesses to the underlying
  1002. // object can be considered terminated.
  1003. if (getUnderlyingObject(Loc.Ptr) !=
  1004. getUnderlyingObject(MaybeTermLoc->first.Ptr))
  1005. return false;
  1006. auto TermLoc = MaybeTermLoc->first;
  1007. if (MaybeTermLoc->second) {
  1008. const Value *LocUO = getUnderlyingObject(Loc.Ptr);
  1009. return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
  1010. }
  1011. int64_t InstWriteOffset = 0;
  1012. int64_t DepWriteOffset = 0;
  1013. return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
  1014. DepWriteOffset) == OW_Complete;
  1015. }
  1016. // Returns true if \p Use may read from \p DefLoc.
  1017. bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
  1018. if (isNoopIntrinsic(UseInst))
  1019. return false;
  1020. // Monotonic or weaker atomic stores can be re-ordered and do not need to be
  1021. // treated as read clobber.
  1022. if (auto SI = dyn_cast<StoreInst>(UseInst))
  1023. return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
  1024. if (!UseInst->mayReadFromMemory())
  1025. return false;
  1026. if (auto *CB = dyn_cast<CallBase>(UseInst))
  1027. if (CB->onlyAccessesInaccessibleMemory())
  1028. return false;
  1029. return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
  1030. }
  1031. /// Returns true if a dependency between \p Current and \p KillingDef is
  1032. /// guaranteed to be loop invariant for the loops that they are in. Either
  1033. /// because they are known to be in the same block, in the same loop level or
  1034. /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
  1035. /// during execution of the containing function.
  1036. bool isGuaranteedLoopIndependent(const Instruction *Current,
  1037. const Instruction *KillingDef,
  1038. const MemoryLocation &CurrentLoc) {
  1039. // If the dependency is within the same block or loop level (being careful
  1040. // of irreducible loops), we know that AA will return a valid result for the
  1041. // memory dependency. (Both at the function level, outside of any loop,
  1042. // would also be valid but we currently disable that to limit compile time).
  1043. if (Current->getParent() == KillingDef->getParent())
  1044. return true;
  1045. const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
  1046. if (!ContainsIrreducibleLoops && CurrentLI &&
  1047. CurrentLI == LI.getLoopFor(KillingDef->getParent()))
  1048. return true;
  1049. // Otherwise check the memory location is invariant to any loops.
  1050. return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
  1051. }
  1052. /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
  1053. /// loop. In particular, this guarantees that it only references a single
  1054. /// MemoryLocation during execution of the containing function.
  1055. bool isGuaranteedLoopInvariant(const Value *Ptr) {
  1056. Ptr = Ptr->stripPointerCasts();
  1057. if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
  1058. if (GEP->hasAllConstantIndices())
  1059. Ptr = GEP->getPointerOperand()->stripPointerCasts();
  1060. if (auto *I = dyn_cast<Instruction>(Ptr))
  1061. return I->getParent()->isEntryBlock();
  1062. return true;
  1063. }
  1064. // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
  1065. // with no read access between them or on any other path to a function exit
  1066. // block if \p KillingLoc is not accessible after the function returns. If
  1067. // there is no such MemoryDef, return None. The returned value may not
  1068. // (completely) overwrite \p KillingLoc. Currently we bail out when we
  1069. // encounter an aliasing MemoryUse (read).
  1070. Optional<MemoryAccess *>
  1071. getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
  1072. const MemoryLocation &KillingLoc, const Value *KillingUndObj,
  1073. unsigned &ScanLimit, unsigned &WalkerStepLimit,
  1074. bool IsMemTerm, unsigned &PartialLimit) {
  1075. if (ScanLimit == 0 || WalkerStepLimit == 0) {
  1076. LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
  1077. return None;
  1078. }
  1079. MemoryAccess *Current = StartAccess;
  1080. Instruction *KillingI = KillingDef->getMemoryInst();
  1081. LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
  1082. // Only optimize defining access of KillingDef when directly starting at its
  1083. // defining access. The defining access also must only access KillingLoc. At
  1084. // the moment we only support instructions with a single write location, so
  1085. // it should be sufficient to disable optimizations for instructions that
  1086. // also read from memory.
  1087. bool CanOptimize = OptimizeMemorySSA &&
  1088. KillingDef->getDefiningAccess() == StartAccess &&
  1089. !KillingI->mayReadFromMemory();
  1090. // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
  1091. Optional<MemoryLocation> CurrentLoc;
  1092. for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
  1093. LLVM_DEBUG({
  1094. dbgs() << " visiting " << *Current;
  1095. if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
  1096. dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
  1097. << ")";
  1098. dbgs() << "\n";
  1099. });
  1100. // Reached TOP.
  1101. if (MSSA.isLiveOnEntryDef(Current)) {
  1102. LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
  1103. return None;
  1104. }
  1105. // Cost of a step. Accesses in the same block are more likely to be valid
  1106. // candidates for elimination, hence consider them cheaper.
  1107. unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
  1108. ? MemorySSASameBBStepCost
  1109. : MemorySSAOtherBBStepCost;
  1110. if (WalkerStepLimit <= StepCost) {
  1111. LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
  1112. return None;
  1113. }
  1114. WalkerStepLimit -= StepCost;
  1115. // Return for MemoryPhis. They cannot be eliminated directly and the
  1116. // caller is responsible for traversing them.
  1117. if (isa<MemoryPhi>(Current)) {
  1118. LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
  1119. return Current;
  1120. }
  1121. // Below, check if CurrentDef is a valid candidate to be eliminated by
  1122. // KillingDef. If it is not, check the next candidate.
  1123. MemoryDef *CurrentDef = cast<MemoryDef>(Current);
  1124. Instruction *CurrentI = CurrentDef->getMemoryInst();
  1125. if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
  1126. CanOptimize = false;
  1127. continue;
  1128. }
  1129. // Before we try to remove anything, check for any extra throwing
  1130. // instructions that block us from DSEing
  1131. if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
  1132. LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
  1133. return None;
  1134. }
  1135. // Check for anything that looks like it will be a barrier to further
  1136. // removal
  1137. if (isDSEBarrier(KillingUndObj, CurrentI)) {
  1138. LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
  1139. return None;
  1140. }
  1141. // If Current is known to be on path that reads DefLoc or is a read
  1142. // clobber, bail out, as the path is not profitable. We skip this check
  1143. // for intrinsic calls, because the code knows how to handle memcpy
  1144. // intrinsics.
  1145. if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
  1146. return None;
  1147. // Quick check if there are direct uses that are read-clobbers.
  1148. if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
  1149. if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
  1150. return !MSSA.dominates(StartAccess, UseOrDef) &&
  1151. isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
  1152. return false;
  1153. })) {
  1154. LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
  1155. return None;
  1156. }
  1157. // If Current does not have an analyzable write location or is not
  1158. // removable, skip it.
  1159. CurrentLoc = getLocForWrite(CurrentI);
  1160. if (!CurrentLoc || !isRemovable(CurrentI)) {
  1161. CanOptimize = false;
  1162. continue;
  1163. }
  1164. // AliasAnalysis does not account for loops. Limit elimination to
  1165. // candidates for which we can guarantee they always store to the same
  1166. // memory location and not located in different loops.
  1167. if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
  1168. LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
  1169. WalkerStepLimit -= 1;
  1170. CanOptimize = false;
  1171. continue;
  1172. }
  1173. if (IsMemTerm) {
  1174. // If the killing def is a memory terminator (e.g. lifetime.end), check
  1175. // the next candidate if the current Current does not write the same
  1176. // underlying object as the terminator.
  1177. if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
  1178. CanOptimize = false;
  1179. continue;
  1180. }
  1181. } else {
  1182. int64_t KillingOffset = 0;
  1183. int64_t DeadOffset = 0;
  1184. auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
  1185. KillingOffset, DeadOffset);
  1186. if (CanOptimize) {
  1187. // CurrentDef is the earliest write clobber of KillingDef. Use it as
  1188. // optimized access. Do not optimize if CurrentDef is already the
  1189. // defining access of KillingDef.
  1190. if (CurrentDef != KillingDef->getDefiningAccess() &&
  1191. (OR == OW_Complete || OR == OW_MaybePartial))
  1192. KillingDef->setOptimized(CurrentDef);
  1193. // Once a may-aliasing def is encountered do not set an optimized
  1194. // access.
  1195. if (OR != OW_None)
  1196. CanOptimize = false;
  1197. }
  1198. // If Current does not write to the same object as KillingDef, check
  1199. // the next candidate.
  1200. if (OR == OW_Unknown || OR == OW_None)
  1201. continue;
  1202. else if (OR == OW_MaybePartial) {
  1203. // If KillingDef only partially overwrites Current, check the next
  1204. // candidate if the partial step limit is exceeded. This aggressively
  1205. // limits the number of candidates for partial store elimination,
  1206. // which are less likely to be removable in the end.
  1207. if (PartialLimit <= 1) {
  1208. WalkerStepLimit -= 1;
  1209. LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
  1210. continue;
  1211. }
  1212. PartialLimit -= 1;
  1213. }
  1214. }
  1215. break;
  1216. };
  1217. // Accesses to objects accessible after the function returns can only be
  1218. // eliminated if the access is dead along all paths to the exit. Collect
  1219. // the blocks with killing (=completely overwriting MemoryDefs) and check if
  1220. // they cover all paths from MaybeDeadAccess to any function exit.
  1221. SmallPtrSet<Instruction *, 16> KillingDefs;
  1222. KillingDefs.insert(KillingDef->getMemoryInst());
  1223. MemoryAccess *MaybeDeadAccess = Current;
  1224. MemoryLocation MaybeDeadLoc = *CurrentLoc;
  1225. Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
  1226. LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
  1227. << *MaybeDeadI << ")\n");
  1228. SmallSetVector<MemoryAccess *, 32> WorkList;
  1229. auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
  1230. for (Use &U : Acc->uses())
  1231. WorkList.insert(cast<MemoryAccess>(U.getUser()));
  1232. };
  1233. PushMemUses(MaybeDeadAccess);
  1234. // Check if DeadDef may be read.
  1235. for (unsigned I = 0; I < WorkList.size(); I++) {
  1236. MemoryAccess *UseAccess = WorkList[I];
  1237. LLVM_DEBUG(dbgs() << " " << *UseAccess);
  1238. // Bail out if the number of accesses to check exceeds the scan limit.
  1239. if (ScanLimit < (WorkList.size() - I)) {
  1240. LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
  1241. return None;
  1242. }
  1243. --ScanLimit;
  1244. NumDomMemDefChecks++;
  1245. if (isa<MemoryPhi>(UseAccess)) {
  1246. if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
  1247. return DT.properlyDominates(KI->getParent(),
  1248. UseAccess->getBlock());
  1249. })) {
  1250. LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
  1251. continue;
  1252. }
  1253. LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
  1254. PushMemUses(UseAccess);
  1255. continue;
  1256. }
  1257. Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
  1258. LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
  1259. if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
  1260. return DT.dominates(KI, UseInst);
  1261. })) {
  1262. LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
  1263. continue;
  1264. }
  1265. // A memory terminator kills all preceeding MemoryDefs and all succeeding
  1266. // MemoryAccesses. We do not have to check it's users.
  1267. if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
  1268. LLVM_DEBUG(
  1269. dbgs()
  1270. << " ... skipping, memterminator invalidates following accesses\n");
  1271. continue;
  1272. }
  1273. if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
  1274. LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
  1275. PushMemUses(UseAccess);
  1276. continue;
  1277. }
  1278. if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
  1279. LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
  1280. return None;
  1281. }
  1282. // Uses which may read the original MemoryDef mean we cannot eliminate the
  1283. // original MD. Stop walk.
  1284. if (isReadClobber(MaybeDeadLoc, UseInst)) {
  1285. LLVM_DEBUG(dbgs() << " ... found read clobber\n");
  1286. return None;
  1287. }
  1288. // If this worklist walks back to the original memory access (and the
  1289. // pointer is not guarenteed loop invariant) then we cannot assume that a
  1290. // store kills itself.
  1291. if (MaybeDeadAccess == UseAccess &&
  1292. !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
  1293. LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
  1294. return None;
  1295. }
  1296. // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
  1297. // if it reads the memory location.
  1298. // TODO: It would probably be better to check for self-reads before
  1299. // calling the function.
  1300. if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
  1301. LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
  1302. continue;
  1303. }
  1304. // Check all uses for MemoryDefs, except for defs completely overwriting
  1305. // the original location. Otherwise we have to check uses of *all*
  1306. // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
  1307. // miss cases like the following
  1308. // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
  1309. // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
  1310. // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
  1311. // (The Use points to the *first* Def it may alias)
  1312. // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
  1313. // stores [0,1]
  1314. if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
  1315. if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
  1316. BasicBlock *MaybeKillingBlock = UseInst->getParent();
  1317. if (PostOrderNumbers.find(MaybeKillingBlock)->second <
  1318. PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
  1319. if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
  1320. LLVM_DEBUG(dbgs()
  1321. << " ... found killing def " << *UseInst << "\n");
  1322. KillingDefs.insert(UseInst);
  1323. }
  1324. } else {
  1325. LLVM_DEBUG(dbgs()
  1326. << " ... found preceeding def " << *UseInst << "\n");
  1327. return None;
  1328. }
  1329. } else
  1330. PushMemUses(UseDef);
  1331. }
  1332. }
  1333. // For accesses to locations visible after the function returns, make sure
  1334. // that the location is dead (=overwritten) along all paths from
  1335. // MaybeDeadAccess to the exit.
  1336. if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
  1337. SmallPtrSet<BasicBlock *, 16> KillingBlocks;
  1338. for (Instruction *KD : KillingDefs)
  1339. KillingBlocks.insert(KD->getParent());
  1340. assert(!KillingBlocks.empty() &&
  1341. "Expected at least a single killing block");
  1342. // Find the common post-dominator of all killing blocks.
  1343. BasicBlock *CommonPred = *KillingBlocks.begin();
  1344. for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
  1345. if (!CommonPred)
  1346. break;
  1347. CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
  1348. }
  1349. // If the common post-dominator does not post-dominate MaybeDeadAccess,
  1350. // there is a path from MaybeDeadAccess to an exit not going through a
  1351. // killing block.
  1352. if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
  1353. if (!AnyUnreachableExit)
  1354. return None;
  1355. // Fall back to CFG scan starting at all non-unreachable roots if not
  1356. // all paths to the exit go through CommonPred.
  1357. CommonPred = nullptr;
  1358. }
  1359. // If CommonPred itself is in the set of killing blocks, we're done.
  1360. if (KillingBlocks.count(CommonPred))
  1361. return {MaybeDeadAccess};
  1362. SetVector<BasicBlock *> WorkList;
  1363. // If CommonPred is null, there are multiple exits from the function.
  1364. // They all have to be added to the worklist.
  1365. if (CommonPred)
  1366. WorkList.insert(CommonPred);
  1367. else
  1368. for (BasicBlock *R : PDT.roots()) {
  1369. if (!isa<UnreachableInst>(R->getTerminator()))
  1370. WorkList.insert(R);
  1371. }
  1372. NumCFGTries++;
  1373. // Check if all paths starting from an exit node go through one of the
  1374. // killing blocks before reaching MaybeDeadAccess.
  1375. for (unsigned I = 0; I < WorkList.size(); I++) {
  1376. NumCFGChecks++;
  1377. BasicBlock *Current = WorkList[I];
  1378. if (KillingBlocks.count(Current))
  1379. continue;
  1380. if (Current == MaybeDeadAccess->getBlock())
  1381. return None;
  1382. // MaybeDeadAccess is reachable from the entry, so we don't have to
  1383. // explore unreachable blocks further.
  1384. if (!DT.isReachableFromEntry(Current))
  1385. continue;
  1386. for (BasicBlock *Pred : predecessors(Current))
  1387. WorkList.insert(Pred);
  1388. if (WorkList.size() >= MemorySSAPathCheckLimit)
  1389. return None;
  1390. }
  1391. NumCFGSuccess++;
  1392. }
  1393. // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
  1394. // potentially dead.
  1395. return {MaybeDeadAccess};
  1396. }
  1397. // Delete dead memory defs
  1398. void deleteDeadInstruction(Instruction *SI) {
  1399. MemorySSAUpdater Updater(&MSSA);
  1400. SmallVector<Instruction *, 32> NowDeadInsts;
  1401. NowDeadInsts.push_back(SI);
  1402. --NumFastOther;
  1403. while (!NowDeadInsts.empty()) {
  1404. Instruction *DeadInst = NowDeadInsts.pop_back_val();
  1405. ++NumFastOther;
  1406. // Try to preserve debug information attached to the dead instruction.
  1407. salvageDebugInfo(*DeadInst);
  1408. salvageKnowledge(DeadInst);
  1409. // Remove the Instruction from MSSA.
  1410. if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
  1411. if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
  1412. SkipStores.insert(MD);
  1413. }
  1414. Updater.removeMemoryAccess(MA);
  1415. }
  1416. auto I = IOLs.find(DeadInst->getParent());
  1417. if (I != IOLs.end())
  1418. I->second.erase(DeadInst);
  1419. // Remove its operands
  1420. for (Use &O : DeadInst->operands())
  1421. if (Instruction *OpI = dyn_cast<Instruction>(O)) {
  1422. O = nullptr;
  1423. if (isInstructionTriviallyDead(OpI, &TLI))
  1424. NowDeadInsts.push_back(OpI);
  1425. }
  1426. EI.removeInstruction(DeadInst);
  1427. DeadInst->eraseFromParent();
  1428. }
  1429. }
  1430. // Check for any extra throws between \p KillingI and \p DeadI that block
  1431. // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
  1432. // MemoryDef that may throw are handled during the walk from one def to the
  1433. // next.
  1434. bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
  1435. const Value *KillingUndObj) {
  1436. // First see if we can ignore it by using the fact that KillingI is an
  1437. // alloca/alloca like object that is not visible to the caller during
  1438. // execution of the function.
  1439. if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
  1440. return false;
  1441. if (KillingI->getParent() == DeadI->getParent())
  1442. return ThrowingBlocks.count(KillingI->getParent());
  1443. return !ThrowingBlocks.empty();
  1444. }
  1445. // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
  1446. // instructions act as barriers:
  1447. // * A memory instruction that may throw and \p KillingI accesses a non-stack
  1448. // object.
  1449. // * Atomic stores stronger that monotonic.
  1450. bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
  1451. // If DeadI may throw it acts as a barrier, unless we are to an
  1452. // alloca/alloca like object that does not escape.
  1453. if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
  1454. return true;
  1455. // If DeadI is an atomic load/store stronger than monotonic, do not try to
  1456. // eliminate/reorder it.
  1457. if (DeadI->isAtomic()) {
  1458. if (auto *LI = dyn_cast<LoadInst>(DeadI))
  1459. return isStrongerThanMonotonic(LI->getOrdering());
  1460. if (auto *SI = dyn_cast<StoreInst>(DeadI))
  1461. return isStrongerThanMonotonic(SI->getOrdering());
  1462. if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
  1463. return isStrongerThanMonotonic(ARMW->getOrdering());
  1464. if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
  1465. return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
  1466. isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
  1467. llvm_unreachable("other instructions should be skipped in MemorySSA");
  1468. }
  1469. return false;
  1470. }
  1471. /// Eliminate writes to objects that are not visible in the caller and are not
  1472. /// accessed before returning from the function.
  1473. bool eliminateDeadWritesAtEndOfFunction() {
  1474. bool MadeChange = false;
  1475. LLVM_DEBUG(
  1476. dbgs()
  1477. << "Trying to eliminate MemoryDefs at the end of the function\n");
  1478. for (MemoryDef *Def : llvm::reverse(MemDefs)) {
  1479. if (SkipStores.contains(Def))
  1480. continue;
  1481. Instruction *DefI = Def->getMemoryInst();
  1482. auto DefLoc = getLocForWrite(DefI);
  1483. if (!DefLoc || !isRemovable(DefI))
  1484. continue;
  1485. // NOTE: Currently eliminating writes at the end of a function is limited
  1486. // to MemoryDefs with a single underlying object, to save compile-time. In
  1487. // practice it appears the case with multiple underlying objects is very
  1488. // uncommon. If it turns out to be important, we can use
  1489. // getUnderlyingObjects here instead.
  1490. const Value *UO = getUnderlyingObject(DefLoc->Ptr);
  1491. if (!isInvisibleToCallerAfterRet(UO))
  1492. continue;
  1493. if (isWriteAtEndOfFunction(Def)) {
  1494. // See through pointer-to-pointer bitcasts
  1495. LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
  1496. "of the function\n");
  1497. deleteDeadInstruction(DefI);
  1498. ++NumFastStores;
  1499. MadeChange = true;
  1500. }
  1501. }
  1502. return MadeChange;
  1503. }
  1504. /// If we have a zero initializing memset following a call to malloc,
  1505. /// try folding it into a call to calloc.
  1506. bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
  1507. Instruction *DefI = Def->getMemoryInst();
  1508. MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
  1509. if (!MemSet)
  1510. // TODO: Could handle zero store to small allocation as well.
  1511. return false;
  1512. Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
  1513. if (!StoredConstant || !StoredConstant->isNullValue())
  1514. return false;
  1515. if (!isRemovable(DefI))
  1516. // The memset might be volatile..
  1517. return false;
  1518. if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
  1519. F.hasFnAttribute(Attribute::SanitizeAddress) ||
  1520. F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
  1521. F.getName() == "calloc")
  1522. return false;
  1523. auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
  1524. if (!Malloc)
  1525. return false;
  1526. auto *InnerCallee = Malloc->getCalledFunction();
  1527. if (!InnerCallee)
  1528. return false;
  1529. LibFunc Func;
  1530. if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
  1531. Func != LibFunc_malloc)
  1532. return false;
  1533. auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
  1534. // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
  1535. // of malloc block
  1536. auto *MallocBB = Malloc->getParent(),
  1537. *MemsetBB = Memset->getParent();
  1538. if (MallocBB == MemsetBB)
  1539. return true;
  1540. auto *Ptr = Memset->getArgOperand(0);
  1541. auto *TI = MallocBB->getTerminator();
  1542. ICmpInst::Predicate Pred;
  1543. BasicBlock *TrueBB, *FalseBB;
  1544. if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
  1545. FalseBB)))
  1546. return false;
  1547. if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
  1548. return false;
  1549. return true;
  1550. };
  1551. if (Malloc->getOperand(0) != MemSet->getLength())
  1552. return false;
  1553. if (!shouldCreateCalloc(Malloc, MemSet) ||
  1554. !DT.dominates(Malloc, MemSet) ||
  1555. !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
  1556. return false;
  1557. IRBuilder<> IRB(Malloc);
  1558. const auto &DL = Malloc->getModule()->getDataLayout();
  1559. auto *Calloc =
  1560. emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
  1561. Malloc->getArgOperand(0), IRB, TLI);
  1562. if (!Calloc)
  1563. return false;
  1564. MemorySSAUpdater Updater(&MSSA);
  1565. auto *LastDef =
  1566. cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
  1567. auto *NewAccess =
  1568. Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
  1569. LastDef);
  1570. auto *NewAccessMD = cast<MemoryDef>(NewAccess);
  1571. Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
  1572. Updater.removeMemoryAccess(Malloc);
  1573. Malloc->replaceAllUsesWith(Calloc);
  1574. Malloc->eraseFromParent();
  1575. return true;
  1576. }
  1577. /// \returns true if \p Def is a no-op store, either because it
  1578. /// directly stores back a loaded value or stores zero to a calloced object.
  1579. bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
  1580. Instruction *DefI = Def->getMemoryInst();
  1581. StoreInst *Store = dyn_cast<StoreInst>(DefI);
  1582. MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
  1583. Constant *StoredConstant = nullptr;
  1584. if (Store)
  1585. StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
  1586. else if (MemSet)
  1587. StoredConstant = dyn_cast<Constant>(MemSet->getValue());
  1588. else
  1589. return false;
  1590. if (!isRemovable(DefI))
  1591. return false;
  1592. if (StoredConstant && isAllocationFn(DefUO, &TLI)) {
  1593. auto *CB = cast<CallBase>(DefUO);
  1594. auto *InitC = getInitialValueOfAllocation(CB, &TLI,
  1595. StoredConstant->getType());
  1596. // If the clobbering access is LiveOnEntry, no instructions between them
  1597. // can modify the memory location.
  1598. if (InitC && InitC == StoredConstant)
  1599. return MSSA.isLiveOnEntryDef(
  1600. MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def));
  1601. }
  1602. if (!Store)
  1603. return false;
  1604. if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
  1605. if (LoadI->getPointerOperand() == Store->getOperand(1)) {
  1606. // Get the defining access for the load.
  1607. auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
  1608. // Fast path: the defining accesses are the same.
  1609. if (LoadAccess == Def->getDefiningAccess())
  1610. return true;
  1611. // Look through phi accesses. Recursively scan all phi accesses by
  1612. // adding them to a worklist. Bail when we run into a memory def that
  1613. // does not match LoadAccess.
  1614. SetVector<MemoryAccess *> ToCheck;
  1615. MemoryAccess *Current =
  1616. MSSA.getWalker()->getClobberingMemoryAccess(Def);
  1617. // We don't want to bail when we run into the store memory def. But,
  1618. // the phi access may point to it. So, pretend like we've already
  1619. // checked it.
  1620. ToCheck.insert(Def);
  1621. ToCheck.insert(Current);
  1622. // Start at current (1) to simulate already having checked Def.
  1623. for (unsigned I = 1; I < ToCheck.size(); ++I) {
  1624. Current = ToCheck[I];
  1625. if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
  1626. // Check all the operands.
  1627. for (auto &Use : PhiAccess->incoming_values())
  1628. ToCheck.insert(cast<MemoryAccess>(&Use));
  1629. continue;
  1630. }
  1631. // If we found a memory def, bail. This happens when we have an
  1632. // unrelated write in between an otherwise noop store.
  1633. assert(isa<MemoryDef>(Current) &&
  1634. "Only MemoryDefs should reach here.");
  1635. // TODO: Skip no alias MemoryDefs that have no aliasing reads.
  1636. // We are searching for the definition of the store's destination.
  1637. // So, if that is the same definition as the load, then this is a
  1638. // noop. Otherwise, fail.
  1639. if (LoadAccess != Current)
  1640. return false;
  1641. }
  1642. return true;
  1643. }
  1644. }
  1645. return false;
  1646. }
  1647. bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
  1648. bool Changed = false;
  1649. for (auto OI : IOL) {
  1650. Instruction *DeadI = OI.first;
  1651. MemoryLocation Loc = *getLocForWrite(DeadI);
  1652. assert(isRemovable(DeadI) && "Expect only removable instruction");
  1653. const Value *Ptr = Loc.Ptr->stripPointerCasts();
  1654. int64_t DeadStart = 0;
  1655. uint64_t DeadSize = Loc.Size.getValue();
  1656. GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
  1657. OverlapIntervalsTy &IntervalMap = OI.second;
  1658. Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
  1659. if (IntervalMap.empty())
  1660. continue;
  1661. Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
  1662. }
  1663. return Changed;
  1664. }
  1665. /// Eliminates writes to locations where the value that is being written
  1666. /// is already stored at the same location.
  1667. bool eliminateRedundantStoresOfExistingValues() {
  1668. bool MadeChange = false;
  1669. LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
  1670. "already existing value\n");
  1671. for (auto *Def : MemDefs) {
  1672. if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
  1673. continue;
  1674. Instruction *DefInst = Def->getMemoryInst();
  1675. auto MaybeDefLoc = getLocForWrite(DefInst);
  1676. if (!MaybeDefLoc || !isRemovable(DefInst))
  1677. continue;
  1678. MemoryDef *UpperDef;
  1679. // To conserve compile-time, we avoid walking to the next clobbering def.
  1680. // Instead, we just try to get the optimized access, if it exists. DSE
  1681. // will try to optimize defs during the earlier traversal.
  1682. if (Def->isOptimized())
  1683. UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
  1684. else
  1685. UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
  1686. if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
  1687. continue;
  1688. Instruction *UpperInst = UpperDef->getMemoryInst();
  1689. auto IsRedundantStore = [&]() {
  1690. if (DefInst->isIdenticalTo(UpperInst))
  1691. return true;
  1692. if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
  1693. if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
  1694. // MemSetInst must have a write location.
  1695. MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
  1696. int64_t InstWriteOffset = 0;
  1697. int64_t DepWriteOffset = 0;
  1698. auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
  1699. InstWriteOffset, DepWriteOffset);
  1700. Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
  1701. return StoredByte && StoredByte == MemSetI->getOperand(1) &&
  1702. OR == OW_Complete;
  1703. }
  1704. }
  1705. return false;
  1706. };
  1707. if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
  1708. continue;
  1709. LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
  1710. << '\n');
  1711. deleteDeadInstruction(DefInst);
  1712. NumRedundantStores++;
  1713. MadeChange = true;
  1714. }
  1715. return MadeChange;
  1716. }
  1717. };
  1718. static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
  1719. DominatorTree &DT, PostDominatorTree &PDT,
  1720. const TargetLibraryInfo &TLI,
  1721. const LoopInfo &LI) {
  1722. bool MadeChange = false;
  1723. DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
  1724. // For each store:
  1725. for (unsigned I = 0; I < State.MemDefs.size(); I++) {
  1726. MemoryDef *KillingDef = State.MemDefs[I];
  1727. if (State.SkipStores.count(KillingDef))
  1728. continue;
  1729. Instruction *KillingI = KillingDef->getMemoryInst();
  1730. Optional<MemoryLocation> MaybeKillingLoc;
  1731. if (State.isMemTerminatorInst(KillingI))
  1732. MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
  1733. [](const std::pair<MemoryLocation, bool> &P) { return P.first; });
  1734. else
  1735. MaybeKillingLoc = State.getLocForWrite(KillingI);
  1736. if (!MaybeKillingLoc) {
  1737. LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
  1738. << *KillingI << "\n");
  1739. continue;
  1740. }
  1741. MemoryLocation KillingLoc = *MaybeKillingLoc;
  1742. assert(KillingLoc.Ptr && "KillingLoc should not be null");
  1743. const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
  1744. LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
  1745. << *KillingDef << " (" << *KillingI << ")\n");
  1746. unsigned ScanLimit = MemorySSAScanLimit;
  1747. unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
  1748. unsigned PartialLimit = MemorySSAPartialStoreLimit;
  1749. // Worklist of MemoryAccesses that may be killed by KillingDef.
  1750. SetVector<MemoryAccess *> ToCheck;
  1751. ToCheck.insert(KillingDef->getDefiningAccess());
  1752. bool Shortend = false;
  1753. bool IsMemTerm = State.isMemTerminatorInst(KillingI);
  1754. // Check if MemoryAccesses in the worklist are killed by KillingDef.
  1755. for (unsigned I = 0; I < ToCheck.size(); I++) {
  1756. MemoryAccess *Current = ToCheck[I];
  1757. if (State.SkipStores.count(Current))
  1758. continue;
  1759. Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
  1760. KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
  1761. WalkerStepLimit, IsMemTerm, PartialLimit);
  1762. if (!MaybeDeadAccess) {
  1763. LLVM_DEBUG(dbgs() << " finished walk\n");
  1764. continue;
  1765. }
  1766. MemoryAccess *DeadAccess = *MaybeDeadAccess;
  1767. LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
  1768. if (isa<MemoryPhi>(DeadAccess)) {
  1769. LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
  1770. for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
  1771. MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
  1772. BasicBlock *IncomingBlock = IncomingAccess->getBlock();
  1773. BasicBlock *PhiBlock = DeadAccess->getBlock();
  1774. // We only consider incoming MemoryAccesses that come before the
  1775. // MemoryPhi. Otherwise we could discover candidates that do not
  1776. // strictly dominate our starting def.
  1777. if (State.PostOrderNumbers[IncomingBlock] >
  1778. State.PostOrderNumbers[PhiBlock])
  1779. ToCheck.insert(IncomingAccess);
  1780. }
  1781. continue;
  1782. }
  1783. auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
  1784. Instruction *DeadI = DeadDefAccess->getMemoryInst();
  1785. LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
  1786. ToCheck.insert(DeadDefAccess->getDefiningAccess());
  1787. NumGetDomMemoryDefPassed++;
  1788. if (!DebugCounter::shouldExecute(MemorySSACounter))
  1789. continue;
  1790. MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
  1791. if (IsMemTerm) {
  1792. const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
  1793. if (KillingUndObj != DeadUndObj)
  1794. continue;
  1795. LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
  1796. << "\n KILLER: " << *KillingI << '\n');
  1797. State.deleteDeadInstruction(DeadI);
  1798. ++NumFastStores;
  1799. MadeChange = true;
  1800. } else {
  1801. // Check if DeadI overwrites KillingI.
  1802. int64_t KillingOffset = 0;
  1803. int64_t DeadOffset = 0;
  1804. OverwriteResult OR = State.isOverwrite(
  1805. KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
  1806. if (OR == OW_MaybePartial) {
  1807. auto Iter = State.IOLs.insert(
  1808. std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
  1809. DeadI->getParent(), InstOverlapIntervalsTy()));
  1810. auto &IOL = Iter.first->second;
  1811. OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
  1812. DeadOffset, DeadI, IOL);
  1813. }
  1814. if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
  1815. auto *DeadSI = dyn_cast<StoreInst>(DeadI);
  1816. auto *KillingSI = dyn_cast<StoreInst>(KillingI);
  1817. // We are re-using tryToMergePartialOverlappingStores, which requires
  1818. // DeadSI to dominate DeadSI.
  1819. // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
  1820. if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
  1821. if (Constant *Merged = tryToMergePartialOverlappingStores(
  1822. KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
  1823. State.BatchAA, &DT)) {
  1824. // Update stored value of earlier store to merged constant.
  1825. DeadSI->setOperand(0, Merged);
  1826. ++NumModifiedStores;
  1827. MadeChange = true;
  1828. Shortend = true;
  1829. // Remove killing store and remove any outstanding overlap
  1830. // intervals for the updated store.
  1831. State.deleteDeadInstruction(KillingSI);
  1832. auto I = State.IOLs.find(DeadSI->getParent());
  1833. if (I != State.IOLs.end())
  1834. I->second.erase(DeadSI);
  1835. break;
  1836. }
  1837. }
  1838. }
  1839. if (OR == OW_Complete) {
  1840. LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
  1841. << "\n KILLER: " << *KillingI << '\n');
  1842. State.deleteDeadInstruction(DeadI);
  1843. ++NumFastStores;
  1844. MadeChange = true;
  1845. }
  1846. }
  1847. }
  1848. // Check if the store is a no-op.
  1849. if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
  1850. LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
  1851. << '\n');
  1852. State.deleteDeadInstruction(KillingI);
  1853. NumRedundantStores++;
  1854. MadeChange = true;
  1855. continue;
  1856. }
  1857. // Can we form a calloc from a memset/malloc pair?
  1858. if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
  1859. LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
  1860. << " DEAD: " << *KillingI << '\n');
  1861. State.deleteDeadInstruction(KillingI);
  1862. MadeChange = true;
  1863. continue;
  1864. }
  1865. }
  1866. if (EnablePartialOverwriteTracking)
  1867. for (auto &KV : State.IOLs)
  1868. MadeChange |= State.removePartiallyOverlappedStores(KV.second);
  1869. MadeChange |= State.eliminateRedundantStoresOfExistingValues();
  1870. MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
  1871. return MadeChange;
  1872. }
  1873. } // end anonymous namespace
  1874. //===----------------------------------------------------------------------===//
  1875. // DSE Pass
  1876. //===----------------------------------------------------------------------===//
  1877. PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
  1878. AliasAnalysis &AA = AM.getResult<AAManager>(F);
  1879. const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  1880. DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
  1881. MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
  1882. PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
  1883. LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
  1884. bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
  1885. #ifdef LLVM_ENABLE_STATS
  1886. if (AreStatisticsEnabled())
  1887. for (auto &I : instructions(F))
  1888. NumRemainingStores += isa<StoreInst>(&I);
  1889. #endif
  1890. if (!Changed)
  1891. return PreservedAnalyses::all();
  1892. PreservedAnalyses PA;
  1893. PA.preserveSet<CFGAnalyses>();
  1894. PA.preserve<MemorySSAAnalysis>();
  1895. PA.preserve<LoopAnalysis>();
  1896. return PA;
  1897. }
  1898. namespace {
  1899. /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
  1900. class DSELegacyPass : public FunctionPass {
  1901. public:
  1902. static char ID; // Pass identification, replacement for typeid
  1903. DSELegacyPass() : FunctionPass(ID) {
  1904. initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
  1905. }
  1906. bool runOnFunction(Function &F) override {
  1907. if (skipFunction(F))
  1908. return false;
  1909. AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
  1910. DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1911. const TargetLibraryInfo &TLI =
  1912. getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  1913. MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
  1914. PostDominatorTree &PDT =
  1915. getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
  1916. LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  1917. bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
  1918. #ifdef LLVM_ENABLE_STATS
  1919. if (AreStatisticsEnabled())
  1920. for (auto &I : instructions(F))
  1921. NumRemainingStores += isa<StoreInst>(&I);
  1922. #endif
  1923. return Changed;
  1924. }
  1925. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1926. AU.setPreservesCFG();
  1927. AU.addRequired<AAResultsWrapperPass>();
  1928. AU.addRequired<TargetLibraryInfoWrapperPass>();
  1929. AU.addPreserved<GlobalsAAWrapperPass>();
  1930. AU.addRequired<DominatorTreeWrapperPass>();
  1931. AU.addPreserved<DominatorTreeWrapperPass>();
  1932. AU.addRequired<PostDominatorTreeWrapperPass>();
  1933. AU.addRequired<MemorySSAWrapperPass>();
  1934. AU.addPreserved<PostDominatorTreeWrapperPass>();
  1935. AU.addPreserved<MemorySSAWrapperPass>();
  1936. AU.addRequired<LoopInfoWrapperPass>();
  1937. AU.addPreserved<LoopInfoWrapperPass>();
  1938. }
  1939. };
  1940. } // end anonymous namespace
  1941. char DSELegacyPass::ID = 0;
  1942. INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
  1943. false)
  1944. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  1945. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  1946. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  1947. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  1948. INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
  1949. INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
  1950. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  1951. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  1952. INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
  1953. false)
  1954. FunctionPass *llvm::createDeadStoreEliminationPass() {
  1955. return new DSELegacyPass();
  1956. }