DeadStoreElimination.cpp 93 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319
  1. //===- DeadStoreElimination.cpp - MemorySSA Backed Dead Store Elimination -===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // The code below implements dead store elimination using MemorySSA. It uses
  10. // the following general approach: given a MemoryDef, walk upwards to find
  11. // clobbering MemoryDefs that may be killed by the starting def. Then check
  12. // that there are no uses that may read the location of the original MemoryDef
  13. // in between both MemoryDefs. A bit more concretely:
  14. //
  15. // For all MemoryDefs StartDef:
  16. // 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
  17. // upwards.
  18. // 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
  19. // checking all uses starting at MaybeDeadAccess and walking until we see
  20. // StartDef.
  21. // 3. For each found CurrentDef, check that:
  22. // 1. There are no barrier instructions between CurrentDef and StartDef (like
  23. // throws or stores with ordering constraints).
  24. // 2. StartDef is executed whenever CurrentDef is executed.
  25. // 3. StartDef completely overwrites CurrentDef.
  26. // 4. Erase CurrentDef from the function and MemorySSA.
  27. //
  28. //===----------------------------------------------------------------------===//
  29. #include "llvm/Transforms/Scalar/DeadStoreElimination.h"
  30. #include "llvm/ADT/APInt.h"
  31. #include "llvm/ADT/DenseMap.h"
  32. #include "llvm/ADT/MapVector.h"
  33. #include "llvm/ADT/PostOrderIterator.h"
  34. #include "llvm/ADT/SetVector.h"
  35. #include "llvm/ADT/SmallPtrSet.h"
  36. #include "llvm/ADT/SmallVector.h"
  37. #include "llvm/ADT/Statistic.h"
  38. #include "llvm/ADT/StringRef.h"
  39. #include "llvm/Analysis/AliasAnalysis.h"
  40. #include "llvm/Analysis/AssumptionCache.h"
  41. #include "llvm/Analysis/CaptureTracking.h"
  42. #include "llvm/Analysis/CodeMetrics.h"
  43. #include "llvm/Analysis/GlobalsModRef.h"
  44. #include "llvm/Analysis/LoopInfo.h"
  45. #include "llvm/Analysis/MemoryBuiltins.h"
  46. #include "llvm/Analysis/MemoryLocation.h"
  47. #include "llvm/Analysis/MemorySSA.h"
  48. #include "llvm/Analysis/MemorySSAUpdater.h"
  49. #include "llvm/Analysis/MustExecute.h"
  50. #include "llvm/Analysis/PostDominators.h"
  51. #include "llvm/Analysis/TargetLibraryInfo.h"
  52. #include "llvm/Analysis/ValueTracking.h"
  53. #include "llvm/IR/Argument.h"
  54. #include "llvm/IR/BasicBlock.h"
  55. #include "llvm/IR/Constant.h"
  56. #include "llvm/IR/Constants.h"
  57. #include "llvm/IR/DataLayout.h"
  58. #include "llvm/IR/DebugInfo.h"
  59. #include "llvm/IR/Dominators.h"
  60. #include "llvm/IR/Function.h"
  61. #include "llvm/IR/IRBuilder.h"
  62. #include "llvm/IR/InstIterator.h"
  63. #include "llvm/IR/InstrTypes.h"
  64. #include "llvm/IR/Instruction.h"
  65. #include "llvm/IR/Instructions.h"
  66. #include "llvm/IR/IntrinsicInst.h"
  67. #include "llvm/IR/Module.h"
  68. #include "llvm/IR/PassManager.h"
  69. #include "llvm/IR/PatternMatch.h"
  70. #include "llvm/IR/Value.h"
  71. #include "llvm/InitializePasses.h"
  72. #include "llvm/Pass.h"
  73. #include "llvm/Support/Casting.h"
  74. #include "llvm/Support/CommandLine.h"
  75. #include "llvm/Support/Debug.h"
  76. #include "llvm/Support/DebugCounter.h"
  77. #include "llvm/Support/ErrorHandling.h"
  78. #include "llvm/Support/raw_ostream.h"
  79. #include "llvm/Transforms/Scalar.h"
  80. #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
  81. #include "llvm/Transforms/Utils/BuildLibCalls.h"
  82. #include "llvm/Transforms/Utils/Local.h"
  83. #include <algorithm>
  84. #include <cassert>
  85. #include <cstdint>
  86. #include <iterator>
  87. #include <map>
  88. #include <optional>
  89. #include <utility>
  90. using namespace llvm;
  91. using namespace PatternMatch;
  92. #define DEBUG_TYPE "dse"
  93. STATISTIC(NumRemainingStores, "Number of stores remaining after DSE");
  94. STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
  95. STATISTIC(NumFastStores, "Number of stores deleted");
  96. STATISTIC(NumFastOther, "Number of other instrs removed");
  97. STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
  98. STATISTIC(NumModifiedStores, "Number of stores modified");
  99. STATISTIC(NumCFGChecks, "Number of stores modified");
  100. STATISTIC(NumCFGTries, "Number of stores modified");
  101. STATISTIC(NumCFGSuccess, "Number of stores modified");
  102. STATISTIC(NumGetDomMemoryDefPassed,
  103. "Number of times a valid candidate is returned from getDomMemoryDef");
  104. STATISTIC(NumDomMemDefChecks,
  105. "Number iterations check for reads in getDomMemoryDef");
  106. DEBUG_COUNTER(MemorySSACounter, "dse-memoryssa",
  107. "Controls which MemoryDefs are eliminated.");
  108. static cl::opt<bool>
  109. EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
  110. cl::init(true), cl::Hidden,
  111. cl::desc("Enable partial-overwrite tracking in DSE"));
  112. static cl::opt<bool>
  113. EnablePartialStoreMerging("enable-dse-partial-store-merging",
  114. cl::init(true), cl::Hidden,
  115. cl::desc("Enable partial store merging in DSE"));
  116. static cl::opt<unsigned>
  117. MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
  118. cl::desc("The number of memory instructions to scan for "
  119. "dead store elimination (default = 150)"));
  120. static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
  121. "dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
  122. cl::desc("The maximum number of steps while walking upwards to find "
  123. "MemoryDefs that may be killed (default = 90)"));
  124. static cl::opt<unsigned> MemorySSAPartialStoreLimit(
  125. "dse-memoryssa-partial-store-limit", cl::init(5), cl::Hidden,
  126. cl::desc("The maximum number candidates that only partially overwrite the "
  127. "killing MemoryDef to consider"
  128. " (default = 5)"));
  129. static cl::opt<unsigned> MemorySSADefsPerBlockLimit(
  130. "dse-memoryssa-defs-per-block-limit", cl::init(5000), cl::Hidden,
  131. cl::desc("The number of MemoryDefs we consider as candidates to eliminated "
  132. "other stores per basic block (default = 5000)"));
  133. static cl::opt<unsigned> MemorySSASameBBStepCost(
  134. "dse-memoryssa-samebb-cost", cl::init(1), cl::Hidden,
  135. cl::desc(
  136. "The cost of a step in the same basic block as the killing MemoryDef"
  137. "(default = 1)"));
  138. static cl::opt<unsigned>
  139. MemorySSAOtherBBStepCost("dse-memoryssa-otherbb-cost", cl::init(5),
  140. cl::Hidden,
  141. cl::desc("The cost of a step in a different basic "
  142. "block than the killing MemoryDef"
  143. "(default = 5)"));
  144. static cl::opt<unsigned> MemorySSAPathCheckLimit(
  145. "dse-memoryssa-path-check-limit", cl::init(50), cl::Hidden,
  146. cl::desc("The maximum number of blocks to check when trying to prove that "
  147. "all paths to an exit go through a killing block (default = 50)"));
  148. // This flags allows or disallows DSE to optimize MemorySSA during its
  149. // traversal. Note that DSE optimizing MemorySSA may impact other passes
  150. // downstream of the DSE invocation and can lead to issues not being
  151. // reproducible in isolation (i.e. when MemorySSA is built from scratch). In
  152. // those cases, the flag can be used to check if DSE's MemorySSA optimizations
  153. // impact follow-up passes.
  154. static cl::opt<bool>
  155. OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
  156. cl::desc("Allow DSE to optimize memory accesses."));
  157. //===----------------------------------------------------------------------===//
  158. // Helper functions
  159. //===----------------------------------------------------------------------===//
  160. using OverlapIntervalsTy = std::map<int64_t, int64_t>;
  161. using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
  162. /// Returns true if the end of this instruction can be safely shortened in
  163. /// length.
  164. static bool isShortenableAtTheEnd(Instruction *I) {
  165. // Don't shorten stores for now
  166. if (isa<StoreInst>(I))
  167. return false;
  168. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  169. switch (II->getIntrinsicID()) {
  170. default: return false;
  171. case Intrinsic::memset:
  172. case Intrinsic::memcpy:
  173. case Intrinsic::memcpy_element_unordered_atomic:
  174. case Intrinsic::memset_element_unordered_atomic:
  175. // Do shorten memory intrinsics.
  176. // FIXME: Add memmove if it's also safe to transform.
  177. return true;
  178. }
  179. }
  180. // Don't shorten libcalls calls for now.
  181. return false;
  182. }
  183. /// Returns true if the beginning of this instruction can be safely shortened
  184. /// in length.
  185. static bool isShortenableAtTheBeginning(Instruction *I) {
  186. // FIXME: Handle only memset for now. Supporting memcpy/memmove should be
  187. // easily done by offsetting the source address.
  188. return isa<AnyMemSetInst>(I);
  189. }
  190. static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
  191. const TargetLibraryInfo &TLI,
  192. const Function *F) {
  193. uint64_t Size;
  194. ObjectSizeOpts Opts;
  195. Opts.NullIsUnknownSize = NullPointerIsDefined(F);
  196. if (getObjectSize(V, Size, DL, &TLI, Opts))
  197. return Size;
  198. return MemoryLocation::UnknownSize;
  199. }
  200. namespace {
  201. enum OverwriteResult {
  202. OW_Begin,
  203. OW_Complete,
  204. OW_End,
  205. OW_PartialEarlierWithFullLater,
  206. OW_MaybePartial,
  207. OW_None,
  208. OW_Unknown
  209. };
  210. } // end anonymous namespace
  211. /// Check if two instruction are masked stores that completely
  212. /// overwrite one another. More specifically, \p KillingI has to
  213. /// overwrite \p DeadI.
  214. static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
  215. const Instruction *DeadI,
  216. BatchAAResults &AA) {
  217. const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
  218. const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
  219. if (KillingII == nullptr || DeadII == nullptr)
  220. return OW_Unknown;
  221. if (KillingII->getIntrinsicID() != DeadII->getIntrinsicID())
  222. return OW_Unknown;
  223. if (KillingII->getIntrinsicID() == Intrinsic::masked_store) {
  224. // Type size.
  225. VectorType *KillingTy =
  226. cast<VectorType>(KillingII->getArgOperand(0)->getType());
  227. VectorType *DeadTy = cast<VectorType>(DeadII->getArgOperand(0)->getType());
  228. if (KillingTy->getScalarSizeInBits() != DeadTy->getScalarSizeInBits())
  229. return OW_Unknown;
  230. // Element count.
  231. if (KillingTy->getElementCount() != DeadTy->getElementCount())
  232. return OW_Unknown;
  233. // Pointers.
  234. Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
  235. Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
  236. if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
  237. return OW_Unknown;
  238. // Masks.
  239. // TODO: check that KillingII's mask is a superset of the DeadII's mask.
  240. if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
  241. return OW_Unknown;
  242. return OW_Complete;
  243. }
  244. return OW_Unknown;
  245. }
  246. /// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
  247. /// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
  248. /// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
  249. /// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
  250. /// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
  251. /// overwritten by a killing (smaller) store which doesn't write outside the big
  252. /// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
  253. /// NOTE: This function must only be called if both \p KillingLoc and \p
  254. /// DeadLoc belong to the same underlying object with valid \p KillingOff and
  255. /// \p DeadOff.
  256. static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
  257. const MemoryLocation &DeadLoc,
  258. int64_t KillingOff, int64_t DeadOff,
  259. Instruction *DeadI,
  260. InstOverlapIntervalsTy &IOL) {
  261. const uint64_t KillingSize = KillingLoc.Size.getValue();
  262. const uint64_t DeadSize = DeadLoc.Size.getValue();
  263. // We may now overlap, although the overlap is not complete. There might also
  264. // be other incomplete overlaps, and together, they might cover the complete
  265. // dead store.
  266. // Note: The correctness of this logic depends on the fact that this function
  267. // is not even called providing DepWrite when there are any intervening reads.
  268. if (EnablePartialOverwriteTracking &&
  269. KillingOff < int64_t(DeadOff + DeadSize) &&
  270. int64_t(KillingOff + KillingSize) >= DeadOff) {
  271. // Insert our part of the overlap into the map.
  272. auto &IM = IOL[DeadI];
  273. LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
  274. << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
  275. << KillingOff << ", " << int64_t(KillingOff + KillingSize)
  276. << ")\n");
  277. // Make sure that we only insert non-overlapping intervals and combine
  278. // adjacent intervals. The intervals are stored in the map with the ending
  279. // offset as the key (in the half-open sense) and the starting offset as
  280. // the value.
  281. int64_t KillingIntStart = KillingOff;
  282. int64_t KillingIntEnd = KillingOff + KillingSize;
  283. // Find any intervals ending at, or after, KillingIntStart which start
  284. // before KillingIntEnd.
  285. auto ILI = IM.lower_bound(KillingIntStart);
  286. if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
  287. // This existing interval is overlapped with the current store somewhere
  288. // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
  289. // intervals and adjusting our start and end.
  290. KillingIntStart = std::min(KillingIntStart, ILI->second);
  291. KillingIntEnd = std::max(KillingIntEnd, ILI->first);
  292. ILI = IM.erase(ILI);
  293. // Continue erasing and adjusting our end in case other previous
  294. // intervals are also overlapped with the current store.
  295. //
  296. // |--- dead 1 ---| |--- dead 2 ---|
  297. // |------- killing---------|
  298. //
  299. while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
  300. assert(ILI->second > KillingIntStart && "Unexpected interval");
  301. KillingIntEnd = std::max(KillingIntEnd, ILI->first);
  302. ILI = IM.erase(ILI);
  303. }
  304. }
  305. IM[KillingIntEnd] = KillingIntStart;
  306. ILI = IM.begin();
  307. if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
  308. LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
  309. << DeadOff << ", " << int64_t(DeadOff + DeadSize)
  310. << ") Composite KillingLoc [" << ILI->second << ", "
  311. << ILI->first << ")\n");
  312. ++NumCompletePartials;
  313. return OW_Complete;
  314. }
  315. }
  316. // Check for a dead store which writes to all the memory locations that
  317. // the killing store writes to.
  318. if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
  319. int64_t(DeadOff + DeadSize) > KillingOff &&
  320. uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
  321. LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
  322. << ", " << int64_t(DeadOff + DeadSize)
  323. << ") by a killing store [" << KillingOff << ", "
  324. << int64_t(KillingOff + KillingSize) << ")\n");
  325. // TODO: Maybe come up with a better name?
  326. return OW_PartialEarlierWithFullLater;
  327. }
  328. // Another interesting case is if the killing store overwrites the end of the
  329. // dead store.
  330. //
  331. // |--dead--|
  332. // |-- killing --|
  333. //
  334. // In this case we may want to trim the size of dead store to avoid
  335. // generating stores to addresses which will definitely be overwritten killing
  336. // store.
  337. if (!EnablePartialOverwriteTracking &&
  338. (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
  339. int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
  340. return OW_End;
  341. // Finally, we also need to check if the killing store overwrites the
  342. // beginning of the dead store.
  343. //
  344. // |--dead--|
  345. // |-- killing --|
  346. //
  347. // In this case we may want to move the destination address and trim the size
  348. // of dead store to avoid generating stores to addresses which will definitely
  349. // be overwritten killing store.
  350. if (!EnablePartialOverwriteTracking &&
  351. (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
  352. assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
  353. "Expect to be handled as OW_Complete");
  354. return OW_Begin;
  355. }
  356. // Otherwise, they don't completely overlap.
  357. return OW_Unknown;
  358. }
  359. /// Returns true if the memory which is accessed by the second instruction is not
  360. /// modified between the first and the second instruction.
  361. /// Precondition: Second instruction must be dominated by the first
  362. /// instruction.
  363. static bool
  364. memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
  365. BatchAAResults &AA, const DataLayout &DL,
  366. DominatorTree *DT) {
  367. // Do a backwards scan through the CFG from SecondI to FirstI. Look for
  368. // instructions which can modify the memory location accessed by SecondI.
  369. //
  370. // While doing the walk keep track of the address to check. It might be
  371. // different in different basic blocks due to PHI translation.
  372. using BlockAddressPair = std::pair<BasicBlock *, PHITransAddr>;
  373. SmallVector<BlockAddressPair, 16> WorkList;
  374. // Keep track of the address we visited each block with. Bail out if we
  375. // visit a block with different addresses.
  376. DenseMap<BasicBlock *, Value *> Visited;
  377. BasicBlock::iterator FirstBBI(FirstI);
  378. ++FirstBBI;
  379. BasicBlock::iterator SecondBBI(SecondI);
  380. BasicBlock *FirstBB = FirstI->getParent();
  381. BasicBlock *SecondBB = SecondI->getParent();
  382. MemoryLocation MemLoc;
  383. if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
  384. MemLoc = MemoryLocation::getForDest(MemSet);
  385. else
  386. MemLoc = MemoryLocation::get(SecondI);
  387. auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
  388. // Start checking the SecondBB.
  389. WorkList.push_back(
  390. std::make_pair(SecondBB, PHITransAddr(MemLocPtr, DL, nullptr)));
  391. bool isFirstBlock = true;
  392. // Check all blocks going backward until we reach the FirstBB.
  393. while (!WorkList.empty()) {
  394. BlockAddressPair Current = WorkList.pop_back_val();
  395. BasicBlock *B = Current.first;
  396. PHITransAddr &Addr = Current.second;
  397. Value *Ptr = Addr.getAddr();
  398. // Ignore instructions before FirstI if this is the FirstBB.
  399. BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
  400. BasicBlock::iterator EI;
  401. if (isFirstBlock) {
  402. // Ignore instructions after SecondI if this is the first visit of SecondBB.
  403. assert(B == SecondBB && "first block is not the store block");
  404. EI = SecondBBI;
  405. isFirstBlock = false;
  406. } else {
  407. // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
  408. // In this case we also have to look at instructions after SecondI.
  409. EI = B->end();
  410. }
  411. for (; BI != EI; ++BI) {
  412. Instruction *I = &*BI;
  413. if (I->mayWriteToMemory() && I != SecondI)
  414. if (isModSet(AA.getModRefInfo(I, MemLoc.getWithNewPtr(Ptr))))
  415. return false;
  416. }
  417. if (B != FirstBB) {
  418. assert(B != &FirstBB->getParent()->getEntryBlock() &&
  419. "Should not hit the entry block because SI must be dominated by LI");
  420. for (BasicBlock *Pred : predecessors(B)) {
  421. PHITransAddr PredAddr = Addr;
  422. if (PredAddr.NeedsPHITranslationFromBlock(B)) {
  423. if (!PredAddr.IsPotentiallyPHITranslatable())
  424. return false;
  425. if (PredAddr.PHITranslateValue(B, Pred, DT, false))
  426. return false;
  427. }
  428. Value *TranslatedPtr = PredAddr.getAddr();
  429. auto Inserted = Visited.insert(std::make_pair(Pred, TranslatedPtr));
  430. if (!Inserted.second) {
  431. // We already visited this block before. If it was with a different
  432. // address - bail out!
  433. if (TranslatedPtr != Inserted.first->second)
  434. return false;
  435. // ... otherwise just skip it.
  436. continue;
  437. }
  438. WorkList.push_back(std::make_pair(Pred, PredAddr));
  439. }
  440. }
  441. }
  442. return true;
  443. }
  444. static void shortenAssignment(Instruction *Inst, uint64_t OldOffsetInBits,
  445. uint64_t OldSizeInBits, uint64_t NewSizeInBits,
  446. bool IsOverwriteEnd) {
  447. DIExpression::FragmentInfo DeadFragment;
  448. DeadFragment.SizeInBits = OldSizeInBits - NewSizeInBits;
  449. DeadFragment.OffsetInBits =
  450. OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
  451. auto CreateDeadFragExpr = [Inst, DeadFragment]() {
  452. // FIXME: This should be using the DIExpression in the Alloca's dbg.assign
  453. // for the variable, since that could also contain a fragment?
  454. return *DIExpression::createFragmentExpression(
  455. DIExpression::get(Inst->getContext(), std::nullopt),
  456. DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
  457. };
  458. // A DIAssignID to use so that the inserted dbg.assign intrinsics do not
  459. // link to any instructions. Created in the loop below (once).
  460. DIAssignID *LinkToNothing = nullptr;
  461. // Insert an unlinked dbg.assign intrinsic for the dead fragment after each
  462. // overlapping dbg.assign intrinsic.
  463. for (auto *DAI : at::getAssignmentMarkers(Inst)) {
  464. if (auto FragInfo = DAI->getExpression()->getFragmentInfo()) {
  465. if (!DIExpression::fragmentsOverlap(*FragInfo, DeadFragment))
  466. continue;
  467. }
  468. // Fragments overlap: insert a new dbg.assign for this dead part.
  469. auto *NewAssign = cast<DbgAssignIntrinsic>(DAI->clone());
  470. NewAssign->insertAfter(DAI);
  471. if (!LinkToNothing)
  472. LinkToNothing = DIAssignID::getDistinct(Inst->getContext());
  473. NewAssign->setAssignId(LinkToNothing);
  474. NewAssign->setExpression(CreateDeadFragExpr());
  475. NewAssign->setKillAddress();
  476. }
  477. }
  478. static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
  479. uint64_t &DeadSize, int64_t KillingStart,
  480. uint64_t KillingSize, bool IsOverwriteEnd) {
  481. auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
  482. Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
  483. // We assume that memet/memcpy operates in chunks of the "largest" native
  484. // type size and aligned on the same value. That means optimal start and size
  485. // of memset/memcpy should be modulo of preferred alignment of that type. That
  486. // is it there is no any sense in trying to reduce store size any further
  487. // since any "extra" stores comes for free anyway.
  488. // On the other hand, maximum alignment we can achieve is limited by alignment
  489. // of initial store.
  490. // TODO: Limit maximum alignment by preferred (or abi?) alignment of the
  491. // "largest" native type.
  492. // Note: What is the proper way to get that value?
  493. // Should TargetTransformInfo::getRegisterBitWidth be used or anything else?
  494. // PrefAlign = std::min(DL.getPrefTypeAlign(LargestType), PrefAlign);
  495. int64_t ToRemoveStart = 0;
  496. uint64_t ToRemoveSize = 0;
  497. // Compute start and size of the region to remove. Make sure 'PrefAlign' is
  498. // maintained on the remaining store.
  499. if (IsOverwriteEnd) {
  500. // Calculate required adjustment for 'KillingStart' in order to keep
  501. // remaining store size aligned on 'PerfAlign'.
  502. uint64_t Off =
  503. offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
  504. ToRemoveStart = KillingStart + Off;
  505. if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
  506. return false;
  507. ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
  508. } else {
  509. ToRemoveStart = DeadStart;
  510. assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
  511. "Not overlapping accesses?");
  512. ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
  513. // Calculate required adjustment for 'ToRemoveSize'in order to keep
  514. // start of the remaining store aligned on 'PerfAlign'.
  515. uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
  516. if (Off != 0) {
  517. if (ToRemoveSize <= (PrefAlign.value() - Off))
  518. return false;
  519. ToRemoveSize -= PrefAlign.value() - Off;
  520. }
  521. assert(isAligned(PrefAlign, ToRemoveSize) &&
  522. "Should preserve selected alignment");
  523. }
  524. assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
  525. assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
  526. uint64_t NewSize = DeadSize - ToRemoveSize;
  527. if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
  528. // When shortening an atomic memory intrinsic, the newly shortened
  529. // length must remain an integer multiple of the element size.
  530. const uint32_t ElementSize = AMI->getElementSizeInBytes();
  531. if (0 != NewSize % ElementSize)
  532. return false;
  533. }
  534. LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
  535. << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
  536. << "\n KILLER [" << ToRemoveStart << ", "
  537. << int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
  538. Value *DeadWriteLength = DeadIntrinsic->getLength();
  539. Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
  540. DeadIntrinsic->setLength(TrimmedLength);
  541. DeadIntrinsic->setDestAlignment(PrefAlign);
  542. if (!IsOverwriteEnd) {
  543. Value *OrigDest = DeadIntrinsic->getRawDest();
  544. Type *Int8PtrTy =
  545. Type::getInt8PtrTy(DeadIntrinsic->getContext(),
  546. OrigDest->getType()->getPointerAddressSpace());
  547. Value *Dest = OrigDest;
  548. if (OrigDest->getType() != Int8PtrTy)
  549. Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
  550. Value *Indices[1] = {
  551. ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
  552. Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
  553. Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
  554. NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
  555. if (NewDestGEP->getType() != OrigDest->getType())
  556. NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
  557. "", DeadI);
  558. DeadIntrinsic->setDest(NewDestGEP);
  559. }
  560. // Update attached dbg.assign intrinsics. Assume 8-bit byte.
  561. shortenAssignment(DeadI, DeadStart * 8, DeadSize * 8, NewSize * 8,
  562. IsOverwriteEnd);
  563. // Finally update start and size of dead access.
  564. if (!IsOverwriteEnd)
  565. DeadStart += ToRemoveSize;
  566. DeadSize = NewSize;
  567. return true;
  568. }
  569. static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
  570. int64_t &DeadStart, uint64_t &DeadSize) {
  571. if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
  572. return false;
  573. OverlapIntervalsTy::iterator OII = --IntervalMap.end();
  574. int64_t KillingStart = OII->second;
  575. uint64_t KillingSize = OII->first - KillingStart;
  576. assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
  577. if (KillingStart > DeadStart &&
  578. // Note: "KillingStart - KillingStart" is known to be positive due to
  579. // preceding check.
  580. (uint64_t)(KillingStart - DeadStart) < DeadSize &&
  581. // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
  582. // be non negative due to preceding checks.
  583. KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
  584. if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
  585. true)) {
  586. IntervalMap.erase(OII);
  587. return true;
  588. }
  589. }
  590. return false;
  591. }
  592. static bool tryToShortenBegin(Instruction *DeadI,
  593. OverlapIntervalsTy &IntervalMap,
  594. int64_t &DeadStart, uint64_t &DeadSize) {
  595. if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
  596. return false;
  597. OverlapIntervalsTy::iterator OII = IntervalMap.begin();
  598. int64_t KillingStart = OII->second;
  599. uint64_t KillingSize = OII->first - KillingStart;
  600. assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
  601. if (KillingStart <= DeadStart &&
  602. // Note: "DeadStart - KillingStart" is known to be non negative due to
  603. // preceding check.
  604. KillingSize > (uint64_t)(DeadStart - KillingStart)) {
  605. // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
  606. // be positive due to preceding checks.
  607. assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
  608. "Should have been handled as OW_Complete");
  609. if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
  610. false)) {
  611. IntervalMap.erase(OII);
  612. return true;
  613. }
  614. }
  615. return false;
  616. }
  617. static Constant *
  618. tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
  619. int64_t KillingOffset, int64_t DeadOffset,
  620. const DataLayout &DL, BatchAAResults &AA,
  621. DominatorTree *DT) {
  622. if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
  623. DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
  624. KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
  625. DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
  626. memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
  627. // If the store we find is:
  628. // a) partially overwritten by the store to 'Loc'
  629. // b) the killing store is fully contained in the dead one and
  630. // c) they both have a constant value
  631. // d) none of the two stores need padding
  632. // Merge the two stores, replacing the dead store's value with a
  633. // merge of both values.
  634. // TODO: Deal with other constant types (vectors, etc), and probably
  635. // some mem intrinsics (if needed)
  636. APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
  637. APInt KillingValue =
  638. cast<ConstantInt>(KillingI->getValueOperand())->getValue();
  639. unsigned KillingBits = KillingValue.getBitWidth();
  640. assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
  641. KillingValue = KillingValue.zext(DeadValue.getBitWidth());
  642. // Offset of the smaller store inside the larger store
  643. unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
  644. unsigned LShiftAmount =
  645. DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
  646. : BitOffsetDiff;
  647. APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
  648. LShiftAmount + KillingBits);
  649. // Clear the bits we'll be replacing, then OR with the smaller
  650. // store, shifted appropriately.
  651. APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
  652. LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
  653. << "\n Killing: " << *KillingI
  654. << "\n Merged Value: " << Merged << '\n');
  655. return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
  656. }
  657. return nullptr;
  658. }
  659. namespace {
  660. // Returns true if \p I is an intrisnic that does not read or write memory.
  661. bool isNoopIntrinsic(Instruction *I) {
  662. if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
  663. switch (II->getIntrinsicID()) {
  664. case Intrinsic::lifetime_start:
  665. case Intrinsic::lifetime_end:
  666. case Intrinsic::invariant_end:
  667. case Intrinsic::launder_invariant_group:
  668. case Intrinsic::assume:
  669. return true;
  670. case Intrinsic::dbg_addr:
  671. case Intrinsic::dbg_declare:
  672. case Intrinsic::dbg_label:
  673. case Intrinsic::dbg_value:
  674. llvm_unreachable("Intrinsic should not be modeled in MemorySSA");
  675. default:
  676. return false;
  677. }
  678. }
  679. return false;
  680. }
  681. // Check if we can ignore \p D for DSE.
  682. bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
  683. Instruction *DI = D->getMemoryInst();
  684. // Calls that only access inaccessible memory cannot read or write any memory
  685. // locations we consider for elimination.
  686. if (auto *CB = dyn_cast<CallBase>(DI))
  687. if (CB->onlyAccessesInaccessibleMemory())
  688. return true;
  689. // We can eliminate stores to locations not visible to the caller across
  690. // throwing instructions.
  691. if (DI->mayThrow() && !DefVisibleToCaller)
  692. return true;
  693. // We can remove the dead stores, irrespective of the fence and its ordering
  694. // (release/acquire/seq_cst). Fences only constraints the ordering of
  695. // already visible stores, it does not make a store visible to other
  696. // threads. So, skipping over a fence does not change a store from being
  697. // dead.
  698. if (isa<FenceInst>(DI))
  699. return true;
  700. // Skip intrinsics that do not really read or modify memory.
  701. if (isNoopIntrinsic(DI))
  702. return true;
  703. return false;
  704. }
  705. struct DSEState {
  706. Function &F;
  707. AliasAnalysis &AA;
  708. EarliestEscapeInfo EI;
  709. /// The single BatchAA instance that is used to cache AA queries. It will
  710. /// not be invalidated over the whole run. This is safe, because:
  711. /// 1. Only memory writes are removed, so the alias cache for memory
  712. /// locations remains valid.
  713. /// 2. No new instructions are added (only instructions removed), so cached
  714. /// information for a deleted value cannot be accessed by a re-used new
  715. /// value pointer.
  716. BatchAAResults BatchAA;
  717. MemorySSA &MSSA;
  718. DominatorTree &DT;
  719. PostDominatorTree &PDT;
  720. const TargetLibraryInfo &TLI;
  721. const DataLayout &DL;
  722. const LoopInfo &LI;
  723. // Whether the function contains any irreducible control flow, useful for
  724. // being accurately able to detect loops.
  725. bool ContainsIrreducibleLoops;
  726. // All MemoryDefs that potentially could kill other MemDefs.
  727. SmallVector<MemoryDef *, 64> MemDefs;
  728. // Any that should be skipped as they are already deleted
  729. SmallPtrSet<MemoryAccess *, 4> SkipStores;
  730. // Keep track whether a given object is captured before return or not.
  731. DenseMap<const Value *, bool> CapturedBeforeReturn;
  732. // Keep track of all of the objects that are invisible to the caller after
  733. // the function returns.
  734. DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
  735. // Keep track of blocks with throwing instructions not modeled in MemorySSA.
  736. SmallPtrSet<BasicBlock *, 16> ThrowingBlocks;
  737. // Post-order numbers for each basic block. Used to figure out if memory
  738. // accesses are executed before another access.
  739. DenseMap<BasicBlock *, unsigned> PostOrderNumbers;
  740. // Values that are only used with assumes. Used to refine pointer escape
  741. // analysis.
  742. SmallPtrSet<const Value *, 32> EphValues;
  743. /// Keep track of instructions (partly) overlapping with killing MemoryDefs per
  744. /// basic block.
  745. MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
  746. // Check if there are root nodes that are terminated by UnreachableInst.
  747. // Those roots pessimize post-dominance queries. If there are such roots,
  748. // fall back to CFG scan starting from all non-unreachable roots.
  749. bool AnyUnreachableExit;
  750. // Whether or not we should iterate on removing dead stores at the end of the
  751. // function due to removing a store causing a previously captured pointer to
  752. // no longer be captured.
  753. bool ShouldIterateEndOfFunctionDSE;
  754. // Class contains self-reference, make sure it's not copied/moved.
  755. DSEState(const DSEState &) = delete;
  756. DSEState &operator=(const DSEState &) = delete;
  757. DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
  758. PostDominatorTree &PDT, AssumptionCache &AC,
  759. const TargetLibraryInfo &TLI, const LoopInfo &LI)
  760. : F(F), AA(AA), EI(DT, LI, EphValues), BatchAA(AA, &EI), MSSA(MSSA),
  761. DT(DT), PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
  762. // Collect blocks with throwing instructions not modeled in MemorySSA and
  763. // alloc-like objects.
  764. unsigned PO = 0;
  765. for (BasicBlock *BB : post_order(&F)) {
  766. PostOrderNumbers[BB] = PO++;
  767. for (Instruction &I : *BB) {
  768. MemoryAccess *MA = MSSA.getMemoryAccess(&I);
  769. if (I.mayThrow() && !MA)
  770. ThrowingBlocks.insert(I.getParent());
  771. auto *MD = dyn_cast_or_null<MemoryDef>(MA);
  772. if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
  773. (getLocForWrite(&I) || isMemTerminatorInst(&I)))
  774. MemDefs.push_back(MD);
  775. }
  776. }
  777. // Treat byval or inalloca arguments the same as Allocas, stores to them are
  778. // dead at the end of the function.
  779. for (Argument &AI : F.args())
  780. if (AI.hasPassPointeeByValueCopyAttr())
  781. InvisibleToCallerAfterRet.insert({&AI, true});
  782. // Collect whether there is any irreducible control flow in the function.
  783. ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
  784. AnyUnreachableExit = any_of(PDT.roots(), [](const BasicBlock *E) {
  785. return isa<UnreachableInst>(E->getTerminator());
  786. });
  787. CodeMetrics::collectEphemeralValues(&F, &AC, EphValues);
  788. }
  789. LocationSize strengthenLocationSize(const Instruction *I,
  790. LocationSize Size) const {
  791. if (auto *CB = dyn_cast<CallBase>(I)) {
  792. LibFunc F;
  793. if (TLI.getLibFunc(*CB, F) && TLI.has(F) &&
  794. (F == LibFunc_memset_chk || F == LibFunc_memcpy_chk)) {
  795. // Use the precise location size specified by the 3rd argument
  796. // for determining KillingI overwrites DeadLoc if it is a memset_chk
  797. // instruction. memset_chk will write either the amount specified as 3rd
  798. // argument or the function will immediately abort and exit the program.
  799. // NOTE: AA may determine NoAlias if it can prove that the access size
  800. // is larger than the allocation size due to that being UB. To avoid
  801. // returning potentially invalid NoAlias results by AA, limit the use of
  802. // the precise location size to isOverwrite.
  803. if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
  804. return LocationSize::precise(Len->getZExtValue());
  805. }
  806. }
  807. return Size;
  808. }
  809. /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
  810. /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
  811. /// location (by \p DeadI instruction).
  812. /// Return OW_MaybePartial if \p KillingI does not completely overwrite
  813. /// \p DeadI, but they both write to the same underlying object. In that
  814. /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
  815. /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
  816. /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
  817. OverwriteResult isOverwrite(const Instruction *KillingI,
  818. const Instruction *DeadI,
  819. const MemoryLocation &KillingLoc,
  820. const MemoryLocation &DeadLoc,
  821. int64_t &KillingOff, int64_t &DeadOff) {
  822. // AliasAnalysis does not always account for loops. Limit overwrite checks
  823. // to dependencies for which we can guarantee they are independent of any
  824. // loops they are in.
  825. if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
  826. return OW_Unknown;
  827. LocationSize KillingLocSize =
  828. strengthenLocationSize(KillingI, KillingLoc.Size);
  829. const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
  830. const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
  831. const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
  832. const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
  833. // Check whether the killing store overwrites the whole object, in which
  834. // case the size/offset of the dead store does not matter.
  835. if (DeadUndObj == KillingUndObj && KillingLocSize.isPrecise()) {
  836. uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
  837. if (KillingUndObjSize != MemoryLocation::UnknownSize &&
  838. KillingUndObjSize == KillingLocSize.getValue())
  839. return OW_Complete;
  840. }
  841. // FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
  842. // get imprecise values here, though (except for unknown sizes).
  843. if (!KillingLocSize.isPrecise() || !DeadLoc.Size.isPrecise()) {
  844. // In case no constant size is known, try to an IR values for the number
  845. // of bytes written and check if they match.
  846. const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
  847. const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
  848. if (KillingMemI && DeadMemI) {
  849. const Value *KillingV = KillingMemI->getLength();
  850. const Value *DeadV = DeadMemI->getLength();
  851. if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
  852. return OW_Complete;
  853. }
  854. // Masked stores have imprecise locations, but we can reason about them
  855. // to some extent.
  856. return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
  857. }
  858. const uint64_t KillingSize = KillingLocSize.getValue();
  859. const uint64_t DeadSize = DeadLoc.Size.getValue();
  860. // Query the alias information
  861. AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
  862. // If the start pointers are the same, we just have to compare sizes to see if
  863. // the killing store was larger than the dead store.
  864. if (AAR == AliasResult::MustAlias) {
  865. // Make sure that the KillingSize size is >= the DeadSize size.
  866. if (KillingSize >= DeadSize)
  867. return OW_Complete;
  868. }
  869. // If we hit a partial alias we may have a full overwrite
  870. if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
  871. int32_t Off = AAR.getOffset();
  872. if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
  873. return OW_Complete;
  874. }
  875. // If we can't resolve the same pointers to the same object, then we can't
  876. // analyze them at all.
  877. if (DeadUndObj != KillingUndObj) {
  878. // Non aliasing stores to different objects don't overlap. Note that
  879. // if the killing store is known to overwrite whole object (out of
  880. // bounds access overwrites whole object as well) then it is assumed to
  881. // completely overwrite any store to the same object even if they don't
  882. // actually alias (see next check).
  883. if (AAR == AliasResult::NoAlias)
  884. return OW_None;
  885. return OW_Unknown;
  886. }
  887. // Okay, we have stores to two completely different pointers. Try to
  888. // decompose the pointer into a "base + constant_offset" form. If the base
  889. // pointers are equal, then we can reason about the two stores.
  890. DeadOff = 0;
  891. KillingOff = 0;
  892. const Value *DeadBasePtr =
  893. GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
  894. const Value *KillingBasePtr =
  895. GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
  896. // If the base pointers still differ, we have two completely different
  897. // stores.
  898. if (DeadBasePtr != KillingBasePtr)
  899. return OW_Unknown;
  900. // The killing access completely overlaps the dead store if and only if
  901. // both start and end of the dead one is "inside" the killing one:
  902. // |<->|--dead--|<->|
  903. // |-----killing------|
  904. // Accesses may overlap if and only if start of one of them is "inside"
  905. // another one:
  906. // |<->|--dead--|<-------->|
  907. // |-------killing--------|
  908. // OR
  909. // |-------dead-------|
  910. // |<->|---killing---|<----->|
  911. //
  912. // We have to be careful here as *Off is signed while *.Size is unsigned.
  913. // Check if the dead access starts "not before" the killing one.
  914. if (DeadOff >= KillingOff) {
  915. // If the dead access ends "not after" the killing access then the
  916. // dead one is completely overwritten by the killing one.
  917. if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
  918. return OW_Complete;
  919. // If start of the dead access is "before" end of the killing access
  920. // then accesses overlap.
  921. else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
  922. return OW_MaybePartial;
  923. }
  924. // If start of the killing access is "before" end of the dead access then
  925. // accesses overlap.
  926. else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
  927. return OW_MaybePartial;
  928. }
  929. // Can reach here only if accesses are known not to overlap.
  930. return OW_None;
  931. }
  932. bool isInvisibleToCallerAfterRet(const Value *V) {
  933. if (isa<AllocaInst>(V))
  934. return true;
  935. auto I = InvisibleToCallerAfterRet.insert({V, false});
  936. if (I.second) {
  937. if (!isInvisibleToCallerOnUnwind(V)) {
  938. I.first->second = false;
  939. } else if (isNoAliasCall(V)) {
  940. I.first->second = !PointerMayBeCaptured(V, true, false, EphValues);
  941. }
  942. }
  943. return I.first->second;
  944. }
  945. bool isInvisibleToCallerOnUnwind(const Value *V) {
  946. bool RequiresNoCaptureBeforeUnwind;
  947. if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
  948. return false;
  949. if (!RequiresNoCaptureBeforeUnwind)
  950. return true;
  951. auto I = CapturedBeforeReturn.insert({V, true});
  952. if (I.second)
  953. // NOTE: This could be made more precise by PointerMayBeCapturedBefore
  954. // with the killing MemoryDef. But we refrain from doing so for now to
  955. // limit compile-time and this does not cause any changes to the number
  956. // of stores removed on a large test set in practice.
  957. I.first->second = PointerMayBeCaptured(V, false, true, EphValues);
  958. return !I.first->second;
  959. }
  960. std::optional<MemoryLocation> getLocForWrite(Instruction *I) const {
  961. if (!I->mayWriteToMemory())
  962. return std::nullopt;
  963. if (auto *CB = dyn_cast<CallBase>(I))
  964. return MemoryLocation::getForDest(CB, TLI);
  965. return MemoryLocation::getOrNone(I);
  966. }
  967. /// Assuming this instruction has a dead analyzable write, can we delete
  968. /// this instruction?
  969. bool isRemovable(Instruction *I) {
  970. assert(getLocForWrite(I) && "Must have analyzable write");
  971. // Don't remove volatile/atomic stores.
  972. if (StoreInst *SI = dyn_cast<StoreInst>(I))
  973. return SI->isUnordered();
  974. if (auto *CB = dyn_cast<CallBase>(I)) {
  975. // Don't remove volatile memory intrinsics.
  976. if (auto *MI = dyn_cast<MemIntrinsic>(CB))
  977. return !MI->isVolatile();
  978. // Never remove dead lifetime intrinsics, e.g. because they are followed
  979. // by a free.
  980. if (CB->isLifetimeStartOrEnd())
  981. return false;
  982. return CB->use_empty() && CB->willReturn() && CB->doesNotThrow() &&
  983. !CB->isTerminator();
  984. }
  985. return false;
  986. }
  987. /// Returns true if \p UseInst completely overwrites \p DefLoc
  988. /// (stored by \p DefInst).
  989. bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst,
  990. Instruction *UseInst) {
  991. // UseInst has a MemoryDef associated in MemorySSA. It's possible for a
  992. // MemoryDef to not write to memory, e.g. a volatile load is modeled as a
  993. // MemoryDef.
  994. if (!UseInst->mayWriteToMemory())
  995. return false;
  996. if (auto *CB = dyn_cast<CallBase>(UseInst))
  997. if (CB->onlyAccessesInaccessibleMemory())
  998. return false;
  999. int64_t InstWriteOffset, DepWriteOffset;
  1000. if (auto CC = getLocForWrite(UseInst))
  1001. return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
  1002. DepWriteOffset) == OW_Complete;
  1003. return false;
  1004. }
  1005. /// Returns true if \p Def is not read before returning from the function.
  1006. bool isWriteAtEndOfFunction(MemoryDef *Def) {
  1007. LLVM_DEBUG(dbgs() << " Check if def " << *Def << " ("
  1008. << *Def->getMemoryInst()
  1009. << ") is at the end the function \n");
  1010. auto MaybeLoc = getLocForWrite(Def->getMemoryInst());
  1011. if (!MaybeLoc) {
  1012. LLVM_DEBUG(dbgs() << " ... could not get location for write.\n");
  1013. return false;
  1014. }
  1015. SmallVector<MemoryAccess *, 4> WorkList;
  1016. SmallPtrSet<MemoryAccess *, 8> Visited;
  1017. auto PushMemUses = [&WorkList, &Visited](MemoryAccess *Acc) {
  1018. if (!Visited.insert(Acc).second)
  1019. return;
  1020. for (Use &U : Acc->uses())
  1021. WorkList.push_back(cast<MemoryAccess>(U.getUser()));
  1022. };
  1023. PushMemUses(Def);
  1024. for (unsigned I = 0; I < WorkList.size(); I++) {
  1025. if (WorkList.size() >= MemorySSAScanLimit) {
  1026. LLVM_DEBUG(dbgs() << " ... hit exploration limit.\n");
  1027. return false;
  1028. }
  1029. MemoryAccess *UseAccess = WorkList[I];
  1030. if (isa<MemoryPhi>(UseAccess)) {
  1031. // AliasAnalysis does not account for loops. Limit elimination to
  1032. // candidates for which we can guarantee they always store to the same
  1033. // memory location.
  1034. if (!isGuaranteedLoopInvariant(MaybeLoc->Ptr))
  1035. return false;
  1036. PushMemUses(cast<MemoryPhi>(UseAccess));
  1037. continue;
  1038. }
  1039. // TODO: Checking for aliasing is expensive. Consider reducing the amount
  1040. // of times this is called and/or caching it.
  1041. Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
  1042. if (isReadClobber(*MaybeLoc, UseInst)) {
  1043. LLVM_DEBUG(dbgs() << " ... hit read clobber " << *UseInst << ".\n");
  1044. return false;
  1045. }
  1046. if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess))
  1047. PushMemUses(UseDef);
  1048. }
  1049. return true;
  1050. }
  1051. /// If \p I is a memory terminator like llvm.lifetime.end or free, return a
  1052. /// pair with the MemoryLocation terminated by \p I and a boolean flag
  1053. /// indicating whether \p I is a free-like call.
  1054. std::optional<std::pair<MemoryLocation, bool>>
  1055. getLocForTerminator(Instruction *I) const {
  1056. uint64_t Len;
  1057. Value *Ptr;
  1058. if (match(I, m_Intrinsic<Intrinsic::lifetime_end>(m_ConstantInt(Len),
  1059. m_Value(Ptr))))
  1060. return {std::make_pair(MemoryLocation(Ptr, Len), false)};
  1061. if (auto *CB = dyn_cast<CallBase>(I)) {
  1062. if (Value *FreedOp = getFreedOperand(CB, &TLI))
  1063. return {std::make_pair(MemoryLocation::getAfter(FreedOp), true)};
  1064. }
  1065. return std::nullopt;
  1066. }
  1067. /// Returns true if \p I is a memory terminator instruction like
  1068. /// llvm.lifetime.end or free.
  1069. bool isMemTerminatorInst(Instruction *I) const {
  1070. auto *CB = dyn_cast<CallBase>(I);
  1071. return CB && (CB->getIntrinsicID() == Intrinsic::lifetime_end ||
  1072. getFreedOperand(CB, &TLI) != nullptr);
  1073. }
  1074. /// Returns true if \p MaybeTerm is a memory terminator for \p Loc from
  1075. /// instruction \p AccessI.
  1076. bool isMemTerminator(const MemoryLocation &Loc, Instruction *AccessI,
  1077. Instruction *MaybeTerm) {
  1078. std::optional<std::pair<MemoryLocation, bool>> MaybeTermLoc =
  1079. getLocForTerminator(MaybeTerm);
  1080. if (!MaybeTermLoc)
  1081. return false;
  1082. // If the terminator is a free-like call, all accesses to the underlying
  1083. // object can be considered terminated.
  1084. if (getUnderlyingObject(Loc.Ptr) !=
  1085. getUnderlyingObject(MaybeTermLoc->first.Ptr))
  1086. return false;
  1087. auto TermLoc = MaybeTermLoc->first;
  1088. if (MaybeTermLoc->second) {
  1089. const Value *LocUO = getUnderlyingObject(Loc.Ptr);
  1090. return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
  1091. }
  1092. int64_t InstWriteOffset = 0;
  1093. int64_t DepWriteOffset = 0;
  1094. return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
  1095. DepWriteOffset) == OW_Complete;
  1096. }
  1097. // Returns true if \p Use may read from \p DefLoc.
  1098. bool isReadClobber(const MemoryLocation &DefLoc, Instruction *UseInst) {
  1099. if (isNoopIntrinsic(UseInst))
  1100. return false;
  1101. // Monotonic or weaker atomic stores can be re-ordered and do not need to be
  1102. // treated as read clobber.
  1103. if (auto SI = dyn_cast<StoreInst>(UseInst))
  1104. return isStrongerThan(SI->getOrdering(), AtomicOrdering::Monotonic);
  1105. if (!UseInst->mayReadFromMemory())
  1106. return false;
  1107. if (auto *CB = dyn_cast<CallBase>(UseInst))
  1108. if (CB->onlyAccessesInaccessibleMemory())
  1109. return false;
  1110. return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
  1111. }
  1112. /// Returns true if a dependency between \p Current and \p KillingDef is
  1113. /// guaranteed to be loop invariant for the loops that they are in. Either
  1114. /// because they are known to be in the same block, in the same loop level or
  1115. /// by guaranteeing that \p CurrentLoc only references a single MemoryLocation
  1116. /// during execution of the containing function.
  1117. bool isGuaranteedLoopIndependent(const Instruction *Current,
  1118. const Instruction *KillingDef,
  1119. const MemoryLocation &CurrentLoc) {
  1120. // If the dependency is within the same block or loop level (being careful
  1121. // of irreducible loops), we know that AA will return a valid result for the
  1122. // memory dependency. (Both at the function level, outside of any loop,
  1123. // would also be valid but we currently disable that to limit compile time).
  1124. if (Current->getParent() == KillingDef->getParent())
  1125. return true;
  1126. const Loop *CurrentLI = LI.getLoopFor(Current->getParent());
  1127. if (!ContainsIrreducibleLoops && CurrentLI &&
  1128. CurrentLI == LI.getLoopFor(KillingDef->getParent()))
  1129. return true;
  1130. // Otherwise check the memory location is invariant to any loops.
  1131. return isGuaranteedLoopInvariant(CurrentLoc.Ptr);
  1132. }
  1133. /// Returns true if \p Ptr is guaranteed to be loop invariant for any possible
  1134. /// loop. In particular, this guarantees that it only references a single
  1135. /// MemoryLocation during execution of the containing function.
  1136. bool isGuaranteedLoopInvariant(const Value *Ptr) {
  1137. Ptr = Ptr->stripPointerCasts();
  1138. if (auto *GEP = dyn_cast<GEPOperator>(Ptr))
  1139. if (GEP->hasAllConstantIndices())
  1140. Ptr = GEP->getPointerOperand()->stripPointerCasts();
  1141. if (auto *I = dyn_cast<Instruction>(Ptr)) {
  1142. return I->getParent()->isEntryBlock() ||
  1143. (!ContainsIrreducibleLoops && !LI.getLoopFor(I->getParent()));
  1144. }
  1145. return true;
  1146. }
  1147. // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
  1148. // with no read access between them or on any other path to a function exit
  1149. // block if \p KillingLoc is not accessible after the function returns. If
  1150. // there is no such MemoryDef, return std::nullopt. The returned value may not
  1151. // (completely) overwrite \p KillingLoc. Currently we bail out when we
  1152. // encounter an aliasing MemoryUse (read).
  1153. std::optional<MemoryAccess *>
  1154. getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
  1155. const MemoryLocation &KillingLoc, const Value *KillingUndObj,
  1156. unsigned &ScanLimit, unsigned &WalkerStepLimit,
  1157. bool IsMemTerm, unsigned &PartialLimit) {
  1158. if (ScanLimit == 0 || WalkerStepLimit == 0) {
  1159. LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
  1160. return std::nullopt;
  1161. }
  1162. MemoryAccess *Current = StartAccess;
  1163. Instruction *KillingI = KillingDef->getMemoryInst();
  1164. LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
  1165. // Only optimize defining access of KillingDef when directly starting at its
  1166. // defining access. The defining access also must only access KillingLoc. At
  1167. // the moment we only support instructions with a single write location, so
  1168. // it should be sufficient to disable optimizations for instructions that
  1169. // also read from memory.
  1170. bool CanOptimize = OptimizeMemorySSA &&
  1171. KillingDef->getDefiningAccess() == StartAccess &&
  1172. !KillingI->mayReadFromMemory();
  1173. // Find the next clobbering Mod access for DefLoc, starting at StartAccess.
  1174. std::optional<MemoryLocation> CurrentLoc;
  1175. for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
  1176. LLVM_DEBUG({
  1177. dbgs() << " visiting " << *Current;
  1178. if (!MSSA.isLiveOnEntryDef(Current) && isa<MemoryUseOrDef>(Current))
  1179. dbgs() << " (" << *cast<MemoryUseOrDef>(Current)->getMemoryInst()
  1180. << ")";
  1181. dbgs() << "\n";
  1182. });
  1183. // Reached TOP.
  1184. if (MSSA.isLiveOnEntryDef(Current)) {
  1185. LLVM_DEBUG(dbgs() << " ... found LiveOnEntryDef\n");
  1186. if (CanOptimize && Current != KillingDef->getDefiningAccess())
  1187. // The first clobbering def is... none.
  1188. KillingDef->setOptimized(Current);
  1189. return std::nullopt;
  1190. }
  1191. // Cost of a step. Accesses in the same block are more likely to be valid
  1192. // candidates for elimination, hence consider them cheaper.
  1193. unsigned StepCost = KillingDef->getBlock() == Current->getBlock()
  1194. ? MemorySSASameBBStepCost
  1195. : MemorySSAOtherBBStepCost;
  1196. if (WalkerStepLimit <= StepCost) {
  1197. LLVM_DEBUG(dbgs() << " ... hit walker step limit\n");
  1198. return std::nullopt;
  1199. }
  1200. WalkerStepLimit -= StepCost;
  1201. // Return for MemoryPhis. They cannot be eliminated directly and the
  1202. // caller is responsible for traversing them.
  1203. if (isa<MemoryPhi>(Current)) {
  1204. LLVM_DEBUG(dbgs() << " ... found MemoryPhi\n");
  1205. return Current;
  1206. }
  1207. // Below, check if CurrentDef is a valid candidate to be eliminated by
  1208. // KillingDef. If it is not, check the next candidate.
  1209. MemoryDef *CurrentDef = cast<MemoryDef>(Current);
  1210. Instruction *CurrentI = CurrentDef->getMemoryInst();
  1211. if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
  1212. CanOptimize = false;
  1213. continue;
  1214. }
  1215. // Before we try to remove anything, check for any extra throwing
  1216. // instructions that block us from DSEing
  1217. if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
  1218. LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
  1219. return std::nullopt;
  1220. }
  1221. // Check for anything that looks like it will be a barrier to further
  1222. // removal
  1223. if (isDSEBarrier(KillingUndObj, CurrentI)) {
  1224. LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
  1225. return std::nullopt;
  1226. }
  1227. // If Current is known to be on path that reads DefLoc or is a read
  1228. // clobber, bail out, as the path is not profitable. We skip this check
  1229. // for intrinsic calls, because the code knows how to handle memcpy
  1230. // intrinsics.
  1231. if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
  1232. return std::nullopt;
  1233. // Quick check if there are direct uses that are read-clobbers.
  1234. if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
  1235. if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
  1236. return !MSSA.dominates(StartAccess, UseOrDef) &&
  1237. isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
  1238. return false;
  1239. })) {
  1240. LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
  1241. return std::nullopt;
  1242. }
  1243. // If Current does not have an analyzable write location or is not
  1244. // removable, skip it.
  1245. CurrentLoc = getLocForWrite(CurrentI);
  1246. if (!CurrentLoc || !isRemovable(CurrentI)) {
  1247. CanOptimize = false;
  1248. continue;
  1249. }
  1250. // AliasAnalysis does not account for loops. Limit elimination to
  1251. // candidates for which we can guarantee they always store to the same
  1252. // memory location and not located in different loops.
  1253. if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
  1254. LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
  1255. CanOptimize = false;
  1256. continue;
  1257. }
  1258. if (IsMemTerm) {
  1259. // If the killing def is a memory terminator (e.g. lifetime.end), check
  1260. // the next candidate if the current Current does not write the same
  1261. // underlying object as the terminator.
  1262. if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
  1263. CanOptimize = false;
  1264. continue;
  1265. }
  1266. } else {
  1267. int64_t KillingOffset = 0;
  1268. int64_t DeadOffset = 0;
  1269. auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
  1270. KillingOffset, DeadOffset);
  1271. if (CanOptimize) {
  1272. // CurrentDef is the earliest write clobber of KillingDef. Use it as
  1273. // optimized access. Do not optimize if CurrentDef is already the
  1274. // defining access of KillingDef.
  1275. if (CurrentDef != KillingDef->getDefiningAccess() &&
  1276. (OR == OW_Complete || OR == OW_MaybePartial))
  1277. KillingDef->setOptimized(CurrentDef);
  1278. // Once a may-aliasing def is encountered do not set an optimized
  1279. // access.
  1280. if (OR != OW_None)
  1281. CanOptimize = false;
  1282. }
  1283. // If Current does not write to the same object as KillingDef, check
  1284. // the next candidate.
  1285. if (OR == OW_Unknown || OR == OW_None)
  1286. continue;
  1287. else if (OR == OW_MaybePartial) {
  1288. // If KillingDef only partially overwrites Current, check the next
  1289. // candidate if the partial step limit is exceeded. This aggressively
  1290. // limits the number of candidates for partial store elimination,
  1291. // which are less likely to be removable in the end.
  1292. if (PartialLimit <= 1) {
  1293. WalkerStepLimit -= 1;
  1294. LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
  1295. continue;
  1296. }
  1297. PartialLimit -= 1;
  1298. }
  1299. }
  1300. break;
  1301. };
  1302. // Accesses to objects accessible after the function returns can only be
  1303. // eliminated if the access is dead along all paths to the exit. Collect
  1304. // the blocks with killing (=completely overwriting MemoryDefs) and check if
  1305. // they cover all paths from MaybeDeadAccess to any function exit.
  1306. SmallPtrSet<Instruction *, 16> KillingDefs;
  1307. KillingDefs.insert(KillingDef->getMemoryInst());
  1308. MemoryAccess *MaybeDeadAccess = Current;
  1309. MemoryLocation MaybeDeadLoc = *CurrentLoc;
  1310. Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
  1311. LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
  1312. << *MaybeDeadI << ")\n");
  1313. SmallSetVector<MemoryAccess *, 32> WorkList;
  1314. auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
  1315. for (Use &U : Acc->uses())
  1316. WorkList.insert(cast<MemoryAccess>(U.getUser()));
  1317. };
  1318. PushMemUses(MaybeDeadAccess);
  1319. // Check if DeadDef may be read.
  1320. for (unsigned I = 0; I < WorkList.size(); I++) {
  1321. MemoryAccess *UseAccess = WorkList[I];
  1322. LLVM_DEBUG(dbgs() << " " << *UseAccess);
  1323. // Bail out if the number of accesses to check exceeds the scan limit.
  1324. if (ScanLimit < (WorkList.size() - I)) {
  1325. LLVM_DEBUG(dbgs() << "\n ... hit scan limit\n");
  1326. return std::nullopt;
  1327. }
  1328. --ScanLimit;
  1329. NumDomMemDefChecks++;
  1330. if (isa<MemoryPhi>(UseAccess)) {
  1331. if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
  1332. return DT.properlyDominates(KI->getParent(),
  1333. UseAccess->getBlock());
  1334. })) {
  1335. LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing block\n");
  1336. continue;
  1337. }
  1338. LLVM_DEBUG(dbgs() << "\n ... adding PHI uses\n");
  1339. PushMemUses(UseAccess);
  1340. continue;
  1341. }
  1342. Instruction *UseInst = cast<MemoryUseOrDef>(UseAccess)->getMemoryInst();
  1343. LLVM_DEBUG(dbgs() << " (" << *UseInst << ")\n");
  1344. if (any_of(KillingDefs, [this, UseInst](Instruction *KI) {
  1345. return DT.dominates(KI, UseInst);
  1346. })) {
  1347. LLVM_DEBUG(dbgs() << " ... skipping, dominated by killing def\n");
  1348. continue;
  1349. }
  1350. // A memory terminator kills all preceeding MemoryDefs and all succeeding
  1351. // MemoryAccesses. We do not have to check it's users.
  1352. if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
  1353. LLVM_DEBUG(
  1354. dbgs()
  1355. << " ... skipping, memterminator invalidates following accesses\n");
  1356. continue;
  1357. }
  1358. if (isNoopIntrinsic(cast<MemoryUseOrDef>(UseAccess)->getMemoryInst())) {
  1359. LLVM_DEBUG(dbgs() << " ... adding uses of intrinsic\n");
  1360. PushMemUses(UseAccess);
  1361. continue;
  1362. }
  1363. if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
  1364. LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
  1365. return std::nullopt;
  1366. }
  1367. // Uses which may read the original MemoryDef mean we cannot eliminate the
  1368. // original MD. Stop walk.
  1369. if (isReadClobber(MaybeDeadLoc, UseInst)) {
  1370. LLVM_DEBUG(dbgs() << " ... found read clobber\n");
  1371. return std::nullopt;
  1372. }
  1373. // If this worklist walks back to the original memory access (and the
  1374. // pointer is not guarenteed loop invariant) then we cannot assume that a
  1375. // store kills itself.
  1376. if (MaybeDeadAccess == UseAccess &&
  1377. !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
  1378. LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
  1379. return std::nullopt;
  1380. }
  1381. // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
  1382. // if it reads the memory location.
  1383. // TODO: It would probably be better to check for self-reads before
  1384. // calling the function.
  1385. if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
  1386. LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
  1387. continue;
  1388. }
  1389. // Check all uses for MemoryDefs, except for defs completely overwriting
  1390. // the original location. Otherwise we have to check uses of *all*
  1391. // MemoryDefs we discover, including non-aliasing ones. Otherwise we might
  1392. // miss cases like the following
  1393. // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
  1394. // 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
  1395. // Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
  1396. // (The Use points to the *first* Def it may alias)
  1397. // 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
  1398. // stores [0,1]
  1399. if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
  1400. if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
  1401. BasicBlock *MaybeKillingBlock = UseInst->getParent();
  1402. if (PostOrderNumbers.find(MaybeKillingBlock)->second <
  1403. PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
  1404. if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
  1405. LLVM_DEBUG(dbgs()
  1406. << " ... found killing def " << *UseInst << "\n");
  1407. KillingDefs.insert(UseInst);
  1408. }
  1409. } else {
  1410. LLVM_DEBUG(dbgs()
  1411. << " ... found preceeding def " << *UseInst << "\n");
  1412. return std::nullopt;
  1413. }
  1414. } else
  1415. PushMemUses(UseDef);
  1416. }
  1417. }
  1418. // For accesses to locations visible after the function returns, make sure
  1419. // that the location is dead (=overwritten) along all paths from
  1420. // MaybeDeadAccess to the exit.
  1421. if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
  1422. SmallPtrSet<BasicBlock *, 16> KillingBlocks;
  1423. for (Instruction *KD : KillingDefs)
  1424. KillingBlocks.insert(KD->getParent());
  1425. assert(!KillingBlocks.empty() &&
  1426. "Expected at least a single killing block");
  1427. // Find the common post-dominator of all killing blocks.
  1428. BasicBlock *CommonPred = *KillingBlocks.begin();
  1429. for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
  1430. if (!CommonPred)
  1431. break;
  1432. CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
  1433. }
  1434. // If the common post-dominator does not post-dominate MaybeDeadAccess,
  1435. // there is a path from MaybeDeadAccess to an exit not going through a
  1436. // killing block.
  1437. if (!PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
  1438. if (!AnyUnreachableExit)
  1439. return std::nullopt;
  1440. // Fall back to CFG scan starting at all non-unreachable roots if not
  1441. // all paths to the exit go through CommonPred.
  1442. CommonPred = nullptr;
  1443. }
  1444. // If CommonPred itself is in the set of killing blocks, we're done.
  1445. if (KillingBlocks.count(CommonPred))
  1446. return {MaybeDeadAccess};
  1447. SetVector<BasicBlock *> WorkList;
  1448. // If CommonPred is null, there are multiple exits from the function.
  1449. // They all have to be added to the worklist.
  1450. if (CommonPred)
  1451. WorkList.insert(CommonPred);
  1452. else
  1453. for (BasicBlock *R : PDT.roots()) {
  1454. if (!isa<UnreachableInst>(R->getTerminator()))
  1455. WorkList.insert(R);
  1456. }
  1457. NumCFGTries++;
  1458. // Check if all paths starting from an exit node go through one of the
  1459. // killing blocks before reaching MaybeDeadAccess.
  1460. for (unsigned I = 0; I < WorkList.size(); I++) {
  1461. NumCFGChecks++;
  1462. BasicBlock *Current = WorkList[I];
  1463. if (KillingBlocks.count(Current))
  1464. continue;
  1465. if (Current == MaybeDeadAccess->getBlock())
  1466. return std::nullopt;
  1467. // MaybeDeadAccess is reachable from the entry, so we don't have to
  1468. // explore unreachable blocks further.
  1469. if (!DT.isReachableFromEntry(Current))
  1470. continue;
  1471. for (BasicBlock *Pred : predecessors(Current))
  1472. WorkList.insert(Pred);
  1473. if (WorkList.size() >= MemorySSAPathCheckLimit)
  1474. return std::nullopt;
  1475. }
  1476. NumCFGSuccess++;
  1477. }
  1478. // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
  1479. // potentially dead.
  1480. return {MaybeDeadAccess};
  1481. }
  1482. // Delete dead memory defs
  1483. void deleteDeadInstruction(Instruction *SI) {
  1484. MemorySSAUpdater Updater(&MSSA);
  1485. SmallVector<Instruction *, 32> NowDeadInsts;
  1486. NowDeadInsts.push_back(SI);
  1487. --NumFastOther;
  1488. while (!NowDeadInsts.empty()) {
  1489. Instruction *DeadInst = NowDeadInsts.pop_back_val();
  1490. ++NumFastOther;
  1491. // Try to preserve debug information attached to the dead instruction.
  1492. salvageDebugInfo(*DeadInst);
  1493. salvageKnowledge(DeadInst);
  1494. // Remove the Instruction from MSSA.
  1495. if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
  1496. if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
  1497. SkipStores.insert(MD);
  1498. if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
  1499. if (SI->getValueOperand()->getType()->isPointerTy()) {
  1500. const Value *UO = getUnderlyingObject(SI->getValueOperand());
  1501. if (CapturedBeforeReturn.erase(UO))
  1502. ShouldIterateEndOfFunctionDSE = true;
  1503. InvisibleToCallerAfterRet.erase(UO);
  1504. }
  1505. }
  1506. }
  1507. Updater.removeMemoryAccess(MA);
  1508. }
  1509. auto I = IOLs.find(DeadInst->getParent());
  1510. if (I != IOLs.end())
  1511. I->second.erase(DeadInst);
  1512. // Remove its operands
  1513. for (Use &O : DeadInst->operands())
  1514. if (Instruction *OpI = dyn_cast<Instruction>(O)) {
  1515. O = nullptr;
  1516. if (isInstructionTriviallyDead(OpI, &TLI))
  1517. NowDeadInsts.push_back(OpI);
  1518. }
  1519. EI.removeInstruction(DeadInst);
  1520. DeadInst->eraseFromParent();
  1521. }
  1522. }
  1523. // Check for any extra throws between \p KillingI and \p DeadI that block
  1524. // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
  1525. // MemoryDef that may throw are handled during the walk from one def to the
  1526. // next.
  1527. bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
  1528. const Value *KillingUndObj) {
  1529. // First see if we can ignore it by using the fact that KillingI is an
  1530. // alloca/alloca like object that is not visible to the caller during
  1531. // execution of the function.
  1532. if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
  1533. return false;
  1534. if (KillingI->getParent() == DeadI->getParent())
  1535. return ThrowingBlocks.count(KillingI->getParent());
  1536. return !ThrowingBlocks.empty();
  1537. }
  1538. // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
  1539. // instructions act as barriers:
  1540. // * A memory instruction that may throw and \p KillingI accesses a non-stack
  1541. // object.
  1542. // * Atomic stores stronger that monotonic.
  1543. bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
  1544. // If DeadI may throw it acts as a barrier, unless we are to an
  1545. // alloca/alloca like object that does not escape.
  1546. if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
  1547. return true;
  1548. // If DeadI is an atomic load/store stronger than monotonic, do not try to
  1549. // eliminate/reorder it.
  1550. if (DeadI->isAtomic()) {
  1551. if (auto *LI = dyn_cast<LoadInst>(DeadI))
  1552. return isStrongerThanMonotonic(LI->getOrdering());
  1553. if (auto *SI = dyn_cast<StoreInst>(DeadI))
  1554. return isStrongerThanMonotonic(SI->getOrdering());
  1555. if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
  1556. return isStrongerThanMonotonic(ARMW->getOrdering());
  1557. if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
  1558. return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
  1559. isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
  1560. llvm_unreachable("other instructions should be skipped in MemorySSA");
  1561. }
  1562. return false;
  1563. }
  1564. /// Eliminate writes to objects that are not visible in the caller and are not
  1565. /// accessed before returning from the function.
  1566. bool eliminateDeadWritesAtEndOfFunction() {
  1567. bool MadeChange = false;
  1568. LLVM_DEBUG(
  1569. dbgs()
  1570. << "Trying to eliminate MemoryDefs at the end of the function\n");
  1571. do {
  1572. ShouldIterateEndOfFunctionDSE = false;
  1573. for (MemoryDef *Def : llvm::reverse(MemDefs)) {
  1574. if (SkipStores.contains(Def))
  1575. continue;
  1576. Instruction *DefI = Def->getMemoryInst();
  1577. auto DefLoc = getLocForWrite(DefI);
  1578. if (!DefLoc || !isRemovable(DefI))
  1579. continue;
  1580. // NOTE: Currently eliminating writes at the end of a function is
  1581. // limited to MemoryDefs with a single underlying object, to save
  1582. // compile-time. In practice it appears the case with multiple
  1583. // underlying objects is very uncommon. If it turns out to be important,
  1584. // we can use getUnderlyingObjects here instead.
  1585. const Value *UO = getUnderlyingObject(DefLoc->Ptr);
  1586. if (!isInvisibleToCallerAfterRet(UO))
  1587. continue;
  1588. if (isWriteAtEndOfFunction(Def)) {
  1589. // See through pointer-to-pointer bitcasts
  1590. LLVM_DEBUG(dbgs() << " ... MemoryDef is not accessed until the end "
  1591. "of the function\n");
  1592. deleteDeadInstruction(DefI);
  1593. ++NumFastStores;
  1594. MadeChange = true;
  1595. }
  1596. }
  1597. } while (ShouldIterateEndOfFunctionDSE);
  1598. return MadeChange;
  1599. }
  1600. /// If we have a zero initializing memset following a call to malloc,
  1601. /// try folding it into a call to calloc.
  1602. bool tryFoldIntoCalloc(MemoryDef *Def, const Value *DefUO) {
  1603. Instruction *DefI = Def->getMemoryInst();
  1604. MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
  1605. if (!MemSet)
  1606. // TODO: Could handle zero store to small allocation as well.
  1607. return false;
  1608. Constant *StoredConstant = dyn_cast<Constant>(MemSet->getValue());
  1609. if (!StoredConstant || !StoredConstant->isNullValue())
  1610. return false;
  1611. if (!isRemovable(DefI))
  1612. // The memset might be volatile..
  1613. return false;
  1614. if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
  1615. F.hasFnAttribute(Attribute::SanitizeAddress) ||
  1616. F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
  1617. F.getName() == "calloc")
  1618. return false;
  1619. auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUO));
  1620. if (!Malloc)
  1621. return false;
  1622. auto *InnerCallee = Malloc->getCalledFunction();
  1623. if (!InnerCallee)
  1624. return false;
  1625. LibFunc Func;
  1626. if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
  1627. Func != LibFunc_malloc)
  1628. return false;
  1629. auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
  1630. // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
  1631. // of malloc block
  1632. auto *MallocBB = Malloc->getParent(),
  1633. *MemsetBB = Memset->getParent();
  1634. if (MallocBB == MemsetBB)
  1635. return true;
  1636. auto *Ptr = Memset->getArgOperand(0);
  1637. auto *TI = MallocBB->getTerminator();
  1638. ICmpInst::Predicate Pred;
  1639. BasicBlock *TrueBB, *FalseBB;
  1640. if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
  1641. FalseBB)))
  1642. return false;
  1643. if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
  1644. return false;
  1645. return true;
  1646. };
  1647. if (Malloc->getOperand(0) != MemSet->getLength())
  1648. return false;
  1649. if (!shouldCreateCalloc(Malloc, MemSet) ||
  1650. !DT.dominates(Malloc, MemSet) ||
  1651. !memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT))
  1652. return false;
  1653. IRBuilder<> IRB(Malloc);
  1654. Type *SizeTTy = Malloc->getArgOperand(0)->getType();
  1655. auto *Calloc = emitCalloc(ConstantInt::get(SizeTTy, 1),
  1656. Malloc->getArgOperand(0), IRB, TLI);
  1657. if (!Calloc)
  1658. return false;
  1659. MemorySSAUpdater Updater(&MSSA);
  1660. auto *LastDef =
  1661. cast<MemoryDef>(Updater.getMemorySSA()->getMemoryAccess(Malloc));
  1662. auto *NewAccess =
  1663. Updater.createMemoryAccessAfter(cast<Instruction>(Calloc), LastDef,
  1664. LastDef);
  1665. auto *NewAccessMD = cast<MemoryDef>(NewAccess);
  1666. Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
  1667. Updater.removeMemoryAccess(Malloc);
  1668. Malloc->replaceAllUsesWith(Calloc);
  1669. Malloc->eraseFromParent();
  1670. return true;
  1671. }
  1672. /// \returns true if \p Def is a no-op store, either because it
  1673. /// directly stores back a loaded value or stores zero to a calloced object.
  1674. bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
  1675. Instruction *DefI = Def->getMemoryInst();
  1676. StoreInst *Store = dyn_cast<StoreInst>(DefI);
  1677. MemSetInst *MemSet = dyn_cast<MemSetInst>(DefI);
  1678. Constant *StoredConstant = nullptr;
  1679. if (Store)
  1680. StoredConstant = dyn_cast<Constant>(Store->getOperand(0));
  1681. else if (MemSet)
  1682. StoredConstant = dyn_cast<Constant>(MemSet->getValue());
  1683. else
  1684. return false;
  1685. if (!isRemovable(DefI))
  1686. return false;
  1687. if (StoredConstant) {
  1688. Constant *InitC =
  1689. getInitialValueOfAllocation(DefUO, &TLI, StoredConstant->getType());
  1690. // If the clobbering access is LiveOnEntry, no instructions between them
  1691. // can modify the memory location.
  1692. if (InitC && InitC == StoredConstant)
  1693. return MSSA.isLiveOnEntryDef(
  1694. MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def, BatchAA));
  1695. }
  1696. if (!Store)
  1697. return false;
  1698. if (auto *LoadI = dyn_cast<LoadInst>(Store->getOperand(0))) {
  1699. if (LoadI->getPointerOperand() == Store->getOperand(1)) {
  1700. // Get the defining access for the load.
  1701. auto *LoadAccess = MSSA.getMemoryAccess(LoadI)->getDefiningAccess();
  1702. // Fast path: the defining accesses are the same.
  1703. if (LoadAccess == Def->getDefiningAccess())
  1704. return true;
  1705. // Look through phi accesses. Recursively scan all phi accesses by
  1706. // adding them to a worklist. Bail when we run into a memory def that
  1707. // does not match LoadAccess.
  1708. SetVector<MemoryAccess *> ToCheck;
  1709. MemoryAccess *Current =
  1710. MSSA.getWalker()->getClobberingMemoryAccess(Def, BatchAA);
  1711. // We don't want to bail when we run into the store memory def. But,
  1712. // the phi access may point to it. So, pretend like we've already
  1713. // checked it.
  1714. ToCheck.insert(Def);
  1715. ToCheck.insert(Current);
  1716. // Start at current (1) to simulate already having checked Def.
  1717. for (unsigned I = 1; I < ToCheck.size(); ++I) {
  1718. Current = ToCheck[I];
  1719. if (auto PhiAccess = dyn_cast<MemoryPhi>(Current)) {
  1720. // Check all the operands.
  1721. for (auto &Use : PhiAccess->incoming_values())
  1722. ToCheck.insert(cast<MemoryAccess>(&Use));
  1723. continue;
  1724. }
  1725. // If we found a memory def, bail. This happens when we have an
  1726. // unrelated write in between an otherwise noop store.
  1727. assert(isa<MemoryDef>(Current) &&
  1728. "Only MemoryDefs should reach here.");
  1729. // TODO: Skip no alias MemoryDefs that have no aliasing reads.
  1730. // We are searching for the definition of the store's destination.
  1731. // So, if that is the same definition as the load, then this is a
  1732. // noop. Otherwise, fail.
  1733. if (LoadAccess != Current)
  1734. return false;
  1735. }
  1736. return true;
  1737. }
  1738. }
  1739. return false;
  1740. }
  1741. bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
  1742. bool Changed = false;
  1743. for (auto OI : IOL) {
  1744. Instruction *DeadI = OI.first;
  1745. MemoryLocation Loc = *getLocForWrite(DeadI);
  1746. assert(isRemovable(DeadI) && "Expect only removable instruction");
  1747. const Value *Ptr = Loc.Ptr->stripPointerCasts();
  1748. int64_t DeadStart = 0;
  1749. uint64_t DeadSize = Loc.Size.getValue();
  1750. GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
  1751. OverlapIntervalsTy &IntervalMap = OI.second;
  1752. Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
  1753. if (IntervalMap.empty())
  1754. continue;
  1755. Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
  1756. }
  1757. return Changed;
  1758. }
  1759. /// Eliminates writes to locations where the value that is being written
  1760. /// is already stored at the same location.
  1761. bool eliminateRedundantStoresOfExistingValues() {
  1762. bool MadeChange = false;
  1763. LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
  1764. "already existing value\n");
  1765. for (auto *Def : MemDefs) {
  1766. if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def))
  1767. continue;
  1768. Instruction *DefInst = Def->getMemoryInst();
  1769. auto MaybeDefLoc = getLocForWrite(DefInst);
  1770. if (!MaybeDefLoc || !isRemovable(DefInst))
  1771. continue;
  1772. MemoryDef *UpperDef;
  1773. // To conserve compile-time, we avoid walking to the next clobbering def.
  1774. // Instead, we just try to get the optimized access, if it exists. DSE
  1775. // will try to optimize defs during the earlier traversal.
  1776. if (Def->isOptimized())
  1777. UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
  1778. else
  1779. UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
  1780. if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
  1781. continue;
  1782. Instruction *UpperInst = UpperDef->getMemoryInst();
  1783. auto IsRedundantStore = [&]() {
  1784. if (DefInst->isIdenticalTo(UpperInst))
  1785. return true;
  1786. if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
  1787. if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
  1788. // MemSetInst must have a write location.
  1789. MemoryLocation UpperLoc = *getLocForWrite(UpperInst);
  1790. int64_t InstWriteOffset = 0;
  1791. int64_t DepWriteOffset = 0;
  1792. auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
  1793. InstWriteOffset, DepWriteOffset);
  1794. Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
  1795. return StoredByte && StoredByte == MemSetI->getOperand(1) &&
  1796. OR == OW_Complete;
  1797. }
  1798. }
  1799. return false;
  1800. };
  1801. if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst))
  1802. continue;
  1803. LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
  1804. << '\n');
  1805. deleteDeadInstruction(DefInst);
  1806. NumRedundantStores++;
  1807. MadeChange = true;
  1808. }
  1809. return MadeChange;
  1810. }
  1811. };
  1812. static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
  1813. DominatorTree &DT, PostDominatorTree &PDT,
  1814. AssumptionCache &AC,
  1815. const TargetLibraryInfo &TLI,
  1816. const LoopInfo &LI) {
  1817. bool MadeChange = false;
  1818. MSSA.ensureOptimizedUses();
  1819. DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
  1820. // For each store:
  1821. for (unsigned I = 0; I < State.MemDefs.size(); I++) {
  1822. MemoryDef *KillingDef = State.MemDefs[I];
  1823. if (State.SkipStores.count(KillingDef))
  1824. continue;
  1825. Instruction *KillingI = KillingDef->getMemoryInst();
  1826. std::optional<MemoryLocation> MaybeKillingLoc;
  1827. if (State.isMemTerminatorInst(KillingI)) {
  1828. if (auto KillingLoc = State.getLocForTerminator(KillingI))
  1829. MaybeKillingLoc = KillingLoc->first;
  1830. } else {
  1831. MaybeKillingLoc = State.getLocForWrite(KillingI);
  1832. }
  1833. if (!MaybeKillingLoc) {
  1834. LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
  1835. << *KillingI << "\n");
  1836. continue;
  1837. }
  1838. MemoryLocation KillingLoc = *MaybeKillingLoc;
  1839. assert(KillingLoc.Ptr && "KillingLoc should not be null");
  1840. const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
  1841. LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
  1842. << *KillingDef << " (" << *KillingI << ")\n");
  1843. unsigned ScanLimit = MemorySSAScanLimit;
  1844. unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
  1845. unsigned PartialLimit = MemorySSAPartialStoreLimit;
  1846. // Worklist of MemoryAccesses that may be killed by KillingDef.
  1847. SetVector<MemoryAccess *> ToCheck;
  1848. ToCheck.insert(KillingDef->getDefiningAccess());
  1849. bool Shortend = false;
  1850. bool IsMemTerm = State.isMemTerminatorInst(KillingI);
  1851. // Check if MemoryAccesses in the worklist are killed by KillingDef.
  1852. for (unsigned I = 0; I < ToCheck.size(); I++) {
  1853. MemoryAccess *Current = ToCheck[I];
  1854. if (State.SkipStores.count(Current))
  1855. continue;
  1856. std::optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
  1857. KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
  1858. WalkerStepLimit, IsMemTerm, PartialLimit);
  1859. if (!MaybeDeadAccess) {
  1860. LLVM_DEBUG(dbgs() << " finished walk\n");
  1861. continue;
  1862. }
  1863. MemoryAccess *DeadAccess = *MaybeDeadAccess;
  1864. LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
  1865. if (isa<MemoryPhi>(DeadAccess)) {
  1866. LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
  1867. for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
  1868. MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
  1869. BasicBlock *IncomingBlock = IncomingAccess->getBlock();
  1870. BasicBlock *PhiBlock = DeadAccess->getBlock();
  1871. // We only consider incoming MemoryAccesses that come before the
  1872. // MemoryPhi. Otherwise we could discover candidates that do not
  1873. // strictly dominate our starting def.
  1874. if (State.PostOrderNumbers[IncomingBlock] >
  1875. State.PostOrderNumbers[PhiBlock])
  1876. ToCheck.insert(IncomingAccess);
  1877. }
  1878. continue;
  1879. }
  1880. auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
  1881. Instruction *DeadI = DeadDefAccess->getMemoryInst();
  1882. LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
  1883. ToCheck.insert(DeadDefAccess->getDefiningAccess());
  1884. NumGetDomMemoryDefPassed++;
  1885. if (!DebugCounter::shouldExecute(MemorySSACounter))
  1886. continue;
  1887. MemoryLocation DeadLoc = *State.getLocForWrite(DeadI);
  1888. if (IsMemTerm) {
  1889. const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
  1890. if (KillingUndObj != DeadUndObj)
  1891. continue;
  1892. LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
  1893. << "\n KILLER: " << *KillingI << '\n');
  1894. State.deleteDeadInstruction(DeadI);
  1895. ++NumFastStores;
  1896. MadeChange = true;
  1897. } else {
  1898. // Check if DeadI overwrites KillingI.
  1899. int64_t KillingOffset = 0;
  1900. int64_t DeadOffset = 0;
  1901. OverwriteResult OR = State.isOverwrite(
  1902. KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
  1903. if (OR == OW_MaybePartial) {
  1904. auto Iter = State.IOLs.insert(
  1905. std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
  1906. DeadI->getParent(), InstOverlapIntervalsTy()));
  1907. auto &IOL = Iter.first->second;
  1908. OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
  1909. DeadOffset, DeadI, IOL);
  1910. }
  1911. if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
  1912. auto *DeadSI = dyn_cast<StoreInst>(DeadI);
  1913. auto *KillingSI = dyn_cast<StoreInst>(KillingI);
  1914. // We are re-using tryToMergePartialOverlappingStores, which requires
  1915. // DeadSI to dominate DeadSI.
  1916. // TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
  1917. if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
  1918. if (Constant *Merged = tryToMergePartialOverlappingStores(
  1919. KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
  1920. State.BatchAA, &DT)) {
  1921. // Update stored value of earlier store to merged constant.
  1922. DeadSI->setOperand(0, Merged);
  1923. ++NumModifiedStores;
  1924. MadeChange = true;
  1925. Shortend = true;
  1926. // Remove killing store and remove any outstanding overlap
  1927. // intervals for the updated store.
  1928. State.deleteDeadInstruction(KillingSI);
  1929. auto I = State.IOLs.find(DeadSI->getParent());
  1930. if (I != State.IOLs.end())
  1931. I->second.erase(DeadSI);
  1932. break;
  1933. }
  1934. }
  1935. }
  1936. if (OR == OW_Complete) {
  1937. LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
  1938. << "\n KILLER: " << *KillingI << '\n');
  1939. State.deleteDeadInstruction(DeadI);
  1940. ++NumFastStores;
  1941. MadeChange = true;
  1942. }
  1943. }
  1944. }
  1945. // Check if the store is a no-op.
  1946. if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) {
  1947. LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
  1948. << '\n');
  1949. State.deleteDeadInstruction(KillingI);
  1950. NumRedundantStores++;
  1951. MadeChange = true;
  1952. continue;
  1953. }
  1954. // Can we form a calloc from a memset/malloc pair?
  1955. if (!Shortend && State.tryFoldIntoCalloc(KillingDef, KillingUndObj)) {
  1956. LLVM_DEBUG(dbgs() << "DSE: Remove memset after forming calloc:\n"
  1957. << " DEAD: " << *KillingI << '\n');
  1958. State.deleteDeadInstruction(KillingI);
  1959. MadeChange = true;
  1960. continue;
  1961. }
  1962. }
  1963. if (EnablePartialOverwriteTracking)
  1964. for (auto &KV : State.IOLs)
  1965. MadeChange |= State.removePartiallyOverlappedStores(KV.second);
  1966. MadeChange |= State.eliminateRedundantStoresOfExistingValues();
  1967. MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
  1968. return MadeChange;
  1969. }
  1970. } // end anonymous namespace
  1971. //===----------------------------------------------------------------------===//
  1972. // DSE Pass
  1973. //===----------------------------------------------------------------------===//
  1974. PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
  1975. AliasAnalysis &AA = AM.getResult<AAManager>(F);
  1976. const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  1977. DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
  1978. MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
  1979. PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
  1980. AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
  1981. LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
  1982. bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
  1983. #ifdef LLVM_ENABLE_STATS
  1984. if (AreStatisticsEnabled())
  1985. for (auto &I : instructions(F))
  1986. NumRemainingStores += isa<StoreInst>(&I);
  1987. #endif
  1988. if (!Changed)
  1989. return PreservedAnalyses::all();
  1990. PreservedAnalyses PA;
  1991. PA.preserveSet<CFGAnalyses>();
  1992. PA.preserve<MemorySSAAnalysis>();
  1993. PA.preserve<LoopAnalysis>();
  1994. return PA;
  1995. }
  1996. namespace {
  1997. /// A legacy pass for the legacy pass manager that wraps \c DSEPass.
  1998. class DSELegacyPass : public FunctionPass {
  1999. public:
  2000. static char ID; // Pass identification, replacement for typeid
  2001. DSELegacyPass() : FunctionPass(ID) {
  2002. initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
  2003. }
  2004. bool runOnFunction(Function &F) override {
  2005. if (skipFunction(F))
  2006. return false;
  2007. AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
  2008. DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  2009. const TargetLibraryInfo &TLI =
  2010. getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  2011. MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
  2012. PostDominatorTree &PDT =
  2013. getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
  2014. AssumptionCache &AC =
  2015. getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  2016. LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
  2017. bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
  2018. #ifdef LLVM_ENABLE_STATS
  2019. if (AreStatisticsEnabled())
  2020. for (auto &I : instructions(F))
  2021. NumRemainingStores += isa<StoreInst>(&I);
  2022. #endif
  2023. return Changed;
  2024. }
  2025. void getAnalysisUsage(AnalysisUsage &AU) const override {
  2026. AU.setPreservesCFG();
  2027. AU.addRequired<AAResultsWrapperPass>();
  2028. AU.addRequired<TargetLibraryInfoWrapperPass>();
  2029. AU.addPreserved<GlobalsAAWrapperPass>();
  2030. AU.addRequired<DominatorTreeWrapperPass>();
  2031. AU.addPreserved<DominatorTreeWrapperPass>();
  2032. AU.addRequired<PostDominatorTreeWrapperPass>();
  2033. AU.addRequired<MemorySSAWrapperPass>();
  2034. AU.addPreserved<PostDominatorTreeWrapperPass>();
  2035. AU.addPreserved<MemorySSAWrapperPass>();
  2036. AU.addRequired<LoopInfoWrapperPass>();
  2037. AU.addPreserved<LoopInfoWrapperPass>();
  2038. AU.addRequired<AssumptionCacheTracker>();
  2039. }
  2040. };
  2041. } // end anonymous namespace
  2042. char DSELegacyPass::ID = 0;
  2043. INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
  2044. false)
  2045. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  2046. INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
  2047. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  2048. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  2049. INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
  2050. INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
  2051. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  2052. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  2053. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  2054. INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
  2055. false)
  2056. FunctionPass *llvm::createDeadStoreEliminationPass() {
  2057. return new DSELegacyPass();
  2058. }