MemCpyOptimizer.cpp 65 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681
  1. //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass performs various transformations related to eliminating memcpy
  10. // calls, or transforming sets of stores into memset's.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
  14. #include "llvm/ADT/DenseSet.h"
  15. #include "llvm/ADT/None.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/SmallVector.h"
  18. #include "llvm/ADT/Statistic.h"
  19. #include "llvm/ADT/iterator_range.h"
  20. #include "llvm/Analysis/AliasAnalysis.h"
  21. #include "llvm/Analysis/AssumptionCache.h"
  22. #include "llvm/Analysis/CaptureTracking.h"
  23. #include "llvm/Analysis/GlobalsModRef.h"
  24. #include "llvm/Analysis/Loads.h"
  25. #include "llvm/Analysis/MemoryLocation.h"
  26. #include "llvm/Analysis/MemorySSA.h"
  27. #include "llvm/Analysis/MemorySSAUpdater.h"
  28. #include "llvm/Analysis/TargetLibraryInfo.h"
  29. #include "llvm/Analysis/ValueTracking.h"
  30. #include "llvm/IR/Argument.h"
  31. #include "llvm/IR/BasicBlock.h"
  32. #include "llvm/IR/Constants.h"
  33. #include "llvm/IR/DataLayout.h"
  34. #include "llvm/IR/DerivedTypes.h"
  35. #include "llvm/IR/Dominators.h"
  36. #include "llvm/IR/Function.h"
  37. #include "llvm/IR/GetElementPtrTypeIterator.h"
  38. #include "llvm/IR/GlobalVariable.h"
  39. #include "llvm/IR/IRBuilder.h"
  40. #include "llvm/IR/InstrTypes.h"
  41. #include "llvm/IR/Instruction.h"
  42. #include "llvm/IR/Instructions.h"
  43. #include "llvm/IR/IntrinsicInst.h"
  44. #include "llvm/IR/Intrinsics.h"
  45. #include "llvm/IR/LLVMContext.h"
  46. #include "llvm/IR/Module.h"
  47. #include "llvm/IR/Operator.h"
  48. #include "llvm/IR/PassManager.h"
  49. #include "llvm/IR/Type.h"
  50. #include "llvm/IR/User.h"
  51. #include "llvm/IR/Value.h"
  52. #include "llvm/InitializePasses.h"
  53. #include "llvm/Pass.h"
  54. #include "llvm/Support/Casting.h"
  55. #include "llvm/Support/Debug.h"
  56. #include "llvm/Support/MathExtras.h"
  57. #include "llvm/Support/raw_ostream.h"
  58. #include "llvm/Transforms/Scalar.h"
  59. #include "llvm/Transforms/Utils/Local.h"
  60. #include <algorithm>
  61. #include <cassert>
  62. #include <cstdint>
  63. #include <utility>
  64. using namespace llvm;
  65. #define DEBUG_TYPE "memcpyopt"
  66. static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
  67. "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
  68. cl::ZeroOrMore,
  69. cl::desc("Enable memcpyopt even when libcalls are disabled"));
  70. STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
  71. STATISTIC(NumMemSetInfer, "Number of memsets inferred");
  72. STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
  73. STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
  74. STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
  75. namespace {
  76. /// Represents a range of memset'd bytes with the ByteVal value.
  77. /// This allows us to analyze stores like:
  78. /// store 0 -> P+1
  79. /// store 0 -> P+0
  80. /// store 0 -> P+3
  81. /// store 0 -> P+2
  82. /// which sometimes happens with stores to arrays of structs etc. When we see
  83. /// the first store, we make a range [1, 2). The second store extends the range
  84. /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
  85. /// two ranges into [0, 3) which is memset'able.
  86. struct MemsetRange {
  87. // Start/End - A semi range that describes the span that this range covers.
  88. // The range is closed at the start and open at the end: [Start, End).
  89. int64_t Start, End;
  90. /// StartPtr - The getelementptr instruction that points to the start of the
  91. /// range.
  92. Value *StartPtr;
  93. /// Alignment - The known alignment of the first store.
  94. unsigned Alignment;
  95. /// TheStores - The actual stores that make up this range.
  96. SmallVector<Instruction*, 16> TheStores;
  97. bool isProfitableToUseMemset(const DataLayout &DL) const;
  98. };
  99. } // end anonymous namespace
  100. bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
  101. // If we found more than 4 stores to merge or 16 bytes, use memset.
  102. if (TheStores.size() >= 4 || End-Start >= 16) return true;
  103. // If there is nothing to merge, don't do anything.
  104. if (TheStores.size() < 2) return false;
  105. // If any of the stores are a memset, then it is always good to extend the
  106. // memset.
  107. for (Instruction *SI : TheStores)
  108. if (!isa<StoreInst>(SI))
  109. return true;
  110. // Assume that the code generator is capable of merging pairs of stores
  111. // together if it wants to.
  112. if (TheStores.size() == 2) return false;
  113. // If we have fewer than 8 stores, it can still be worthwhile to do this.
  114. // For example, merging 4 i8 stores into an i32 store is useful almost always.
  115. // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
  116. // memset will be split into 2 32-bit stores anyway) and doing so can
  117. // pessimize the llvm optimizer.
  118. //
  119. // Since we don't have perfect knowledge here, make some assumptions: assume
  120. // the maximum GPR width is the same size as the largest legal integer
  121. // size. If so, check to see whether we will end up actually reducing the
  122. // number of stores used.
  123. unsigned Bytes = unsigned(End-Start);
  124. unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
  125. if (MaxIntSize == 0)
  126. MaxIntSize = 1;
  127. unsigned NumPointerStores = Bytes / MaxIntSize;
  128. // Assume the remaining bytes if any are done a byte at a time.
  129. unsigned NumByteStores = Bytes % MaxIntSize;
  130. // If we will reduce the # stores (according to this heuristic), do the
  131. // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
  132. // etc.
  133. return TheStores.size() > NumPointerStores+NumByteStores;
  134. }
  135. namespace {
  136. class MemsetRanges {
  137. using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
  138. /// A sorted list of the memset ranges.
  139. SmallVector<MemsetRange, 8> Ranges;
  140. const DataLayout &DL;
  141. public:
  142. MemsetRanges(const DataLayout &DL) : DL(DL) {}
  143. using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
  144. const_iterator begin() const { return Ranges.begin(); }
  145. const_iterator end() const { return Ranges.end(); }
  146. bool empty() const { return Ranges.empty(); }
  147. void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
  148. if (auto *SI = dyn_cast<StoreInst>(Inst))
  149. addStore(OffsetFromFirst, SI);
  150. else
  151. addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
  152. }
  153. void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
  154. TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
  155. assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
  156. addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
  157. SI->getAlign().value(), SI);
  158. }
  159. void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
  160. int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
  161. addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
  162. }
  163. void addRange(int64_t Start, int64_t Size, Value *Ptr,
  164. unsigned Alignment, Instruction *Inst);
  165. };
  166. } // end anonymous namespace
  167. /// Add a new store to the MemsetRanges data structure. This adds a
  168. /// new range for the specified store at the specified offset, merging into
  169. /// existing ranges as appropriate.
  170. void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
  171. unsigned Alignment, Instruction *Inst) {
  172. int64_t End = Start+Size;
  173. range_iterator I = partition_point(
  174. Ranges, [=](const MemsetRange &O) { return O.End < Start; });
  175. // We now know that I == E, in which case we didn't find anything to merge
  176. // with, or that Start <= I->End. If End < I->Start or I == E, then we need
  177. // to insert a new range. Handle this now.
  178. if (I == Ranges.end() || End < I->Start) {
  179. MemsetRange &R = *Ranges.insert(I, MemsetRange());
  180. R.Start = Start;
  181. R.End = End;
  182. R.StartPtr = Ptr;
  183. R.Alignment = Alignment;
  184. R.TheStores.push_back(Inst);
  185. return;
  186. }
  187. // This store overlaps with I, add it.
  188. I->TheStores.push_back(Inst);
  189. // At this point, we may have an interval that completely contains our store.
  190. // If so, just add it to the interval and return.
  191. if (I->Start <= Start && I->End >= End)
  192. return;
  193. // Now we know that Start <= I->End and End >= I->Start so the range overlaps
  194. // but is not entirely contained within the range.
  195. // See if the range extends the start of the range. In this case, it couldn't
  196. // possibly cause it to join the prior range, because otherwise we would have
  197. // stopped on *it*.
  198. if (Start < I->Start) {
  199. I->Start = Start;
  200. I->StartPtr = Ptr;
  201. I->Alignment = Alignment;
  202. }
  203. // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
  204. // is in or right at the end of I), and that End >= I->Start. Extend I out to
  205. // End.
  206. if (End > I->End) {
  207. I->End = End;
  208. range_iterator NextI = I;
  209. while (++NextI != Ranges.end() && End >= NextI->Start) {
  210. // Merge the range in.
  211. I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
  212. if (NextI->End > I->End)
  213. I->End = NextI->End;
  214. Ranges.erase(NextI);
  215. NextI = I;
  216. }
  217. }
  218. }
  219. //===----------------------------------------------------------------------===//
  220. // MemCpyOptLegacyPass Pass
  221. //===----------------------------------------------------------------------===//
  222. namespace {
  223. class MemCpyOptLegacyPass : public FunctionPass {
  224. MemCpyOptPass Impl;
  225. public:
  226. static char ID; // Pass identification, replacement for typeid
  227. MemCpyOptLegacyPass() : FunctionPass(ID) {
  228. initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
  229. }
  230. bool runOnFunction(Function &F) override;
  231. private:
  232. // This transformation requires dominator postdominator info
  233. void getAnalysisUsage(AnalysisUsage &AU) const override {
  234. AU.setPreservesCFG();
  235. AU.addRequired<AssumptionCacheTracker>();
  236. AU.addRequired<DominatorTreeWrapperPass>();
  237. AU.addPreserved<DominatorTreeWrapperPass>();
  238. AU.addPreserved<GlobalsAAWrapperPass>();
  239. AU.addRequired<TargetLibraryInfoWrapperPass>();
  240. AU.addRequired<AAResultsWrapperPass>();
  241. AU.addPreserved<AAResultsWrapperPass>();
  242. AU.addRequired<MemorySSAWrapperPass>();
  243. AU.addPreserved<MemorySSAWrapperPass>();
  244. }
  245. };
  246. } // end anonymous namespace
  247. char MemCpyOptLegacyPass::ID = 0;
  248. /// The public interface to this file...
  249. FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
  250. INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
  251. false, false)
  252. INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
  253. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  254. INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
  255. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  256. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  257. INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
  258. INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
  259. false, false)
  260. // Check that V is either not accessible by the caller, or unwinding cannot
  261. // occur between Start and End.
  262. static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
  263. Instruction *End) {
  264. assert(Start->getParent() == End->getParent() && "Must be in same block");
  265. // Function can't unwind, so it also can't be visible through unwinding.
  266. if (Start->getFunction()->doesNotThrow())
  267. return false;
  268. // Object is not visible on unwind.
  269. // TODO: Support RequiresNoCaptureBeforeUnwind case.
  270. bool RequiresNoCaptureBeforeUnwind;
  271. if (isNotVisibleOnUnwind(getUnderlyingObject(V),
  272. RequiresNoCaptureBeforeUnwind) &&
  273. !RequiresNoCaptureBeforeUnwind)
  274. return false;
  275. // Check whether there are any unwinding instructions in the range.
  276. return any_of(make_range(Start->getIterator(), End->getIterator()),
  277. [](const Instruction &I) { return I.mayThrow(); });
  278. }
  279. void MemCpyOptPass::eraseInstruction(Instruction *I) {
  280. MSSAU->removeMemoryAccess(I);
  281. I->eraseFromParent();
  282. }
  283. // Check for mod or ref of Loc between Start and End, excluding both boundaries.
  284. // Start and End must be in the same block
  285. static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc,
  286. const MemoryUseOrDef *Start,
  287. const MemoryUseOrDef *End) {
  288. assert(Start->getBlock() == End->getBlock() && "Only local supported");
  289. for (const MemoryAccess &MA :
  290. make_range(++Start->getIterator(), End->getIterator())) {
  291. if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
  292. Loc)))
  293. return true;
  294. }
  295. return false;
  296. }
  297. // Check for mod of Loc between Start and End, excluding both boundaries.
  298. // Start and End can be in different blocks.
  299. static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
  300. const MemoryUseOrDef *Start,
  301. const MemoryUseOrDef *End) {
  302. // TODO: Only walk until we hit Start.
  303. MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
  304. End->getDefiningAccess(), Loc);
  305. return !MSSA->dominates(Clobber, Start);
  306. }
  307. /// When scanning forward over instructions, we look for some other patterns to
  308. /// fold away. In particular, this looks for stores to neighboring locations of
  309. /// memory. If it sees enough consecutive ones, it attempts to merge them
  310. /// together into a memcpy/memset.
  311. Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
  312. Value *StartPtr,
  313. Value *ByteVal) {
  314. const DataLayout &DL = StartInst->getModule()->getDataLayout();
  315. // We can't track scalable types
  316. if (auto *SI = dyn_cast<StoreInst>(StartInst))
  317. if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
  318. return nullptr;
  319. // Okay, so we now have a single store that can be splatable. Scan to find
  320. // all subsequent stores of the same value to offset from the same pointer.
  321. // Join these together into ranges, so we can decide whether contiguous blocks
  322. // are stored.
  323. MemsetRanges Ranges(DL);
  324. BasicBlock::iterator BI(StartInst);
  325. // Keeps track of the last memory use or def before the insertion point for
  326. // the new memset. The new MemoryDef for the inserted memsets will be inserted
  327. // after MemInsertPoint. It points to either LastMemDef or to the last user
  328. // before the insertion point of the memset, if there are any such users.
  329. MemoryUseOrDef *MemInsertPoint = nullptr;
  330. // Keeps track of the last MemoryDef between StartInst and the insertion point
  331. // for the new memset. This will become the defining access of the inserted
  332. // memsets.
  333. MemoryDef *LastMemDef = nullptr;
  334. for (++BI; !BI->isTerminator(); ++BI) {
  335. auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
  336. MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
  337. if (CurrentAcc) {
  338. MemInsertPoint = CurrentAcc;
  339. if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
  340. LastMemDef = CurrentDef;
  341. }
  342. // Calls that only access inaccessible memory do not block merging
  343. // accessible stores.
  344. if (auto *CB = dyn_cast<CallBase>(BI)) {
  345. if (CB->onlyAccessesInaccessibleMemory())
  346. continue;
  347. }
  348. if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
  349. // If the instruction is readnone, ignore it, otherwise bail out. We
  350. // don't even allow readonly here because we don't want something like:
  351. // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
  352. if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
  353. break;
  354. continue;
  355. }
  356. if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
  357. // If this is a store, see if we can merge it in.
  358. if (!NextStore->isSimple()) break;
  359. Value *StoredVal = NextStore->getValueOperand();
  360. // Don't convert stores of non-integral pointer types to memsets (which
  361. // stores integers).
  362. if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
  363. break;
  364. // We can't track ranges involving scalable types.
  365. if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
  366. break;
  367. // Check to see if this stored value is of the same byte-splattable value.
  368. Value *StoredByte = isBytewiseValue(StoredVal, DL);
  369. if (isa<UndefValue>(ByteVal) && StoredByte)
  370. ByteVal = StoredByte;
  371. if (ByteVal != StoredByte)
  372. break;
  373. // Check to see if this store is to a constant offset from the start ptr.
  374. Optional<int64_t> Offset =
  375. isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
  376. if (!Offset)
  377. break;
  378. Ranges.addStore(*Offset, NextStore);
  379. } else {
  380. auto *MSI = cast<MemSetInst>(BI);
  381. if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
  382. !isa<ConstantInt>(MSI->getLength()))
  383. break;
  384. // Check to see if this store is to a constant offset from the start ptr.
  385. Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
  386. if (!Offset)
  387. break;
  388. Ranges.addMemSet(*Offset, MSI);
  389. }
  390. }
  391. // If we have no ranges, then we just had a single store with nothing that
  392. // could be merged in. This is a very common case of course.
  393. if (Ranges.empty())
  394. return nullptr;
  395. // If we had at least one store that could be merged in, add the starting
  396. // store as well. We try to avoid this unless there is at least something
  397. // interesting as a small compile-time optimization.
  398. Ranges.addInst(0, StartInst);
  399. // If we create any memsets, we put it right before the first instruction that
  400. // isn't part of the memset block. This ensure that the memset is dominated
  401. // by any addressing instruction needed by the start of the block.
  402. IRBuilder<> Builder(&*BI);
  403. // Now that we have full information about ranges, loop over the ranges and
  404. // emit memset's for anything big enough to be worthwhile.
  405. Instruction *AMemSet = nullptr;
  406. for (const MemsetRange &Range : Ranges) {
  407. if (Range.TheStores.size() == 1) continue;
  408. // If it is profitable to lower this range to memset, do so now.
  409. if (!Range.isProfitableToUseMemset(DL))
  410. continue;
  411. // Otherwise, we do want to transform this! Create a new memset.
  412. // Get the starting pointer of the block.
  413. StartPtr = Range.StartPtr;
  414. AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
  415. MaybeAlign(Range.Alignment));
  416. LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
  417. : Range.TheStores) dbgs()
  418. << *SI << '\n';
  419. dbgs() << "With: " << *AMemSet << '\n');
  420. if (!Range.TheStores.empty())
  421. AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
  422. assert(LastMemDef && MemInsertPoint &&
  423. "Both LastMemDef and MemInsertPoint need to be set");
  424. auto *NewDef =
  425. cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
  426. ? MSSAU->createMemoryAccessBefore(
  427. AMemSet, LastMemDef, MemInsertPoint)
  428. : MSSAU->createMemoryAccessAfter(
  429. AMemSet, LastMemDef, MemInsertPoint));
  430. MSSAU->insertDef(NewDef, /*RenameUses=*/true);
  431. LastMemDef = NewDef;
  432. MemInsertPoint = NewDef;
  433. // Zap all the stores.
  434. for (Instruction *SI : Range.TheStores)
  435. eraseInstruction(SI);
  436. ++NumMemSetInfer;
  437. }
  438. return AMemSet;
  439. }
  440. // This method try to lift a store instruction before position P.
  441. // It will lift the store and its argument + that anything that
  442. // may alias with these.
  443. // The method returns true if it was successful.
  444. bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
  445. // If the store alias this position, early bail out.
  446. MemoryLocation StoreLoc = MemoryLocation::get(SI);
  447. if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
  448. return false;
  449. // Keep track of the arguments of all instruction we plan to lift
  450. // so we can make sure to lift them as well if appropriate.
  451. DenseSet<Instruction*> Args;
  452. if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
  453. if (Ptr->getParent() == SI->getParent())
  454. Args.insert(Ptr);
  455. // Instruction to lift before P.
  456. SmallVector<Instruction *, 8> ToLift{SI};
  457. // Memory locations of lifted instructions.
  458. SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
  459. // Lifted calls.
  460. SmallVector<const CallBase *, 8> Calls;
  461. const MemoryLocation LoadLoc = MemoryLocation::get(LI);
  462. for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
  463. auto *C = &*I;
  464. // Make sure hoisting does not perform a store that was not guaranteed to
  465. // happen.
  466. if (!isGuaranteedToTransferExecutionToSuccessor(C))
  467. return false;
  468. bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
  469. bool NeedLift = false;
  470. if (Args.erase(C))
  471. NeedLift = true;
  472. else if (MayAlias) {
  473. NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
  474. return isModOrRefSet(AA->getModRefInfo(C, ML));
  475. });
  476. if (!NeedLift)
  477. NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
  478. return isModOrRefSet(AA->getModRefInfo(C, Call));
  479. });
  480. }
  481. if (!NeedLift)
  482. continue;
  483. if (MayAlias) {
  484. // Since LI is implicitly moved downwards past the lifted instructions,
  485. // none of them may modify its source.
  486. if (isModSet(AA->getModRefInfo(C, LoadLoc)))
  487. return false;
  488. else if (const auto *Call = dyn_cast<CallBase>(C)) {
  489. // If we can't lift this before P, it's game over.
  490. if (isModOrRefSet(AA->getModRefInfo(P, Call)))
  491. return false;
  492. Calls.push_back(Call);
  493. } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
  494. // If we can't lift this before P, it's game over.
  495. auto ML = MemoryLocation::get(C);
  496. if (isModOrRefSet(AA->getModRefInfo(P, ML)))
  497. return false;
  498. MemLocs.push_back(ML);
  499. } else
  500. // We don't know how to lift this instruction.
  501. return false;
  502. }
  503. ToLift.push_back(C);
  504. for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
  505. if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
  506. if (A->getParent() == SI->getParent()) {
  507. // Cannot hoist user of P above P
  508. if(A == P) return false;
  509. Args.insert(A);
  510. }
  511. }
  512. }
  513. // Find MSSA insertion point. Normally P will always have a corresponding
  514. // memory access before which we can insert. However, with non-standard AA
  515. // pipelines, there may be a mismatch between AA and MSSA, in which case we
  516. // will scan for a memory access before P. In either case, we know for sure
  517. // that at least the load will have a memory access.
  518. // TODO: Simplify this once P will be determined by MSSA, in which case the
  519. // discrepancy can no longer occur.
  520. MemoryUseOrDef *MemInsertPoint = nullptr;
  521. if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
  522. MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
  523. } else {
  524. const Instruction *ConstP = P;
  525. for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
  526. ++LI->getReverseIterator())) {
  527. if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
  528. MemInsertPoint = MA;
  529. break;
  530. }
  531. }
  532. }
  533. // We made it, we need to lift.
  534. for (auto *I : llvm::reverse(ToLift)) {
  535. LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
  536. I->moveBefore(P);
  537. assert(MemInsertPoint && "Must have found insert point");
  538. if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
  539. MSSAU->moveAfter(MA, MemInsertPoint);
  540. MemInsertPoint = MA;
  541. }
  542. }
  543. return true;
  544. }
  545. bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
  546. if (!SI->isSimple()) return false;
  547. // Avoid merging nontemporal stores since the resulting
  548. // memcpy/memset would not be able to preserve the nontemporal hint.
  549. // In theory we could teach how to propagate the !nontemporal metadata to
  550. // memset calls. However, that change would force the backend to
  551. // conservatively expand !nontemporal memset calls back to sequences of
  552. // store instructions (effectively undoing the merging).
  553. if (SI->getMetadata(LLVMContext::MD_nontemporal))
  554. return false;
  555. const DataLayout &DL = SI->getModule()->getDataLayout();
  556. Value *StoredVal = SI->getValueOperand();
  557. // Not all the transforms below are correct for non-integral pointers, bail
  558. // until we've audited the individual pieces.
  559. if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
  560. return false;
  561. // Load to store forwarding can be interpreted as memcpy.
  562. if (auto *LI = dyn_cast<LoadInst>(StoredVal)) {
  563. if (LI->isSimple() && LI->hasOneUse() &&
  564. LI->getParent() == SI->getParent()) {
  565. auto *T = LI->getType();
  566. // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
  567. // the corresponding libcalls are not available.
  568. // TODO: We should really distinguish between libcall availability and
  569. // our ability to introduce intrinsics.
  570. if (T->isAggregateType() &&
  571. (EnableMemCpyOptWithoutLibcalls ||
  572. (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
  573. MemoryLocation LoadLoc = MemoryLocation::get(LI);
  574. // We use alias analysis to check if an instruction may store to
  575. // the memory we load from in between the load and the store. If
  576. // such an instruction is found, we try to promote there instead
  577. // of at the store position.
  578. // TODO: Can use MSSA for this.
  579. Instruction *P = SI;
  580. for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
  581. if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
  582. P = &I;
  583. break;
  584. }
  585. }
  586. // We found an instruction that may write to the loaded memory.
  587. // We can try to promote at this position instead of the store
  588. // position if nothing aliases the store memory after this and the store
  589. // destination is not in the range.
  590. if (P && P != SI) {
  591. if (!moveUp(SI, P, LI))
  592. P = nullptr;
  593. }
  594. // If a valid insertion position is found, then we can promote
  595. // the load/store pair to a memcpy.
  596. if (P) {
  597. // If we load from memory that may alias the memory we store to,
  598. // memmove must be used to preserve semantic. If not, memcpy can
  599. // be used. Also, if we load from constant memory, memcpy can be used
  600. // as the constant memory won't be modified.
  601. bool UseMemMove = false;
  602. if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
  603. UseMemMove = true;
  604. uint64_t Size = DL.getTypeStoreSize(T);
  605. IRBuilder<> Builder(P);
  606. Instruction *M;
  607. if (UseMemMove)
  608. M = Builder.CreateMemMove(
  609. SI->getPointerOperand(), SI->getAlign(),
  610. LI->getPointerOperand(), LI->getAlign(), Size);
  611. else
  612. M = Builder.CreateMemCpy(
  613. SI->getPointerOperand(), SI->getAlign(),
  614. LI->getPointerOperand(), LI->getAlign(), Size);
  615. LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
  616. << *M << "\n");
  617. auto *LastDef =
  618. cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
  619. auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
  620. MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
  621. eraseInstruction(SI);
  622. eraseInstruction(LI);
  623. ++NumMemCpyInstr;
  624. // Make sure we do not invalidate the iterator.
  625. BBI = M->getIterator();
  626. return true;
  627. }
  628. }
  629. // Detect cases where we're performing call slot forwarding, but
  630. // happen to be using a load-store pair to implement it, rather than
  631. // a memcpy.
  632. CallInst *C = nullptr;
  633. if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
  634. MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
  635. // The load most post-dom the call. Limit to the same block for now.
  636. // TODO: Support non-local call-slot optimization?
  637. if (LoadClobber->getBlock() == SI->getParent())
  638. C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
  639. }
  640. if (C) {
  641. // Check that nothing touches the dest of the "copy" between
  642. // the call and the store.
  643. MemoryLocation StoreLoc = MemoryLocation::get(SI);
  644. if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
  645. MSSA->getMemoryAccess(SI)))
  646. C = nullptr;
  647. }
  648. if (C) {
  649. bool changed = performCallSlotOptzn(
  650. LI, SI, SI->getPointerOperand()->stripPointerCasts(),
  651. LI->getPointerOperand()->stripPointerCasts(),
  652. DL.getTypeStoreSize(SI->getOperand(0)->getType()),
  653. commonAlignment(SI->getAlign(), LI->getAlign()), C);
  654. if (changed) {
  655. eraseInstruction(SI);
  656. eraseInstruction(LI);
  657. ++NumMemCpyInstr;
  658. return true;
  659. }
  660. }
  661. }
  662. }
  663. // The following code creates memset intrinsics out of thin air. Don't do
  664. // this if the corresponding libfunc is not available.
  665. // TODO: We should really distinguish between libcall availability and
  666. // our ability to introduce intrinsics.
  667. if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
  668. return false;
  669. // There are two cases that are interesting for this code to handle: memcpy
  670. // and memset. Right now we only handle memset.
  671. // Ensure that the value being stored is something that can be memset'able a
  672. // byte at a time like "0" or "-1" or any width, as well as things like
  673. // 0xA0A0A0A0 and 0.0.
  674. auto *V = SI->getOperand(0);
  675. if (Value *ByteVal = isBytewiseValue(V, DL)) {
  676. if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
  677. ByteVal)) {
  678. BBI = I->getIterator(); // Don't invalidate iterator.
  679. return true;
  680. }
  681. // If we have an aggregate, we try to promote it to memset regardless
  682. // of opportunity for merging as it can expose optimization opportunities
  683. // in subsequent passes.
  684. auto *T = V->getType();
  685. if (T->isAggregateType()) {
  686. uint64_t Size = DL.getTypeStoreSize(T);
  687. IRBuilder<> Builder(SI);
  688. auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
  689. SI->getAlign());
  690. LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
  691. // The newly inserted memset is immediately overwritten by the original
  692. // store, so we do not need to rename uses.
  693. auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
  694. auto *NewAccess = MSSAU->createMemoryAccessBefore(
  695. M, StoreDef->getDefiningAccess(), StoreDef);
  696. MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
  697. eraseInstruction(SI);
  698. NumMemSetInfer++;
  699. // Make sure we do not invalidate the iterator.
  700. BBI = M->getIterator();
  701. return true;
  702. }
  703. }
  704. return false;
  705. }
  706. bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
  707. // See if there is another memset or store neighboring this memset which
  708. // allows us to widen out the memset to do a single larger store.
  709. if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
  710. if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
  711. MSI->getValue())) {
  712. BBI = I->getIterator(); // Don't invalidate iterator.
  713. return true;
  714. }
  715. return false;
  716. }
  717. /// Takes a memcpy and a call that it depends on,
  718. /// and checks for the possibility of a call slot optimization by having
  719. /// the call write its result directly into the destination of the memcpy.
  720. bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
  721. Instruction *cpyStore, Value *cpyDest,
  722. Value *cpySrc, TypeSize cpySize,
  723. Align cpyAlign, CallInst *C) {
  724. // The general transformation to keep in mind is
  725. //
  726. // call @func(..., src, ...)
  727. // memcpy(dest, src, ...)
  728. //
  729. // ->
  730. //
  731. // memcpy(dest, src, ...)
  732. // call @func(..., dest, ...)
  733. //
  734. // Since moving the memcpy is technically awkward, we additionally check that
  735. // src only holds uninitialized values at the moment of the call, meaning that
  736. // the memcpy can be discarded rather than moved.
  737. // We can't optimize scalable types.
  738. if (cpySize.isScalable())
  739. return false;
  740. // Lifetime marks shouldn't be operated on.
  741. if (Function *F = C->getCalledFunction())
  742. if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
  743. return false;
  744. // Require that src be an alloca. This simplifies the reasoning considerably.
  745. auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
  746. if (!srcAlloca)
  747. return false;
  748. ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
  749. if (!srcArraySize)
  750. return false;
  751. const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
  752. uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
  753. srcArraySize->getZExtValue();
  754. if (cpySize < srcSize)
  755. return false;
  756. // Check that accessing the first srcSize bytes of dest will not cause a
  757. // trap. Otherwise the transform is invalid since it might cause a trap
  758. // to occur earlier than it otherwise would.
  759. if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
  760. DL, C, DT)) {
  761. LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
  762. return false;
  763. }
  764. // Make sure that nothing can observe cpyDest being written early. There are
  765. // a number of cases to consider:
  766. // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
  767. // the transform.
  768. // 2. C itself may not access cpyDest (prior to the transform). This is
  769. // checked further below.
  770. // 3. If cpyDest is accessible to the caller of this function (potentially
  771. // captured and not based on an alloca), we need to ensure that we cannot
  772. // unwind between C and cpyStore. This is checked here.
  773. // 4. If cpyDest is potentially captured, there may be accesses to it from
  774. // another thread. In this case, we need to check that cpyStore is
  775. // guaranteed to be executed if C is. As it is a non-atomic access, it
  776. // renders accesses from other threads undefined.
  777. // TODO: This is currently not checked.
  778. if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
  779. LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding");
  780. return false;
  781. }
  782. // Check that dest points to memory that is at least as aligned as src.
  783. Align srcAlign = srcAlloca->getAlign();
  784. bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
  785. // If dest is not aligned enough and we can't increase its alignment then
  786. // bail out.
  787. if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
  788. return false;
  789. // Check that src is not accessed except via the call and the memcpy. This
  790. // guarantees that it holds only undefined values when passed in (so the final
  791. // memcpy can be dropped), that it is not read or written between the call and
  792. // the memcpy, and that writing beyond the end of it is undefined.
  793. SmallVector<User *, 8> srcUseList(srcAlloca->users());
  794. while (!srcUseList.empty()) {
  795. User *U = srcUseList.pop_back_val();
  796. if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
  797. append_range(srcUseList, U->users());
  798. continue;
  799. }
  800. if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
  801. if (!G->hasAllZeroIndices())
  802. return false;
  803. append_range(srcUseList, U->users());
  804. continue;
  805. }
  806. if (const auto *IT = dyn_cast<IntrinsicInst>(U))
  807. if (IT->isLifetimeStartOrEnd())
  808. continue;
  809. if (U != C && U != cpyLoad)
  810. return false;
  811. }
  812. // Check whether src is captured by the called function, in which case there
  813. // may be further indirect uses of src.
  814. bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
  815. return U->stripPointerCasts() == cpySrc &&
  816. !C->doesNotCapture(C->getArgOperandNo(&U));
  817. });
  818. // If src is captured, then check whether there are any potential uses of
  819. // src through the captured pointer before the lifetime of src ends, either
  820. // due to a lifetime.end or a return from the function.
  821. if (SrcIsCaptured) {
  822. // Check that dest is not captured before/at the call. We have already
  823. // checked that src is not captured before it. If either had been captured,
  824. // then the call might be comparing the argument against the captured dest
  825. // or src pointer.
  826. Value *DestObj = getUnderlyingObject(cpyDest);
  827. if (!isIdentifiedFunctionLocal(DestObj) ||
  828. PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
  829. /* StoreCaptures */ true, C, DT,
  830. /* IncludeI */ true))
  831. return false;
  832. MemoryLocation SrcLoc =
  833. MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
  834. for (Instruction &I :
  835. make_range(++C->getIterator(), C->getParent()->end())) {
  836. // Lifetime of srcAlloca ends at lifetime.end.
  837. if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
  838. if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
  839. II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
  840. cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
  841. break;
  842. }
  843. // Lifetime of srcAlloca ends at return.
  844. if (isa<ReturnInst>(&I))
  845. break;
  846. // Ignore the direct read of src in the load.
  847. if (&I == cpyLoad)
  848. continue;
  849. // Check whether this instruction may mod/ref src through the captured
  850. // pointer (we have already any direct mod/refs in the loop above).
  851. // Also bail if we hit a terminator, as we don't want to scan into other
  852. // blocks.
  853. if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
  854. return false;
  855. }
  856. }
  857. // Since we're changing the parameter to the callsite, we need to make sure
  858. // that what would be the new parameter dominates the callsite.
  859. if (!DT->dominates(cpyDest, C)) {
  860. // Support moving a constant index GEP before the call.
  861. auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
  862. if (GEP && GEP->hasAllConstantIndices() &&
  863. DT->dominates(GEP->getPointerOperand(), C))
  864. GEP->moveBefore(C);
  865. else
  866. return false;
  867. }
  868. // In addition to knowing that the call does not access src in some
  869. // unexpected manner, for example via a global, which we deduce from
  870. // the use analysis, we also need to know that it does not sneakily
  871. // access dest. We rely on AA to figure this out for us.
  872. ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
  873. // If necessary, perform additional analysis.
  874. if (isModOrRefSet(MR))
  875. MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
  876. if (isModOrRefSet(MR))
  877. return false;
  878. // We can't create address space casts here because we don't know if they're
  879. // safe for the target.
  880. if (cpySrc->getType()->getPointerAddressSpace() !=
  881. cpyDest->getType()->getPointerAddressSpace())
  882. return false;
  883. for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
  884. if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
  885. cpySrc->getType()->getPointerAddressSpace() !=
  886. C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
  887. return false;
  888. // All the checks have passed, so do the transformation.
  889. bool changedArgument = false;
  890. for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
  891. if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
  892. Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
  893. : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
  894. cpyDest->getName(), C);
  895. changedArgument = true;
  896. if (C->getArgOperand(ArgI)->getType() == Dest->getType())
  897. C->setArgOperand(ArgI, Dest);
  898. else
  899. C->setArgOperand(ArgI, CastInst::CreatePointerCast(
  900. Dest, C->getArgOperand(ArgI)->getType(),
  901. Dest->getName(), C));
  902. }
  903. if (!changedArgument)
  904. return false;
  905. // If the destination wasn't sufficiently aligned then increase its alignment.
  906. if (!isDestSufficientlyAligned) {
  907. assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
  908. cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
  909. }
  910. // Update AA metadata
  911. // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
  912. // handled here, but combineMetadata doesn't support them yet
  913. unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
  914. LLVMContext::MD_noalias,
  915. LLVMContext::MD_invariant_group,
  916. LLVMContext::MD_access_group};
  917. combineMetadata(C, cpyLoad, KnownIDs, true);
  918. if (cpyLoad != cpyStore)
  919. combineMetadata(C, cpyStore, KnownIDs, true);
  920. ++NumCallSlot;
  921. return true;
  922. }
  923. /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
  924. /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
  925. bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
  926. MemCpyInst *MDep) {
  927. // We can only transforms memcpy's where the dest of one is the source of the
  928. // other.
  929. if (M->getSource() != MDep->getDest() || MDep->isVolatile())
  930. return false;
  931. // If dep instruction is reading from our current input, then it is a noop
  932. // transfer and substituting the input won't change this instruction. Just
  933. // ignore the input and let someone else zap MDep. This handles cases like:
  934. // memcpy(a <- a)
  935. // memcpy(b <- a)
  936. if (M->getSource() == MDep->getSource())
  937. return false;
  938. // Second, the length of the memcpy's must be the same, or the preceding one
  939. // must be larger than the following one.
  940. if (MDep->getLength() != M->getLength()) {
  941. auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
  942. auto *MLen = dyn_cast<ConstantInt>(M->getLength());
  943. if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
  944. return false;
  945. }
  946. // Verify that the copied-from memory doesn't change in between the two
  947. // transfers. For example, in:
  948. // memcpy(a <- b)
  949. // *b = 42;
  950. // memcpy(c <- a)
  951. // It would be invalid to transform the second memcpy into memcpy(c <- b).
  952. //
  953. // TODO: If the code between M and MDep is transparent to the destination "c",
  954. // then we could still perform the xform by moving M up to the first memcpy.
  955. // TODO: It would be sufficient to check the MDep source up to the memcpy
  956. // size of M, rather than MDep.
  957. if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
  958. MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
  959. return false;
  960. // If the dest of the second might alias the source of the first, then the
  961. // source and dest might overlap. In addition, if the source of the first
  962. // points to constant memory, they won't overlap by definition. Otherwise, we
  963. // still want to eliminate the intermediate value, but we have to generate a
  964. // memmove instead of memcpy.
  965. bool UseMemMove = false;
  966. if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
  967. UseMemMove = true;
  968. // If all checks passed, then we can transform M.
  969. LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
  970. << *MDep << '\n' << *M << '\n');
  971. // TODO: Is this worth it if we're creating a less aligned memcpy? For
  972. // example we could be moving from movaps -> movq on x86.
  973. IRBuilder<> Builder(M);
  974. Instruction *NewM;
  975. if (UseMemMove)
  976. NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
  977. MDep->getRawSource(), MDep->getSourceAlign(),
  978. M->getLength(), M->isVolatile());
  979. else if (isa<MemCpyInlineInst>(M)) {
  980. // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
  981. // never allowed since that would allow the latter to be lowered as a call
  982. // to an external function.
  983. NewM = Builder.CreateMemCpyInline(
  984. M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
  985. MDep->getSourceAlign(), M->getLength(), M->isVolatile());
  986. } else
  987. NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
  988. MDep->getRawSource(), MDep->getSourceAlign(),
  989. M->getLength(), M->isVolatile());
  990. assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
  991. auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
  992. auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
  993. MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
  994. // Remove the instruction we're replacing.
  995. eraseInstruction(M);
  996. ++NumMemCpyInstr;
  997. return true;
  998. }
  999. /// We've found that the (upward scanning) memory dependence of \p MemCpy is
  1000. /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
  1001. /// weren't copied over by \p MemCpy.
  1002. ///
  1003. /// In other words, transform:
  1004. /// \code
  1005. /// memset(dst, c, dst_size);
  1006. /// memcpy(dst, src, src_size);
  1007. /// \endcode
  1008. /// into:
  1009. /// \code
  1010. /// memcpy(dst, src, src_size);
  1011. /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
  1012. /// \endcode
  1013. bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
  1014. MemSetInst *MemSet) {
  1015. // We can only transform memset/memcpy with the same destination.
  1016. if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
  1017. return false;
  1018. // Check that src and dst of the memcpy aren't the same. While memcpy
  1019. // operands cannot partially overlap, exact equality is allowed.
  1020. if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
  1021. return false;
  1022. // We know that dst up to src_size is not written. We now need to make sure
  1023. // that dst up to dst_size is not accessed. (If we did not move the memset,
  1024. // checking for reads would be sufficient.)
  1025. if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
  1026. MSSA->getMemoryAccess(MemSet),
  1027. MSSA->getMemoryAccess(MemCpy)))
  1028. return false;
  1029. // Use the same i8* dest as the memcpy, killing the memset dest if different.
  1030. Value *Dest = MemCpy->getRawDest();
  1031. Value *DestSize = MemSet->getLength();
  1032. Value *SrcSize = MemCpy->getLength();
  1033. if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
  1034. return false;
  1035. // If the sizes are the same, simply drop the memset instead of generating
  1036. // a replacement with zero size.
  1037. if (DestSize == SrcSize) {
  1038. eraseInstruction(MemSet);
  1039. return true;
  1040. }
  1041. // By default, create an unaligned memset.
  1042. unsigned Align = 1;
  1043. // If Dest is aligned, and SrcSize is constant, use the minimum alignment
  1044. // of the sum.
  1045. const unsigned DestAlign =
  1046. std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
  1047. if (DestAlign > 1)
  1048. if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
  1049. Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
  1050. IRBuilder<> Builder(MemCpy);
  1051. // If the sizes have different types, zext the smaller one.
  1052. if (DestSize->getType() != SrcSize->getType()) {
  1053. if (DestSize->getType()->getIntegerBitWidth() >
  1054. SrcSize->getType()->getIntegerBitWidth())
  1055. SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
  1056. else
  1057. DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
  1058. }
  1059. Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
  1060. Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
  1061. Value *MemsetLen = Builder.CreateSelect(
  1062. Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
  1063. unsigned DestAS = Dest->getType()->getPointerAddressSpace();
  1064. Instruction *NewMemSet = Builder.CreateMemSet(
  1065. Builder.CreateGEP(Builder.getInt8Ty(),
  1066. Builder.CreatePointerCast(Dest,
  1067. Builder.getInt8PtrTy(DestAS)),
  1068. SrcSize),
  1069. MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
  1070. assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
  1071. "MemCpy must be a MemoryDef");
  1072. // The new memset is inserted after the memcpy, but it is known that its
  1073. // defining access is the memset about to be removed which immediately
  1074. // precedes the memcpy.
  1075. auto *LastDef =
  1076. cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
  1077. auto *NewAccess = MSSAU->createMemoryAccessBefore(
  1078. NewMemSet, LastDef->getDefiningAccess(), LastDef);
  1079. MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
  1080. eraseInstruction(MemSet);
  1081. return true;
  1082. }
  1083. /// Determine whether the instruction has undefined content for the given Size,
  1084. /// either because it was freshly alloca'd or started its lifetime.
  1085. static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
  1086. MemoryDef *Def, Value *Size) {
  1087. if (MSSA->isLiveOnEntryDef(Def))
  1088. return isa<AllocaInst>(getUnderlyingObject(V));
  1089. if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
  1090. if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
  1091. auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
  1092. if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
  1093. if (AA->isMustAlias(V, II->getArgOperand(1)) &&
  1094. LTSize->getZExtValue() >= CSize->getZExtValue())
  1095. return true;
  1096. }
  1097. // If the lifetime.start covers a whole alloca (as it almost always
  1098. // does) and we're querying a pointer based on that alloca, then we know
  1099. // the memory is definitely undef, regardless of how exactly we alias.
  1100. // The size also doesn't matter, as an out-of-bounds access would be UB.
  1101. if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
  1102. if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
  1103. const DataLayout &DL = Alloca->getModule()->getDataLayout();
  1104. if (Optional<TypeSize> AllocaSize =
  1105. Alloca->getAllocationSizeInBits(DL))
  1106. if (*AllocaSize == LTSize->getValue() * 8)
  1107. return true;
  1108. }
  1109. }
  1110. }
  1111. }
  1112. return false;
  1113. }
  1114. /// Transform memcpy to memset when its source was just memset.
  1115. /// In other words, turn:
  1116. /// \code
  1117. /// memset(dst1, c, dst1_size);
  1118. /// memcpy(dst2, dst1, dst2_size);
  1119. /// \endcode
  1120. /// into:
  1121. /// \code
  1122. /// memset(dst1, c, dst1_size);
  1123. /// memset(dst2, c, dst2_size);
  1124. /// \endcode
  1125. /// When dst2_size <= dst1_size.
  1126. bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
  1127. MemSetInst *MemSet) {
  1128. // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
  1129. // memcpying from the same address. Otherwise it is hard to reason about.
  1130. if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
  1131. return false;
  1132. Value *MemSetSize = MemSet->getLength();
  1133. Value *CopySize = MemCpy->getLength();
  1134. if (MemSetSize != CopySize) {
  1135. // Make sure the memcpy doesn't read any more than what the memset wrote.
  1136. // Don't worry about sizes larger than i64.
  1137. // A known memset size is required.
  1138. auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
  1139. if (!CMemSetSize)
  1140. return false;
  1141. // A known memcpy size is also required.
  1142. auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
  1143. if (!CCopySize)
  1144. return false;
  1145. if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
  1146. // If the memcpy is larger than the memset, but the memory was undef prior
  1147. // to the memset, we can just ignore the tail. Technically we're only
  1148. // interested in the bytes from MemSetSize..CopySize here, but as we can't
  1149. // easily represent this location, we use the full 0..CopySize range.
  1150. MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
  1151. bool CanReduceSize = false;
  1152. MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
  1153. MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
  1154. MemSetAccess->getDefiningAccess(), MemCpyLoc);
  1155. if (auto *MD = dyn_cast<MemoryDef>(Clobber))
  1156. if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
  1157. CanReduceSize = true;
  1158. if (!CanReduceSize)
  1159. return false;
  1160. CopySize = MemSetSize;
  1161. }
  1162. }
  1163. IRBuilder<> Builder(MemCpy);
  1164. Instruction *NewM =
  1165. Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
  1166. CopySize, MaybeAlign(MemCpy->getDestAlignment()));
  1167. auto *LastDef =
  1168. cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
  1169. auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
  1170. MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
  1171. return true;
  1172. }
  1173. /// Perform simplification of memcpy's. If we have memcpy A
  1174. /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
  1175. /// B to be a memcpy from X to Z (or potentially a memmove, depending on
  1176. /// circumstances). This allows later passes to remove the first memcpy
  1177. /// altogether.
  1178. bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
  1179. // We can only optimize non-volatile memcpy's.
  1180. if (M->isVolatile()) return false;
  1181. // If the source and destination of the memcpy are the same, then zap it.
  1182. if (M->getSource() == M->getDest()) {
  1183. ++BBI;
  1184. eraseInstruction(M);
  1185. return true;
  1186. }
  1187. // If copying from a constant, try to turn the memcpy into a memset.
  1188. if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
  1189. if (GV->isConstant() && GV->hasDefinitiveInitializer())
  1190. if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
  1191. M->getModule()->getDataLayout())) {
  1192. IRBuilder<> Builder(M);
  1193. Instruction *NewM =
  1194. Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
  1195. MaybeAlign(M->getDestAlignment()), false);
  1196. auto *LastDef =
  1197. cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
  1198. auto *NewAccess =
  1199. MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
  1200. MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
  1201. eraseInstruction(M);
  1202. ++NumCpyToSet;
  1203. return true;
  1204. }
  1205. MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
  1206. MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
  1207. MemoryLocation DestLoc = MemoryLocation::getForDest(M);
  1208. const MemoryAccess *DestClobber =
  1209. MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
  1210. // Try to turn a partially redundant memset + memcpy into
  1211. // memcpy + smaller memset. We don't need the memcpy size for this.
  1212. // The memcpy most post-dom the memset, so limit this to the same basic
  1213. // block. A non-local generalization is likely not worthwhile.
  1214. if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
  1215. if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
  1216. if (DestClobber->getBlock() == M->getParent())
  1217. if (processMemSetMemCpyDependence(M, MDep))
  1218. return true;
  1219. MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
  1220. AnyClobber, MemoryLocation::getForSource(M));
  1221. // There are four possible optimizations we can do for memcpy:
  1222. // a) memcpy-memcpy xform which exposes redundance for DSE.
  1223. // b) call-memcpy xform for return slot optimization.
  1224. // c) memcpy from freshly alloca'd space or space that has just started
  1225. // its lifetime copies undefined data, and we can therefore eliminate
  1226. // the memcpy in favor of the data that was already at the destination.
  1227. // d) memcpy from a just-memset'd source can be turned into memset.
  1228. if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
  1229. if (Instruction *MI = MD->getMemoryInst()) {
  1230. if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
  1231. if (auto *C = dyn_cast<CallInst>(MI)) {
  1232. // The memcpy must post-dom the call. Limit to the same block for
  1233. // now. Additionally, we need to ensure that there are no accesses
  1234. // to dest between the call and the memcpy. Accesses to src will be
  1235. // checked by performCallSlotOptzn().
  1236. // TODO: Support non-local call-slot optimization?
  1237. if (C->getParent() == M->getParent() &&
  1238. !accessedBetween(*AA, DestLoc, MD, MA)) {
  1239. // FIXME: Can we pass in either of dest/src alignment here instead
  1240. // of conservatively taking the minimum?
  1241. Align Alignment = std::min(M->getDestAlign().valueOrOne(),
  1242. M->getSourceAlign().valueOrOne());
  1243. if (performCallSlotOptzn(
  1244. M, M, M->getDest(), M->getSource(),
  1245. TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
  1246. C)) {
  1247. LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
  1248. << " call: " << *C << "\n"
  1249. << " memcpy: " << *M << "\n");
  1250. eraseInstruction(M);
  1251. ++NumMemCpyInstr;
  1252. return true;
  1253. }
  1254. }
  1255. }
  1256. }
  1257. if (auto *MDep = dyn_cast<MemCpyInst>(MI))
  1258. return processMemCpyMemCpyDependence(M, MDep);
  1259. if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
  1260. if (performMemCpyToMemSetOptzn(M, MDep)) {
  1261. LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
  1262. eraseInstruction(M);
  1263. ++NumCpyToSet;
  1264. return true;
  1265. }
  1266. }
  1267. }
  1268. if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
  1269. LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
  1270. eraseInstruction(M);
  1271. ++NumMemCpyInstr;
  1272. return true;
  1273. }
  1274. }
  1275. return false;
  1276. }
  1277. /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
  1278. /// not to alias.
  1279. bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
  1280. // See if the source could be modified by this memmove potentially.
  1281. if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
  1282. return false;
  1283. LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
  1284. << "\n");
  1285. // If not, then we know we can transform this.
  1286. Type *ArgTys[3] = { M->getRawDest()->getType(),
  1287. M->getRawSource()->getType(),
  1288. M->getLength()->getType() };
  1289. M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
  1290. Intrinsic::memcpy, ArgTys));
  1291. // For MemorySSA nothing really changes (except that memcpy may imply stricter
  1292. // aliasing guarantees).
  1293. ++NumMoveToCpy;
  1294. return true;
  1295. }
  1296. /// This is called on every byval argument in call sites.
  1297. bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
  1298. const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
  1299. // Find out what feeds this byval argument.
  1300. Value *ByValArg = CB.getArgOperand(ArgNo);
  1301. Type *ByValTy = CB.getParamByValType(ArgNo);
  1302. TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
  1303. MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
  1304. MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
  1305. if (!CallAccess)
  1306. return false;
  1307. MemCpyInst *MDep = nullptr;
  1308. MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
  1309. CallAccess->getDefiningAccess(), Loc);
  1310. if (auto *MD = dyn_cast<MemoryDef>(Clobber))
  1311. MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
  1312. // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
  1313. // a memcpy, see if we can byval from the source of the memcpy instead of the
  1314. // result.
  1315. if (!MDep || MDep->isVolatile() ||
  1316. ByValArg->stripPointerCasts() != MDep->getDest())
  1317. return false;
  1318. // The length of the memcpy must be larger or equal to the size of the byval.
  1319. auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
  1320. if (!C1 || !TypeSize::isKnownGE(
  1321. TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
  1322. return false;
  1323. // Get the alignment of the byval. If the call doesn't specify the alignment,
  1324. // then it is some target specific value that we can't know.
  1325. MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
  1326. if (!ByValAlign) return false;
  1327. // If it is greater than the memcpy, then we check to see if we can force the
  1328. // source of the memcpy to the alignment we need. If we fail, we bail out.
  1329. MaybeAlign MemDepAlign = MDep->getSourceAlign();
  1330. if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
  1331. getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
  1332. DT) < *ByValAlign)
  1333. return false;
  1334. // The address space of the memcpy source must match the byval argument
  1335. if (MDep->getSource()->getType()->getPointerAddressSpace() !=
  1336. ByValArg->getType()->getPointerAddressSpace())
  1337. return false;
  1338. // Verify that the copied-from memory doesn't change in between the memcpy and
  1339. // the byval call.
  1340. // memcpy(a <- b)
  1341. // *b = 42;
  1342. // foo(*a)
  1343. // It would be invalid to transform the second memcpy into foo(*b).
  1344. if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
  1345. MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
  1346. return false;
  1347. Value *TmpCast = MDep->getSource();
  1348. if (MDep->getSource()->getType() != ByValArg->getType()) {
  1349. BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
  1350. "tmpcast", &CB);
  1351. // Set the tmpcast's DebugLoc to MDep's
  1352. TmpBitCast->setDebugLoc(MDep->getDebugLoc());
  1353. TmpCast = TmpBitCast;
  1354. }
  1355. LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
  1356. << " " << *MDep << "\n"
  1357. << " " << CB << "\n");
  1358. // Otherwise we're good! Update the byval argument.
  1359. CB.setArgOperand(ArgNo, TmpCast);
  1360. ++NumMemCpyInstr;
  1361. return true;
  1362. }
  1363. /// Executes one iteration of MemCpyOptPass.
  1364. bool MemCpyOptPass::iterateOnFunction(Function &F) {
  1365. bool MadeChange = false;
  1366. // Walk all instruction in the function.
  1367. for (BasicBlock &BB : F) {
  1368. // Skip unreachable blocks. For example processStore assumes that an
  1369. // instruction in a BB can't be dominated by a later instruction in the
  1370. // same BB (which is a scenario that can happen for an unreachable BB that
  1371. // has itself as a predecessor).
  1372. if (!DT->isReachableFromEntry(&BB))
  1373. continue;
  1374. for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
  1375. // Avoid invalidating the iterator.
  1376. Instruction *I = &*BI++;
  1377. bool RepeatInstruction = false;
  1378. if (auto *SI = dyn_cast<StoreInst>(I))
  1379. MadeChange |= processStore(SI, BI);
  1380. else if (auto *M = dyn_cast<MemSetInst>(I))
  1381. RepeatInstruction = processMemSet(M, BI);
  1382. else if (auto *M = dyn_cast<MemCpyInst>(I))
  1383. RepeatInstruction = processMemCpy(M, BI);
  1384. else if (auto *M = dyn_cast<MemMoveInst>(I))
  1385. RepeatInstruction = processMemMove(M);
  1386. else if (auto *CB = dyn_cast<CallBase>(I)) {
  1387. for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
  1388. if (CB->isByValArgument(i))
  1389. MadeChange |= processByValArgument(*CB, i);
  1390. }
  1391. // Reprocess the instruction if desired.
  1392. if (RepeatInstruction) {
  1393. if (BI != BB.begin())
  1394. --BI;
  1395. MadeChange = true;
  1396. }
  1397. }
  1398. }
  1399. return MadeChange;
  1400. }
  1401. PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
  1402. auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
  1403. auto *AA = &AM.getResult<AAManager>(F);
  1404. auto *AC = &AM.getResult<AssumptionAnalysis>(F);
  1405. auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
  1406. auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
  1407. bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
  1408. if (!MadeChange)
  1409. return PreservedAnalyses::all();
  1410. PreservedAnalyses PA;
  1411. PA.preserveSet<CFGAnalyses>();
  1412. PA.preserve<MemorySSAAnalysis>();
  1413. return PA;
  1414. }
  1415. bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
  1416. AliasAnalysis *AA_, AssumptionCache *AC_,
  1417. DominatorTree *DT_, MemorySSA *MSSA_) {
  1418. bool MadeChange = false;
  1419. TLI = TLI_;
  1420. AA = AA_;
  1421. AC = AC_;
  1422. DT = DT_;
  1423. MSSA = MSSA_;
  1424. MemorySSAUpdater MSSAU_(MSSA_);
  1425. MSSAU = &MSSAU_;
  1426. while (true) {
  1427. if (!iterateOnFunction(F))
  1428. break;
  1429. MadeChange = true;
  1430. }
  1431. if (VerifyMemorySSA)
  1432. MSSA_->verifyMemorySSA();
  1433. return MadeChange;
  1434. }
  1435. /// This is the main transformation entry point for a function.
  1436. bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
  1437. if (skipFunction(F))
  1438. return false;
  1439. auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
  1440. auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  1441. auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
  1442. auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  1443. auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
  1444. return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
  1445. }